runtime: add tracing of runtime events

Add actual tracing of interesting runtime events.
Part of a larger tracing functionality:
https://docs.google.com/document/u/1/d/1FP5apqzBgr7ahCCgFO-yoVhk4YZrNIDNf9RybngBc14/pub
Full change:
https://codereview.appspot.com/146920043

Change-Id: Icccf54aea54e09350bb698ba6bf11532f9fbe6d3
Reviewed-on: https://go-review.googlesource.com/1451
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index abe73e7..56560f9 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -96,7 +96,7 @@
 		if !block {
 			return false
 		}
-		gopark(nil, nil, "chan send (nil chan)")
+		gopark(nil, nil, "chan send (nil chan)", traceEvGoStop)
 		throw("unreachable")
 	}
 
@@ -178,7 +178,7 @@
 		mysg.selectdone = nil
 		gp.param = nil
 		c.sendq.enqueue(mysg)
-		goparkunlock(&c.lock, "chan send")
+		goparkunlock(&c.lock, "chan send", traceEvGoBlockSend)
 
 		// someone woke us up.
 		if mysg != gp.waiting {
@@ -217,7 +217,7 @@
 		mysg.elem = nil
 		mysg.selectdone = nil
 		c.sendq.enqueue(mysg)
-		goparkunlock(&c.lock, "chan send")
+		goparkunlock(&c.lock, "chan send", traceEvGoBlockSend)
 
 		// someone woke us up - try again
 		if mysg.releasetime > 0 {
@@ -340,7 +340,7 @@
 		if !block {
 			return
 		}
-		gopark(nil, nil, "chan receive (nil chan)")
+		gopark(nil, nil, "chan receive (nil chan)", traceEvGoStop)
 		throw("unreachable")
 	}
 
@@ -414,7 +414,7 @@
 		mysg.selectdone = nil
 		gp.param = nil
 		c.recvq.enqueue(mysg)
-		goparkunlock(&c.lock, "chan receive")
+		goparkunlock(&c.lock, "chan receive", traceEvGoBlockRecv)
 
 		// someone woke us up
 		if mysg != gp.waiting {
@@ -471,7 +471,7 @@
 		mysg.selectdone = nil
 
 		c.recvq.enqueue(mysg)
-		goparkunlock(&c.lock, "chan receive")
+		goparkunlock(&c.lock, "chan receive", traceEvGoBlockRecv)
 
 		// someone woke us up - try again
 		if mysg.releasetime > 0 {
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 69eb090..f827b9c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -368,6 +368,12 @@
 	if force == 0 {
 		gctimer.cycle.sweepterm = nanotime()
 	}
+
+	if trace.enabled {
+		traceGoSched()
+		traceGCStart()
+	}
+
 	// Pick up the remaining unswept/not being swept spans before we STW
 	for gosweepone() != ^uintptr(0) {
 		sweep.nbgsweep++
@@ -423,6 +429,11 @@
 		gccheckmark_m(startTime, eagersweep)
 	})
 
+	if trace.enabled {
+		traceGCDone()
+		traceGoStart()
+	}
+
 	// all done
 	mp.gcing = 0
 
diff --git a/src/runtime/malloc1.go b/src/runtime/malloc1.go
index 7b5907b..4d0754b 100644
--- a/src/runtime/malloc1.go
+++ b/src/runtime/malloc1.go
@@ -71,6 +71,9 @@
 	h := &mheap_
 	memstats.heap_alloc += uint64(c.local_cachealloc)
 	c.local_cachealloc = 0
+	if trace.enabled {
+		traceHeapAlloc()
+	}
 	memstats.tinyallocs += uint64(c.local_tinyallocs)
 	c.local_tinyallocs = 0
 	memstats.nlookup += uint64(c.local_nlookup)
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 3cc9a4e..28afa0d 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -127,7 +127,7 @@
 			fing = gp
 			fingwait = true
 			gp.issystem = true
-			goparkunlock(&finlock, "finalizer wait")
+			goparkunlock(&finlock, "finalizer wait", traceEvGoBlock)
 			gp.issystem = false
 			continue
 		}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 6436a3f..8cc060c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1003,6 +1003,11 @@
 		print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
 		throw("MSpan_Sweep: bad span state")
 	}
+
+	if trace.enabled {
+		traceGCSweepStart()
+	}
+
 	cl := s.sizeclass
 	size := s.elemsize
 	res := false
@@ -1132,6 +1137,10 @@
 		res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
 		// MCentral_FreeSpan updates sweepgen
 	}
+	if trace.enabled {
+		traceGCSweepDone()
+		traceNextGC()
+	}
 	return res
 }
 
@@ -1212,12 +1221,20 @@
 	_g_.m.traceback = 2
 	gchelperstart()
 
+	if trace.enabled {
+		traceGCScanStart()
+	}
+
 	// parallel mark for over GC roots
 	parfordo(work.markfor)
 	if gcphase != _GCscan {
 		scanblock(0, 0, nil) // blocks in getfull
 	}
 
+	if trace.enabled {
+		traceGCScanDone()
+	}
+
 	nproc := work.nproc // work.nproc can change right after we increment work.ndone
 	if xadd(&work.ndone, +1) == nproc-1 {
 		notewakeup(&work.alldone)
@@ -1540,6 +1557,10 @@
 		gp.gcworkdone = false // set to true in gcphasework
 	}
 
+	if trace.enabled {
+		traceGCScanStart()
+	}
+
 	parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot)
 	if work.nproc > 1 {
 		noteclear(&work.alldone)
@@ -1572,6 +1593,10 @@
 		notesleep(&work.alldone)
 	}
 
+	if trace.enabled {
+		traceGCScanDone()
+	}
+
 	shrinkfinish()
 
 	cachestats()
@@ -1581,6 +1606,9 @@
 	// conservatively set next_gc to high value assuming that everything is live
 	// concurrent/lazy sweep will reduce this number while discovering new garbage
 	memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100
+	if trace.enabled {
+		traceNextGC()
+	}
 
 	t4 := nanotime()
 	atomicstore64(&memstats.last_gc, uint64(unixnanotime())) // must be Unix time to make sense to user
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
index 5959396..bbd786d 100644
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -67,7 +67,7 @@
 		gcwork(0)
 		lock(&bggc.lock)
 		bggc.working = 0
-		goparkunlock(&bggc.lock, "Concurrent GC wait")
+		goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock)
 	}
 }
 
@@ -88,6 +88,6 @@
 			continue
 		}
 		sweep.parked = true
-		goparkunlock(&gclock, "GC sweep wait")
+		goparkunlock(&gclock, "GC sweep wait", traceEvGoBlock)
 	}
 }
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 1ff661c..11bc809 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -217,6 +217,9 @@
 			}
 		}
 	}
+	if trace.enabled {
+		traceHeapAlloc()
+	}
 	unlock(&h.lock)
 	return s
 }
@@ -440,6 +443,9 @@
 			memstats.heap_objects--
 		}
 		mHeap_FreeSpanLocked(h, s, true, true)
+		if trace.enabled {
+			traceHeapAlloc()
+		}
 		unlock(&h.lock)
 	})
 }
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index 3ef4506..8ea0878 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -71,7 +71,7 @@
 
 var (
 	netpollInited uint32
-	pollcache pollCache
+	pollcache     pollCache
 )
 
 //go:linkname net_runtime_pollServerInit net.runtime_pollServerInit
@@ -333,7 +333,7 @@
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
 	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
 	if waitio || netpollcheckerr(pd, mode) == 0 {
-		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait")
+		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait", traceEvGoBlockNet)
 	}
 	// be careful to not lose concurrent READY notification
 	old := xchguintptr(gpp, 0)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 03ba00f..3d84968 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -93,7 +93,7 @@
 	// let the other goroutine finish printing the panic trace.
 	// Once it does, it will exit. See issue 3934.
 	if panicking != 0 {
-		gopark(nil, nil, "panicwait")
+		gopark(nil, nil, "panicwait", traceEvGoStop)
 	}
 
 	exit(0)
@@ -117,7 +117,7 @@
 			throw("forcegc: phase error")
 		}
 		atomicstore(&forcegc.idle, 1)
-		goparkunlock(&forcegc.lock, "force gc (idle)")
+		goparkunlock(&forcegc.lock, "force gc (idle)", traceEvGoBlock)
 		// this goroutine is explicitly resumed by sysmon
 		if debug.gctrace > 0 {
 			println("GC forced")
@@ -136,7 +136,7 @@
 
 // Puts the current goroutine into a waiting state and calls unlockf.
 // If unlockf returns false, the goroutine is resumed.
-func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) {
+func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceEv byte) {
 	mp := acquirem()
 	gp := mp.curg
 	status := readgstatus(gp)
@@ -146,6 +146,7 @@
 	mp.waitlock = lock
 	mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
 	gp.waitreason = reason
+	mp.waittraceev = traceEv
 	releasem(mp)
 	// can't do anything that might move the G between Ms here.
 	mcall(park_m)
@@ -153,8 +154,8 @@
 
 // Puts the current goroutine into a waiting state and unlocks the lock.
 // The goroutine can be made runnable again by calling goready(gp).
-func goparkunlock(lock *mutex, reason string) {
-	gopark(parkunlock_c, unsafe.Pointer(lock), reason)
+func goparkunlock(lock *mutex, reason string, traceEv byte) {
+	gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv)
 }
 
 func goready(gp *g) {
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 39e8094..fcff605 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -196,6 +196,10 @@
 
 // Mark gp ready to run.
 func ready(gp *g) {
+	if trace.enabled {
+		traceGoUnpark(gp)
+	}
+
 	status := readgstatus(gp)
 
 	// Mark runnable.
@@ -263,6 +267,7 @@
 			throw("gcprocs inconsistency")
 		}
 		mp.helpgc = n
+		mp.p = allp[pos]
 		mp.mcache = allp[pos].mcache
 		pos++
 		notewakeup(&mp.park)
@@ -604,6 +609,11 @@
 		p := allp[i]
 		s := p.status
 		if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
+			if trace.enabled {
+				traceGoSysBlock(p)
+				traceProcStop(p)
+			}
+			p.syscalltick++
 			sched.stopwait--
 		}
 	}
@@ -1052,6 +1062,7 @@
 		gchelper()
 		_g_.m.helpgc = 0
 		_g_.m.mcache = nil
+		_g_.m.p = nil
 		goto retry
 	}
 	acquirep(_g_.m.nextp)
@@ -1234,6 +1245,10 @@
 		resetcpuprofiler(hz)
 	}
 
+	if trace.enabled {
+		traceGoStart()
+	}
+
 	gogo(&gp.sched)
 }
 
@@ -1272,6 +1287,9 @@
 	if gp := netpoll(false); gp != nil { // non-blocking
 		injectglist(gp.schedlink)
 		casgstatus(gp, _Gwaiting, _Grunnable)
+		if trace.enabled {
+			traceGoUnpark(gp)
+		}
 		return gp
 	}
 
@@ -1355,6 +1373,9 @@
 				acquirep(_p_)
 				injectglist(gp.schedlink)
 				casgstatus(gp, _Gwaiting, _Grunnable)
+				if trace.enabled {
+					traceGoUnpark(gp)
+				}
 				return gp
 			}
 			injectglist(gp)
@@ -1391,6 +1412,11 @@
 	if glist == nil {
 		return
 	}
+	if trace.enabled {
+		for gp := glist; gp != nil; gp = gp.schedlink {
+			traceGoUnpark(gp)
+		}
+	}
 	lock(&sched.lock)
 	var n int
 	for n = 0; glist != nil; n++ {
@@ -1426,18 +1452,28 @@
 	}
 
 	var gp *g
-	// Check the global runnable queue once in a while to ensure fairness.
-	// Otherwise two goroutines can completely occupy the local runqueue
-	// by constantly respawning each other.
-	if _g_.m.p.schedtick%61 == 0 && sched.runqsize > 0 {
-		lock(&sched.lock)
-		gp = globrunqget(_g_.m.p, 1)
-		unlock(&sched.lock)
+	if trace.enabled || trace.shutdown {
+		gp = traceReader()
 		if gp != nil {
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			traceGoUnpark(gp)
 			resetspinning()
 		}
 	}
 	if gp == nil {
+		// Check the global runnable queue once in a while to ensure fairness.
+		// Otherwise two goroutines can completely occupy the local runqueue
+		// by constantly respawning each other.
+		if _g_.m.p.schedtick%61 == 0 && sched.runqsize > 0 {
+			lock(&sched.lock)
+			gp = globrunqget(_g_.m.p, 1)
+			unlock(&sched.lock)
+			if gp != nil {
+				resetspinning()
+			}
+		}
+	}
+	if gp == nil {
 		gp = runqget(_g_.m.p)
 		if gp != nil && _g_.m.spinning {
 			throw("schedule: spinning with local work")
@@ -1476,11 +1512,12 @@
 
 // Puts the current goroutine into a waiting state and calls unlockf.
 // If unlockf returns false, the goroutine is resumed.
-func park(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) {
+func park(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string, traceev byte) {
 	_g_ := getg()
 
 	_g_.m.waitlock = lock
 	_g_.m.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
+	_g_.m.waittraceev = traceev
 	_g_.waitreason = reason
 	mcall(park_m)
 }
@@ -1492,14 +1529,18 @@
 
 // Puts the current goroutine into a waiting state and unlocks the lock.
 // The goroutine can be made runnable again by calling ready(gp).
-func parkunlock(lock *mutex, reason string) {
-	park(parkunlock_c, unsafe.Pointer(lock), reason)
+func parkunlock(lock *mutex, reason string, traceev byte) {
+	park(parkunlock_c, unsafe.Pointer(lock), reason, traceev)
 }
 
 // park continuation on g0.
 func park_m(gp *g) {
 	_g_ := getg()
 
+	if trace.enabled {
+		traceGoPark(_g_.m.waittraceev, gp)
+	}
+
 	casgstatus(gp, _Grunning, _Gwaiting)
 	dropg()
 
@@ -1509,6 +1550,9 @@
 		_g_.m.waitunlockf = nil
 		_g_.m.waitlock = nil
 		if !ok {
+			if trace.enabled {
+				traceGoUnpark(gp)
+			}
 			casgstatus(gp, _Gwaiting, _Grunnable)
 			execute(gp) // Schedule it back, never returns.
 		}
@@ -1516,8 +1560,7 @@
 	schedule()
 }
 
-// Gosched continuation on g0.
-func gosched_m(gp *g) {
+func goschedImpl(gp *g) {
 	status := readgstatus(gp)
 	if status&^_Gscan != _Grunning {
 		dumpgstatus(gp)
@@ -1532,6 +1575,21 @@
 	schedule()
 }
 
+// Gosched continuation on g0.
+func gosched_m(gp *g) {
+	if trace.enabled {
+		traceGoSched()
+	}
+	goschedImpl(gp)
+}
+
+func gopreempt_m(gp *g) {
+	if trace.enabled {
+		traceGoPreempt()
+	}
+	goschedImpl(gp)
+}
+
 // Finishes execution of the current goroutine.
 // Must be NOSPLIT because it is called from Go. (TODO - probably not anymore)
 //go:nosplit
@@ -1539,6 +1597,9 @@
 	if raceenabled {
 		racegoend()
 	}
+	if trace.enabled {
+		traceGoEnd()
+	}
 	mcall(goexit0)
 }
 
@@ -1603,6 +1664,20 @@
 // from a function further up in the call stack than the parent, as g->syscallsp
 // must always point to a valid stack frame. entersyscall below is the normal
 // entry point for syscalls, which obtains the SP and PC from the caller.
+//
+// Syscall tracing:
+// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
+// If the syscall does not block, that is it, we do not emit any other events.
+// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
+// when syscall returns we emit traceGoSysExit and when the goroutine starts running
+// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
+// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
+// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.syscalltick),
+// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
+// and we wait for the increment before emitting traceGoSysExit.
+// Note that the increment is done even if tracing is not enabled,
+// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
+//
 //go:nosplit
 func reentersyscall(pc, sp uintptr) {
 	_g_ := getg()
@@ -1611,6 +1686,10 @@
 	// but can have inconsistent g->sched, do not let GC observe it.
 	_g_.m.locks++
 
+	if trace.enabled {
+		systemstack(traceGoSysCall)
+	}
+
 	// Entersyscall must not call any function that might split/grow the stack.
 	// (See details in comment above.)
 	// Catch calls that might, by replacing the stack guard with something that
@@ -1635,6 +1714,7 @@
 		save(pc, sp)
 	}
 
+	_g_.m.syscalltick = _g_.m.p.syscalltick
 	_g_.m.mcache = nil
 	_g_.m.p.m = nil
 	atomicstore(&_g_.m.p.status, _Psyscall)
@@ -1667,9 +1747,15 @@
 
 func entersyscall_gcwait() {
 	_g_ := getg()
+	_p_ := _g_.m.p
 
 	lock(&sched.lock)
-	if sched.stopwait > 0 && cas(&_g_.m.p.status, _Psyscall, _Pgcstop) {
+	if sched.stopwait > 0 && cas(&_p_.status, _Psyscall, _Pgcstop) {
+		if trace.enabled {
+			traceGoSysBlock(_p_)
+			traceProcStop(_p_)
+		}
+		_p_.syscalltick++
 		if sched.stopwait--; sched.stopwait == 0 {
 			notewakeup(&sched.stopnote)
 		}
@@ -1685,6 +1771,8 @@
 	_g_.m.locks++ // see comment in entersyscall
 	_g_.throwsplit = true
 	_g_.stackguard0 = stackPreempt // see comment in entersyscall
+	_g_.m.syscalltick = _g_.m.p.syscalltick
+	_g_.m.p.syscalltick++
 
 	// Leave SP around for GC and traceback.
 	pc := getcallerpc(unsafe.Pointer(&dummy))
@@ -1718,6 +1806,10 @@
 }
 
 func entersyscallblock_handoff() {
+	if trace.enabled {
+		traceGoSysCall()
+		traceGoSysBlock(getg().m.p)
+	}
 	handoffp(releasep())
 }
 
@@ -1735,10 +1827,16 @@
 	}
 
 	_g_.waitsince = 0
+	oldp := _g_.m.p
 	if exitsyscallfast() {
 		if _g_.m.mcache == nil {
 			throw("lost mcache")
 		}
+		if trace.enabled {
+			if oldp != _g_.m.p || _g_.m.syscalltick != _g_.m.p.syscalltick {
+				systemstack(traceGoStart)
+			}
+		}
 		// There's a cpu for us, so we can run.
 		_g_.m.p.syscalltick++
 		// We need to cas the status and scan before resuming...
@@ -1759,6 +1857,15 @@
 		return
 	}
 
+	if trace.enabled {
+		// Wait till traceGoSysBlock event is emited.
+		// This ensures consistency of the trace (the goroutine is started after it is blocked).
+		for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
+			osyield()
+		}
+		systemstack(traceGoSysExit)
+	}
+
 	_g_.m.locks--
 
 	// Call the scheduler.
@@ -1795,16 +1902,41 @@
 		// There's a cpu for us, so we can run.
 		_g_.m.mcache = _g_.m.p.mcache
 		_g_.m.p.m = _g_.m
+		if _g_.m.syscalltick != _g_.m.p.syscalltick {
+			if trace.enabled {
+				// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
+				// traceGoSysBlock for this syscall was already emitted,
+				// but here we effectively retake the p from the new syscall running on the same p.
+				systemstack(func() {
+					// Denote blocking of the new syscall.
+					traceGoSysBlock(_g_.m.p)
+					// Denote completion of the current syscall.
+					traceGoSysExit()
+				})
+			}
+			_g_.m.p.syscalltick++
+		}
 		return true
 	}
 
 	// Try to get any other idle P.
+	oldp := _g_.m.p
 	_g_.m.mcache = nil
 	_g_.m.p = nil
 	if sched.pidle != nil {
 		var ok bool
 		systemstack(func() {
 			ok = exitsyscallfast_pidle()
+			if ok && trace.enabled {
+				if oldp != nil {
+					// Wait till traceGoSysBlock event is emited.
+					// This ensures consistency of the trace (the goroutine is started after it is blocked).
+					for oldp.syscalltick == _g_.m.syscalltick {
+						osyield()
+					}
+				}
+				traceGoSysExit()
+			}
 		})
 		if ok {
 			return true
@@ -1984,6 +2116,7 @@
 	newg.sched.g = guintptr(unsafe.Pointer(newg))
 	gostartcallfn(&newg.sched, fn)
 	newg.gopc = callerpc
+	newg.startpc = fn.fn
 	casgstatus(newg, _Gdead, _Grunnable)
 
 	if _p_.goidcache == _p_.goidcacheend {
@@ -1999,6 +2132,9 @@
 	if raceenabled {
 		newg.racectx = racegostart(callerpc)
 	}
+	if trace.enabled {
+		traceGoCreate(newg, newg.startpc)
+	}
 	runqput(_p_, newg)
 
 	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
@@ -2380,6 +2516,9 @@
 	if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs {
 		throw("procresize: invalid arg")
 	}
+	if trace.enabled {
+		traceGomaxprocs(new)
+	}
 
 	// initialize new P's
 	for i := int32(0); i < new; i++ {
@@ -2405,6 +2544,14 @@
 	// free unused P's
 	for i := new; i < old; i++ {
 		p := allp[i]
+		if trace.enabled {
+			if p == getg().m.p {
+				// moving to p[0], pretend that we were descheduled
+				// and then scheduled again to keep the trace sane.
+				traceGoSched()
+				traceProcStop(p)
+			}
+		}
 		// move all runable goroutines to the global queue
 		for p.runqhead != p.runqtail {
 			// pop from tail of local queue
@@ -2421,6 +2568,7 @@
 		freemcache(p.mcache)
 		p.mcache = nil
 		gfpurge(p)
+		traceProcFree(p)
 		p.status = _Pdead
 		// can't free P itself because it can be referenced by an M in syscall
 	}
@@ -2440,6 +2588,9 @@
 		p.m = nil
 		p.status = _Pidle
 		acquirep(p)
+		if trace.enabled {
+			traceGoStart()
+		}
 	}
 	var runnablePs *p
 	for i := new - 1; i >= 0; i-- {
@@ -2480,6 +2631,10 @@
 	_g_.m.p = _p_
 	_p_.m = _g_.m
 	_p_.status = _Prunning
+
+	if trace.enabled {
+		traceProcStart()
+	}
 }
 
 // Disassociate p and the current m.
@@ -2494,6 +2649,9 @@
 		print("releasep: m=", _g_.m, " m->p=", _g_.m.p, " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
 		throw("releasep: invalid p state")
 	}
+	if trace.enabled {
+		traceProcStop(_g_.m.p)
+	}
 	_g_.m.p = nil
 	_g_.m.mcache = nil
 	_p_.m = nil
@@ -2713,7 +2871,12 @@
 			// increment nmidle and report deadlock.
 			incidlelocked(-1)
 			if cas(&_p_.status, s, _Pidle) {
+				if trace.enabled {
+					traceGoSysBlock(_p_)
+					traceProcStop(_p_)
+				}
 				n++
+				_p_.syscalltick++
 				handoffp(_p_)
 			}
 			incidlelocked(1)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index f0f8c1a..c71a3c3 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -277,6 +277,8 @@
 	traceback     uint8
 	waitunlockf   unsafe.Pointer // todo go func(*g, unsafe.pointer) bool
 	waitlock      unsafe.Pointer
+	waittraceev   byte
+	syscalltick   uint32
 	//#ifdef GOOS_windows
 	thread uintptr // thread handle
 	// these are here because they are too large to be on the stack
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 20dd299..34fda16 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -173,7 +173,7 @@
 }
 
 func block() {
-	gopark(nil, nil, "select (no cases)") // forever
+	gopark(nil, nil, "select (no cases)", traceEvGoStop) // forever
 }
 
 // overwrites return pc on stack to signal which case of the select
@@ -363,7 +363,7 @@
 
 	// wait for someone to wake us up
 	gp.param = nil
-	gopark(selparkcommit, unsafe.Pointer(sel), "select")
+	gopark(selparkcommit, unsafe.Pointer(sel), "select", traceEvGoBlockSelect)
 
 	// someone woke us up
 	sellock(sel)
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 35a1264..34852ea 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -97,7 +97,7 @@
 		// Any semrelease after the cansemacquire knows we're waiting
 		// (we set nwait above), so go to sleep.
 		root.queue(addr, s)
-		goparkunlock(&root.lock, "semacquire")
+		goparkunlock(&root.lock, "semacquire", traceEvGoBlockSync)
 		if cansemacquire(addr) {
 			break
 		}
@@ -234,7 +234,7 @@
 			s.tail.next = w
 		}
 		s.tail = w
-		goparkunlock(&s.lock, "semacquire")
+		goparkunlock(&s.lock, "semacquire", traceEvGoBlockCond)
 		if t0 != 0 {
 			blockevent(int64(w.releasetime)-t0, 2)
 		}
@@ -273,7 +273,7 @@
 			s.tail.next = w
 		}
 		s.tail = w
-		goparkunlock(&s.lock, "semarelease")
+		goparkunlock(&s.lock, "semarelease", traceEvGoBlockCond)
 		releaseSudog(w)
 	} else {
 		unlock(&s.lock)
diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go
index 513f27d..dda39ce 100644
--- a/src/runtime/stack1.go
+++ b/src/runtime/stack1.go
@@ -721,7 +721,7 @@
 
 		// Act like goroutine called runtime.Gosched.
 		casgstatus(gp, _Gwaiting, _Grunning)
-		gosched_m(gp) // never return
+		gopreempt_m(gp) // never return
 	}
 
 	// Allocate a bigger segment and move the stack.
diff --git a/src/runtime/time.go b/src/runtime/time.go
index 5917d7c..50895ca 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -56,7 +56,7 @@
 	t.arg = getg()
 	lock(&timers.lock)
 	addtimerLocked(t)
-	goparkunlock(&timers.lock, "sleep")
+	goparkunlock(&timers.lock, "sleep", traceEvGoSleep)
 }
 
 // startTimer adds t to the timer heap.
@@ -200,7 +200,7 @@
 		if delta < 0 || faketime > 0 {
 			// No timers left - put goroutine to sleep.
 			timers.rescheduling = true
-			goparkunlock(&timers.lock, "timer goroutine (idle)")
+			goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock)
 			continue
 		}
 		// At least one timer pending.  Sleep until then.
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 17d0298..032bef4 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -292,7 +292,7 @@
 	// Wait for new data.
 	if trace.fullHead == nil && !trace.shutdown {
 		trace.reader = getg()
-		goparkunlock(&trace.lock, "trace reader (blocked)" /*, traceEvGoBlock*/)
+		goparkunlock(&trace.lock, "trace reader (blocked)", traceEvGoBlock)
 		lock(&trace.lock)
 	}
 	// Write a buffer.