runtime: enable profiling on g0

Since we now have stack information for code running on the
systemstack, we can traceback over it. To make cpu profiles useful,
add a case in gentraceback to jump over systemstack switches.

Fixes #10609.

Change-Id: I21f47fcc802c07c5d4a1ada56374314e388a6dc7
Reviewed-on: https://go-review.googlesource.com/9506
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 9f34e37..0f29608 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -46,6 +46,9 @@
 	timerprocPC          uintptr
 	gcBgMarkWorkerPC     uintptr
 	systemstack_switchPC uintptr
+	systemstackPC        uintptr
+
+	gogoPC uintptr
 
 	externalthreadhandlerp uintptr // initialized elsewhere
 )
@@ -69,6 +72,10 @@
 	timerprocPC = funcPC(timerproc)
 	gcBgMarkWorkerPC = funcPC(gcBgMarkWorker)
 	systemstack_switchPC = funcPC(systemstack_switch)
+	systemstackPC = funcPC(systemstack)
+
+	// used by sigprof handler
+	gogoPC = funcPC(gogo)
 }
 
 // Traceback over the deferred function calls.
@@ -194,7 +201,14 @@
 		// Found an actual function.
 		// Derive frame pointer and link register.
 		if frame.fp == 0 {
-			frame.fp = frame.sp + uintptr(funcspdelta(f, frame.pc))
+			// We want to jump over the systemstack switch. If we're running on the
+			// g0, this systemstack is at the top of the stack.
+			// if we're not on g0 or there's a no curg, then this is a regular call.
+			sp := frame.sp
+			if flags&_TraceJumpStack != 0 && f.entry == systemstackPC && gp == g.m.g0 && gp.m.curg != nil {
+				sp = gp.m.curg.sched.sp
+			}
+			frame.fp = sp + uintptr(funcspdelta(f, frame.pc))
 			if !usesLR {
 				// On x86, call instruction pushes return PC before entering new function.
 				frame.fp += regSize