libgo: prune sighandler frames in runtime.sigprof

When writing stack frames to the pprof CPU profile machinery, it is
very important to insure that the frames emitted do not contain any
frames corresponding to artifacts of the profiling process itself
(signal handlers, sigprof, etc). This patch changes runtime.sigprof to
strip out those frames from the raw stack generated by
"runtime.callers".

Fixes golang/go#26595.

Change-Id: I6045e3e6a01421b0d38358ee885336735b49a780
Reviewed-on: https://go-review.googlesource.com/126175
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/libgo/go/runtime/pprof/pprof_test.go b/libgo/go/runtime/pprof/pprof_test.go
index 07a7946..02d99f5 100644
--- a/libgo/go/runtime/pprof/pprof_test.go
+++ b/libgo/go/runtime/pprof/pprof_test.go
@@ -72,15 +72,24 @@
 	return foo
 }
 
+// Return a list of functions that we don't want to ever appear in CPU
+// profiles. For gccgo, that list includes the sigprof handler itself.
+func avoidFunctions() []string {
+	if runtime.Compiler == "gccgo" {
+		return []string{"runtime.sigprof"}
+	}
+	return nil
+}
+
 func TestCPUProfile(t *testing.T) {
-	testCPUProfile(t, []string{"pprof.cpuHog1"}, func(dur time.Duration) {
+	testCPUProfile(t, []string{"pprof.cpuHog1"}, avoidFunctions(), func(dur time.Duration) {
 		cpuHogger(cpuHog1, &salt1, dur)
 	})
 }
 
 func TestCPUProfileMultithreaded(t *testing.T) {
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
-	testCPUProfile(t, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, func(dur time.Duration) {
+	testCPUProfile(t, []string{"pprof.cpuHog1", "pprof.cpuHog2"}, avoidFunctions(), func(dur time.Duration) {
 		c := make(chan int)
 		go func() {
 			cpuHogger(cpuHog1, &salt1, dur)
@@ -92,7 +101,7 @@
 }
 
 func TestCPUProfileInlining(t *testing.T) {
-	testCPUProfile(t, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, func(dur time.Duration) {
+	testCPUProfile(t, []string{"pprof.inlinedCallee", "pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
 		cpuHogger(inlinedCaller, &salt1, dur)
 	})
 }
@@ -130,7 +139,7 @@
 	}
 }
 
-func testCPUProfile(t *testing.T, need []string, f func(dur time.Duration)) {
+func testCPUProfile(t *testing.T, need []string, avoid []string, f func(dur time.Duration)) {
 	switch runtime.GOOS {
 	case "darwin":
 		switch runtime.GOARCH {
@@ -169,7 +178,7 @@
 		f(duration)
 		StopCPUProfile()
 
-		if profileOk(t, need, prof, duration) {
+		if profileOk(t, need, avoid, prof, duration) {
 			return
 		}
 
@@ -202,11 +211,13 @@
 	return false
 }
 
-func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
+func profileOk(t *testing.T, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
 	ok = true
 
-	// Check that profile is well formed and contains need.
+	// Check that profile is well formed, contains 'need', and does not contain
+	// anything from 'avoid'.
 	have := make([]uintptr, len(need))
+	avoidSamples := make([]uintptr, len(avoid))
 	var samples uintptr
 	var buf bytes.Buffer
 	parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
@@ -229,6 +240,15 @@
 				}
 			}
 		}
+		for i, name := range avoid {
+			for _, loc := range stk {
+				for _, line := range loc.Line {
+					if strings.Contains(line.Function.Name, name) {
+						avoidSamples[i] += count
+					}
+				}
+			}
+		}
 		fmt.Fprintf(&buf, "\n")
 	})
 	t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())
@@ -251,6 +271,14 @@
 		ok = false
 	}
 
+	for i, name := range avoid {
+		bad := avoidSamples[i]
+		if bad != 0 {
+			t.Logf("found %d samples in avoid-function %s\n", bad, name)
+			ok = false
+		}
+	}
+
 	if len(need) == 0 {
 		return ok
 	}
@@ -318,6 +346,9 @@
 // If it did, it would see inconsistent state and would either record an incorrect stack
 // or crash because the stack was malformed.
 func TestGoroutineSwitch(t *testing.T) {
+	if runtime.Compiler == "gccgo" {
+		t.Skip("not applicable for gccgo")
+	}
 	// How much to try. These defaults take about 1 seconds
 	// on a 2012 MacBook Pro. The ones in short mode take
 	// about 0.1 seconds.
@@ -377,7 +408,7 @@
 
 // Test that profiling of division operations is okay, especially on ARM. See issue 6681.
 func TestMathBigDivide(t *testing.T) {
-	testCPUProfile(t, nil, func(duration time.Duration) {
+	testCPUProfile(t, nil, nil, func(duration time.Duration) {
 		t := time.After(duration)
 		pi := new(big.Int)
 		for {
@@ -851,7 +882,7 @@
 }
 
 func TestCPUProfileLabel(t *testing.T) {
-	testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, func(dur time.Duration) {
+	testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) {
 		Do(context.Background(), Labels("key", "value"), func(context.Context) {
 			cpuHogger(cpuHog1, &salt1, dur)
 		})
@@ -862,7 +893,7 @@
 	// Test the race detector annotations for synchronization
 	// between settings labels and consuming them from the
 	// profile.
-	testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, func(dur time.Duration) {
+	testCPUProfile(t, []string{"pprof.cpuHogger;key=value"}, avoidFunctions(), func(dur time.Duration) {
 		start := time.Now()
 		var wg sync.WaitGroup
 		for time.Since(start) < dur {
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index a6746c9..5826958 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -3418,8 +3418,36 @@
 		var stklocs [maxCPUProfStack]location
 		n = callers(0, stklocs[:])
 
+		// Issue 26595: the stack trace we've just collected is going
+		// to include frames that we don't want to report in the CPU
+		// profile, including signal handler frames. Here is what we
+		// might typically see at the point of "callers" above for a
+		// signal delivered to the application routine "interesting"
+		// called by "main".
+		//
+		//  0: runtime.sigprof
+		//  1: runtime.sighandler
+		//  2: runtime.sigtrampgo
+		//  3: runtime.sigtramp
+		//  4: <signal handler called>
+		//  5: main.interesting_routine
+		//  6: main.main
+		//
+		// To ensure a sane profile, walk through the frames in
+		// "stklocs" until we find the "runtime.sigtramp" frame, then
+		// report only those frames below the frame one down from
+		// that. If for some reason "runtime.sigtramp" is not present,
+		// don't make any changes.
+		framesToDiscard := 0
 		for i := 0; i < n; i++ {
-			stk[i] = stklocs[i].pc
+			if stklocs[i].function == "runtime.sigtramp" && i+2 < n {
+				framesToDiscard = i + 2
+				n -= framesToDiscard
+				break
+			}
+		}
+		for i := 0; i < n; i++ {
+			stk[i] = stklocs[i+framesToDiscard].pc
 		}
 	}