runtime: print hexdump on traceback failure

Currently, if anything goes wrong when printing a traceback, we simply
cut off the traceback without any further diagnostics. Unfortunately,
right now, we have a few issues that are difficult to debug because
the traceback simply cuts off (#21431, #23484).

This is an attempt to improve the debuggability of traceback failure
by printing a diagnostic message plus a hex dump around the failed
traceback frame when something goes wrong.

The failures look like:

goroutine 5 [running]:
runtime: unexpected return pc for main.badLR2 called from 0xbad
stack: frame={sp:0xc42004dfa8, fp:0xc42004dfc8} stack=[0xc42004d800,0xc42004e000)
000000c42004dea8:  0000000000000001  0000000000000001
000000c42004deb8:  000000c42004ded8  000000c42004ded8
000000c42004dec8:  0000000000427eea <runtime.dopanic+74>  000000c42004ded8
000000c42004ded8:  000000000044df70 <runtime.dopanic.func1+0>  000000c420001080
000000c42004dee8:  0000000000427b21 <runtime.gopanic+961>  000000c42004df08
000000c42004def8:  000000c42004df98  0000000000427b21 <runtime.gopanic+961>
000000c42004df08:  0000000000000000  0000000000000000
000000c42004df18:  0000000000000000  0000000000000000
000000c42004df28:  0000000000000000  0000000000000000
000000c42004df38:  0000000000000000  000000c420001080
000000c42004df48:  0000000000000000  0000000000000000
000000c42004df58:  0000000000000000  0000000000000000
000000c42004df68:  000000c4200010a0  0000000000000000
000000c42004df78:  00000000004c6400  00000000005031d0
000000c42004df88:  0000000000000000  0000000000000000
000000c42004df98:  000000c42004dfb8  00000000004ae7d9 <main.badLR2+73>
000000c42004dfa8: <00000000004c6400  00000000005031d0
000000c42004dfb8:  000000c42004dfd0 !0000000000000bad
000000c42004dfc8: >0000000000000000  0000000000000000
000000c42004dfd8:  0000000000451821 <runtime.goexit+1>  0000000000000000
000000c42004dfe8:  0000000000000000  0000000000000000
000000c42004dff8:  0000000000000000
main.badLR2(0x0)
	/go/src/runtime/testdata/testprog/badtraceback.go:42 +0x49

For #21431, #23484.

Change-Id: I8718fc76ced81adb0b4b0b4f2293f3219ca80786
Reviewed-on: https://go-review.googlesource.com/89016
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 9588ddd..0254ebd 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -607,3 +607,17 @@
 	}
 	t.Errorf("test ran %d times without producing expected output", tries)
 }
+
+func TestBadTraceback(t *testing.T) {
+	output := runTestProg(t, "testprog", "BadTraceback")
+	for _, want := range []string{
+		"runtime: unexpected return pc",
+		"called from 0xbad",
+		"00000bad",    // Smashed LR in hex dump
+		"<main.badLR", // Symbolization in hex dump (badLR1 or badLR2)
+	} {
+		if !strings.Contains(output, want) {
+			t.Errorf("output does not contain %q:\n%s", want, output)
+		}
+	}
+}
diff --git a/src/runtime/print.go b/src/runtime/print.go
index a698fcb..7b2e4f4 100644
--- a/src/runtime/print.go
+++ b/src/runtime/print.go
@@ -6,6 +6,7 @@
 
 import (
 	"runtime/internal/atomic"
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -249,3 +250,55 @@
 func printiface(i iface) {
 	print("(", i.tab, ",", i.data, ")")
 }
+
+// hexdumpWords prints a word-oriented hex dump of [p, end).
+//
+// If mark != nil, it will be called with each printed word's address
+// and should return a character mark to appear just before that
+// word's value. It can return 0 to indicate no mark.
+func hexdumpWords(p, end uintptr, mark func(uintptr) byte) {
+	p1 := func(x uintptr) {
+		var buf [2 * sys.PtrSize]byte
+		for i := len(buf) - 1; i >= 0; i-- {
+			if x&0xF < 10 {
+				buf[i] = byte(x&0xF) + '0'
+			} else {
+				buf[i] = byte(x&0xF) - 10 + 'a'
+			}
+			x >>= 4
+		}
+		gwrite(buf[:])
+	}
+
+	printlock()
+	var markbuf [1]byte
+	markbuf[0] = ' '
+	for i := uintptr(0); p+i < end; i += sys.PtrSize {
+		if i%16 == 0 {
+			if i != 0 {
+				println()
+			}
+			p1(p + i)
+			print(": ")
+		}
+
+		if mark != nil {
+			markbuf[0] = mark(p + i)
+			if markbuf[0] == 0 {
+				markbuf[0] = ' '
+			}
+		}
+		gwrite(markbuf[:])
+		val := *(*uintptr)(unsafe.Pointer(p + i))
+		p1(val)
+		print(" ")
+
+		// Can we symbolize val?
+		fn := findfunc(val)
+		if fn.valid() {
+			print("<", funcname(fn), "+", val-fn.entry, "> ")
+		}
+	}
+	println()
+	printunlock()
+}
diff --git a/src/runtime/testdata/testprog/badtraceback.go b/src/runtime/testdata/testprog/badtraceback.go
new file mode 100644
index 0000000..d558adc
--- /dev/null
+++ b/src/runtime/testdata/testprog/badtraceback.go
@@ -0,0 +1,47 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"runtime"
+	"runtime/debug"
+	"unsafe"
+)
+
+func init() {
+	register("BadTraceback", BadTraceback)
+}
+
+func BadTraceback() {
+	// Disable GC to prevent traceback at unexpected time.
+	debug.SetGCPercent(-1)
+
+	// Run badLR1 on its own stack to minimize the stack size and
+	// exercise the stack bounds logic in the hex dump.
+	go badLR1()
+	select {}
+}
+
+//go:noinline
+func badLR1() {
+	// We need two frames on LR machines because we'll smash this
+	// frame's saved LR.
+	badLR2(0)
+}
+
+//go:noinline
+func badLR2(arg int) {
+	// Smash the return PC or saved LR.
+	lrOff := unsafe.Sizeof(uintptr(0))
+	if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
+		lrOff = 32 // FIXED_FRAME or sys.MinFrameSize
+	}
+	lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff))
+	*lrPtr = 0xbad
+
+	// Print a backtrace. This should include diagnostics for the
+	// bad return PC and a hex dump.
+	panic("backtrace")
+}
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 501ecb0..62622df 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -204,8 +204,11 @@
 
 	f := findfunc(frame.pc)
 	if !f.valid() {
-		if callback != nil {
+		if callback != nil || printing {
 			print("runtime: unknown pc ", hex(frame.pc), "\n")
+			tracebackHexdump(gp.stack, &frame, 0)
+		}
+		if callback != nil {
 			throw("unknown pc")
 		}
 		return 0
@@ -281,8 +284,11 @@
 				// In that context it is okay to stop early.
 				// But if callback is set, we're doing a garbage collection and must
 				// get everything, so crash loudly.
-				if callback != nil {
+				if callback != nil || printing {
 					print("runtime: unexpected return pc for ", funcname(f), " called from ", hex(frame.lr), "\n")
+					tracebackHexdump(gp.stack, &frame, lrPtr)
+				}
+				if callback != nil {
 					throw("unknown caller pc")
 				}
 			}
@@ -866,6 +872,53 @@
 	unlock(&allglock)
 }
 
+// tracebackHexdump hexdumps part of stk around frame.sp and frame.fp
+// for debugging purposes. If the address bad is included in the
+// hexdumped range, it will mark it as well.
+func tracebackHexdump(stk stack, frame *stkframe, bad uintptr) {
+	const expand = 32 * sys.PtrSize
+	const maxExpand = 256 * sys.PtrSize
+	// Start around frame.sp.
+	lo, hi := frame.sp, frame.sp
+	// Expand to include frame.fp.
+	if frame.fp != 0 && frame.fp < lo {
+		lo = frame.fp
+	}
+	if frame.fp != 0 && frame.fp > hi {
+		hi = frame.fp
+	}
+	// Expand a bit more.
+	lo, hi = lo-expand, hi+expand
+	// But don't go too far from frame.sp.
+	if lo < frame.sp-maxExpand {
+		lo = frame.sp - maxExpand
+	}
+	if hi > frame.sp+maxExpand {
+		hi = frame.sp + maxExpand
+	}
+	// And don't go outside the stack bounds.
+	if lo < stk.lo {
+		lo = stk.lo
+	}
+	if hi > stk.hi {
+		hi = stk.hi
+	}
+
+	// Print the hex dump.
+	print("stack: frame={sp:", hex(frame.sp), ", fp:", hex(frame.fp), "} stack=[", hex(stk.lo), ",", hex(stk.hi), ")\n")
+	hexdumpWords(lo, hi, func(p uintptr) byte {
+		switch p {
+		case frame.fp:
+			return '>'
+		case frame.sp:
+			return '<'
+		case bad:
+			return '!'
+		}
+		return 0
+	})
+}
+
 // Does f mark the top of a goroutine stack?
 func topofstack(f funcInfo) bool {
 	pc := f.entry