runtime: add precise stack scan support

This CL adds support of precise stack scan using stack maps to
the runtime. The stack maps are generated by the compiler (if
supported). Each safepoint is associated with a (real or dummy)
landing pad, and its "type info" in the exception table is a
pointer to the stack map. When a stack is scanned, the stack map
is found by the stack unwinding code by inspecting the exception
table (LSDA).

For precise stack scan we need to unwind the stack. There are
three cases:

- If a goroutine is scanning its own stack, it can unwind the
  stack and scan the frames.

- If a goroutine is scanning another, stopped, goroutine, it
  cannot directly unwind the target stack. We handle this by
  switching (runtime.gogo) to the target g, letting it unwind
  and scan the stack, and switch back.

- If we are scanning a goroutine that is blocked in a syscall,
  we send a signal to the target goroutine's thread, and let the
  signal handler unwind and scan the stack. Extra care is needed
  as this races with enter/exit syscall.

Currently this is only implemented on linux.

Change-Id: I065894277741c2cacce703ce7489e8979b3d0aac
Reviewed-on: https://go-review.googlesource.com/c/140518
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/libgo/go/runtime/mgcmark.go b/libgo/go/runtime/mgcmark.go
index 88cae41..631c4d7 100644
--- a/libgo/go/runtime/mgcmark.go
+++ b/libgo/go/runtime/mgcmark.go
@@ -664,7 +664,10 @@
 }
 
 // We use a C function to find the stack.
-func doscanstack(*g, *gcWork)
+// Returns whether we succesfully scanned the stack.
+func doscanstack(*g, *gcWork) bool
+
+func doscanstackswitch(*g, *g)
 
 // scanstack scans gp's stack, greying all pointers found on the stack.
 //
@@ -691,7 +694,16 @@
 		return
 	case _Grunning:
 		// ok for gccgo, though not for gc.
-	case _Grunnable, _Gsyscall, _Gwaiting:
+		if usestackmaps {
+			print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+			throw("scanstack: goroutine not stopped")
+		}
+	case _Gsyscall:
+		if usestackmaps {
+			print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+			throw("scanstack: goroutine in syscall")
+		}
+	case _Grunnable, _Gwaiting:
 		// ok
 	}
 
@@ -701,15 +713,64 @@
 	}
 
 	// Scan the stack.
-	doscanstack(gp, gcw)
+	if usestackmaps {
+		g := getg()
+		if g == gp {
+			// Scan its own stack.
+			doscanstack(gp, gcw)
+		} else if gp.entry != nil {
+			// This is a newly created g that hasn't run. No stack to scan.
+		} else {
+			// Scanning another g's stack. We need to switch to that g
+			// to unwind its stack. And switch back after scan.
+			scanstackswitch(gp, gcw)
+		}
+	} else {
+		doscanstack(gp, gcw)
 
-	// Conservatively scan the saved register values.
-	scanstackblock(uintptr(unsafe.Pointer(&gp.gcregs)), unsafe.Sizeof(gp.gcregs), gcw)
-	scanstackblock(uintptr(unsafe.Pointer(&gp.context)), unsafe.Sizeof(gp.context), gcw)
+		// Conservatively scan the saved register values.
+		scanstackblock(uintptr(unsafe.Pointer(&gp.gcregs)), unsafe.Sizeof(gp.gcregs), gcw)
+		scanstackblock(uintptr(unsafe.Pointer(&gp.context)), unsafe.Sizeof(gp.context), gcw)
+	}
 
 	gp.gcscanvalid = true
 }
 
+// scanstackswitch scans gp's stack by switching (gogo) to gp and
+// letting it scan its own stack, and switching back upon finish.
+//
+//go:nowritebarrier
+func scanstackswitch(gp *g, gcw *gcWork) {
+	g := getg()
+
+	// We are on the system stack which prevents preemption. But
+	// we are going to switch to g stack. Lock m to block preemption.
+	mp := acquirem()
+
+	// The doscanstackswitch function will modify the current g's
+	// context. Preserve it.
+	// The stack scan code may call systemstack, which will modify
+	// gp's context. Preserve it as well so we can resume gp.
+	context := g.context
+	stackcontext := g.stackcontext
+	context2 := gp.context
+	stackcontext2 := gp.stackcontext
+
+	gp.scangcw = uintptr(unsafe.Pointer(gcw))
+	gp.scang = uintptr(unsafe.Pointer(g))
+	doscanstackswitch(g, gp)
+
+	// Restore the contexts.
+	g.context = context
+	g.stackcontext = stackcontext
+	gp.context = context2
+	gp.stackcontext = stackcontext2
+	gp.scangcw = 0
+	// gp.scang is already cleared in C code.
+
+	releasem(mp)
+}
+
 type gcDrainFlags int
 
 const (
@@ -1064,6 +1125,10 @@
 // scanblock, but we scan the stack conservatively, so there is no
 // bitmask of pointers.
 func scanstackblock(b, n uintptr, gcw *gcWork) {
+	if usestackmaps {
+		throw("scanstackblock: conservative scan but stack map is used")
+	}
+
 	for i := uintptr(0); i < n; i += sys.PtrSize {
 		// Same work as in scanobject; see comments there.
 		obj := *(*uintptr)(unsafe.Pointer(b + i))
@@ -1073,6 +1138,50 @@
 	}
 }
 
+// scanstackblockwithmap is like scanstackblock, but with an explicit
+// pointer bitmap. This is used only when precise stack scan is enabled.
+//go:linkname scanstackblockwithmap runtime.scanstackblockwithmap
+//go:nowritebarrier
+func scanstackblockwithmap(pc, b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
+	// Use local copies of original parameters, so that a stack trace
+	// due to one of the throws below shows the original block
+	// base and extent.
+	b := b0
+	n := n0
+
+	for i := uintptr(0); i < n; {
+		// Find bits for the next word.
+		bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
+		if bits == 0 {
+			i += sys.PtrSize * 8
+			continue
+		}
+		for j := 0; j < 8 && i < n; j++ {
+			if bits&1 != 0 {
+				// Same work as in scanobject; see comments there.
+				obj := *(*uintptr)(unsafe.Pointer(b + i))
+				if obj != 0 {
+					o, span, objIndex := findObject(obj, b, i, false)
+					if obj < minPhysPageSize ||
+						span != nil && span.state != _MSpanManual &&
+							(obj < span.base() || obj >= span.limit || span.state != mSpanInUse) {
+						print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n")
+						name, file, line := funcfileline(pc, -1)
+						print(name, "\n", file, ":", line, "\n")
+						//gcDumpObject("object", b, i)
+						throw("found bad pointer in Go stack (incorrect use of unsafe or cgo?)")
+					}
+					if o != 0 {
+						greyobject(o, b, i, span, gcw, objIndex, false)
+					}
+				}
+			}
+			bits >>= 1
+			i += sys.PtrSize
+		}
+	}
+}
+
 // Shade the object if it isn't already.
 // The object is not nil and known to be in the heap.
 // Preemption must be disabled.
diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go
index 5709555..08511fd 100644
--- a/libgo/go/runtime/os_gccgo.go
+++ b/libgo/go/runtime/os_gccgo.go
@@ -27,7 +27,8 @@
 func minit() {
 	minitSignals()
 
-	// FIXME: We should set _g_.m.procid here.
+	// FIXME: only works on linux for now.
+	getg().m.procid = uint64(gettid())
 }
 
 // Called from dropm to undo the effect of an minit.
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index bb16924..ef166cb 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -528,6 +528,8 @@
 
 	sched.maxmcount = 10000
 
+	usestackmaps = probestackmaps()
+
 	mallocinit()
 	mcommoninit(_g_.m)
 	cpuinit() // must run before alginit
@@ -891,7 +893,49 @@
 		case _Gcopystack:
 		// Stack being switched. Go around again.
 
-		case _Grunnable, _Gsyscall, _Gwaiting:
+		case _Gsyscall:
+			if usestackmaps {
+				// Claim goroutine by setting scan bit.
+				// Racing with execution or readying of gp.
+				// The scan bit keeps them from running
+				// the goroutine until we're done.
+				if castogscanstatus(gp, s, s|_Gscan) {
+					if gp.scanningself {
+						// Don't try to scan the stack
+						// if the goroutine is going to do
+						// it itself.
+						// FIXME: can this happen?
+						restartg(gp)
+						break
+					}
+					if !gp.gcscandone {
+						// Send a signal to let the goroutine scan
+						// itself. This races with enter/exitsyscall.
+						// If the goroutine is not stopped at a safepoint,
+						// it will not scan the stack and we'll try again.
+						mp := gp.m
+						noteclear(&mp.scannote)
+						gp.scangcw = uintptr(unsafe.Pointer(gcw))
+						tgkill(getpid(), _pid_t(mp.procid), _SIGURG)
+
+						// Wait for gp to scan its own stack.
+						notesleep(&mp.scannote)
+
+						if !gp.gcscandone {
+							// The signal delivered at a bad time.
+							// Try again.
+							restartg(gp)
+							break
+						}
+					}
+					restartg(gp)
+					break loop
+				}
+				break
+			}
+			fallthrough
+
+		case _Grunnable, _Gwaiting:
 			// Claim goroutine by setting scan bit.
 			// Racing with execution or readying of gp.
 			// The scan bit keeps them from running
@@ -954,6 +998,11 @@
 
 // The GC requests that this routine be moved from a scanmumble state to a mumble state.
 func restartg(gp *g) {
+	if gp.scang != 0 || gp.scangcw != 0 {
+		print("g ", gp.goid, "is being scanned scang=", gp.scang, " scangcw=", gp.scangcw, "\n")
+		throw("restartg: being scanned")
+	}
+
 	s := readgstatus(gp)
 	switch s {
 	default:
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index e12e832..6eb9491 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -430,6 +430,9 @@
 
 	scanningself bool // whether goroutine is scanning its own stack
 
+	scang   uintptr // the g that wants to scan this g's stack (uintptr to avoid write barrier)
+	scangcw uintptr // gc worker for scanning stack (uintptr to avoid write barrier)
+
 	isSystemGoroutine bool // whether goroutine is a "system" goroutine
 
 	traceback uintptr // stack traceback buffer
@@ -514,6 +517,8 @@
 	exiting    bool // thread is exiting
 
 	gcing int32
+
+	scannote note // synchonization for signal-based stack scanning
 }
 
 type p struct {
diff --git a/libgo/go/runtime/signal_sighandler.go b/libgo/go/runtime/signal_sighandler.go
index e4bf7bc..b41eaf4 100644
--- a/libgo/go/runtime/signal_sighandler.go
+++ b/libgo/go/runtime/signal_sighandler.go
@@ -36,6 +36,28 @@
 
 	sigfault, sigpc := getSiginfo(info, ctxt)
 
+	if sig == _SIGURG && usestackmaps {
+		// We may be signaled to do a stack scan.
+		// The signal delivery races with enter/exitsyscall.
+		// We may be on g0 stack now. gp.m.curg is the g we
+		// want to scan.
+		// If we're not on g stack, give up. The sender will
+		// try again later.
+		// If we're not stopped at a safepoint (doscanstack will
+		// return false), also give up.
+		if s := readgstatus(gp.m.curg); s == _Gscansyscall {
+			if gp == gp.m.curg {
+				if doscanstack(gp, (*gcWork)(unsafe.Pointer(gp.scangcw))) {
+					gp.gcscanvalid = true
+					gp.gcscandone = true
+				}
+			}
+			gp.m.curg.scangcw = 0
+			notewakeup(&gp.m.scannote)
+			return
+		}
+	}
+
 	if sig == _SIGPROF {
 		sigprof(sigpc, gp, _g_.m)
 		return
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index 1aae4f3..4caa39d 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -447,3 +447,10 @@
 // signal handler, which will attempt to tear down the runtime
 // immediately.
 func abort()
+
+// usestackmaps is true if stack map (precise stack scan) is enabled.
+var usestackmaps bool
+
+// probestackmaps detects whether there are stack maps.
+//go:linkname probestackmaps runtime.probestackmaps
+func probestackmaps() bool
diff --git a/libgo/go/runtime/stubs_linux.go b/libgo/go/runtime/stubs_linux.go
index d10f657..3c733e3 100644
--- a/libgo/go/runtime/stubs_linux.go
+++ b/libgo/go/runtime/stubs_linux.go
@@ -7,3 +7,11 @@
 package runtime
 
 func sbrk0() uintptr
+
+func gettid() _pid_t {
+	return _pid_t(syscall(_SYS_gettid, 0, 0, 0, 0, 0, 0))
+}
+
+func tgkill(pid _pid_t, tid _pid_t, sig uint32) uint32 {
+	return uint32(syscall(_SYS_tgkill, uintptr(pid), uintptr(tid), uintptr(sig), 0, 0, 0))
+}
diff --git a/libgo/go/runtime/stubs_nonlinux.go b/libgo/go/runtime/stubs_nonlinux.go
index e1ea05c..4cdab0c 100644
--- a/libgo/go/runtime/stubs_nonlinux.go
+++ b/libgo/go/runtime/stubs_nonlinux.go
@@ -10,3 +10,11 @@
 func sbrk0() uintptr {
 	return 0
 }
+
+func gettid() _pid_t {
+	return 0
+}
+
+func tgkill(pid _pid_t, tid _pid_t, sig uint32) uint32 {
+	throw("tgkill not implemented")
+}
diff --git a/libgo/runtime/go-unwind.c b/libgo/runtime/go-unwind.c
index a059acb..a1a9558 100644
--- a/libgo/runtime/go-unwind.c
+++ b/libgo/runtime/go-unwind.c
@@ -304,6 +304,26 @@
   return p;
 }
 
+static inline int
+value_size (uint8_t encoding)
+{
+  switch (encoding & 0x0f)
+    {
+      case DW_EH_PE_sdata2:
+      case DW_EH_PE_udata2:
+        return 2;
+      case DW_EH_PE_sdata4:
+      case DW_EH_PE_udata4:
+        return 4;
+      case DW_EH_PE_sdata8:
+      case DW_EH_PE_udata8:
+        return 8;
+      default:
+        break;
+    }
+  abort ();
+}
+
 /* The rest of this code is really similar to gcc/unwind-c.c and
    libjava/exception.cc.  */
 
@@ -563,3 +583,231 @@
   _Unwind_SetIP (context, landing_pad);
   return _URC_INSTALL_CONTEXT;
 }
+
+// A dummy personality function, which doesn't capture any exception
+// and simply passes by. This is used for functions that don't
+// capture exceptions but need LSDA for stack maps.
+_Unwind_Reason_Code
+__gccgo_personality_dummy (int, _Unwind_Action, _Unwind_Exception_Class,
+		      struct _Unwind_Exception *, struct _Unwind_Context *)
+  __attribute__ ((no_split_stack));
+
+_Unwind_Reason_Code
+__gccgo_personality_dummy (int version __attribute__ ((unused)),
+		      _Unwind_Action actions __attribute__ ((unused)),
+		      _Unwind_Exception_Class exception_class __attribute__ ((unused)),
+		      struct _Unwind_Exception *ue_header __attribute__ ((unused)),
+		      struct _Unwind_Context *context __attribute__ ((unused)))
+{
+  CONTINUE_UNWINDING;
+}
+
+// A sentinel value for Go functions.
+// A function is a Go function if it has LSDA, which has type info,
+// and the first (dummy) landing pad's type info is a pointer to
+// this value.
+#define GO_FUNC_SENTINEL ((uint64)'G' | ((uint64)'O'<<8) | \
+                          ((uint64)'.'<<16) | ((uint64)'.'<<24) | \
+                          ((uint64)'F'<<32) | ((uint64)'U'<<40) | \
+                          ((uint64)'N'<<48) | ((uint64)'C'<<56))
+
+struct _stackmap {
+  uint32 len;
+  uint8 data[1]; // variabe length
+};
+
+extern void
+  runtime_scanstackblockwithmap (uintptr ip, uintptr sp, uintptr size, uint8 *ptrmask, void* gcw)
+  __asm__ (GOSYM_PREFIX "runtime.scanstackblockwithmap");
+
+#define FOUND        0
+#define NOTFOUND_OK  1
+#define NOTFOUND_BAD 2
+
+// Helper function to search for stack maps in the unwinding records of a frame.
+// If found, populate ip, sp, and stackmap. Returns the #define'd values above.
+static int
+findstackmaps (struct _Unwind_Context *context, _Unwind_Ptr *ip, _Unwind_Ptr *sp, struct _stackmap **stackmap)
+{
+  lsda_header_info info;
+  const unsigned char *language_specific_data, *p, *action_record;
+  bool first;
+  struct _stackmap *stackmap1;
+  _Unwind_Ptr ip1;
+  int ip_before_insn = 0;
+  _sleb128_t index;
+  int size;
+
+  language_specific_data = (const unsigned char *)
+    _Unwind_GetLanguageSpecificData (context);
+
+  /* If no LSDA, then there is no stack maps.  */
+  if (! language_specific_data)
+    return NOTFOUND_OK;
+
+  p = parse_lsda_header (context, language_specific_data, &info);
+
+  if (info.TType == NULL)
+    return NOTFOUND_OK;
+
+#ifdef HAVE_GETIPINFO
+  ip1 = _Unwind_GetIPInfo (context, &ip_before_insn);
+#else
+  ip1 = _Unwind_GetIP (context);
+#endif
+  if (! ip_before_insn)
+    --ip1;
+
+  size = value_size (info.ttype_encoding);
+
+  action_record = NULL;
+  first = true;
+
+  /* Search the call-site table for the action associated with this IP.  */
+  while (p < info.action_table)
+    {
+      _Unwind_Ptr cs_start, cs_len, cs_lp;
+      _uleb128_t cs_action;
+
+      /* Note that all call-site encodings are "absolute" displacements.  */
+      p = read_encoded_value (0, info.call_site_encoding, p, &cs_start);
+      p = read_encoded_value (0, info.call_site_encoding, p, &cs_len);
+      p = read_encoded_value (0, info.call_site_encoding, p, &cs_lp);
+      p = read_uleb128 (p, &cs_action);
+
+      if (first)
+        {
+          // For a Go function, the first entry points to the sentinel value.
+          // Check this here.
+          const unsigned char *p1, *action1;
+          uint64 *x;
+
+          if (!cs_action)
+            return NOTFOUND_OK;
+
+          action1 = info.action_table + cs_action - 1;
+          read_sleb128 (action1, &index);
+          p1 = info.TType - index*size;
+          read_encoded_value (context, info.ttype_encoding, p1, (_Unwind_Ptr*)&x);
+          if (x == NULL || *x != GO_FUNC_SENTINEL)
+            return NOTFOUND_OK;
+
+          first = false;
+          continue;
+        }
+
+      /* The table is sorted, so if we've passed the ip, stop.  */
+      if (ip1 < info.Start + cs_start)
+        return NOTFOUND_BAD;
+      else if (ip1 < info.Start + cs_start + cs_len)
+        {
+          if (cs_action)
+            action_record = info.action_table + cs_action - 1;
+          break;
+        }
+    }
+
+  if (action_record == NULL)
+    return NOTFOUND_BAD;
+
+  read_sleb128 (action_record, &index);
+  p = info.TType - index*size;
+  read_encoded_value (context, info.ttype_encoding, p, (_Unwind_Ptr*)&stackmap1);
+  if (stackmap1 == NULL)
+    return NOTFOUND_BAD;
+
+  if (ip != NULL)
+    *ip = ip1;
+  if (sp != NULL)
+    *sp = _Unwind_GetCFA (context);
+  if (stackmap != NULL)
+    *stackmap = stackmap1;
+  return FOUND;
+}
+
+// Callback function to scan a stack frame with stack maps.
+// It skips non-Go functions.
+static _Unwind_Reason_Code
+scanstackwithmap_callback (struct _Unwind_Context *context, void *arg)
+{
+  struct _stackmap *stackmap;
+  _Unwind_Ptr ip, sp;
+  G* gp;
+  void *gcw = arg;
+
+  switch (findstackmaps (context, &ip, &sp, &stackmap))
+    {
+      case NOTFOUND_OK:
+        // Not a Go function. Skip this frame.
+        return _URC_NO_REASON;
+      case NOTFOUND_BAD:
+        {
+          // No stack map found.
+          // If we're scanning from the signal stack, the goroutine
+          // may be not stopped at a safepoint. Allow this case.
+          gp = runtime_g ();
+          if (gp != gp->m->gsignal)
+            {
+              // TODO: print gp, pc, sp
+              runtime_throw ("no stack map");
+            }
+          return _URC_NORMAL_STOP;
+        }
+      case FOUND:
+        break;
+      default:
+        abort ();
+    }
+
+  runtime_scanstackblockwithmap (ip, sp, (uintptr)(stackmap->len) * sizeof(uintptr), stackmap->data, gcw);
+
+  return _URC_NO_REASON;
+}
+
+// Scan the stack with stack maps. Return whether the scan
+// succeeded.
+bool
+scanstackwithmap (void *gcw)
+{
+  _Unwind_Reason_Code code;
+  code = _Unwind_Backtrace (scanstackwithmap_callback, gcw);
+  return code == _URC_END_OF_STACK;
+}
+
+// Returns whether stack map is enabled.
+bool
+usestackmaps ()
+{
+  return runtime_usestackmaps;
+}
+
+// Callback function to probe if a stack frame has stack maps.
+static _Unwind_Reason_Code
+probestackmaps_callback (struct _Unwind_Context *context,
+                         void *arg __attribute__ ((unused)))
+{
+  switch (findstackmaps (context, NULL, NULL, NULL))
+    {
+      case NOTFOUND_OK:
+      case NOTFOUND_BAD:
+        return _URC_NO_REASON;
+      case FOUND:
+        break;
+      default:
+        abort ();
+    }
+
+  // Found a stack map. No need to keep unwinding.
+  runtime_usestackmaps = true;
+  return _URC_NORMAL_STOP;
+}
+
+// Try to find a stack map, store the result in global variable runtime_usestackmaps.
+// Called in start-up time from Go code, so there is a Go frame on the stack.
+bool
+probestackmaps ()
+{
+  runtime_usestackmaps = false;
+  _Unwind_Backtrace (probestackmaps_callback, NULL);
+  return runtime_usestackmaps;
+}
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index 7bd95a0..99b2cb1 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -59,6 +59,8 @@
 void gtraceback(G*)
   __asm__(GOSYM_PREFIX "runtime.gtraceback");
 
+static void gscanstack(G*);
+
 #ifdef __rtems__
 #define __thread
 #endif
@@ -340,6 +342,8 @@
 
 		if(gp->traceback != 0)
 			gtraceback(gp);
+		if(gp->scang != 0)
+			gscanstack(gp);
 	}
 	if (gp == nil || !gp->fromgogo) {
 #ifdef USING_SPLIT_STACK
@@ -469,6 +473,66 @@
 	runtime_gogo(traceback->gp);
 }
 
+void doscanstackswitch(G*, G*) __asm__(GOSYM_PREFIX "runtime.doscanstackswitch");
+
+// Switch to gp and let it scan its stack.
+// The first time gp->scang is set (to me). The second time here
+// gp is done scanning, and has unset gp->scang, so we just return.
+void
+doscanstackswitch(G* me, G* gp)
+{
+	__go_assert(me->entry == nil);
+	me->fromgogo = false;
+
+#ifdef USING_SPLIT_STACK
+	__splitstack_getcontext((void*)(&me->stackcontext[0]));
+#endif
+	getcontext(ucontext_arg(&me->context[0]));
+
+	if(me->entry != nil) {
+		// Got here from mcall.
+		// The stack scanning code may call systemstack, which calls
+		// mcall, which calls setcontext.
+		// Run the function, which at the end will switch back to gp.
+		FuncVal *fv = me->entry;
+		void (*pfn)(G*) = (void (*)(G*))fv->fn;
+		G* gp1 = (G*)me->param;
+		__go_assert(gp1 == gp);
+		me->entry = nil;
+		me->param = nil;
+		__builtin_call_with_static_chain(pfn(gp1), fv);
+		abort();
+	}
+
+	if (gp->scang != 0)
+		runtime_gogo(gp);
+}
+
+// Do a stack scan, then switch back to the g that triggers this scan.
+// We come here from doscanstackswitch.
+static void
+gscanstack(G *gp)
+{
+	G *oldg, *oldcurg;
+	M* holdm;
+
+	oldg = (G*)gp->scang;
+	oldcurg = oldg->m->curg;
+	holdm = gp->m;
+	if(holdm != nil && holdm != g->m)
+		runtime_throw("gscanstack: m is not nil");
+	oldg->m->curg = gp;
+	gp->m = oldg->m;
+	gp->scang = 0;
+
+	doscanstack(gp, (void*)gp->scangcw);
+
+	gp->scangcw = 0;
+	gp->m = holdm;
+	oldg->m->curg = oldcurg;
+	runtime_gogo(oldg);
+}
+
 // Called by pthread_create to start an M.
 void*
 runtime_mstart(void *arg)
@@ -516,6 +580,9 @@
 		// may always go to the getcontext call in mcall.
 		gtraceback(gp);
 	}
+	if(gp->scang != 0)
+		// Got here from doscanswitch. Should not happen.
+		runtime_throw("mstart with scang");
 
 	if(gp->entry != nil) {
 		// Got here from mcall.
@@ -630,7 +697,8 @@
 {
 	// Save the registers in the g structure so that any pointers
 	// held in registers will be seen by the garbage collector.
-	getcontext(ucontext_arg(&g->gcregs[0]));
+	if (!runtime_usestackmaps)
+		getcontext(ucontext_arg(&g->gcregs[0]));
 
 	// Note that if this function does save any registers itself,
 	// we might store the wrong value in the call to getcontext.
@@ -676,7 +744,8 @@
 {
 	// Save the registers in the g structure so that any pointers
 	// held in registers will be seen by the garbage collector.
-	getcontext(ucontext_arg(&g->gcregs[0]));
+	if (!runtime_usestackmaps)
+		getcontext(ucontext_arg(&g->gcregs[0]));
 
 	// See comment in runtime_entersyscall.
 	doentersyscallblock((uintptr)runtime_getcallerpc(),
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 3c94532..21921ec 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -502,3 +502,16 @@
 struct funcfileline_return
 runtime_funcfileline (uintptr targetpc, int32 index)
   __asm__ (GOSYM_PREFIX "runtime.funcfileline");
+
+/*
+ * helpers for stack scan.
+ */
+bool scanstackwithmap(void*)
+  __asm__(GOSYM_PREFIX "runtime.scanstackwithmap");
+bool doscanstack(G*, void*)
+  __asm__("runtime.doscanstack");
+
+bool runtime_usestackmaps;
+
+bool probestackmaps(void)
+  __asm__("runtime.probestackmaps");
diff --git a/libgo/runtime/stack.c b/libgo/runtime/stack.c
index a971e8f..2d5d1e0 100644
--- a/libgo/runtime/stack.c
+++ b/libgo/runtime/stack.c
@@ -23,33 +23,43 @@
 extern void scanstackblock(void *addr, uintptr size, void *gcw)
   __asm__("runtime.scanstackblock");
 
-void doscanstack(G*, void*)
-  __asm__("runtime.doscanstack");
-
-static void doscanstack1(G*, void*)
+static bool doscanstack1(G*, void*)
   __attribute__ ((noinline));
 
 // Scan gp's stack, passing stack chunks to scanstackblock.
-void doscanstack(G *gp, void* gcw) {
+bool doscanstack(G *gp, void* gcw) {
 	// Save registers on the stack, so that if we are scanning our
 	// own stack we will see them.
-	__builtin_unwind_init();
-	flush_registers_to_secondary_stack();
+	if (!runtime_usestackmaps) {
+		__builtin_unwind_init();
+		flush_registers_to_secondary_stack();
+	}
 
-	doscanstack1(gp, gcw);
+	return doscanstack1(gp, gcw);
 }
 
 // Scan gp's stack after saving registers.
-static void doscanstack1(G *gp, void *gcw) {
+static bool doscanstack1(G *gp, void *gcw) {
 #ifdef USING_SPLIT_STACK
 	void* sp;
 	size_t spsize;
 	void* next_segment;
 	void* next_sp;
 	void* initial_sp;
+	G* _g_;
 
-	if (gp == runtime_g()) {
+	_g_ = runtime_g();
+	if (runtime_usestackmaps) {
+		// If stack map is enabled, we get here only when we can unwind
+		// the stack being scanned. That is, either we are scanning our
+		// own stack, or we are scanning through a signal handler.
+		__go_assert((_g_ == gp) || ((_g_ == gp->m->gsignal) && (gp == gp->m->curg)));
+		return scanstackwithmap(gcw);
+	}
+	if (_g_ == gp) {
 		// Scanning our own stack.
+		// If we are on a signal stack, it can unwind through the signal
+		// handler and see the g stack, so just scan our own stack.
 		sp = __splitstack_find(nil, nil, &spsize, &next_segment,
 				       &next_sp, &initial_sp);
 	} else {
@@ -95,7 +105,7 @@
 		// The goroutine is usually asleep (the world is stopped).
 		bottom = (void*)gp->gcnextsp;
 		if(bottom == nil)
-			return;
+			return true;
 		nextsp2 = (void*)gp->gcnextsp2;
 	}
 	top = (byte*)(void*)(gp->gcinitialsp) + gp->gcstacksize;
@@ -111,4 +121,5 @@
 			scanstackblock(initialsp2, (uintptr)(nextsp2 - initialsp2), gcw);
 	}
 #endif
+	return true;
 }