runtime: dropg before CAS g status to _Grunnable/_Gwaiting

Currently, we dropg (which clears gp.m) after we CAS the g status
to _Grunnable or _Gwaiting. Immediately after CASing the g status,
another thread may CAS it to _Gscan status and scan its stack.
With precise stack scan, it accesses gp.m in order to switch to g
and back (in doscanstackswitch). This races with dropg. If
doscanstackswitch reads gp.m, then dropg runs, when we restore
the m at the end of the scan it will set to a stale value. Worse,
if dropg runs after doscanstackswitch sets the new m, gp will be
running with a nil m.

To fix this, we do dropg before CAS g status to _Grunnable or
_Gwaiting. We can do this safely if we are CASing from _Grunning,
as we own the g when it is in _Grunning. There is one case where
we CAS from _Gsyscall to _Grunnable. It is not safe to dropg when
it is in _Gsyscall, as precise stack scan needs to read gp.m in
order to signal the m. So we need to introduce a transient state,
_Gexitingsyscall, between _Gsyscall and _Grunnable, where the GC
should not scan its stack.

In is a little unfortunate that we have to add another g status.
We could reuse an existing one (e.g. _Gcopystack), but it is
clearer and safer to just use a new one, as Austin suggested.

Change-Id: I02d49b0b99416e5dce26682329a0fbf5e1578b26
Reviewed-on: https://go-review.googlesource.com/c/158157
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index ef166cb..655d0a9 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -956,6 +956,10 @@
 				break loop
 			}
 
+		case _Gexitingsyscall:
+			// This is a transient state during which we should not scan its stack.
+			// Try again.
+
 		case _Gscanwaiting:
 			// newstack is doing a scan for us right now. Wait.
 
@@ -2635,8 +2639,8 @@
 		traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
 	}
 
-	casgstatus(gp, _Grunning, _Gwaiting)
 	dropg()
+	casgstatus(gp, _Grunning, _Gwaiting)
 
 	if _g_.m.waitunlockf != nil {
 		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
@@ -2660,8 +2664,8 @@
 		dumpgstatus(gp)
 		throw("bad g status")
 	}
-	casgstatus(gp, _Grunning, _Grunnable)
 	dropg()
+	casgstatus(gp, _Grunning, _Grunnable)
 	lock(&sched.lock)
 	globrunqput(gp)
 	unlock(&sched.lock)
@@ -3054,8 +3058,9 @@
 func exitsyscall0(gp *g) {
 	_g_ := getg()
 
-	casgstatus(gp, _Gsyscall, _Grunnable)
+	casgstatus(gp, _Gsyscall, _Gexitingsyscall)
 	dropg()
+	casgstatus(gp, _Gexitingsyscall, _Grunnable)
 	lock(&sched.lock)
 	_p_ := pidleget()
 	if _p_ == nil {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 6eb9491..0e9cf63 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -70,6 +70,12 @@
 	// stack is owned by the goroutine that put it in _Gcopystack.
 	_Gcopystack // 8
 
+	// _Gexitingsyscall means this goroutine is exiting from a
+	// system call. This is like _Gsyscall, but the GC should not
+	// scan its stack. Currently this is only used in exitsyscall0
+	// as a transient state when it drops the G.
+	_Gexitingsyscall // 9
+
 	// _Gscan combined with one of the above states other than
 	// _Grunning indicates that GC is scanning the stack. The
 	// goroutine is not executing user code and the stack is owned
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 7347cea..72a83a5 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -122,13 +122,14 @@
 }
 
 var gStatusStrings = [...]string{
-	_Gidle:      "idle",
-	_Grunnable:  "runnable",
-	_Grunning:   "running",
-	_Gsyscall:   "syscall",
-	_Gwaiting:   "waiting",
-	_Gdead:      "dead",
-	_Gcopystack: "copystack",
+	_Gidle:           "idle",
+	_Grunnable:       "runnable",
+	_Grunning:        "running",
+	_Gsyscall:        "syscall",
+	_Gwaiting:        "waiting",
+	_Gdead:           "dead",
+	_Gcopystack:      "copystack",
+	_Gexitingsyscall: "exiting syscall",
 }
 
 func goroutineheader(gp *g) {