runtime: dropg before CAS g status to _Grunnable/_Gwaiting
Currently, we dropg (which clears gp.m) after we CAS the g status
to _Grunnable or _Gwaiting. Immediately after CASing the g status,
another thread may CAS it to _Gscan status and scan its stack.
With precise stack scan, it accesses gp.m in order to switch to g
and back (in doscanstackswitch). This races with dropg. If
doscanstackswitch reads gp.m, then dropg runs, when we restore
the m at the end of the scan it will set to a stale value. Worse,
if dropg runs after doscanstackswitch sets the new m, gp will be
running with a nil m.
To fix this, we do dropg before CAS g status to _Grunnable or
_Gwaiting. We can do this safely if we are CASing from _Grunning,
as we own the g when it is in _Grunning. There is one case where
we CAS from _Gsyscall to _Grunnable. It is not safe to dropg when
it is in _Gsyscall, as precise stack scan needs to read gp.m in
order to signal the m. So we need to introduce a transient state,
_Gexitingsyscall, between _Gsyscall and _Grunnable, where the GC
should not scan its stack.
In is a little unfortunate that we have to add another g status.
We could reuse an existing one (e.g. _Gcopystack), but it is
clearer and safer to just use a new one, as Austin suggested.
Change-Id: I02d49b0b99416e5dce26682329a0fbf5e1578b26
Reviewed-on: https://go-review.googlesource.com/c/158157
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index ef166cb..655d0a9 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -956,6 +956,10 @@
break loop
}
+ case _Gexitingsyscall:
+ // This is a transient state during which we should not scan its stack.
+ // Try again.
+
case _Gscanwaiting:
// newstack is doing a scan for us right now. Wait.
@@ -2635,8 +2639,8 @@
traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip)
}
- casgstatus(gp, _Grunning, _Gwaiting)
dropg()
+ casgstatus(gp, _Grunning, _Gwaiting)
if _g_.m.waitunlockf != nil {
fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
@@ -2660,8 +2664,8 @@
dumpgstatus(gp)
throw("bad g status")
}
- casgstatus(gp, _Grunning, _Grunnable)
dropg()
+ casgstatus(gp, _Grunning, _Grunnable)
lock(&sched.lock)
globrunqput(gp)
unlock(&sched.lock)
@@ -3054,8 +3058,9 @@
func exitsyscall0(gp *g) {
_g_ := getg()
- casgstatus(gp, _Gsyscall, _Grunnable)
+ casgstatus(gp, _Gsyscall, _Gexitingsyscall)
dropg()
+ casgstatus(gp, _Gexitingsyscall, _Grunnable)
lock(&sched.lock)
_p_ := pidleget()
if _p_ == nil {
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index 6eb9491..0e9cf63 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -70,6 +70,12 @@
// stack is owned by the goroutine that put it in _Gcopystack.
_Gcopystack // 8
+ // _Gexitingsyscall means this goroutine is exiting from a
+ // system call. This is like _Gsyscall, but the GC should not
+ // scan its stack. Currently this is only used in exitsyscall0
+ // as a transient state when it drops the G.
+ _Gexitingsyscall // 9
+
// _Gscan combined with one of the above states other than
// _Grunning indicates that GC is scanning the stack. The
// goroutine is not executing user code and the stack is owned
diff --git a/libgo/go/runtime/traceback_gccgo.go b/libgo/go/runtime/traceback_gccgo.go
index 7347cea..72a83a5 100644
--- a/libgo/go/runtime/traceback_gccgo.go
+++ b/libgo/go/runtime/traceback_gccgo.go
@@ -122,13 +122,14 @@
}
var gStatusStrings = [...]string{
- _Gidle: "idle",
- _Grunnable: "runnable",
- _Grunning: "running",
- _Gsyscall: "syscall",
- _Gwaiting: "waiting",
- _Gdead: "dead",
- _Gcopystack: "copystack",
+ _Gidle: "idle",
+ _Grunnable: "runnable",
+ _Grunning: "running",
+ _Gsyscall: "syscall",
+ _Gwaiting: "waiting",
+ _Gdead: "dead",
+ _Gcopystack: "copystack",
+ _Gexitingsyscall: "exiting syscall",
}
func goroutineheader(gp *g) {