sync: yield to the waiter when unlocking a starving mutex
When we have already assigned the semaphore ticket to a specific
waiter, we want to get the waiter running as fast as possible since
no other G waiting on the semaphore can acquire it optimistically.
The net effect is that, when a sync.Mutex is contended, the code in
the critical section guarded by the Mutex gets a priority boost.
Fixes #33747
The original work was done in CL 200577 by Carlo Alberto Ferraris. The
change was reverted in CL 205817 because it broke the linux-arm64-packet
and solaris-amd64-oraclerel builders.
Change-Id: I76d79b1d63fd206ed1c57fe6900cb7ae9e4d46cb
Reviewed-on: https://go-review.googlesource.com/c/go/+/206180
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index d3ebd89..ef977b3 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -900,3 +900,11 @@
startTheWorld()
return
}
+
+var Semacquire = semacquire
+var Semrelease1 = semrelease1
+
+func SemNwait(addr *uint32) uint32 {
+ root := semroot(addr)
+ return atomic.Load(&root.nwait)
+}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 67ff556..56e9530 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2753,7 +2753,22 @@
casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted)
dropg()
casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted)
+ schedule()
+}
+// goyield is like Gosched, but it:
+// - does not emit a GoSched trace event
+// - puts the current G on the runq of the current P instead of the globrunq
+func goyield() {
+ checkTimeouts()
+ mcall(goyield_m)
+}
+
+func goyield_m(gp *g) {
+ pp := gp.m.p.ptr()
+ casgstatus(gp, _Grunning, _Grunnable)
+ dropg()
+ runqput(pp, gp, false)
schedule()
}
diff --git a/src/runtime/sema.go b/src/runtime/sema.go
index 530af5b..7bbf871 100644
--- a/src/runtime/sema.go
+++ b/src/runtime/sema.go
@@ -180,7 +180,7 @@
atomic.Xadd(&root.nwait, -1)
}
unlock(&root.lock)
- if s != nil { // May be slow, so unlock first
+ if s != nil { // May be slow or even yield, so unlock first
acquiretime := s.acquiretime
if acquiretime != 0 {
mutexevent(t0-acquiretime, 3+skipframes)
@@ -192,6 +192,25 @@
s.ticket = 1
}
readyWithTime(s, 5+skipframes)
+ if s.ticket == 1 && getg().m.locks == 0 {
+ // Direct G handoff
+ // readyWithTime has added the waiter G as runnext in the
+ // current P; we now call the scheduler so that we start running
+ // the waiter G immediately.
+ // Note that waiter inherits our time slice: this is desirable
+ // to avoid having a highly contended semaphore hog the P
+ // indefinitely. goyield is like Gosched, but it does not emit a
+ // GoSched trace event and, more importantly, puts the current G
+ // on the local runq instead of the global one.
+ // We only do this in the starving regime (handoff=true), as in
+ // the non-starving case it is possible for a different waiter
+ // to acquire the semaphore while we are yielding/scheduling,
+ // and this would be wasteful. We wait instead to enter starving
+ // regime, and then we start to do direct handoffs of ticket and
+ // P.
+ // See issue 33747 for discussion.
+ goyield()
+ }
}
}
diff --git a/src/runtime/sema_test.go b/src/runtime/sema_test.go
new file mode 100644
index 0000000..8bd5d4c
--- /dev/null
+++ b/src/runtime/sema_test.go
@@ -0,0 +1,97 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ . "runtime"
+ "sync/atomic"
+ "testing"
+)
+
+// TestSemaHandoff checks that when semrelease+handoff is
+// requested, the G that releases the semaphore yields its
+// P directly to the first waiter in line.
+// See issue 33747 for discussion.
+func TestSemaHandoff(t *testing.T) {
+ const iter = 10000
+ ok := 0
+ for i := 0; i < iter; i++ {
+ if testSemaHandoff() {
+ ok++
+ }
+ }
+ // As long as two thirds of handoffs are direct, we
+ // consider the test successful. The scheduler is
+ // nondeterministic, so this test checks that we get the
+ // desired outcome in a significant majority of cases.
+ // The actual ratio of direct handoffs is much higher
+ // (>90%) but we use a lower threshold to minimize the
+ // chances that unrelated changes in the runtime will
+ // cause the test to fail or become flaky.
+ if ok < iter*2/3 {
+ t.Fatal("direct handoff < 2/3:", ok, iter)
+ }
+}
+
+func TestSemaHandoff1(t *testing.T) {
+ if GOMAXPROCS(-1) <= 1 {
+ t.Skip("GOMAXPROCS <= 1")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(1)
+ TestSemaHandoff(t)
+}
+
+func TestSemaHandoff2(t *testing.T) {
+ if GOMAXPROCS(-1) <= 2 {
+ t.Skip("GOMAXPROCS <= 2")
+ }
+ defer GOMAXPROCS(GOMAXPROCS(-1))
+ GOMAXPROCS(2)
+ TestSemaHandoff(t)
+}
+
+func testSemaHandoff() bool {
+ var sema, res uint32
+ done := make(chan struct{})
+
+ // We're testing that the current goroutine is able to yield its time slice
+ // to another goroutine. Stop the current goroutine from migrating to
+ // another CPU where it can win the race (and appear to have not yielded) by
+ // keeping the CPUs slightly busy.
+ for i := 0; i < GOMAXPROCS(-1); i++ {
+ go func() {
+ for {
+ select {
+ case <-done:
+ return
+ default:
+ }
+ Gosched()
+ }
+ }()
+ }
+
+ go func() {
+ Semacquire(&sema)
+ atomic.CompareAndSwapUint32(&res, 0, 1)
+
+ Semrelease1(&sema, true, 0)
+ close(done)
+ }()
+ for SemNwait(&sema) == 0 {
+ Gosched() // wait for goroutine to block in Semacquire
+ }
+
+ // The crux of the test: we release the semaphore with handoff
+ // and immediately perform a CAS both here and in the waiter; we
+ // want the CAS in the waiter to execute first.
+ Semrelease1(&sema, true, 0)
+ atomic.CompareAndSwapUint32(&res, 0, 2)
+
+ <-done // wait for goroutines to finish to avoid data races
+
+ return res == 1 // did the waiter run first?
+}
diff --git a/src/sync/mutex.go b/src/sync/mutex.go
index 11ad20c..3028552 100644
--- a/src/sync/mutex.go
+++ b/src/sync/mutex.go
@@ -216,7 +216,8 @@
old = m.state
}
} else {
- // Starving mode: handoff mutex ownership to the next waiter.
+ // Starving mode: handoff mutex ownership to the next waiter, and yield
+ // our time slice so that the next waiter can start to run immediately.
// Note: mutexLocked is not set, the waiter will set it after wakeup.
// But mutex is still considered locked if mutexStarving is set,
// so new coming goroutines won't acquire it.