runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.
R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index 9d2f765..c121466 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -106,7 +106,7 @@
static G* gfget(P*);
static void gfpurge(P*);
static void globrunqput(G*);
-static G* globrunqget(P*);
+static G* globrunqget(P*, int32);
static P* pidleget(void);
static void pidleput(P*);
static void injectglist(G*);
@@ -1024,7 +1024,7 @@
// global runq
if(runtime·sched.runqsize) {
runtime·lock(&runtime·sched);
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime·unlock(&runtime·sched);
if(gp)
return gp;
@@ -1065,7 +1065,7 @@
goto top;
}
if(runtime·sched.runqsize) {
- gp = globrunqget(m->p);
+ gp = globrunqget(m->p, 0);
runtime·unlock(&runtime·sched);
return gp;
}
@@ -1144,6 +1144,7 @@
schedule(void)
{
G *gp;
+ uint32 tick;
if(m->locks)
runtime·throw("schedule: holding locks");
@@ -1154,9 +1155,23 @@
goto top;
}
- gp = runqget(m->p);
- if(gp && m->spinning)
- runtime·throw("schedule: spinning with local work");
+ gp = nil;
+ // Check the global runnable queue once in a while to ensure fairness.
+ // Otherwise two goroutines can completely occupy the local runqueue
+ // by constantly respawning each other.
+ tick = m->p->tick;
+ // This is a fancy way to say tick%61==0,
+ // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
+ if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
+ runtime·lock(&runtime·sched);
+ gp = globrunqget(m->p, 1);
+ runtime·unlock(&runtime·sched);
+ }
+ if(gp == nil) {
+ gp = runqget(m->p);
+ if(gp && m->spinning)
+ runtime·throw("schedule: spinning with local work");
+ }
if(gp == nil)
gp = findrunnable();
@@ -2167,7 +2182,7 @@
// Try get a batch of G's from the global runnable queue.
// Sched must be locked.
static G*
-globrunqget(P *p)
+globrunqget(P *p, int32 max)
{
G *gp, *gp1;
int32 n;
@@ -2177,6 +2192,8 @@
n = runtime·sched.runqsize/runtime·gomaxprocs+1;
if(n > runtime·sched.runqsize)
n = runtime·sched.runqsize;
+ if(max > 0 && n > max)
+ n = max;
runtime·sched.runqsize -= n;
if(runtime·sched.runqsize == 0)
runtime·sched.runqtail = nil;
diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go
index 21fb9c2..83368e0 100644
--- a/src/pkg/runtime/proc_test.go
+++ b/src/pkg/runtime/proc_test.go
@@ -8,6 +8,7 @@
"math"
"runtime"
"sync/atomic"
+ "syscall"
"testing"
"time"
)
@@ -107,6 +108,55 @@
}
}
+func TestTimerFairness(t *testing.T) {
+ done := make(chan bool)
+ c := make(chan bool)
+ for i := 0; i < 2; i++ {
+ go func() {
+ for {
+ select {
+ case c <- true:
+ case <-done:
+ return
+ }
+ }
+ }()
+ }
+
+ timer := time.After(20 * time.Millisecond)
+ for {
+ select {
+ case <-c:
+ case <-timer:
+ close(done)
+ return
+ }
+ }
+}
+
+func TestTimerFairness2(t *testing.T) {
+ done := make(chan bool)
+ c := make(chan bool)
+ for i := 0; i < 2; i++ {
+ go func() {
+ timer := time.After(20 * time.Millisecond)
+ var buf [1]byte
+ for {
+ syscall.Read(0, buf[0:0])
+ select {
+ case c <- true:
+ case <-c:
+ case <-timer:
+ done <- true
+ return
+ }
+ }
+ }()
+ }
+ <-done
+ <-done
+}
+
func stackGrowthRecursive(i int) {
var pad [128]uint64
if i != 0 && pad[0] == 0 {