runtime: make NumGoroutine wait for system goroutines to register

In libgo system goroutines register themselves after they start.
That means that there is a small race between the goroutine being
seen by the scheduler and the scheduler knowing that the goroutine
is a system goroutine. That in turn means that runtime.NumGoroutines
can overestimate the number of goroutines at times.

This patch fixes the overestimate by counting the number of system
goroutines waiting to start, and pausing NumGoroutines until those
goroutines have all registered.

This is kind of a lot of mechanism for this not very important
problem, but I couldn't think of a better approach.

The test for this is TestNumGoroutine in runtime/proc_test.go.
The test is not currently run, but it will be soon.

Change-Id: I1f162f0d5ce0c72b55aadec735ff6d8a4e8f5a25
Reviewed-on: https://go-review.googlesource.com/46457
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/libgo/go/runtime/debug.go b/libgo/go/runtime/debug.go
index 6a9efcd..fdd7346 100644
--- a/libgo/go/runtime/debug.go
+++ b/libgo/go/runtime/debug.go
@@ -54,6 +54,7 @@
 
 // NumGoroutine returns the number of goroutines that currently exist.
 func NumGoroutine() int {
+	waitForSystemGoroutines()
 	return int(gcount())
 }
 
diff --git a/libgo/go/runtime/mfinal.go b/libgo/go/runtime/mfinal.go
index 615a2b2..229ccb5 100644
--- a/libgo/go/runtime/mfinal.go
+++ b/libgo/go/runtime/mfinal.go
@@ -106,6 +106,7 @@
 func createfing() {
 	// start the finalizer goroutine exactly once
 	if fingCreate == 0 && atomic.Cas(&fingCreate, 0, 1) {
+		expectSystemGoroutine()
 		go runfinq()
 	}
 }
diff --git a/libgo/go/runtime/mgc.go b/libgo/go/runtime/mgc.go
index 5cee12d..ebb00e1 100644
--- a/libgo/go/runtime/mgc.go
+++ b/libgo/go/runtime/mgc.go
@@ -209,6 +209,7 @@
 // It kicks off the background sweeper goroutine and enables GC.
 func gcenable() {
 	c := make(chan int, 1)
+	expectSystemGoroutine()
 	go bgsweep(c)
 	<-c
 	memstats.enablegc = true // now that runtime is initialized, GC is okay
@@ -1399,6 +1400,7 @@
 			break
 		}
 		if p.gcBgMarkWorker == 0 {
+			expectSystemGoroutine()
 			go gcBgMarkWorker(p)
 			notetsleepg(&work.bgMarkReady, -1)
 			noteclear(&work.bgMarkReady)
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index 066d0e5..cb1e974 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -233,6 +233,7 @@
 
 // start forcegc helper goroutine
 func init() {
+	expectSystemGoroutine()
 	go forcegchelper()
 }
 
@@ -2728,6 +2729,28 @@
 	return newg
 }
 
+// expectedSystemGoroutines counts the number of goroutines expected
+// to mark themselves as system goroutines. After they mark themselves
+// by calling setSystemGoroutine, this is decremented. NumGoroutines
+// uses this to wait for all system goroutines to mark themselves
+// before it counts them.
+var expectedSystemGoroutines uint32
+
+// expectSystemGoroutine is called when starting a goroutine that will
+// call setSystemGoroutine. It increments expectedSystemGoroutines.
+func expectSystemGoroutine() {
+	atomic.Xadd(&expectedSystemGoroutines, +1)
+}
+
+// waitForSystemGoroutines waits for all currently expected system
+// goroutines to register themselves.
+func waitForSystemGoroutines() {
+	for atomic.Load(&expectedSystemGoroutines) > 0 {
+		Gosched()
+		osyield()
+	}
+}
+
 // setSystemGoroutine marks this goroutine as a "system goroutine".
 // In the gc toolchain this is done by comparing startpc to a list of
 // saved special PCs. In gccgo that approach does not work as startpc
@@ -2738,6 +2761,7 @@
 func setSystemGoroutine() {
 	getg().isSystemGoroutine = true
 	atomic.Xadd(&sched.ngsys, +1)
+	atomic.Xadd(&expectedSystemGoroutines, -1)
 }
 
 // Put on gfree list.
diff --git a/libgo/go/runtime/time.go b/libgo/go/runtime/time.go
index e85fc7a..cc167a8 100644
--- a/libgo/go/runtime/time.go
+++ b/libgo/go/runtime/time.go
@@ -113,6 +113,7 @@
 	}
 	if !timers.created {
 		timers.created = true
+		expectSystemGoroutine()
 		go timerproc()
 	}
 }