runtime: simplify process for starting GC goroutine

Currently, when allocation reaches the GC trigger, the runtime uses
readyExecute to start the GC goroutine immediately rather than wait
for the scheduler to get around to the GC goroutine while the mutator
continues to grow the heap.

Now that the scheduler runs the most recently readied goroutine when a
goroutine yields its time slice, this rigmarole is no longer
necessary. The runtime can simply ready the GC goroutine and yield
from the readying goroutine.

Change-Id: I3b4ebadd2a72a923b1389f7598f82973dd5c8710
Reviewed-on: https://go-review.googlesource.com/9292
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 9bb1aca..3b42481 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -655,28 +655,24 @@
 	}
 
 	// trigger concurrent GC
+	readied := false
 	lock(&bggc.lock)
 	if !bggc.started {
 		bggc.working = 1
 		bggc.started = true
-		// This puts the G on the end of the current run
-		// queue, so it may take a while to actually start.
-		// This is only a problem for the first GC cycle.
+		readied = true
 		go backgroundgc()
 	} else if bggc.working == 0 {
 		bggc.working = 1
-		if getg().m.lockedg != nil {
-			// We can't directly switch to GC on a locked
-			// M, so put it on the run queue and someone
-			// will get to it.
-			ready(bggc.g, 0)
-		} else {
-			unlock(&bggc.lock)
-			readyExecute(bggc.g, 0)
-			return
-		}
+		readied = true
+		ready(bggc.g, 0)
 	}
 	unlock(&bggc.lock)
+	if readied {
+		// This G just started or ready()d the GC goroutine.
+		// Switch directly to it by yielding.
+		Gosched()
+	}
 }
 
 // State of the background concurrent GC goroutine.
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 166d7c8..a2956fe 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -155,45 +155,6 @@
 	}
 }
 
-// readyExecute marks gp ready to run, preempt the current g, and execute gp.
-// This is used to start concurrent GC promptly when we reach its trigger.
-func readyExecute(gp *g, traceskip int) {
-	// Squirrel away gp so we don't allocate a closure for the
-	// mcall'd func below. If we allocate a closure, it could go
-	// away as soon as we put _g_ on the runqueue.
-	getg().readyg = gp
-
-	mcall(func(_g_ *g) {
-		gp := _g_.readyg
-		_g_.readyg = nil
-
-		if trace.enabled {
-			traceGoUnpark(gp, traceskip)
-			traceGoSched()
-		}
-
-		if _g_.m.locks != 0 {
-			throw("readyExecute: holding locks")
-		}
-		if _g_.m.lockedg != nil {
-			throw("cannot readyExecute from a locked g")
-		}
-		if readgstatus(gp)&^_Gscan != _Gwaiting {
-			dumpgstatus(gp)
-			throw("bad gp.status in readyExecute")
-		}
-
-		// Preempt the current g
-		casgstatus(_g_, _Grunning, _Grunnable)
-		runqput(_g_.m.p.ptr(), _g_, false)
-		dropg()
-
-		// Ready gp and switch to it
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		execute(gp, false)
-	})
-}
-
 func gcprocs() int32 {
 	// Figure out how many CPUs to use during GC.
 	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.