runtime: don't spin in checkPut if non-preemptible

Currently it's possible for the runtime to deadlock if checkPut is
called in a non-preemptible context. In this case, checkPut may spin,
so it won't leave the non-preemptible context, but the thread running
gcMarkDone needs to preempt all of the goroutines before it can
release the checkPut spin loops.

Fix this by returning from checkPut if it's called under any of the
conditions that would prevent gcMarkDone from preempting it. In this
case, it leaves a note behind that this happened; if the runtime does
later detect left-over work it can at least indicate that it was
unable to catch it in the act.

For #27993.
Updates #29385 (may fix it).

Change-Id: Ic71c10701229febb4ddf8c104fb10e06d84b122e
Reviewed-on: https://go-review.googlesource.com/c/156017
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 9d21dc4..f5d6374 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1519,6 +1519,9 @@
 					print(" wbuf2.n=", gcw.wbuf2.nobj)
 				}
 				print("\n")
+				if gcw.pauseGen == gcw.putGen {
+					println("runtime: checkPut already failed at this generation")
+				}
 				throw("throwOnGCWork")
 			}
 		}
diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go
index 0ed0713..f2c16d7 100644
--- a/src/runtime/mgcwork.go
+++ b/src/runtime/mgcwork.go
@@ -98,6 +98,9 @@
 	// gcWorkPauseGen if debugCachedWork is true.
 	pauseGen uint32
 
+	// putGen is the pauseGen of the last putGen.
+	putGen uint32
+
 	// pauseStack is the stack at which this P was paused if
 	// debugCachedWork is true.
 	pauseStack [16]uintptr
@@ -121,10 +124,23 @@
 
 func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) {
 	if debugCachedWork {
+		alreadyFailed := w.putGen == w.pauseGen
+		w.putGen = w.pauseGen
+		if m := getg().m; m.locks > 0 || m.mallocing != 0 || m.preemptoff != "" || m.p.ptr().status != _Prunning {
+			// If we were to spin, the runtime may
+			// deadlock: the condition above prevents
+			// preemption (see newstack), which could
+			// prevent gcMarkDone from finishing the
+			// ragged barrier and releasing the spin.
+			return
+		}
 		for atomic.Load(&gcWorkPauseGen) == w.pauseGen {
 		}
 		if throwOnGCWork {
 			printlock()
+			if alreadyFailed {
+				println("runtime: checkPut already failed at this generation")
+			}
 			println("runtime: late gcWork put")
 			if ptr != 0 {
 				gcDumpObject("ptr", ptr, ^uintptr(0))