runtime: consistently access pollDesc r/w Gs with atomics

Both netpollblock and netpollunblock read gpp using a non-atomic load.
When consuming a ready event, netpollblock clears gpp using a non-atomic
store, thus skipping a barrier.

Thus on systems with weak memory ordering, a sequence like so this is
possible:

             T1                                T2

1. netpollblock: read gpp -> pdReady
2. netpollblock: store gpp -> 0

                                 3. netpollunblock: read gpp -> pdReady
                                 4. netpollunblock: return

i.e., without a happens-before edge between (2) and (3), netpollunblock
may read the stale value of gpp.

Switch these access to use atomic loads and stores in order to create
these edges.

For ease of future maintainance, I've simply changed rg and wg to always
be accessed atomically, though I don't believe pollOpen or pollClose
require atomics today.

Fixes #48925

Change-Id: I903ea667eea320277610b4f969129935731520c3
Reviewed-on: https://go-review.googlesource.com/c/go/+/355952
Trust: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index 2393711..1008b44 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -78,6 +78,7 @@
 	// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
 	// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
 	// in a lock-free way by all operations.
+	// TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness.
 	// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
 	// that will blow up when GC starts moving objects.
 	lock    mutex // protects the following fields
@@ -86,11 +87,11 @@
 	everr   bool      // marks event scanning error happened
 	user    uint32    // user settable cookie
 	rseq    uintptr   // protects from stale read timers
-	rg      uintptr   // pdReady, pdWait, G waiting for read or nil
+	rg      uintptr   // pdReady, pdWait, G waiting for read or nil. Accessed atomically.
 	rt      timer     // read deadline timer (set if rt.f != nil)
 	rd      int64     // read deadline
 	wseq    uintptr   // protects from stale write timers
-	wg      uintptr   // pdReady, pdWait, G waiting for write or nil
+	wg      uintptr   // pdReady, pdWait, G waiting for write or nil. Accessed atomically.
 	wt      timer     // write deadline timer
 	wd      int64     // write deadline
 	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
@@ -147,20 +148,22 @@
 func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
 	pd := pollcache.alloc()
 	lock(&pd.lock)
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := atomic.Loaduintptr(&pd.wg)
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on free polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := atomic.Loaduintptr(&pd.rg)
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on free polldesc")
 	}
 	pd.fd = fd
 	pd.closing = false
 	pd.everr = false
 	pd.rseq++
-	pd.rg = 0
+	atomic.Storeuintptr(&pd.rg, 0)
 	pd.rd = 0
 	pd.wseq++
-	pd.wg = 0
+	atomic.Storeuintptr(&pd.wg, 0)
 	pd.wd = 0
 	pd.self = pd
 	unlock(&pd.lock)
@@ -178,10 +181,12 @@
 	if !pd.closing {
 		throw("runtime: close polldesc w/o unblock")
 	}
-	if pd.wg != 0 && pd.wg != pdReady {
+	wg := atomic.Loaduintptr(&pd.wg)
+	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on closing polldesc")
 	}
-	if pd.rg != 0 && pd.rg != pdReady {
+	rg := atomic.Loaduintptr(&pd.rg)
+	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on closing polldesc")
 	}
 	netpollclose(pd.fd)
@@ -205,9 +210,9 @@
 		return errcode
 	}
 	if mode == 'r' {
-		pd.rg = 0
+		atomic.Storeuintptr(&pd.rg, 0)
 	} else if mode == 'w' {
-		pd.wg = 0
+		atomic.Storeuintptr(&pd.wg, 0)
 	}
 	return pollNoError
 }
@@ -417,6 +422,8 @@
 
 // returns true if IO is ready, or false if timedout or closed
 // waitio - wait only for completed IO, ignore errors
+// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
+// can hold only a single waiting goroutine for each mode.
 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	gpp := &pd.rg
 	if mode == 'w' {
@@ -425,17 +432,19 @@
 
 	// set the gpp semaphore to pdWait
 	for {
-		old := *gpp
-		if old == pdReady {
-			*gpp = 0
+		// Consume notification if already ready.
+		if atomic.Casuintptr(gpp, pdReady, 0) {
 			return true
 		}
-		if old != 0 {
-			throw("runtime: double wait")
-		}
 		if atomic.Casuintptr(gpp, 0, pdWait) {
 			break
 		}
+
+		// Double check that this isn't corrupt; otherwise we'd loop
+		// forever.
+		if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 {
+			throw("runtime: double wait")
+		}
 	}
 
 	// need to recheck error states after setting gpp to pdWait
@@ -459,7 +468,7 @@
 	}
 
 	for {
-		old := *gpp
+		old := atomic.Loaduintptr(gpp)
 		if old == pdReady {
 			return nil
 		}