runtime: eliminate all writebarrierptr* calls

Calls to writebarrierptr can simply be actual pointer writes. Calls to
writebarrierptr_prewrite need to go through the write barrier buffer.

Updates #22460.

Change-Id: I92cee4da98c5baa499f1977563757c76f95bf0ca
Reviewed-on: https://go-review.googlesource.com/92704
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Rick Hudson <rlh@golang.org>
diff --git a/src/runtime/atomic_pointer.go b/src/runtime/atomic_pointer.go
index 292b351..09cfbda 100644
--- a/src/runtime/atomic_pointer.go
+++ b/src/runtime/atomic_pointer.go
@@ -16,11 +16,24 @@
 // Instead, these are wrappers around the actual atomics (casp1 and so on)
 // that use noescape to convey which arguments do not escape.
 
+// atomicwb performs a write barrier before an atomic pointer write.
+// The caller should guard the call with "if writeBarrier.enabled".
+//
+//go:nosplit
+func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) {
+	slot := (*uintptr)(unsafe.Pointer(ptr))
+	if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) {
+		wbBufFlush(slot, uintptr(new))
+	}
+}
+
 // atomicstorep performs *ptr = new atomically and invokes a write barrier.
 //
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(ptr), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb((*unsafe.Pointer)(ptr), new)
+	}
 	atomic.StorepNoWB(noescape(ptr), new)
 }
 
@@ -29,7 +42,9 @@
 	// The write barrier is only necessary if the CAS succeeds,
 	// but since it needs to happen before the write becomes
 	// public, we have to do it conservatively all the time.
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return atomic.Casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new)
 }
 
@@ -43,7 +58,9 @@
 //go:linkname sync_atomic_StorePointer sync/atomic.StorePointer
 //go:nosplit
 func sync_atomic_StorePointer(ptr *unsafe.Pointer, new unsafe.Pointer) {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	sync_atomic_StoreUintptr((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
 }
 
@@ -53,7 +70,9 @@
 //go:linkname sync_atomic_SwapPointer sync/atomic.SwapPointer
 //go:nosplit
 func sync_atomic_SwapPointer(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	old := unsafe.Pointer(sync_atomic_SwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(new)))
 	return old
 }
@@ -64,6 +83,8 @@
 //go:linkname sync_atomic_CompareAndSwapPointer sync/atomic.CompareAndSwapPointer
 //go:nosplit
 func sync_atomic_CompareAndSwapPointer(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
-	writebarrierptr_prewrite((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
+	if writeBarrier.enabled {
+		atomicwb(ptr, new)
+	}
 	return sync_atomic_CompareAndSwapUintptr((*uintptr)(noescape(unsafe.Pointer(ptr))), uintptr(old), uintptr(new))
 }
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 41ae803..678128b 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -310,6 +310,8 @@
 	// So make sure that no preemption points can happen between read & use.
 	dst := sg.elem
 	typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.size)
+	// No need for cgo write barrier checks because dst is always
+	// Go memory.
 	memmove(dst, src, t.size)
 }
 
diff --git a/src/runtime/hashmap_fast.go b/src/runtime/hashmap_fast.go
index 2de3814..f978d1b 100644
--- a/src/runtime/hashmap_fast.go
+++ b/src/runtime/hashmap_fast.go
@@ -1002,7 +1002,8 @@
 
 				// Copy key.
 				if sys.PtrSize == 4 && t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
-					writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+					// Write with a write barrier.
+					*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
 				} else {
 					*(*uint32)(dst.k) = *(*uint32)(k)
 				}
@@ -1103,7 +1104,8 @@
 				// Copy key.
 				if t.key.kind&kindNoPointers == 0 && writeBarrier.enabled {
 					if sys.PtrSize == 8 {
-						writebarrierptr((*uintptr)(dst.k), *(*uintptr)(k))
+						// Write with a write barrier.
+						*(*unsafe.Pointer)(dst.k) = *(*unsafe.Pointer)(k)
 					} else {
 						// There are three ways to squeeze at least one 32 bit pointer into 64 bits.
 						// Give up and call typedmemmove.
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 3a88f17..8e03505 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -550,6 +550,8 @@
 // make sure the underlying allocation contains pointers, usually
 // by checking typ.kind&kindNoPointers.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func bulkBarrierPreWrite(dst, src, size uintptr) {
 	if (dst|src|size)&(sys.PtrSize-1) != 0 {
@@ -649,7 +651,7 @@
 	}
 }
 
-// typeBitsBulkBarrier executes writebarrierptr_prewrite for every
+// typeBitsBulkBarrier executes a write barrier for every
 // pointer that would be copied from [src, src+size) to [dst,
 // dst+size) by a memmove using the type bitmap to locate those
 // pointer slots.
@@ -663,6 +665,8 @@
 // Must not be preempted because it typically runs right before memmove,
 // and the GC must observe them as an atomic action.
 //
+// Callers must perform cgo checks if writeBarrier.cgo.
+//
 //go:nosplit
 func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
 	if typ == nil {
@@ -680,6 +684,7 @@
 		return
 	}
 	ptrmask := typ.gcdata
+	buf := &getg().m.p.ptr().wbBuf
 	var bits uint32
 	for i := uintptr(0); i < typ.ptrdata; i += sys.PtrSize {
 		if i&(sys.PtrSize*8-1) == 0 {
@@ -691,7 +696,9 @@
 		if bits&1 != 0 {
 			dstx := (*uintptr)(unsafe.Pointer(dst + i))
 			srcx := (*uintptr)(unsafe.Pointer(src + i))
-			writebarrierptr_prewrite(dstx, *srcx)
+			if !buf.putFast(*dstx, *srcx) {
+				wbBufFlush(nil, 0)
+			}
 		}
 	}
 }