runtime: drive proportional sweep directly off heap_live

Currently, proportional sweep maintains its own count of how many
bytes have been allocated since the beginning of the sweep cycle so it
can compute how many pages need to be swept for a given allocation.

However, this requires a somewhat complex reimbursement scheme since
proportional sweep must be done before a span is allocated, but we
don't know how many bytes to charge until we've allocated a span. This
means that the allocated byte count used by proportional sweep can go
up and down, which has led to underflow bugs in the past (#18043) and
is going to interfere with adjusting sweep pacing on-the-fly (for #19076).

This approach also means we're maintaining a statistic that is very
closely related to heap_live, but has a different 0 value. This is
particularly confusing because the sweep ratio is computed based on
heap_live, so you have to understand that these two statistics are
very closely related.

Replace all of this and compute the sweep debt directly from the
current value of heap_live. To make this work, we simply save the
value of heap_live when the sweep ratio is computed to use as a
"basis" for later computing the sweep debt.

This eliminates the need for reimbursement as well as the code for
maintaining the sweeper's version of the live heap size.

For #19076.

Coincidentally fixes #18043, since this eliminates sweep reimbursement
entirely.

Change-Id: I1f931ddd6e90c901a3972c7506874c899251dc2a
Reviewed-on: https://go-review.googlesource.com/39832
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index 8f9c529..5302dd8 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -126,9 +126,6 @@
 	// mcache. If it gets uncached, we'll adjust this.
 	atomic.Xadd64(&c.nmalloc, int64(n))
 	usedBytes := uintptr(s.allocCount) * s.elemsize
-	if usedBytes > 0 {
-		reimburseSweepCredit(usedBytes)
-	}
 	atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes))
 	if trace.enabled {
 		// heap_live changed.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 8872269..8cba9f7 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -839,7 +839,8 @@
 		// pages by the time the allocated heap reaches the GC
 		// trigger. Compute the ratio of in-use pages to sweep
 		// per byte allocated.
-		heapDistance := int64(trigger) - int64(atomic.Load64(&memstats.heap_live))
+		heapLiveBasis := atomic.Load64(&memstats.heap_live)
+		heapDistance := int64(trigger) - int64(heapLiveBasis)
 		// Add a little margin so rounding errors and
 		// concurrent sweep are less likely to leave pages
 		// unswept when GC starts.
@@ -850,7 +851,7 @@
 		}
 		mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance)
 		mheap_.pagesSwept = 0
-		mheap_.spanBytesAlloc = 0
+		mheap_.sweepHeapLiveBasis = heapLiveBasis
 	}
 }
 
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index dd06825..8915b39 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -123,7 +123,7 @@
 	// last one print trace information.
 	if atomic.Xadd(&mheap_.sweepers, -1) == 0 && atomic.Load(&mheap_.sweepdone) != 0 {
 		if debug.gcpacertrace > 0 {
-			print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages at ", sweepRatio, " pages/byte\n")
+			print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", (memstats.heap_live-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept, " pages at ", sweepRatio, " pages/byte\n")
 		}
 	}
 	_g_.m.locks--
@@ -379,8 +379,7 @@
 //
 // deductSweepCredit makes a worst-case assumption that all spanBytes
 // bytes of the ultimately allocated span will be available for object
-// allocation. The caller should call reimburseSweepCredit if that
-// turns out not to be the case once the span is allocated.
+// allocation.
 //
 // deductSweepCredit is the core of the "proportional sweep" system.
 // It uses statistics gathered by the garbage collector to perform
@@ -398,12 +397,10 @@
 		traceGCSweepStart()
 	}
 
-	// Account for this span allocation.
-	spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes))
-
 	// Fix debt if necessary.
-	pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc))
-	for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) {
+	newHeapLive := uintptr(atomic.Load64(&memstats.heap_live)-mheap_.sweepHeapLiveBasis) + spanBytes
+	pagesTarget := int64(mheap_.sweepPagesPerByte*float64(newHeapLive)) - int64(callerSweepPages)
+	for pagesTarget > int64(atomic.Load64(&mheap_.pagesSwept)) {
 		if gosweepone() == ^uintptr(0) {
 			mheap_.sweepPagesPerByte = 0
 			break
@@ -414,19 +411,3 @@
 		traceGCSweepDone()
 	}
 }
-
-// reimburseSweepCredit records that unusableBytes bytes of a
-// just-allocated span are not available for object allocation. This
-// offsets the worst-case charge performed by deductSweepCredit.
-func reimburseSweepCredit(unusableBytes uintptr) {
-	if mheap_.sweepPagesPerByte == 0 {
-		// Nobody cares about the credit. Avoid the atomic.
-		return
-	}
-	nval := atomic.Xadd64(&mheap_.spanBytesAlloc, -int64(unusableBytes))
-	if int64(nval) < 0 {
-		// Debugging for #18043.
-		print("runtime: bad spanBytesAlloc=", nval, " (was ", nval+uint64(unusableBytes), ") unusableBytes=", unusableBytes, " sweepPagesPerByte=", mheap_.sweepPagesPerByte, "\n")
-		throw("spanBytesAlloc underflow")
-	}
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index e1b3b18..643fc7c 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -75,10 +75,10 @@
 	_ uint32 // align uint64 fields on 32-bit for atomics
 
 	// Proportional sweep
-	pagesInUse        uint64  // pages of spans in stats _MSpanInUse; R/W with mheap.lock
-	spanBytesAlloc    uint64  // bytes of spans allocated this cycle; updated atomically
-	pagesSwept        uint64  // pages swept this cycle; updated atomically
-	sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
+	pagesInUse         uint64  // pages of spans in stats _MSpanInUse; R/W with mheap.lock
+	pagesSwept         uint64  // pages swept this cycle; updated atomically
+	sweepHeapLiveBasis uint64  // value of heap_live to use as the origin of sweep ratio; written with lock, read without
+	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
 	// TODO(austin): pagesInUse should be a uintptr, but the 386
 	// compiler can't 8-byte align fields.