runtime: detect and report zombie slots during sweeping

A zombie slot is a slot that is marked, but isn't allocated. This can
indicate a bug in the GC, or a bad use of unsafe.Pointer. Currently,
the sweeper has best-effort detection for zombie slots: if there are
more marked slots than allocated slots, then there must have been a
zombie slot. However, this is imprecise since it only compares totals
and it reports almost no information that may be helpful to debug the
issue.

Add a precise check that compares the mark and allocation bitmaps and
reports detailed information if it detects a zombie slot.

No appreciable effect on performance as measured by the sweet
benchmarks:

name                                old time/op  new time/op  delta
BiogoIgor                            15.8s ± 2%   15.8s ± 2%    ~     (p=0.421 n=24+25)
BiogoKrishna                         15.6s ± 2%   15.8s ± 5%    ~     (p=0.082 n=22+23)
BleveIndexBatch100                   4.90s ± 3%   4.88s ± 2%    ~     (p=0.627 n=25+24)
CompileTemplate                      204ms ± 1%   205ms ± 0%  +0.22%  (p=0.010 n=24+23)
CompileUnicode                      77.8ms ± 2%  78.0ms ± 1%    ~     (p=0.236 n=25+24)
CompileGoTypes                       729ms ± 0%   731ms ± 0%  +0.26%  (p=0.000 n=24+24)
CompileCompiler                      3.52s ± 0%   3.52s ± 1%    ~     (p=0.152 n=25+25)
CompileSSA                           8.06s ± 1%   8.05s ± 0%    ~     (p=0.192 n=25+24)
CompileFlate                         132ms ± 1%   132ms ± 1%    ~     (p=0.373 n=24+24)
CompileGoParser                      163ms ± 1%   164ms ± 1%  +0.32%  (p=0.003 n=24+25)
CompileReflect                       453ms ± 1%   455ms ± 1%  +0.39%  (p=0.000 n=22+22)
CompileTar                           181ms ± 1%   181ms ± 1%  +0.20%  (p=0.029 n=24+21)
CompileXML                           244ms ± 1%   244ms ± 1%    ~     (p=0.065 n=24+24)
CompileStdCmd                        15.8s ± 2%   15.7s ± 2%    ~     (p=0.059 n=23+24)
FoglemanFauxGLRenderRotateBoat       13.4s ±11%   12.8s ± 0%    ~     (p=0.377 n=25+24)
FoglemanPathTraceRenderGopherIter1   18.6s ± 0%   18.6s ± 0%    ~     (p=0.696 n=23+24)
GopherLuaKNucleotide                 28.7s ± 4%   28.6s ± 5%    ~     (p=0.700 n=25+25)
MarkdownRenderXHTML                  250ms ± 1%   248ms ± 1%  -1.01%  (p=0.000 n=24+24)
[Geo mean]                           1.60s        1.60s       -0.11%

(https://perf.golang.org/search?q=upload:20200517.6)

For #38702.

Change-Id: I8af1fefd5fbf7b9cb665b98f9c4b73d1d08eea81
Reviewed-on: https://go-review.googlesource.com/c/go/+/234100
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 4c281ce..c5c8a4c 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -12,6 +12,7 @@
 	"runtime"
 	"runtime/debug"
 	"sort"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"testing"
@@ -192,6 +193,15 @@
 	}
 }
 
+func TestGcZombieReporting(t *testing.T) {
+	// This test is somewhat sensitive to how the allocator works.
+	got := runTestProg(t, "testprog", "GCZombie")
+	want := "found pointer to free object"
+	if !strings.Contains(got, want) {
+		t.Fatalf("expected %q in output, but got %q", want, got)
+	}
+}
+
 func BenchmarkSetTypePtr(b *testing.B) {
 	benchSetType(b, new(*byte))
 }
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index f9b03d3..3aa3afc 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -439,10 +439,31 @@
 		}
 	}
 
+	// Check for zombie objects.
+	if s.freeindex < s.nelems {
+		// Everything < freeindex is allocated and hence
+		// cannot be zombies.
+		//
+		// Check the first bitmap byte, where we have to be
+		// careful with freeindex.
+		obj := s.freeindex
+		if (*s.gcmarkBits.bytep(obj / 8)&^*s.allocBits.bytep(obj / 8))>>(obj%8) != 0 {
+			s.reportZombies()
+		}
+		// Check remaining bytes.
+		for i := obj/8 + 1; i < divRoundUp(s.nelems, 8); i++ {
+			if *s.gcmarkBits.bytep(i)&^*s.allocBits.bytep(i) != 0 {
+				s.reportZombies()
+			}
+		}
+	}
+
 	// Count the number of free objects in this span.
 	nalloc := uint16(s.countAlloc())
 	nfreed := s.allocCount - nalloc
 	if nalloc > s.allocCount {
+		// The zombie check above should have caught this in
+		// more detail.
 		print("runtime: nelems=", s.nelems, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n")
 		throw("sweep increased allocation count")
 	}
@@ -755,6 +776,57 @@
 	return res
 }
 
+// reportZombies reports any marked but free objects in s and throws.
+//
+// This generally means one of the following:
+//
+// 1. User code converted a pointer to a uintptr and then back
+// unsafely, and a GC ran while the uintptr was the only reference to
+// an object.
+//
+// 2. User code (or a compiler bug) constructed a bad pointer that
+// points to a free slot, often a past-the-end pointer.
+//
+// 3. The GC two cycles ago missed a pointer and freed a live object,
+// but it was still live in the last cycle, so this GC cycle found a
+// pointer to that object and marked it.
+func (s *mspan) reportZombies() {
+	printlock()
+	print("runtime: marked free object in span ", s, ", elemsize=", s.elemsize, " freeindex=", s.freeindex, " (bad use of unsafe.Pointer? try -d=checkptr)\n")
+	mbits := s.markBitsForBase()
+	abits := s.allocBitsForIndex(0)
+	for i := uintptr(0); i < s.nelems; i++ {
+		addr := s.base() + i*s.elemsize
+		print(hex(addr))
+		alloc := i < s.freeindex || abits.isMarked()
+		if alloc {
+			print(" alloc")
+		} else {
+			print(" free ")
+		}
+		if mbits.isMarked() {
+			print(" marked  ")
+		} else {
+			print(" unmarked")
+		}
+		zombie := mbits.isMarked() && !alloc
+		if zombie {
+			print(" zombie")
+		}
+		print("\n")
+		if zombie {
+			length := s.elemsize
+			if length > 1024 {
+				length = 1024
+			}
+			hexdumpWords(addr, addr+length, nil)
+		}
+		mbits.advance()
+		abits.advance()
+	}
+	throw("found pointer to free object")
+}
+
 // deductSweepCredit deducts sweep credit for allocating a span of
 // size spanBytes. This must be performed *before* the span is
 // allocated to ensure the system has enough credit. If necessary, it
diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go
index f691a1d..74732cd 100644
--- a/src/runtime/testdata/testprog/gc.go
+++ b/src/runtime/testdata/testprog/gc.go
@@ -11,6 +11,7 @@
 	"runtime/debug"
 	"sync/atomic"
 	"time"
+	"unsafe"
 )
 
 func init() {
@@ -19,6 +20,7 @@
 	register("GCSys", GCSys)
 	register("GCPhys", GCPhys)
 	register("DeferLiveness", DeferLiveness)
+	register("GCZombie", GCZombie)
 }
 
 func GCSys() {
@@ -264,3 +266,37 @@
 func escape(x interface{}) { sink2 = x; sink2 = nil }
 
 var sink2 interface{}
+
+// Test zombie object detection and reporting.
+func GCZombie() {
+	// Allocate several objects of unusual size (so free slots are
+	// unlikely to all be re-allocated by the runtime).
+	const size = 190
+	const count = 8192 / size
+	keep := make([]*byte, 0, (count+1)/2)
+	free := make([]uintptr, 0, (count+1)/2)
+	zombies := make([]*byte, 0, len(free))
+	for i := 0; i < count; i++ {
+		obj := make([]byte, size)
+		p := &obj[0]
+		if i%2 == 0 {
+			keep = append(keep, p)
+		} else {
+			free = append(free, uintptr(unsafe.Pointer(p)))
+		}
+	}
+
+	// Free the unreferenced objects.
+	runtime.GC()
+
+	// Bring the free objects back to life.
+	for _, p := range free {
+		zombies = append(zombies, (*byte)(unsafe.Pointer(p)))
+	}
+
+	// GC should detect the zombie objects.
+	runtime.GC()
+	println("failed")
+	runtime.KeepAlive(keep)
+	runtime.KeepAlive(zombies)
+}