debug/gocore: handle 1.11 sparse heaps

Go 1.11 changed (will change?) the Go heap.  Instead of a single
arena, there will be potentially multiple arenas.

Change the way we do address->object lookups to handle multiple,
not necessarily near each other, arenas.  The major change is to
Use a page table instead of an array to keep HeapInfo structures.

Also move ptr/nonptr bits into HeapInfo, as they are recorded
differently in 1.11.  It's a bit tricky because a heapInfo needs
different numbers of ptr/nonptr bits for 32 and 64 bit inferiors.

I'll add a 1.11 test when 1.11 ships. A tip-compiled test passes
with this CL, but I don't want to check that in.

Change-Id: I5b32fc4aaa88db137a23df2b4592eaba8a3f1fcc
Reviewed-on: https://go-review.googlesource.com/100059
Reviewed-by: Austin Clements <austin@google.com>
diff --git a/gocore/dominator.go b/gocore/dominator.go
index 375fd86..59fac42 100644
--- a/gocore/dominator.go
+++ b/gocore/dominator.go
@@ -35,6 +35,9 @@
 type ltDom struct {
 	p *Process
 
+	// mapping from object ID to object
+	objs []Object
+
 	// number -> name
 	vertices []vName
 
@@ -55,6 +58,11 @@
 }
 
 type dominators struct {
+	p *Process
+
+	// mapping from object ID to object
+	objs []Object
+
 	// name -> dominator name
 	idom []vName
 
@@ -68,7 +76,7 @@
 
 func (p *Process) calculateDominators() *dominators {
 	lt := runLT(p)
-	d := dominators{idom: lt.idom}
+	d := dominators{p: p, idom: lt.idom, objs: lt.objs}
 	lt = ltDom{}
 
 	d.reverse()
@@ -83,6 +91,7 @@
 		p:         p,
 		nRoots:    len(p.rootIdx),
 		nVertices: nVertices,
+		objs:      make([]Object, p.nObj),
 		vertices:  make([]vName, nVertices),
 		parents:   make([]vName, nVertices),
 		semis:     make([]vNumber, nVertices),
@@ -112,6 +121,14 @@
 		parentName vName
 	}
 
+	// Initialize objs for mapping from object index back to Object.
+	i := 0
+	d.p.ForEachObject(func(x Object) bool {
+		d.objs[i] = x
+		i++
+		return true
+	})
+
 	// Add roots to the work stack, essentially pretending to visit
 	// the pseudo-root, numbering it 0.
 	d.semis[pseudoRoot] = 0
@@ -145,7 +162,7 @@
 			return true
 		}
 
-		root, object := findVertexByName(d.p, item.name)
+		root, object := d.findVertexByName(item.name)
 		if root != nil {
 			d.p.ForEachRootPtr(root, visitChild)
 		} else {
@@ -156,14 +173,23 @@
 }
 
 // findVertexByName returns the root/object named by n, or nil,0 for the pseudo-root.
-func findVertexByName(p *Process, n vName) (*Root, Object) {
+func (d *ltDom) findVertexByName(n vName) (*Root, Object) {
 	if n == 0 {
 		return nil, 0
 	}
-	if int(n) < len(p.rootIdx)+1 {
-		return p.rootIdx[n-1], 0
+	if int(n) < len(d.p.rootIdx)+1 {
+		return d.p.rootIdx[n-1], 0
 	}
-	return nil, p.findObjectFromIndex(int(n) - len(p.rootIdx) - 1)
+	return nil, d.objs[int(n)-len(d.p.rootIdx)-1]
+}
+func (d *dominators) findVertexByName(n vName) (*Root, Object) {
+	if n == 0 {
+		return nil, 0
+	}
+	if int(n) < len(d.p.rootIdx)+1 {
+		return d.p.rootIdx[n-1], 0
+	}
+	return nil, d.objs[int(n)-len(d.p.rootIdx)-1]
 }
 
 // calculate runs the main part of LT.
@@ -188,7 +214,7 @@
 		}
 
 		// Step 2. Compute the semidominators of all nodes.
-		root, obj := findVertexByName(d.p, w)
+		root, obj := d.findVertexByName(w)
 		// This loop never visits the pseudo-root.
 		if root != nil {
 			u := d.eval(pseudoRoot)
@@ -337,7 +363,7 @@
 
 		work = work[:len(work)-1]
 
-		root, obj := findVertexByName(p, item.v)
+		root, obj := d.findVertexByName(item.v)
 		var size int64
 		switch {
 		case item.v == pseudoRoot:
@@ -358,7 +384,7 @@
 	fmt.Fprintf(w, "digraph %s {\nrankdir=\"LR\"\n", "dominators")
 	for number, name := range d.vertices {
 		var label string
-		root, obj := findVertexByName(d.p, name)
+		root, obj := d.findVertexByName(name)
 
 		switch {
 		case name == 0:
diff --git a/gocore/dominator_test.go b/gocore/dominator_test.go
index 2f70902..f3360b0 100644
--- a/gocore/dominator_test.go
+++ b/gocore/dominator_test.go
@@ -134,7 +134,7 @@
 	pRoot.idom = nil
 
 	getVertex := func(n vName) *sanityVertex {
-		r, o := findVertexByName(d.p, n)
+		r, o := d.findVertexByName(n)
 		switch {
 		case n == pseudoRoot:
 			return &pRoot
diff --git a/gocore/gocore_test.go b/gocore/gocore_test.go
index d59e598..8d4206d 100644
--- a/gocore/gocore_test.go
+++ b/gocore/gocore_test.go
@@ -5,6 +5,7 @@
 package gocore
 
 import (
+	"fmt"
 	"reflect"
 	"testing"
 
@@ -29,6 +30,21 @@
 	return p
 }
 
+func loadExampleVersion(t *testing.T, version string) *Process {
+	if version == "1.9" {
+		version = ""
+	}
+	c, err := core.Core(fmt.Sprintf("testdata/core%s", version), "testdata")
+	if err != nil {
+		t.Fatalf("can't load test core file: %s", err)
+	}
+	p, err := Core(c, FlagTypes|FlagReverse)
+	if err != nil {
+		t.Fatalf("can't parse Go core: %s", err)
+	}
+	return p
+}
+
 func TestObjects(t *testing.T) {
 	p := loadExample(t)
 	n := 0
@@ -41,18 +57,6 @@
 	}
 }
 
-func TestObjectIndexes(t *testing.T) {
-	p := loadExample(t)
-	p.ForEachObject(func(x Object) bool {
-		idx, _ := p.findObjectIndex(p.Addr(x))
-		got := p.findObjectFromIndex(idx)
-		if x != got {
-			t.Errorf("findObjectFromIndex(%v) = %#x, want %#x", idx, got, x)
-		}
-		return true
-	})
-}
-
 func TestRoots(t *testing.T) {
 	p := loadExample(t)
 	n := 0
@@ -187,3 +191,7 @@
 		}
 	}
 }
+
+func TestVersions(t *testing.T) {
+	loadExampleVersion(t, "1.10")
+}
diff --git a/gocore/object.go b/gocore/object.go
index 61d66de..debf0ca 100644
--- a/gocore/object.go
+++ b/gocore/object.go
@@ -6,7 +6,6 @@
 
 import (
 	"math/bits"
-	"sort"
 	"strings"
 
 	"golang.org/x/debug/core"
@@ -31,13 +30,13 @@
 	// Function to call when we find a new pointer.
 	add := func(x core.Address) {
 		h := p.findHeapInfo(x)
-		if h == nil || h.base == 0 { // not in heap or not in a valid span
+		if h == nil { // not in heap or not in a valid span
 			// Invalid spans can happen with intra-stack pointers.
 			return
 		}
 		// Round down to object start.
 		x = h.base.Add(x.Sub(h.base) / h.size * h.size)
-		// Object start may map to a different info. Reload.
+		// Object start may map to a different info. Reload heap info.
 		h = p.findHeapInfo(x)
 		// Find mark bit
 		b := uint64(x) % heapInfoSize / 8
@@ -108,10 +107,17 @@
 	p.nObj = n
 
 	// Initialize firstIdx fields in the heapInfo, for fast object index lookups.
-	for i := len(p.heapInfo) - 1; i >= 0; i-- {
-		h := &p.heapInfo[i]
-		n -= bits.OnesCount64(h.mark)
-		h.firstIdx = n
+	n = 0
+	p.ForEachObject(func(x Object) bool {
+		h := p.findHeapInfo(p.Addr(x))
+		if h.firstIdx == -1 {
+			h.firstIdx = n
+		}
+		n++
+		return true
+	})
+	if n != p.nObj {
+		panic("object count wrong")
 	}
 
 	// Update stats to include the live/garbage distinction.
@@ -125,19 +131,14 @@
 // isPtrFromHeap reports whether the inferior at address a contains a pointer.
 // a must be somewhere in the heap.
 func (p *Process) isPtrFromHeap(a core.Address) bool {
-	// Convert arena offset in words to bitmap offset in bits.
-	off := a.Sub(p.arenaStart)
-	off >>= p.proc.LogPtrSize()
-
-	// Find bit in bitmap. It goes backwards from the end.
-	// Each byte contains pointer/nonpointer bits for 4 words in its low nybble.
-	return p.proc.ReadUint8(p.bitmapEnd.Add(-(off>>2)-1))>>uint(off&3)&1 != 0
+	return p.findHeapInfo(a).IsPtr(a, p.proc.PtrSize())
 }
 
 // IsPtr reports whether the inferior at address a contains a pointer.
 func (p *Process) IsPtr(a core.Address) bool {
-	if a >= p.arenaStart && a < p.arenaUsed {
-		return p.isPtrFromHeap(a)
+	h := p.findHeapInfo(a)
+	if h != nil {
+		return h.IsPtr(a, p.proc.PtrSize())
 	}
 	for _, m := range p.modules {
 		for _, s := range [2]string{"data", "bss"} {
@@ -163,7 +164,7 @@
 func (p *Process) FindObject(a core.Address) (Object, int64) {
 	// Round down to the start of an object.
 	h := p.findHeapInfo(a)
-	if h == nil || h.size == 0 {
+	if h == nil {
 		// Not in Go heap, or in a span
 		// that doesn't hold Go objects (freed, stacks, ...)
 		return 0, 0
@@ -171,7 +172,7 @@
 	x := h.base.Add(a.Sub(h.base) / h.size * h.size)
 	// Check if object is marked.
 	h = p.findHeapInfo(x)
-	if h.mark>>(uint64(x)%heapInfoSize/8)&1 == 0 {
+	if h.mark>>(uint64(x)%heapInfoSize/8)&1 == 0 { // free or garbage
 		return 0, 0
 	}
 	return Object(x), a.Sub(x)
@@ -186,40 +187,21 @@
 	return h.firstIdx + bits.OnesCount64(h.mark&(uint64(1)<<(uint64(x)%heapInfoSize/8)-1)), off
 }
 
-func (p *Process) findObjectFromIndex(idx int) Object {
-	// Find the first heapInfo after the index we want, then go one back,
-	// so that we skip over empty chunks.
-	hIdx := sort.Search(len(p.heapInfo), func(k int) bool {
-		return p.heapInfo[k].firstIdx > idx
-	})
-	if hIdx > 0 {
-		hIdx--
-	}
-	h := p.heapInfo[hIdx]
-	gap := idx - h.firstIdx + 1
-	// TODO: use math/bits somehow?
-	for offset := core.Address(0); offset < heapInfoSize; offset += 8 {
-		if h.mark>>(offset/8)&1 == 1 {
-			gap--
-		}
-		if gap == 0 {
-			// Can't use heapInfo.base: it's the start of the *span*, not the heapInfo.
-			return Object(p.arenaStart + core.Address(hIdx*heapInfoSize) + offset)
-		}
-	}
-	panic("Overran heap info looking for live objects")
-}
-
 // ForEachObject calls fn with each object in the Go heap.
 // If fn returns false, ForEachObject returns immediately.
 func (p *Process) ForEachObject(fn func(x Object) bool) {
-	for i := 0; i < len(p.heapInfo); i++ {
-		m := p.heapInfo[i].mark
-		for m != 0 {
-			j := bits.TrailingZeros64(m)
-			m &= m - 1
-			if !fn(Object(p.arenaStart.Add(int64(i)*heapInfoSize + int64(j)*8))) {
-				return
+	for _, k := range p.pages {
+		pt := p.pageTable[k]
+		for i := range pt {
+			h := &pt[i]
+			m := h.mark
+			for m != 0 {
+				j := bits.TrailingZeros64(m)
+				m &= m - 1
+				x := Object(k)*pageTableSize*heapInfoSize + Object(i)*heapInfoSize + Object(j)*8
+				if !fn(x) {
+					return
+				}
 			}
 		}
 	}
@@ -348,15 +330,69 @@
 	base     core.Address // start of the span containing this heap region
 	size     int64        // size of objects in the span
 	mark     uint64       // 64 mark bits, one for every 8 bytes
-	firstIdx int          // the index of the first object that starts in or after this region
+	firstIdx int          // the index of the first object that starts in this region, or -1 if none
+	// For 64-bit inferiors, ptr[0] contains 64 pointer bits, one
+	// for every 8 bytes.  On 32-bit inferiors, ptr contains 128
+	// pointer bits, one for every 4 bytes.
+	ptr [2]uint64
 }
 
+func (h *heapInfo) IsPtr(a core.Address, ptrSize int64) bool {
+	if ptrSize == 8 {
+		i := uint(a%heapInfoSize) / 8
+		return h.ptr[0]>>i&1 != 0
+	}
+	i := a % heapInfoSize / 4
+	return h.ptr[i/64]>>(i%64)&1 != 0
+}
+
+// setHeapPtr records that the memory at heap address a contains a pointer.
+func (p *Process) setHeapPtr(a core.Address) {
+	h := p.allocHeapInfo(a)
+	if p.proc.PtrSize() == 8 {
+		i := uint(a%heapInfoSize) / 8
+		h.ptr[0] |= uint64(1) << i
+		return
+	}
+	i := a % heapInfoSize / 4
+	h.ptr[i/64] |= uint64(1) << (i % 64)
+}
+
+// Heap info structures cover 9 bits of address.
+// A page table entry covers 20 bits of address (1MB).
+const pageTableSize = 1 << 11
+
+type pageTableEntry [pageTableSize]heapInfo
+
 // findHeapInfo finds the heapInfo structure for a.
 // Returns nil if a is not a heap address.
 func (p *Process) findHeapInfo(a core.Address) *heapInfo {
-	if a < p.arenaStart || a >= p.arenaUsed {
+	k := a / heapInfoSize / pageTableSize
+	i := a / heapInfoSize % pageTableSize
+	t := p.pageTable[k]
+	if t == nil {
 		return nil
 	}
-	i := a.Sub(p.arenaStart) / heapInfoSize
-	return &p.heapInfo[i]
+	h := &t[i]
+	if h.base == 0 {
+		return nil
+	}
+	return h
+}
+
+// Same as findHeapInfo, but allocates the heapInfo if it
+// hasn't been allocated yet.
+func (p *Process) allocHeapInfo(a core.Address) *heapInfo {
+	k := a / heapInfoSize / pageTableSize
+	i := a / heapInfoSize % pageTableSize
+	t := p.pageTable[k]
+	if t == nil {
+		t = new(pageTableEntry)
+		for j := 0; j < pageTableSize; j++ {
+			t[j].firstIdx = -1
+		}
+		p.pageTable[k] = t
+		p.pages = append(p.pages, k)
+	}
+	return &t[i]
 }
diff --git a/gocore/process.go b/gocore/process.go
index de034c8..d618566 100644
--- a/gocore/process.go
+++ b/gocore/process.go
@@ -16,12 +16,11 @@
 type Process struct {
 	proc *core.Process
 
-	arenaStart core.Address
-	arenaUsed  core.Address
-	bitmapEnd  core.Address
-
 	// data structure for fast object finding
-	heapInfo []heapInfo
+	// The key to these maps is the object address divided by
+	// pageTableSize * heapInfoSize.
+	pageTable map[core.Address]*pageTableEntry
+	pages     []core.Address // deterministic ordering of keys of pageTable
 
 	// number of live objects
 	nObj int
@@ -168,7 +167,7 @@
 	// Read all the data that depend on runtime globals.
 	p.buildVersion = p.rtGlobals["buildVersion"].String()
 	p.readModules()
-	p.readSpans()
+	p.readHeap()
 	p.readGs()
 	p.readStackVars() // needs to be after readGs.
 	p.markObjects()   // needs to be after readGlobals, readStackVars.
@@ -182,21 +181,108 @@
 	return p, nil
 }
 
-func (p *Process) readSpans() {
+type arena struct {
+	heapMin core.Address
+	heapMax core.Address
+
+	bitmapMin core.Address
+	bitmapMax core.Address
+
+	spanTableMin core.Address
+	spanTableMax core.Address
+}
+
+func (p *Process) readHeap() {
+	ptrSize := p.proc.PtrSize()
+	logPtrSize := p.proc.LogPtrSize()
+	p.pageTable = map[core.Address]*pageTableEntry{}
 	mheap := p.rtGlobals["mheap_"]
+	var arenas []arena
 
-	spanTableStart := mheap.Field("spans").SlicePtr().Address()
-	spanTableEnd := spanTableStart.Add(mheap.Field("spans").SliceCap() * p.proc.PtrSize())
-	arenaStart := core.Address(mheap.Field("arena_start").Uintptr())
-	arenaUsed := core.Address(mheap.Field("arena_used").Uintptr())
-	arenaEnd := core.Address(mheap.Field("arena_end").Uintptr())
-	bitmapEnd := core.Address(mheap.Field("bitmap").Uintptr())
-	bitmapStart := bitmapEnd.Add(-int64(mheap.Field("bitmap_mapped").Uintptr()))
+	if mheap.HasField("spans") {
+		// go 1.9 or 1.10. There is a single arena.
+		arenaStart := core.Address(mheap.Field("arena_start").Uintptr())
+		arenaUsed := core.Address(mheap.Field("arena_used").Uintptr())
+		arenaEnd := core.Address(mheap.Field("arena_end").Uintptr())
+		bitmapEnd := core.Address(mheap.Field("bitmap").Uintptr())
+		bitmapStart := bitmapEnd.Add(-int64(mheap.Field("bitmap_mapped").Uintptr()))
+		spanTableStart := mheap.Field("spans").SlicePtr().Address()
+		spanTableEnd := spanTableStart.Add(mheap.Field("spans").SliceCap() * ptrSize)
+		arenas = append(arenas, arena{
+			heapMin:      arenaStart,
+			heapMax:      arenaEnd,
+			bitmapMin:    bitmapStart,
+			bitmapMax:    bitmapEnd,
+			spanTableMin: spanTableStart,
+			spanTableMax: spanTableEnd,
+		})
 
-	p.arenaStart = arenaStart
-	p.arenaUsed = arenaUsed
-	p.bitmapEnd = bitmapEnd
+		// Copy pointer bits to heap info.
+		// Note that the pointer bits are stored backwards.
+		for a := arenaStart; a < arenaUsed; a = a.Add(ptrSize) {
+			off := a.Sub(arenaStart) >> logPtrSize
+			if p.proc.ReadUint8(bitmapEnd.Add(-(off>>2)-1))>>uint(off&3)&1 != 0 {
+				p.setHeapPtr(a)
+			}
+		}
+	} else {
+		// go 1.11+. Has multiple arenas.
+		arenaSize := p.rtConstants["heapArenaBytes"]
+		if arenaSize%heapInfoSize != 0 {
+			panic("arenaSize not a multiple of heapInfoSize")
+		}
+		arenaBaseOffset := p.rtConstants["arenaBaseOffset"]
+		if ptrSize == 4 && arenaBaseOffset != 0 {
+			panic("arenaBaseOffset must be 0 for 32-bit inferior")
+		}
+		level1Table := mheap.Field("arenas")
+		level1size := level1Table.ArrayLen()
+		for level1 := int64(0); level1 < level1size; level1++ {
+			ptr := level1Table.ArrayIndex(level1)
+			if ptr.Address() == 0 {
+				continue
+			}
+			level2table := ptr.Deref()
+			level2size := level2table.ArrayLen()
+			for level2 := int64(0); level2 < level2size; level2++ {
+				ptr = level2table.ArrayIndex(level2)
+				if ptr.Address() == 0 {
+					continue
+				}
+				a := ptr.Deref()
 
+				min := core.Address(arenaSize*(level2+level1*level2size) - arenaBaseOffset)
+				max := min.Add(arenaSize)
+				bitmap := a.Field("bitmap")
+				spans := a.Field("spans")
+
+				arenas = append(arenas, arena{
+					heapMin:      min,
+					heapMax:      max,
+					bitmapMin:    bitmap.a,
+					bitmapMax:    bitmap.a.Add(bitmap.ArrayLen()),
+					spanTableMin: spans.a,
+					spanTableMax: spans.a.Add(spans.ArrayLen() * ptrSize),
+				})
+
+				// Copy out ptr/nonptr bits
+				n := bitmap.ArrayLen()
+				for i := int64(0); i < n; i++ {
+					m := bitmap.ArrayIndex(i).Uint8()
+					for j := int64(0); j < 8; j++ {
+						if m>>uint(j)&1 != 0 {
+							p.setHeapPtr(min.Add((i*8 + j) * ptrSize))
+						}
+					}
+				}
+			}
+		}
+	}
+
+	p.readSpans(mheap, arenas)
+}
+
+func (p *Process) readSpans(mheap region, arenas []arena) {
 	var all int64
 	var text int64
 	var readOnly int64
@@ -228,9 +314,11 @@
 					size -= b.Sub(a)
 				}
 			}
-			attribute(spanTableStart, spanTableEnd, &spanTable)
-			attribute(arenaStart, arenaEnd, &heap)
-			attribute(bitmapStart, bitmapEnd, &bitmap)
+			for _, a := range arenas {
+				attribute(a.heapMin, a.heapMax, &heap)
+				attribute(a.bitmapMin, a.bitmapMax, &bitmap)
+				attribute(a.spanTableMin, a.spanTableMax, &spanTable)
+			}
 			// Any other anonymous mapping is bss.
 			// TODO: how to distinguish original bss from anonymous mmap?
 			bss += size
@@ -250,7 +338,6 @@
 	if pageSize%heapInfoSize != 0 {
 		panic(fmt.Sprintf("page size not a multiple of %d", heapInfoSize))
 	}
-	p.heapInfo = make([]heapInfo, (p.arenaUsed-p.arenaStart)/heapInfoSize)
 	allspans := mheap.Field("allspans")
 	var allSpanSize int64
 	var freeSpanSize int64
@@ -294,8 +381,12 @@
 				}
 			}
 			spanRoundSize += spanSize - n*elemSize
+
+			// initialize heap info records for all inuse spans.
 			for a := min; a < max; a += heapInfoSize {
-				p.heapInfo[(a.Sub(p.arenaStart))/heapInfoSize] = heapInfo{base: min, size: elemSize, firstIdx: -1}
+				h := p.allocHeapInfo(a)
+				h.base = min
+				h.size = elemSize
 			}
 
 			// Process special records.
diff --git a/gocore/region.go b/gocore/region.go
index b0fdc79..cc78518 100644
--- a/gocore/region.go
+++ b/gocore/region.go
@@ -157,3 +157,25 @@
 	}
 	return region{p: r.p, a: r.a.Add(finfo.Off), typ: finfo.Type}
 }
+
+func (r region) HasField(f string) bool {
+	finfo := r.typ.field(f)
+	return finfo != nil
+}
+
+func (r region) ArrayLen() int64 {
+	if r.typ.Kind != KindArray {
+		panic("can't ArrayLen a non-array")
+	}
+	return r.typ.Count
+}
+
+func (r region) ArrayIndex(i int64) region {
+	if r.typ.Kind != KindArray {
+		panic("can't ArrayIndex a non-array")
+	}
+	if i < 0 || i >= r.typ.Count {
+		panic("array index out of bounds")
+	}
+	return region{p: r.p, a: r.a.Add(i * r.typ.Elem.Size), typ: r.typ.Elem}
+}
diff --git a/gocore/testdata/README b/gocore/testdata/README
index 11d9761..2cf240c 100644
--- a/gocore/testdata/README
+++ b/gocore/testdata/README
@@ -12,3 +12,5 @@
 path.  The executable was at /tmp/test, so that path is reproduced
 here so the core dump reader can find the executable using this
 testdata directory as the base directory.
+
+core1.10 is produced in the same way, except using go1.10.0.
diff --git a/gocore/testdata/core1.10 b/gocore/testdata/core1.10
new file mode 100644
index 0000000..903c176
--- /dev/null
+++ b/gocore/testdata/core1.10
Binary files differ
diff --git a/gocore/testdata/tmp/test1.10 b/gocore/testdata/tmp/test1.10
new file mode 100755
index 0000000..6839de6
--- /dev/null
+++ b/gocore/testdata/tmp/test1.10
Binary files differ