gocore: add dominator tree computation

Calculate the heap's dominator tree using modified L-T, reverse its
edges, and use that to calculate the per-object retained size.

One small functionality change: heapInfo.firstIdx is now set for empty
chunks, allowing binary search over Process.heapInfo.

Haven't really figured out what the public API for this should be yet;
maybe it's okay to expose the vertex names and provide an iterator
method.

This should be pretty fast, but I haven't benchmarked it yet, so there
might be some low-hanging fruit lying around.

Change-Id: I2c85c29514a0f2e60231f13f0c058dd7f12b548e
Reviewed-on: https://go-review.googlesource.com/98736
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/gocore/dominator.go b/gocore/dominator.go
new file mode 100644
index 0000000..375fd86
--- /dev/null
+++ b/gocore/dominator.go
@@ -0,0 +1,398 @@
+// Copyright 2018 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gocore
+
+import (
+	"fmt"
+	"io"
+)
+
+// Code liberally adapted from cmd/compile/internal/ssa/dom.go and
+// x/tools/go/ssa/dom.go.
+//
+// We use the algorithm described in Lengauer & Tarjan. 1979.  A fast
+// algorithm for finding dominators in a flowgraph.
+// http://doi.acm.org/10.1145/357062.357071
+//
+// We also apply the optimizations to SLT described in Georgiadis et
+// al, Finding Dominators in Practice, JGAA 2006,
+// http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
+// to avoid the need for buckets of size > 1.
+
+// Vertex name, as used in the papers.
+// 0 -> the pseudo-root, a made-up object that parents all the GC roots.
+// 1...nRoots -> a root, found at p.rootIdx[#-1]
+// nRoots+1... -> an object, with object index # - nRoots - 1
+type vName int
+
+const pseudoRoot vName = 0
+
+// Vertex number, assigned in the DFS traversal in step 1.
+type vNumber int
+
+type ltDom struct {
+	p *Process
+
+	// number -> name
+	vertices []vName
+
+	// name -> parent name
+	parents []vName
+
+	// name -> vertex number before step 2 or semidominator number after.
+	semis []vNumber
+
+	// name -> ancestor name
+	ancestor []vName
+	labels   []vName
+
+	// name -> dominator name
+	idom []vName
+
+	nVertices, nRoots int
+}
+
+type dominators struct {
+	// name -> dominator name
+	idom []vName
+
+	// Reverse dominator tree edges, stored just like the ones in Process. name -> child name.
+	ridx  []int
+	redge []vName
+
+	// Retained size for each vertex. name -> retained size.
+	size []int64
+}
+
+func (p *Process) calculateDominators() *dominators {
+	lt := runLT(p)
+	d := dominators{idom: lt.idom}
+	lt = ltDom{}
+
+	d.reverse()
+	d.calcSize(p)
+
+	return &d
+}
+
+func runLT(p *Process) ltDom {
+	nVertices := 1 + len(p.rootIdx) + p.nObj
+	lt := ltDom{
+		p:         p,
+		nRoots:    len(p.rootIdx),
+		nVertices: nVertices,
+		vertices:  make([]vName, nVertices),
+		parents:   make([]vName, nVertices),
+		semis:     make([]vNumber, nVertices),
+		ancestor:  make([]vName, nVertices),
+		labels:    make([]vName, nVertices),
+		idom:      make([]vName, nVertices),
+	}
+	// TODO: increment all the names and use 0 as the uninitialized value.
+	for i := range lt.semis {
+		lt.semis[i] = -1
+	}
+	for i := range lt.ancestor {
+		lt.ancestor[i] = -1
+	}
+	for i := range lt.labels {
+		lt.labels[i] = vName(i)
+	}
+	lt.initialize()
+	lt.calculate()
+	return lt
+}
+
+// initialize implements step 1 of LT.
+func (d *ltDom) initialize() {
+	type workItem struct {
+		name       vName
+		parentName vName
+	}
+
+	// Add roots to the work stack, essentially pretending to visit
+	// the pseudo-root, numbering it 0.
+	d.semis[pseudoRoot] = 0
+	d.parents[pseudoRoot] = -1
+	d.vertices[0] = pseudoRoot
+	var work []workItem
+	for i := 1; i < 1+d.nRoots; i++ {
+		work = append(work, workItem{name: vName(i), parentName: 0})
+	}
+
+	n := vNumber(1) // 0 was the pseudo-root.
+
+	// Build the spanning tree, assigning vertex numbers to each object
+	// and initializing semi and parent.
+	for len(work) != 0 {
+		item := work[len(work)-1]
+		work = work[:len(work)-1]
+
+		if d.semis[item.name] != -1 {
+			continue
+		}
+
+		d.semis[item.name] = n
+		d.parents[item.name] = item.parentName
+		d.vertices[n] = item.name
+		n++
+
+		visitChild := func(_ int64, child Object, _ int64) bool {
+			childIdx, _ := d.p.findObjectIndex(d.p.Addr(child))
+			work = append(work, workItem{name: vName(childIdx + d.nRoots + 1), parentName: item.name})
+			return true
+		}
+
+		root, object := findVertexByName(d.p, item.name)
+		if root != nil {
+			d.p.ForEachRootPtr(root, visitChild)
+		} else {
+			d.p.ForEachPtr(object, visitChild)
+		}
+
+	}
+}
+
+// findVertexByName returns the root/object named by n, or nil,0 for the pseudo-root.
+func findVertexByName(p *Process, n vName) (*Root, Object) {
+	if n == 0 {
+		return nil, 0
+	}
+	if int(n) < len(p.rootIdx)+1 {
+		return p.rootIdx[n-1], 0
+	}
+	return nil, p.findObjectFromIndex(int(n) - len(p.rootIdx) - 1)
+}
+
+// calculate runs the main part of LT.
+func (d *ltDom) calculate() {
+	// name -> bucket (a name), per Georgiadis.
+	buckets := make([]vName, d.nVertices)
+	for i := range buckets {
+		buckets[i] = vName(i)
+	}
+
+	for i := vNumber(len(d.vertices)) - 1; i > 0; i-- {
+		w := d.vertices[i]
+
+		// Step 3. Implicitly define the immediate dominator of each node.
+		for v := buckets[w]; v != w; v = buckets[v] {
+			u := d.eval(v)
+			if d.semis[u] < d.semis[v] {
+				d.idom[v] = u
+			} else {
+				d.idom[v] = w
+			}
+		}
+
+		// Step 2. Compute the semidominators of all nodes.
+		root, obj := findVertexByName(d.p, w)
+		// This loop never visits the pseudo-root.
+		if root != nil {
+			u := d.eval(pseudoRoot)
+			if d.semis[u] < d.semis[w] {
+				d.semis[w] = d.semis[u]
+			}
+		} else {
+			d.p.ForEachReversePtr(obj, func(x Object, r *Root, _, _ int64) bool {
+				var v int
+				if r != nil {
+					v = d.p.findRootIndex(r) + 1
+				} else {
+					v, _ = d.p.findObjectIndex(d.p.Addr(x))
+					v += d.nRoots + 1
+				}
+				u := d.eval(vName(v))
+				if d.semis[u] < d.semis[w] {
+					d.semis[w] = d.semis[u]
+				}
+				return true
+			})
+		}
+
+		d.link(d.parents[w], w)
+
+		if d.parents[w] == d.vertices[d.semis[w]] {
+			d.idom[w] = d.parents[w]
+		} else {
+			buckets[w] = buckets[d.vertices[d.semis[w]]]
+			buckets[d.vertices[d.semis[w]]] = w
+		}
+	}
+
+	// The final 'Step 3' is now outside the loop.
+	for v := buckets[pseudoRoot]; v != pseudoRoot; v = buckets[v] {
+		d.idom[v] = pseudoRoot
+	}
+
+	// Step 4. Explicitly define the immediate dominator of each
+	// node, in preorder.
+	for _, w := range d.vertices[1:] {
+		if d.idom[w] != d.vertices[d.semis[w]] {
+			d.idom[w] = d.idom[d.idom[w]]
+		}
+	}
+}
+
+// eval is EVAL from the papers.
+func (d *ltDom) eval(v vName) vName {
+	if d.ancestor[v] == -1 {
+		return v
+	}
+	d.compress(v)
+	return d.labels[v]
+}
+
+// compress is COMPRESS from the papers.
+func (d *ltDom) compress(v vName) {
+	var stackBuf [20]vName
+	stack := stackBuf[:0]
+	for d.ancestor[d.ancestor[v]] != -1 {
+		stack = append(stack, v)
+		v = d.ancestor[v]
+	}
+
+	for len(stack) != 0 {
+		v := stack[len(stack)-1]
+		stack = stack[:len(stack)-1]
+
+		if d.semis[d.labels[d.ancestor[v]]] < d.semis[d.labels[v]] {
+			d.labels[v] = d.labels[d.ancestor[v]]
+		}
+		d.ancestor[v] = d.ancestor[d.ancestor[v]]
+	}
+}
+
+// link is LINK from the papers.
+func (d *ltDom) link(v, w vName) {
+	d.ancestor[w] = v
+}
+
+// reverse computes and stores reverse edges for each vertex.
+func (d *dominators) reverse() {
+	// One inbound edge per vertex. Then we need an extra so that you can
+	// always look at ridx[i+1], and another for working storage while
+	// populating redge.
+	cnt := make([]int, len(d.idom)+2)
+
+	// Fill cnt[2:] with the number of outbound edges for each vertex.
+	tmp := cnt[2:]
+	for _, idom := range d.idom {
+		tmp[idom]++
+	}
+
+	// Make tmp cumulative. After this step, cnt[1:] is what we want for
+	// ridx, but the next step messes it up.
+	var n int
+	for idx, c := range tmp {
+		n += c
+		tmp[idx] = n
+	}
+
+	// Store outbound edges in redge, using cnt[1:] as the index to store
+	// the next edge for each vertex. After we're done, everything's been
+	// shifted over one, and cnt is ridx.
+	redge := make([]vName, len(d.idom))
+	tmp = cnt[1:]
+	for i, idom := range d.idom {
+		redge[tmp[idom]] = vName(i)
+		tmp[idom]++
+	}
+	d.redge, d.ridx = redge, cnt[:len(cnt)-1]
+}
+
+type dfsMode int
+
+const (
+	down dfsMode = iota
+	up
+)
+
+// calcSize calculates the total retained size for each vertex.
+func (d *dominators) calcSize(p *Process) {
+	d.size = make([]int64, len(d.idom))
+	type workItem struct {
+		v    vName
+		mode dfsMode
+	}
+	work := []workItem{{pseudoRoot, down}}
+
+	for len(work) > 0 {
+		item := &work[len(work)-1]
+
+		kids := d.redge[d.ridx[item.v]:d.ridx[item.v+1]]
+		if item.mode == down && len(kids) != 0 {
+			item.mode = up
+			for _, w := range kids {
+				if w == 0 {
+					// bogus self-edge. Ignore.
+					continue
+				}
+				work = append(work, workItem{w, down})
+			}
+			continue
+		}
+
+		work = work[:len(work)-1]
+
+		root, obj := findVertexByName(p, item.v)
+		var size int64
+		switch {
+		case item.v == pseudoRoot:
+			break
+		case root != nil:
+			size += root.Type.Size
+		default:
+			size += p.Size(obj)
+		}
+		for _, w := range kids {
+			size += d.size[w]
+		}
+		d.size[item.v] = size
+	}
+}
+
+func (d *ltDom) dot(w io.Writer) {
+	fmt.Fprintf(w, "digraph %s {\nrankdir=\"LR\"\n", "dominators")
+	for number, name := range d.vertices {
+		var label string
+		root, obj := findVertexByName(d.p, name)
+
+		switch {
+		case name == 0:
+			label = "pseudo-root"
+		case root != nil:
+			typeName := root.Type.Name
+			if len(typeName) > 30 {
+				typeName = typeName[:30]
+			}
+			label = fmt.Sprintf("root %s %#x (type %s)", root.Name, root.Addr, typeName)
+		default:
+			typ, _ := d.p.Type(obj)
+			var typeName string
+			if typ != nil {
+				typeName = typ.Name
+				if len(typeName) > 30 {
+					typeName = typeName[:30]
+				}
+			}
+			label = fmt.Sprintf("object %#x (type %s)", obj, typeName)
+		}
+
+		fmt.Fprintf(w, "\t%v [label=\"name #%04v, number #%04v: %s\"]\n", name, name, number, label)
+	}
+
+	fmt.Fprint(w, "\n\n")
+	for v, parent := range d.parents {
+		fmt.Fprintf(w, "\t%v -> %v [style=\"solid\"]\n", parent, v)
+	}
+	for v, idom := range d.idom {
+		fmt.Fprintf(w, "\t%v -> %v [style=\"bold\"]\n", idom, v)
+	}
+	for v, sdom := range d.semis {
+		fmt.Fprintf(w, "\t%v -> %v [style=\"dotted\"]\n", v, d.vertices[sdom])
+	}
+	fmt.Fprint(w, "}\n")
+}
diff --git a/gocore/dominator_test.go b/gocore/dominator_test.go
new file mode 100644
index 0000000..2f70902
--- /dev/null
+++ b/gocore/dominator_test.go
@@ -0,0 +1,198 @@
+// Copyright 2018 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gocore
+
+import (
+	"fmt"
+	"os"
+	"testing"
+)
+
+func TestLT(t *testing.T) {
+	p := loadExample(t)
+	lt := runLT(p)
+	sanityCheck(t, lt)
+	if false {
+		lt.dot(os.Stdout)
+	}
+}
+
+func TestDominators(t *testing.T) {
+	p := loadExample(t)
+	d := p.calculateDominators()
+	if size := d.size[pseudoRoot]; size < 100<<10 {
+		t.Errorf("total size of objects is only %v bytes, should be >100KiB", size)
+	}
+}
+
+func sanityCheck(t *testing.T, d ltDom) bool {
+	t.Helper()
+	// Build pointer-y graph.
+	pRoot := sanityVertex{}
+	roots := make([]sanityVertex, d.nRoots)
+	objects := make([]sanityVertex, d.p.nObj)
+
+	for i, r := range d.p.rootIdx {
+		v := &roots[i]
+		v.root = r
+		pRoot.succ = append(pRoot.succ, v)
+		v.pred = append(v.pred, &pRoot)
+		d.p.ForEachRootPtr(r, func(_ int64, x Object, _ int64) bool {
+			idx, _ := d.p.findObjectIndex(d.p.Addr(x))
+			v.succ = append(v.succ, &objects[idx])
+			objects[idx].pred = append(objects[idx].pred, v)
+			return true
+		})
+	}
+	d.p.ForEachObject(func(x Object) bool {
+		xIdx, _ := d.p.findObjectIndex(d.p.Addr(x))
+		v := &objects[xIdx]
+		v.obj = x
+		d.p.ForEachPtr(x, func(_ int64, y Object, _ int64) bool {
+			yIdx, _ := d.p.findObjectIndex(d.p.Addr(y))
+			v.succ = append(v.succ, &objects[yIdx])
+			objects[yIdx].pred = append(objects[yIdx].pred, v)
+			return true
+		})
+		return true
+	})
+
+	// Precompute postorder traversal.
+	var postorder []*sanityVertex
+	type workItem struct {
+		v    *sanityVertex
+		mode dfsMode
+	}
+	seen := make(map[*sanityVertex]bool, d.nVertices)
+	work := []workItem{{&pRoot, down}}
+	for len(work) > 0 {
+		item := &work[len(work)-1]
+
+		if item.mode == down && len(item.v.succ) != 0 {
+			item.mode = up
+			for _, w := range item.v.succ {
+				// Only push each node once.
+				if seen[w] {
+					continue
+				}
+				seen[w] = true
+
+				work = append(work, workItem{w, down})
+			}
+			continue
+		}
+
+		work = work[:len(work)-1]
+		postorder = append(postorder, item.v)
+	}
+
+	// Make map from block id to order index (for intersect call)
+	postnum := make(map[*sanityVertex]int, d.nVertices)
+	for i, b := range postorder {
+		postnum[b] = i
+	}
+
+	// Make the pseudo-root a self-loop
+	pRoot.idom = &pRoot
+	if postnum[&pRoot] != len(postorder)-1 {
+		panic("pseudo-root not last in postorder")
+	}
+
+	// Compute relaxation of idom entries
+	for {
+		changed := false
+
+		for i := len(postorder) - 2; i >= 0; i-- {
+			v := postorder[i]
+			var d *sanityVertex
+
+			for _, pred := range v.pred {
+				if pred.idom == nil {
+					continue
+				}
+
+				if d == nil {
+					d = pred
+					continue
+				}
+
+				d = intersect(d, pred, postnum)
+			}
+			if v.idom != d {
+				v.idom = d
+				changed = true
+			}
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	pRoot.idom = nil
+
+	getVertex := func(n vName) *sanityVertex {
+		r, o := findVertexByName(d.p, n)
+		switch {
+		case n == pseudoRoot:
+			return &pRoot
+		case r != nil:
+			return &roots[d.p.findRootIndex(r)]
+		default:
+			idx, _ := d.p.findObjectIndex(d.p.Addr(o))
+			return &objects[idx]
+		}
+	}
+
+	matches := true
+	for vertName, domName := range d.idom {
+		if vName(vertName) == pseudoRoot {
+			continue
+		}
+		vert := getVertex(vName(vertName))
+		dom := getVertex(domName)
+
+		if vert.idom != dom {
+			matches = false
+			t.Errorf("Mismatch in idom for %v, name #%04v: fast reports %v, sanity reports %v\n", vert.String(d.p), vertName, dom.String(d.p), vert.idom.String(d.p))
+		}
+	}
+	return matches
+}
+
+func intersect(v, w *sanityVertex, postnum map[*sanityVertex]int) *sanityVertex {
+	for v != w {
+		if postnum[v] < postnum[w] {
+			v = v.idom
+		} else {
+			w = w.idom
+		}
+	}
+	return v
+}
+
+type sanityVertex struct {
+	root *Root
+	obj  Object
+	pred []*sanityVertex
+	succ []*sanityVertex
+	idom *sanityVertex
+}
+
+func (v *sanityVertex) String(p *Process) string {
+	switch {
+	case v.root != nil:
+		return fmt.Sprintf("root %s %#x (type %s)", v.root.Name, v.root.Addr, v.root.Type)
+	case v.obj != 0:
+		typ, _ := p.Type(v.obj)
+		var typeName string
+		if typ != nil {
+			typeName = typ.Name
+		}
+		return fmt.Sprintf("object %#x (type %s)", v.obj, typeName)
+	default:
+		return "pseudo-root"
+	}
+}
diff --git a/gocore/gocore_test.go b/gocore/gocore_test.go
index 320bac9..d59e598 100644
--- a/gocore/gocore_test.go
+++ b/gocore/gocore_test.go
@@ -41,6 +41,18 @@
 	}
 }
 
+func TestObjectIndexes(t *testing.T) {
+	p := loadExample(t)
+	p.ForEachObject(func(x Object) bool {
+		idx, _ := p.findObjectIndex(p.Addr(x))
+		got := p.findObjectFromIndex(idx)
+		if x != got {
+			t.Errorf("findObjectFromIndex(%v) = %#x, want %#x", idx, got, x)
+		}
+		return true
+	})
+}
+
 func TestRoots(t *testing.T) {
 	p := loadExample(t)
 	n := 0
diff --git a/gocore/object.go b/gocore/object.go
index f81c40c..61d66de 100644
--- a/gocore/object.go
+++ b/gocore/object.go
@@ -6,6 +6,7 @@
 
 import (
 	"math/bits"
+	"sort"
 	"strings"
 
 	"golang.org/x/debug/core"
@@ -109,9 +110,6 @@
 	// Initialize firstIdx fields in the heapInfo, for fast object index lookups.
 	for i := len(p.heapInfo) - 1; i >= 0; i-- {
 		h := &p.heapInfo[i]
-		if h.mark == 0 { // not really necessary, just leave -1 sentinel as a double check.
-			continue
-		}
 		n -= bits.OnesCount64(h.mark)
 		h.firstIdx = n
 	}
@@ -188,6 +186,30 @@
 	return h.firstIdx + bits.OnesCount64(h.mark&(uint64(1)<<(uint64(x)%heapInfoSize/8)-1)), off
 }
 
+func (p *Process) findObjectFromIndex(idx int) Object {
+	// Find the first heapInfo after the index we want, then go one back,
+	// so that we skip over empty chunks.
+	hIdx := sort.Search(len(p.heapInfo), func(k int) bool {
+		return p.heapInfo[k].firstIdx > idx
+	})
+	if hIdx > 0 {
+		hIdx--
+	}
+	h := p.heapInfo[hIdx]
+	gap := idx - h.firstIdx + 1
+	// TODO: use math/bits somehow?
+	for offset := core.Address(0); offset < heapInfoSize; offset += 8 {
+		if h.mark>>(offset/8)&1 == 1 {
+			gap--
+		}
+		if gap == 0 {
+			// Can't use heapInfo.base: it's the start of the *span*, not the heapInfo.
+			return Object(p.arenaStart + core.Address(hIdx*heapInfoSize) + offset)
+		}
+	}
+	panic("Overran heap info looking for live objects")
+}
+
 // ForEachObject calls fn with each object in the Go heap.
 // If fn returns false, ForEachObject returns immediately.
 func (p *Process) ForEachObject(fn func(x Object) bool) {
@@ -326,7 +348,7 @@
 	base     core.Address // start of the span containing this heap region
 	size     int64        // size of objects in the span
 	mark     uint64       // 64 mark bits, one for every 8 bytes
-	firstIdx int          // the index of the first object that starts in this region (-1 if none)
+	firstIdx int          // the index of the first object that starts in or after this region
 }
 
 // findHeapInfo finds the heapInfo structure for a.
diff --git a/gocore/reverse.go b/gocore/reverse.go
index 6cda39d..350374d 100644
--- a/gocore/reverse.go
+++ b/gocore/reverse.go
@@ -110,3 +110,9 @@
 		}
 	}
 }
+
+func (p *Process) findRootIndex(r *Root) int {
+	return sort.Search(len(p.rootIdx), func(k int) bool {
+		return p.rootIdx[k].Addr >= r.Addr
+	})
+}