gocore: add dominator tree computation
Calculate the heap's dominator tree using modified L-T, reverse its
edges, and use that to calculate the per-object retained size.
One small functionality change: heapInfo.firstIdx is now set for empty
chunks, allowing binary search over Process.heapInfo.
Haven't really figured out what the public API for this should be yet;
maybe it's okay to expose the vertex names and provide an iterator
method.
This should be pretty fast, but I haven't benchmarked it yet, so there
might be some low-hanging fruit lying around.
Change-Id: I2c85c29514a0f2e60231f13f0c058dd7f12b548e
Reviewed-on: https://go-review.googlesource.com/98736
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/gocore/dominator.go b/gocore/dominator.go
new file mode 100644
index 0000000..375fd86
--- /dev/null
+++ b/gocore/dominator.go
@@ -0,0 +1,398 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gocore
+
+import (
+ "fmt"
+ "io"
+)
+
+// Code liberally adapted from cmd/compile/internal/ssa/dom.go and
+// x/tools/go/ssa/dom.go.
+//
+// We use the algorithm described in Lengauer & Tarjan. 1979. A fast
+// algorithm for finding dominators in a flowgraph.
+// http://doi.acm.org/10.1145/357062.357071
+//
+// We also apply the optimizations to SLT described in Georgiadis et
+// al, Finding Dominators in Practice, JGAA 2006,
+// http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
+// to avoid the need for buckets of size > 1.
+
+// Vertex name, as used in the papers.
+// 0 -> the pseudo-root, a made-up object that parents all the GC roots.
+// 1...nRoots -> a root, found at p.rootIdx[#-1]
+// nRoots+1... -> an object, with object index # - nRoots - 1
+type vName int
+
+const pseudoRoot vName = 0
+
+// Vertex number, assigned in the DFS traversal in step 1.
+type vNumber int
+
+type ltDom struct {
+ p *Process
+
+ // number -> name
+ vertices []vName
+
+ // name -> parent name
+ parents []vName
+
+ // name -> vertex number before step 2 or semidominator number after.
+ semis []vNumber
+
+ // name -> ancestor name
+ ancestor []vName
+ labels []vName
+
+ // name -> dominator name
+ idom []vName
+
+ nVertices, nRoots int
+}
+
+type dominators struct {
+ // name -> dominator name
+ idom []vName
+
+ // Reverse dominator tree edges, stored just like the ones in Process. name -> child name.
+ ridx []int
+ redge []vName
+
+ // Retained size for each vertex. name -> retained size.
+ size []int64
+}
+
+func (p *Process) calculateDominators() *dominators {
+ lt := runLT(p)
+ d := dominators{idom: lt.idom}
+ lt = ltDom{}
+
+ d.reverse()
+ d.calcSize(p)
+
+ return &d
+}
+
+func runLT(p *Process) ltDom {
+ nVertices := 1 + len(p.rootIdx) + p.nObj
+ lt := ltDom{
+ p: p,
+ nRoots: len(p.rootIdx),
+ nVertices: nVertices,
+ vertices: make([]vName, nVertices),
+ parents: make([]vName, nVertices),
+ semis: make([]vNumber, nVertices),
+ ancestor: make([]vName, nVertices),
+ labels: make([]vName, nVertices),
+ idom: make([]vName, nVertices),
+ }
+ // TODO: increment all the names and use 0 as the uninitialized value.
+ for i := range lt.semis {
+ lt.semis[i] = -1
+ }
+ for i := range lt.ancestor {
+ lt.ancestor[i] = -1
+ }
+ for i := range lt.labels {
+ lt.labels[i] = vName(i)
+ }
+ lt.initialize()
+ lt.calculate()
+ return lt
+}
+
+// initialize implements step 1 of LT.
+func (d *ltDom) initialize() {
+ type workItem struct {
+ name vName
+ parentName vName
+ }
+
+ // Add roots to the work stack, essentially pretending to visit
+ // the pseudo-root, numbering it 0.
+ d.semis[pseudoRoot] = 0
+ d.parents[pseudoRoot] = -1
+ d.vertices[0] = pseudoRoot
+ var work []workItem
+ for i := 1; i < 1+d.nRoots; i++ {
+ work = append(work, workItem{name: vName(i), parentName: 0})
+ }
+
+ n := vNumber(1) // 0 was the pseudo-root.
+
+ // Build the spanning tree, assigning vertex numbers to each object
+ // and initializing semi and parent.
+ for len(work) != 0 {
+ item := work[len(work)-1]
+ work = work[:len(work)-1]
+
+ if d.semis[item.name] != -1 {
+ continue
+ }
+
+ d.semis[item.name] = n
+ d.parents[item.name] = item.parentName
+ d.vertices[n] = item.name
+ n++
+
+ visitChild := func(_ int64, child Object, _ int64) bool {
+ childIdx, _ := d.p.findObjectIndex(d.p.Addr(child))
+ work = append(work, workItem{name: vName(childIdx + d.nRoots + 1), parentName: item.name})
+ return true
+ }
+
+ root, object := findVertexByName(d.p, item.name)
+ if root != nil {
+ d.p.ForEachRootPtr(root, visitChild)
+ } else {
+ d.p.ForEachPtr(object, visitChild)
+ }
+
+ }
+}
+
+// findVertexByName returns the root/object named by n, or nil,0 for the pseudo-root.
+func findVertexByName(p *Process, n vName) (*Root, Object) {
+ if n == 0 {
+ return nil, 0
+ }
+ if int(n) < len(p.rootIdx)+1 {
+ return p.rootIdx[n-1], 0
+ }
+ return nil, p.findObjectFromIndex(int(n) - len(p.rootIdx) - 1)
+}
+
+// calculate runs the main part of LT.
+func (d *ltDom) calculate() {
+ // name -> bucket (a name), per Georgiadis.
+ buckets := make([]vName, d.nVertices)
+ for i := range buckets {
+ buckets[i] = vName(i)
+ }
+
+ for i := vNumber(len(d.vertices)) - 1; i > 0; i-- {
+ w := d.vertices[i]
+
+ // Step 3. Implicitly define the immediate dominator of each node.
+ for v := buckets[w]; v != w; v = buckets[v] {
+ u := d.eval(v)
+ if d.semis[u] < d.semis[v] {
+ d.idom[v] = u
+ } else {
+ d.idom[v] = w
+ }
+ }
+
+ // Step 2. Compute the semidominators of all nodes.
+ root, obj := findVertexByName(d.p, w)
+ // This loop never visits the pseudo-root.
+ if root != nil {
+ u := d.eval(pseudoRoot)
+ if d.semis[u] < d.semis[w] {
+ d.semis[w] = d.semis[u]
+ }
+ } else {
+ d.p.ForEachReversePtr(obj, func(x Object, r *Root, _, _ int64) bool {
+ var v int
+ if r != nil {
+ v = d.p.findRootIndex(r) + 1
+ } else {
+ v, _ = d.p.findObjectIndex(d.p.Addr(x))
+ v += d.nRoots + 1
+ }
+ u := d.eval(vName(v))
+ if d.semis[u] < d.semis[w] {
+ d.semis[w] = d.semis[u]
+ }
+ return true
+ })
+ }
+
+ d.link(d.parents[w], w)
+
+ if d.parents[w] == d.vertices[d.semis[w]] {
+ d.idom[w] = d.parents[w]
+ } else {
+ buckets[w] = buckets[d.vertices[d.semis[w]]]
+ buckets[d.vertices[d.semis[w]]] = w
+ }
+ }
+
+ // The final 'Step 3' is now outside the loop.
+ for v := buckets[pseudoRoot]; v != pseudoRoot; v = buckets[v] {
+ d.idom[v] = pseudoRoot
+ }
+
+ // Step 4. Explicitly define the immediate dominator of each
+ // node, in preorder.
+ for _, w := range d.vertices[1:] {
+ if d.idom[w] != d.vertices[d.semis[w]] {
+ d.idom[w] = d.idom[d.idom[w]]
+ }
+ }
+}
+
+// eval is EVAL from the papers.
+func (d *ltDom) eval(v vName) vName {
+ if d.ancestor[v] == -1 {
+ return v
+ }
+ d.compress(v)
+ return d.labels[v]
+}
+
+// compress is COMPRESS from the papers.
+func (d *ltDom) compress(v vName) {
+ var stackBuf [20]vName
+ stack := stackBuf[:0]
+ for d.ancestor[d.ancestor[v]] != -1 {
+ stack = append(stack, v)
+ v = d.ancestor[v]
+ }
+
+ for len(stack) != 0 {
+ v := stack[len(stack)-1]
+ stack = stack[:len(stack)-1]
+
+ if d.semis[d.labels[d.ancestor[v]]] < d.semis[d.labels[v]] {
+ d.labels[v] = d.labels[d.ancestor[v]]
+ }
+ d.ancestor[v] = d.ancestor[d.ancestor[v]]
+ }
+}
+
+// link is LINK from the papers.
+func (d *ltDom) link(v, w vName) {
+ d.ancestor[w] = v
+}
+
+// reverse computes and stores reverse edges for each vertex.
+func (d *dominators) reverse() {
+ // One inbound edge per vertex. Then we need an extra so that you can
+ // always look at ridx[i+1], and another for working storage while
+ // populating redge.
+ cnt := make([]int, len(d.idom)+2)
+
+ // Fill cnt[2:] with the number of outbound edges for each vertex.
+ tmp := cnt[2:]
+ for _, idom := range d.idom {
+ tmp[idom]++
+ }
+
+ // Make tmp cumulative. After this step, cnt[1:] is what we want for
+ // ridx, but the next step messes it up.
+ var n int
+ for idx, c := range tmp {
+ n += c
+ tmp[idx] = n
+ }
+
+ // Store outbound edges in redge, using cnt[1:] as the index to store
+ // the next edge for each vertex. After we're done, everything's been
+ // shifted over one, and cnt is ridx.
+ redge := make([]vName, len(d.idom))
+ tmp = cnt[1:]
+ for i, idom := range d.idom {
+ redge[tmp[idom]] = vName(i)
+ tmp[idom]++
+ }
+ d.redge, d.ridx = redge, cnt[:len(cnt)-1]
+}
+
+type dfsMode int
+
+const (
+ down dfsMode = iota
+ up
+)
+
+// calcSize calculates the total retained size for each vertex.
+func (d *dominators) calcSize(p *Process) {
+ d.size = make([]int64, len(d.idom))
+ type workItem struct {
+ v vName
+ mode dfsMode
+ }
+ work := []workItem{{pseudoRoot, down}}
+
+ for len(work) > 0 {
+ item := &work[len(work)-1]
+
+ kids := d.redge[d.ridx[item.v]:d.ridx[item.v+1]]
+ if item.mode == down && len(kids) != 0 {
+ item.mode = up
+ for _, w := range kids {
+ if w == 0 {
+ // bogus self-edge. Ignore.
+ continue
+ }
+ work = append(work, workItem{w, down})
+ }
+ continue
+ }
+
+ work = work[:len(work)-1]
+
+ root, obj := findVertexByName(p, item.v)
+ var size int64
+ switch {
+ case item.v == pseudoRoot:
+ break
+ case root != nil:
+ size += root.Type.Size
+ default:
+ size += p.Size(obj)
+ }
+ for _, w := range kids {
+ size += d.size[w]
+ }
+ d.size[item.v] = size
+ }
+}
+
+func (d *ltDom) dot(w io.Writer) {
+ fmt.Fprintf(w, "digraph %s {\nrankdir=\"LR\"\n", "dominators")
+ for number, name := range d.vertices {
+ var label string
+ root, obj := findVertexByName(d.p, name)
+
+ switch {
+ case name == 0:
+ label = "pseudo-root"
+ case root != nil:
+ typeName := root.Type.Name
+ if len(typeName) > 30 {
+ typeName = typeName[:30]
+ }
+ label = fmt.Sprintf("root %s %#x (type %s)", root.Name, root.Addr, typeName)
+ default:
+ typ, _ := d.p.Type(obj)
+ var typeName string
+ if typ != nil {
+ typeName = typ.Name
+ if len(typeName) > 30 {
+ typeName = typeName[:30]
+ }
+ }
+ label = fmt.Sprintf("object %#x (type %s)", obj, typeName)
+ }
+
+ fmt.Fprintf(w, "\t%v [label=\"name #%04v, number #%04v: %s\"]\n", name, name, number, label)
+ }
+
+ fmt.Fprint(w, "\n\n")
+ for v, parent := range d.parents {
+ fmt.Fprintf(w, "\t%v -> %v [style=\"solid\"]\n", parent, v)
+ }
+ for v, idom := range d.idom {
+ fmt.Fprintf(w, "\t%v -> %v [style=\"bold\"]\n", idom, v)
+ }
+ for v, sdom := range d.semis {
+ fmt.Fprintf(w, "\t%v -> %v [style=\"dotted\"]\n", v, d.vertices[sdom])
+ }
+ fmt.Fprint(w, "}\n")
+}
diff --git a/gocore/dominator_test.go b/gocore/dominator_test.go
new file mode 100644
index 0000000..2f70902
--- /dev/null
+++ b/gocore/dominator_test.go
@@ -0,0 +1,198 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gocore
+
+import (
+ "fmt"
+ "os"
+ "testing"
+)
+
+func TestLT(t *testing.T) {
+ p := loadExample(t)
+ lt := runLT(p)
+ sanityCheck(t, lt)
+ if false {
+ lt.dot(os.Stdout)
+ }
+}
+
+func TestDominators(t *testing.T) {
+ p := loadExample(t)
+ d := p.calculateDominators()
+ if size := d.size[pseudoRoot]; size < 100<<10 {
+ t.Errorf("total size of objects is only %v bytes, should be >100KiB", size)
+ }
+}
+
+func sanityCheck(t *testing.T, d ltDom) bool {
+ t.Helper()
+ // Build pointer-y graph.
+ pRoot := sanityVertex{}
+ roots := make([]sanityVertex, d.nRoots)
+ objects := make([]sanityVertex, d.p.nObj)
+
+ for i, r := range d.p.rootIdx {
+ v := &roots[i]
+ v.root = r
+ pRoot.succ = append(pRoot.succ, v)
+ v.pred = append(v.pred, &pRoot)
+ d.p.ForEachRootPtr(r, func(_ int64, x Object, _ int64) bool {
+ idx, _ := d.p.findObjectIndex(d.p.Addr(x))
+ v.succ = append(v.succ, &objects[idx])
+ objects[idx].pred = append(objects[idx].pred, v)
+ return true
+ })
+ }
+ d.p.ForEachObject(func(x Object) bool {
+ xIdx, _ := d.p.findObjectIndex(d.p.Addr(x))
+ v := &objects[xIdx]
+ v.obj = x
+ d.p.ForEachPtr(x, func(_ int64, y Object, _ int64) bool {
+ yIdx, _ := d.p.findObjectIndex(d.p.Addr(y))
+ v.succ = append(v.succ, &objects[yIdx])
+ objects[yIdx].pred = append(objects[yIdx].pred, v)
+ return true
+ })
+ return true
+ })
+
+ // Precompute postorder traversal.
+ var postorder []*sanityVertex
+ type workItem struct {
+ v *sanityVertex
+ mode dfsMode
+ }
+ seen := make(map[*sanityVertex]bool, d.nVertices)
+ work := []workItem{{&pRoot, down}}
+ for len(work) > 0 {
+ item := &work[len(work)-1]
+
+ if item.mode == down && len(item.v.succ) != 0 {
+ item.mode = up
+ for _, w := range item.v.succ {
+ // Only push each node once.
+ if seen[w] {
+ continue
+ }
+ seen[w] = true
+
+ work = append(work, workItem{w, down})
+ }
+ continue
+ }
+
+ work = work[:len(work)-1]
+ postorder = append(postorder, item.v)
+ }
+
+ // Make map from block id to order index (for intersect call)
+ postnum := make(map[*sanityVertex]int, d.nVertices)
+ for i, b := range postorder {
+ postnum[b] = i
+ }
+
+ // Make the pseudo-root a self-loop
+ pRoot.idom = &pRoot
+ if postnum[&pRoot] != len(postorder)-1 {
+ panic("pseudo-root not last in postorder")
+ }
+
+ // Compute relaxation of idom entries
+ for {
+ changed := false
+
+ for i := len(postorder) - 2; i >= 0; i-- {
+ v := postorder[i]
+ var d *sanityVertex
+
+ for _, pred := range v.pred {
+ if pred.idom == nil {
+ continue
+ }
+
+ if d == nil {
+ d = pred
+ continue
+ }
+
+ d = intersect(d, pred, postnum)
+ }
+ if v.idom != d {
+ v.idom = d
+ changed = true
+ }
+ }
+
+ if !changed {
+ break
+ }
+ }
+
+ pRoot.idom = nil
+
+ getVertex := func(n vName) *sanityVertex {
+ r, o := findVertexByName(d.p, n)
+ switch {
+ case n == pseudoRoot:
+ return &pRoot
+ case r != nil:
+ return &roots[d.p.findRootIndex(r)]
+ default:
+ idx, _ := d.p.findObjectIndex(d.p.Addr(o))
+ return &objects[idx]
+ }
+ }
+
+ matches := true
+ for vertName, domName := range d.idom {
+ if vName(vertName) == pseudoRoot {
+ continue
+ }
+ vert := getVertex(vName(vertName))
+ dom := getVertex(domName)
+
+ if vert.idom != dom {
+ matches = false
+ t.Errorf("Mismatch in idom for %v, name #%04v: fast reports %v, sanity reports %v\n", vert.String(d.p), vertName, dom.String(d.p), vert.idom.String(d.p))
+ }
+ }
+ return matches
+}
+
+func intersect(v, w *sanityVertex, postnum map[*sanityVertex]int) *sanityVertex {
+ for v != w {
+ if postnum[v] < postnum[w] {
+ v = v.idom
+ } else {
+ w = w.idom
+ }
+ }
+ return v
+}
+
+type sanityVertex struct {
+ root *Root
+ obj Object
+ pred []*sanityVertex
+ succ []*sanityVertex
+ idom *sanityVertex
+}
+
+func (v *sanityVertex) String(p *Process) string {
+ switch {
+ case v.root != nil:
+ return fmt.Sprintf("root %s %#x (type %s)", v.root.Name, v.root.Addr, v.root.Type)
+ case v.obj != 0:
+ typ, _ := p.Type(v.obj)
+ var typeName string
+ if typ != nil {
+ typeName = typ.Name
+ }
+ return fmt.Sprintf("object %#x (type %s)", v.obj, typeName)
+ default:
+ return "pseudo-root"
+ }
+}
diff --git a/gocore/gocore_test.go b/gocore/gocore_test.go
index 320bac9..d59e598 100644
--- a/gocore/gocore_test.go
+++ b/gocore/gocore_test.go
@@ -41,6 +41,18 @@
}
}
+func TestObjectIndexes(t *testing.T) {
+ p := loadExample(t)
+ p.ForEachObject(func(x Object) bool {
+ idx, _ := p.findObjectIndex(p.Addr(x))
+ got := p.findObjectFromIndex(idx)
+ if x != got {
+ t.Errorf("findObjectFromIndex(%v) = %#x, want %#x", idx, got, x)
+ }
+ return true
+ })
+}
+
func TestRoots(t *testing.T) {
p := loadExample(t)
n := 0
diff --git a/gocore/object.go b/gocore/object.go
index f81c40c..61d66de 100644
--- a/gocore/object.go
+++ b/gocore/object.go
@@ -6,6 +6,7 @@
import (
"math/bits"
+ "sort"
"strings"
"golang.org/x/debug/core"
@@ -109,9 +110,6 @@
// Initialize firstIdx fields in the heapInfo, for fast object index lookups.
for i := len(p.heapInfo) - 1; i >= 0; i-- {
h := &p.heapInfo[i]
- if h.mark == 0 { // not really necessary, just leave -1 sentinel as a double check.
- continue
- }
n -= bits.OnesCount64(h.mark)
h.firstIdx = n
}
@@ -188,6 +186,30 @@
return h.firstIdx + bits.OnesCount64(h.mark&(uint64(1)<<(uint64(x)%heapInfoSize/8)-1)), off
}
+func (p *Process) findObjectFromIndex(idx int) Object {
+ // Find the first heapInfo after the index we want, then go one back,
+ // so that we skip over empty chunks.
+ hIdx := sort.Search(len(p.heapInfo), func(k int) bool {
+ return p.heapInfo[k].firstIdx > idx
+ })
+ if hIdx > 0 {
+ hIdx--
+ }
+ h := p.heapInfo[hIdx]
+ gap := idx - h.firstIdx + 1
+ // TODO: use math/bits somehow?
+ for offset := core.Address(0); offset < heapInfoSize; offset += 8 {
+ if h.mark>>(offset/8)&1 == 1 {
+ gap--
+ }
+ if gap == 0 {
+ // Can't use heapInfo.base: it's the start of the *span*, not the heapInfo.
+ return Object(p.arenaStart + core.Address(hIdx*heapInfoSize) + offset)
+ }
+ }
+ panic("Overran heap info looking for live objects")
+}
+
// ForEachObject calls fn with each object in the Go heap.
// If fn returns false, ForEachObject returns immediately.
func (p *Process) ForEachObject(fn func(x Object) bool) {
@@ -326,7 +348,7 @@
base core.Address // start of the span containing this heap region
size int64 // size of objects in the span
mark uint64 // 64 mark bits, one for every 8 bytes
- firstIdx int // the index of the first object that starts in this region (-1 if none)
+ firstIdx int // the index of the first object that starts in or after this region
}
// findHeapInfo finds the heapInfo structure for a.
diff --git a/gocore/reverse.go b/gocore/reverse.go
index 6cda39d..350374d 100644
--- a/gocore/reverse.go
+++ b/gocore/reverse.go
@@ -110,3 +110,9 @@
}
}
}
+
+func (p *Process) findRootIndex(r *Root) int {
+ return sort.Search(len(p.rootIdx), func(k int) bool {
+ return p.rootIdx[k].Addr >= r.Addr
+ })
+}