go/pointer: implement pointer equivalence via hash-value numbering, a pre-solver optimization. This reduces solver time by about 40%. See hvn.go for detailed description. Also in this CL: - Update package docs. - Added various global opt/debug options for maintainer convenience. - Added logging of phase timing. - Added stdlib_test, disabled by default, that runs the analysis on all tests in $GOROOT. - include types when dumping solution LGTM=crawshaw R=crawshaw, dannyb CC=golang-codereviews https://golang.org/cl/96650048

commit: 9b38eafe607ba43ac44da301e6e04a1b7c2e8d90 [log] [tgz]
author: Alan Donovan <adonovan@google.com> Mon Jun 16 15:46:07 2014 -0400
committer: Alan Donovan <adonovan@google.com> Mon Jun 16 15:46:07 2014 -0400
tree: 261efe554ab56cdbc5651cebc38f26b37daa4c48
parent: 47c0a8f0c30aa0ffdc2109b0a1b585123e2c0534 [diff] [blame]
diff --git a/go/pointer/analysis.go b/go/pointer/analysis.go
index 884962b..cb423f4 100644
--- a/go/pointer/analysis.go
+++ b/go/pointer/analysis.go

@@ -12,7 +12,9 @@
 	"io"
 	"os"
 	"reflect"
+	"runtime"
 	"runtime/debug"
+	"sort"
 
 	"code.google.com/p/go.tools/go/callgraph"
 	"code.google.com/p/go.tools/go/ssa"
@@ -20,6 +22,18 @@
 	"code.google.com/p/go.tools/go/types/typeutil"
 )
 
+const (
+	// optimization options; enable all when committing
+	optRenumber = true // enable renumbering optimization (makes logs hard to read)
+	optHVN      = true // enable pointer equivalence via Hash-Value Numbering
+
+	// debugging options; disable all when committing
+	debugHVN           = false // enable assertions in HVN
+	debugHVNVerbose    = false // enable extra HVN logging
+	debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
+	debugTimers        = true  // show running time of each phase
+)
+
 // object.flags bitmask values.
 const (
 	otTagged   = 1 << iota // type-tagged object
@@ -72,7 +86,7 @@
 type node struct {
 	// If non-nil, this node is the start of an object
 	// (addressable memory location).
-	// The following obj.size words implicitly belong to the object;
+	// The following obj.size nodes implicitly belong to the object;
 	// they locate their object by scanning back.
 	obj *object
 
@@ -85,21 +99,10 @@
 	// an object of aggregate type (struct, tuple, array) this is.
 	subelement *fieldInfo // e.g. ".a.b[*].c"
 
-	// Points-to sets.
-	pts     nodeset // points-to set of this node
-	prevPts nodeset // pts(n) in previous iteration (for difference propagation)
-
-	// Graph edges
-	copyTo nodeset // simple copy constraint edges
-
-	// Complex constraints attached to this node (x).
-	// - *loadConstraint       y=*x
-	// - *offsetAddrConstraint y=&x.f or y=&x[0]
-	// - *storeConstraint      *x=z
-	// - *typeFilterConstraint y=x.(I)
-	// - *untagConstraint      y=x.(C)
-	// - *invokeConstraint     y=x.f(params...)
-	complex []constraint
+	// Solver state for the canonical node of this pointer-
+	// equivalence class.  Each node is created with its own state
+	// but they become shared after HVN.
+	solve *solverState
 }
 
 // An analysis instance holds the state of a single pointer analysis problem.
@@ -119,6 +122,8 @@
 	globalobj   map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
 	localval    map[ssa.Value]nodeid        // node for each local ssa.Value
 	localobj    map[ssa.Value]nodeid        // maps v to sole member of pts(v), if singleton
+	atFuncs     map[*ssa.Function]bool      // address-taken functions (for presolver)
+	mapValues   []nodeid                    // values of makemap objects (indirect in HVN)
 	work        nodeset                     // solver's worklist
 	result      *Result                     // results of the analysis
 	track       track                       // pointerlike types whose aliasing we track
@@ -136,9 +141,10 @@
 	runtimeSetFinalizer *ssa.Function   // runtime.SetFinalizer
 }
 
-// enclosingObj returns the object (addressible memory object) that encloses node id.
-// Panic ensues if that node does not belong to any object.
-func (a *analysis) enclosingObj(id nodeid) *object {
+// enclosingObj returns the first node of the addressable memory
+// object that encloses node id.  Panic ensues if that node does not
+// belong to any object.
+func (a *analysis) enclosingObj(id nodeid) nodeid {
 	// Find previous node with obj != nil.
 	for i := id; i >= 0; i-- {
 		n := a.nodes[i]
@@ -146,7 +152,7 @@
 			if i+nodeid(obj.size) <= id {
 				break // out of bounds
 			}
-			return obj
+			return i
 		}
 	}
 	panic("node has no enclosing object")
@@ -156,7 +162,7 @@
 // Panic ensues if that node is not addressable.
 func (a *analysis) labelFor(id nodeid) *Label {
 	return &Label{
-		obj:        a.enclosingObj(id),
+		obj:        a.nodes[a.enclosingObj(id)].obj,
 		subelement: a.nodes[id].subelement,
 	}
 }
@@ -222,6 +228,7 @@
 		globalobj:   make(map[ssa.Value]nodeid),
 		flattenMemo: make(map[types.Type][]*fieldInfo),
 		trackTypes:  make(map[types.Type]bool),
+		atFuncs:     make(map[*ssa.Function]bool),
 		hasher:      typeutil.MakeHasher(),
 		intrinsics:  make(map[*ssa.Function]intrinsic),
 		result: &Result{
@@ -236,7 +243,7 @@
 	}
 
 	if a.log != nil {
-		fmt.Fprintln(a.log, "======== NEW ANALYSIS ========")
+		fmt.Fprintln(a.log, "==== Starting analysis")
 	}
 
 	// Pointer analysis requires a complete program for soundness.
@@ -275,24 +282,55 @@
 	a.computeTrackBits()
 
 	a.generate()
+	a.showCounts()
 
-	if a.log != nil {
-		// Show size of constraint system.
-		counts := make(map[reflect.Type]int)
-		for _, c := range a.constraints {
-			counts[reflect.TypeOf(c)]++
-		}
-		fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
-		for t, n := range counts {
-			fmt.Fprintf(a.log, "\t%s:\t%d\n", t, n)
-		}
-		fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
+	if optRenumber {
+		a.renumber()
 	}
 
-	a.optimize()
+	N := len(a.nodes) // excludes solver-created nodes
+
+	if optHVN {
+		if debugHVNCrossCheck {
+			// Cross-check: run the solver once without
+			// optimization, once with, and compare the
+			// solutions.
+			savedConstraints := a.constraints
+
+			a.solve()
+			a.dumpSolution("A.pts", N)
+
+			// Restore.
+			a.constraints = savedConstraints
+			for _, n := range a.nodes {
+				n.solve = new(solverState)
+			}
+			a.nodes = a.nodes[:N]
+
+			// rtypes is effectively part of the solver state.
+			a.rtypes = typeutil.Map{}
+			a.rtypes.SetHasher(a.hasher)
+		}
+
+		a.hvn()
+	}
+
+	if debugHVNCrossCheck {
+		runtime.GC()
+		runtime.GC()
+	}
 
 	a.solve()
 
+	// Compare solutions.
+	if optHVN && debugHVNCrossCheck {
+		a.dumpSolution("B.pts", N)
+
+		if !diff("A.pts", "B.pts") {
+			return nil, fmt.Errorf("internal error: optimization changed solution")
+		}
+	}
+
 	// Create callgraph.Nodes in deterministic order.
 	if cg := a.result.CallGraph; cg != nil {
 		for _, caller := range a.cgnodes {
@@ -304,7 +342,7 @@
 	var space [100]int
 	for _, caller := range a.cgnodes {
 		for _, site := range caller.sites {
-			for _, callee := range a.nodes[site.targets].pts.AppendTo(space[:0]) {
+			for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
 				a.callEdge(caller, site, nodeid(callee))
 			}
 		}
@@ -342,3 +380,65 @@
 		a.warnf(fn.Pos(), " (declared here)")
 	}
 }
+
+// dumpSolution writes the PTS solution to the specified file.
+//
+// It only dumps the nodes that existed before solving.  The order in
+// which solver-created nodes are created depends on pre-solver
+// optimization, so we can't include them in the cross-check.
+//
+func (a *analysis) dumpSolution(filename string, N int) {
+	f, err := os.Create(filename)
+	if err != nil {
+		panic(err)
+	}
+	for id, n := range a.nodes[:N] {
+		if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
+			panic(err)
+		}
+		var sep string
+		for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
+			if l >= N {
+				break
+			}
+			fmt.Fprintf(f, "%s%d", sep, l)
+			sep = " "
+		}
+		fmt.Fprintf(f, "} : %s\n", n.typ)
+	}
+	if err := f.Close(); err != nil {
+		panic(err)
+	}
+}
+
+// showCounts logs the size of the constraint system.  A typical
+// optimized distribution is 65% copy, 13% load, 11% addr, 5%
+// offsetAddr, 4% store, 2% others.
+//
+func (a *analysis) showCounts() {
+	if a.log != nil {
+		counts := make(map[reflect.Type]int)
+		for _, c := range a.constraints {
+			counts[reflect.TypeOf(c)]++
+		}
+		fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
+		var lines []string
+		for t, n := range counts {
+			line := fmt.Sprintf("%7d  (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
+			lines = append(lines, line)
+		}
+		sort.Sort(sort.Reverse(sort.StringSlice(lines)))
+		for _, line := range lines {
+			fmt.Fprintf(a.log, "\t%s\n", line)
+		}
+
+		fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
+
+		// Show number of pointer equivalence classes.
+		m := make(map[*solverState]bool)
+		for _, n := range a.nodes {
+			m[n.solve] = true
+		}
+		fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
+	}
+}
commit	9b38eafe607ba43ac44da301e6e04a1b7c2e8d90	[log] [tgz]
author	Alan Donovan <adonovan@google.com>	Mon Jun 16 15:46:07 2014 -0400
committer	Alan Donovan <adonovan@google.com>	Mon Jun 16 15:46:07 2014 -0400
tree	261efe554ab56cdbc5651cebc38f26b37daa4c48
parent	47c0a8f0c30aa0ffdc2109b0a1b585123e2c0534 [diff] [blame]