vta: adds VTA graph propagation functionality

The propagation consists of two steps. First, VTA graph is converted to
a DAG by computing SCCs (resolving any graph loops) and then types and
higher-order functions are propagated through the DAG.

The results of this propagation are used in the final step to create a
call graph. That CL comes after this one.

Change-Id: Ib66a9401885d4105f3b6a68d5a0007f306882144
Reviewed-on: https://go-review.googlesource.com/c/tools/+/323050
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Zvonimir Pavlinovic <zpavlinovic@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
diff --git a/go/callgraph/vta/propagation.go b/go/callgraph/vta/propagation.go
new file mode 100644
index 0000000..6c11801
--- /dev/null
+++ b/go/callgraph/vta/propagation.go
@@ -0,0 +1,181 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vta
+
+import (
+	"go/types"
+
+	"golang.org/x/tools/go/ssa"
+
+	"golang.org/x/tools/go/types/typeutil"
+)
+
+// scc computes strongly connected components (SCCs) of `g` using the
+// classical Tarjan's algorithm for SCCs. The result is a pair <m, id>
+// where m is a map from nodes to unique id of their SCC in the range
+// [0, id). The SCCs are sorted in reverse topological order: for SCCs
+// with ids X and Y s.t. X < Y, Y comes before X in the topological order.
+func scc(g vtaGraph) (map[node]int, int) {
+	// standard data structures used by Tarjan's algorithm.
+	var index uint64
+	var stack []node
+	indexMap := make(map[node]uint64)
+	lowLink := make(map[node]uint64)
+	onStack := make(map[node]bool)
+
+	nodeToSccID := make(map[node]int)
+	sccID := 0
+
+	var doSCC func(node)
+	doSCC = func(n node) {
+		indexMap[n] = index
+		lowLink[n] = index
+		index = index + 1
+		onStack[n] = true
+		stack = append(stack, n)
+
+		for s := range g[n] {
+			if _, ok := indexMap[s]; !ok {
+				// Analyze successor s that has not been visited yet.
+				doSCC(s)
+				lowLink[n] = min(lowLink[n], lowLink[s])
+			} else if onStack[s] {
+				// The successor is on the stack, meaning it has to be
+				// in the current SCC.
+				lowLink[n] = min(lowLink[n], indexMap[s])
+			}
+		}
+
+		// if n is a root node, pop the stack and generate a new SCC.
+		if lowLink[n] == indexMap[n] {
+			for {
+				w := stack[len(stack)-1]
+				stack = stack[:len(stack)-1]
+				onStack[w] = false
+				nodeToSccID[w] = sccID
+				if w == n {
+					break
+				}
+			}
+			sccID++
+		}
+	}
+
+	index = 0
+	for n := range g {
+		if _, ok := indexMap[n]; !ok {
+			doSCC(n)
+		}
+	}
+
+	return nodeToSccID, sccID
+}
+
+func min(x, y uint64) uint64 {
+	if x < y {
+		return x
+	}
+	return y
+}
+
+// propType represents type information being propagated
+// over the vta graph. f != nil only for function nodes
+// and nodes reachable from function nodes. There, we also
+// remember the actual *ssa.Function in order to more
+// precisely model higher-order flow.
+type propType struct {
+	typ types.Type
+	f   *ssa.Function
+}
+
+// propTypeMap is an auxiliary structure that serves
+// the role of a map from nodes to a set of propTypes.
+type propTypeMap struct {
+	nodeToScc  map[node]int
+	sccToTypes map[int]map[propType]bool
+}
+
+// propTypes returns a set of propTypes associated with
+// node `n`. If `n` is not in the map `ptm`, nil is returned.
+//
+// Note: for performance reasons, the returned set is a
+// reference to existing set in the map `ptm`, so any updates
+// to it will affect `ptm` as well.
+func (ptm propTypeMap) propTypes(n node) map[propType]bool {
+	id, ok := ptm.nodeToScc[n]
+	if !ok {
+		return nil
+	}
+	return ptm.sccToTypes[id]
+}
+
+// propagate reduces the `graph` based on its SCCs and
+// then propagates type information through the reduced
+// graph. The result is a map from nodes to a set of types
+// and functions, stemming from higher-order data flow,
+// reaching the node. `canon` is used for type uniqueness.
+func propagate(graph vtaGraph, canon *typeutil.Map) propTypeMap {
+	nodeToScc, sccID := scc(graph)
+	// Initialize sccToTypes to avoid repeated check
+	// for initialization later.
+	sccToTypes := make(map[int]map[propType]bool, sccID)
+	for i := 0; i <= sccID; i++ {
+		sccToTypes[i] = make(map[propType]bool)
+	}
+
+	// We also need the reverse map, from ids to SCCs.
+	sccs := make(map[int][]node, sccID)
+	for n, id := range nodeToScc {
+		sccs[id] = append(sccs[id], n)
+	}
+
+	for i := len(sccs) - 1; i >= 0; i-- {
+		nodes := sccs[i]
+		// Save the types induced by the nodes of the SCC.
+		mergeTypes(sccToTypes[i], nodeTypes(nodes, canon))
+		nextSccs := make(map[int]bool)
+		for _, node := range nodes {
+			for succ := range graph[node] {
+				nextSccs[nodeToScc[succ]] = true
+			}
+		}
+		// Propagate types to all successor SCCs.
+		for nextScc := range nextSccs {
+			mergeTypes(sccToTypes[nextScc], sccToTypes[i])
+		}
+	}
+
+	return propTypeMap{nodeToScc: nodeToScc, sccToTypes: sccToTypes}
+}
+
+// nodeTypes returns a set of propTypes for `nodes`. These are the
+// propTypes stemming from the type of each node in `nodes` plus.
+func nodeTypes(nodes []node, canon *typeutil.Map) map[propType]bool {
+	types := make(map[propType]bool)
+	for _, n := range nodes {
+		if hasInitialTypes(n) {
+			types[getPropType(n, canon)] = true
+		}
+	}
+	return types
+}
+
+// getPropType creates a propType for `node` based on its type.
+// propType.typ is always node.Type(). If node is function, then
+// propType.val is the underlying function; nil otherwise.
+func getPropType(node node, canon *typeutil.Map) propType {
+	t := canonicalize(node.Type(), canon)
+	if fn, ok := node.(function); ok {
+		return propType{f: fn.f, typ: t}
+	}
+	return propType{f: nil, typ: t}
+}
+
+// mergeTypes merges propTypes in `rhs` to `lhs`.
+func mergeTypes(lhs, rhs map[propType]bool) {
+	for typ := range rhs {
+		lhs[typ] = true
+	}
+}
diff --git a/go/callgraph/vta/propagation_test.go b/go/callgraph/vta/propagation_test.go
new file mode 100644
index 0000000..219fd70
--- /dev/null
+++ b/go/callgraph/vta/propagation_test.go
@@ -0,0 +1,336 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vta
+
+import (
+	"go/token"
+	"go/types"
+	"reflect"
+	"sort"
+	"strings"
+	"testing"
+	"unsafe"
+
+	"golang.org/x/tools/go/ssa"
+
+	"golang.org/x/tools/go/types/typeutil"
+)
+
+// val is a test data structure for creating ssa.Value
+// outside of the ssa package. Needed for manual creation
+// of vta graph nodes in testing.
+type val struct {
+	name string
+	typ  types.Type
+}
+
+func (v val) String() string {
+	return v.name
+}
+
+func (v val) Name() string {
+	return v.name
+}
+
+func (v val) Type() types.Type {
+	return v.typ
+}
+
+func (v val) Parent() *ssa.Function {
+	return nil
+}
+
+func (v val) Referrers() *[]ssa.Instruction {
+	return nil
+}
+
+func (v val) Pos() token.Pos {
+	return token.NoPos
+}
+
+// newLocal creates a new local node with ssa.Value
+// named `name` and type `t`.
+func newLocal(name string, t types.Type) local {
+	return local{val: val{name: name, typ: t}}
+}
+
+// newNamedType creates a bogus type named `name`.
+func newNamedType(name string) *types.Named {
+	return types.NewNamed(types.NewTypeName(token.NoPos, nil, name, nil), nil, nil)
+}
+
+// sccString is a utility for stringifying `nodeToScc`. Every
+// scc is represented as a string where string representation
+// of scc nodes are sorted and concatenated using `;`.
+func sccString(nodeToScc map[node]int) []string {
+	sccs := make(map[int][]node)
+	for n, id := range nodeToScc {
+		sccs[id] = append(sccs[id], n)
+	}
+
+	var sccsStr []string
+	for _, scc := range sccs {
+		var nodesStr []string
+		for _, node := range scc {
+			nodesStr = append(nodesStr, node.String())
+		}
+		sort.Strings(nodesStr)
+		sccsStr = append(sccsStr, strings.Join(nodesStr, ";"))
+	}
+	return sccsStr
+}
+
+// nodeToTypeString is testing utility for stringifying results
+// of type propagation: propTypeMap `pMap` is converted to a map
+// from node strings to a string consisting of type stringifications
+// concatenated with `;`. We stringify reachable type information
+// that also has an accompanying function by the function name.
+func nodeToTypeString(pMap propTypeMap) map[string]string {
+	// Convert propType to a string. If propType has
+	// an attached function, return the function name.
+	// Otherwise, return the type name.
+	propTypeString := func(p propType) string {
+		if p.f != nil {
+			return p.f.Name()
+		}
+		return p.typ.String()
+	}
+
+	nodeToTypeStr := make(map[string]string)
+	for node := range pMap.nodeToScc {
+		var propStrings []string
+		for prop := range pMap.propTypes(node) {
+			propStrings = append(propStrings, propTypeString(prop))
+		}
+		sort.Strings(propStrings)
+		nodeToTypeStr[node.String()] = strings.Join(propStrings, ";")
+	}
+
+	return nodeToTypeStr
+}
+
+// sccEqual compares two sets of SCC stringifications.
+func sccEqual(sccs1 []string, sccs2 []string) bool {
+	if len(sccs1) != len(sccs2) {
+		return false
+	}
+	sort.Strings(sccs1)
+	sort.Strings(sccs2)
+	return reflect.DeepEqual(sccs1, sccs2)
+}
+
+// isRevTopSorted checks if sccs of `g` are sorted in reverse
+// topological order:
+//  for every edge x -> y in g, nodeToScc[x] > nodeToScc[y]
+func isRevTopSorted(g vtaGraph, nodeToScc map[node]int) bool {
+	for n, succs := range g {
+		for s := range succs {
+			if nodeToScc[n] < nodeToScc[s] {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+// setName sets name of the function `f` to `name`
+// using reflection since setting the name otherwise
+// is only possible within the ssa package.
+func setName(f *ssa.Function, name string) {
+	fi := reflect.ValueOf(f).Elem().FieldByName("name")
+	fi = reflect.NewAt(fi.Type(), unsafe.Pointer(fi.UnsafeAddr())).Elem()
+	fi.SetString(name)
+}
+
+// testSuite produces a named set of graphs as follows, where
+// parentheses contain node types and F nodes stand for function
+// nodes whose content is function named F:
+//
+//  no-cycles:
+//	t0 (A) -> t1 (B) -> t2 (C)
+//
+//  trivial-cycle:
+//      <--------    <--------
+//      |       |    |       |
+//      t0 (A) ->    t1 (B) ->
+//
+//  circle-cycle:
+//	t0 (A) -> t1 (A) -> t2 (B)
+//      |                   |
+//      <--------------------
+//
+//  fully-connected:
+//	t0 (A) <-> t1 (B)
+//           \    /
+//            t2(C)
+//
+//  subsumed-scc:
+//	t0 (A) -> t1 (B) -> t2(B) -> t3 (A)
+//      |          |         |        |
+//      |          <---------         |
+//      <-----------------------------
+//
+//  more-realistic:
+//      <--------
+//      |        |
+//      t0 (A) -->
+//                            ---------->
+//                           |           |
+//      t1 (A) -> t2 (B) -> F1 -> F2 -> F3 -> F4
+//       |        |          |           |
+//        <-------           <------------
+func testSuite() map[string]vtaGraph {
+	a := newNamedType("A")
+	b := newNamedType("B")
+	c := newNamedType("C")
+	sig := types.NewSignature(nil, types.NewTuple(), types.NewTuple(), false)
+
+	f1 := &ssa.Function{Signature: sig}
+	setName(f1, "F1")
+	f2 := &ssa.Function{Signature: sig}
+	setName(f2, "F2")
+	f3 := &ssa.Function{Signature: sig}
+	setName(f3, "F3")
+	f4 := &ssa.Function{Signature: sig}
+	setName(f4, "F4")
+
+	graphs := make(map[string]vtaGraph)
+	graphs["no-cycles"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t1", b): true},
+		newLocal("t1", b): {newLocal("t2", c): true},
+	}
+
+	graphs["trivial-cycle"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t0", a): true},
+		newLocal("t1", b): {newLocal("t1", b): true},
+	}
+
+	graphs["circle-cycle"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t1", a): true},
+		newLocal("t1", a): {newLocal("t2", b): true},
+		newLocal("t2", b): {newLocal("t0", a): true},
+	}
+
+	graphs["fully-connected"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t1", b): true, newLocal("t2", c): true},
+		newLocal("t1", b): {newLocal("t0", a): true, newLocal("t2", c): true},
+		newLocal("t2", c): {newLocal("t0", a): true, newLocal("t1", b): true},
+	}
+
+	graphs["subsumed-scc"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t1", b): true},
+		newLocal("t1", b): {newLocal("t2", b): true},
+		newLocal("t2", b): {newLocal("t1", b): true, newLocal("t3", a): true},
+		newLocal("t3", a): {newLocal("t0", a): true},
+	}
+
+	graphs["more-realistic"] = map[node]map[node]bool{
+		newLocal("t0", a): {newLocal("t0", a): true},
+		newLocal("t1", a): {newLocal("t2", b): true},
+		newLocal("t2", b): {newLocal("t1", a): true, function{f1}: true},
+		function{f1}:      {function{f2}: true, function{f3}: true},
+		function{f2}:      {function{f3}: true},
+		function{f3}:      {function{f1}: true, function{f4}: true},
+	}
+
+	return graphs
+}
+
+func TestSCC(t *testing.T) {
+	suite := testSuite()
+	for _, test := range []struct {
+		name  string
+		graph vtaGraph
+		want  []string
+	}{
+		// No cycles results in three separate SCCs: {t0}	{t1}	{t2}
+		{name: "no-cycles", graph: suite["no-cycles"], want: []string{"Local(t0)", "Local(t1)", "Local(t2)"}},
+		// The two trivial self-loop cycles results in: {t0}	{t1}
+		{name: "trivial-cycle", graph: suite["trivial-cycle"], want: []string{"Local(t0)", "Local(t1)"}},
+		// The circle cycle produce a single SCC: {t0, t1, t2}
+		{name: "circle-cycle", graph: suite["circle-cycle"], want: []string{"Local(t0);Local(t1);Local(t2)"}},
+		// Similar holds for fully connected SCC: {t0, t1, t2}
+		{name: "fully-connected", graph: suite["fully-connected"], want: []string{"Local(t0);Local(t1);Local(t2)"}},
+		// Subsumed SCC also has a single SCC: {t0, t1, t2, t3}
+		{name: "subsumed-scc", graph: suite["subsumed-scc"], want: []string{"Local(t0);Local(t1);Local(t2);Local(t3)"}},
+		// The more realistic example has the following SCCs: {t0}	{t1, t2}	{F1, F2, F3}	{F4}
+		{name: "more-realistic", graph: suite["more-realistic"], want: []string{"Local(t0)", "Local(t1);Local(t2)", "Function(F1);Function(F2);Function(F3)", "Function(F4)"}},
+	} {
+		sccs, _ := scc(test.graph)
+		if got := sccString(sccs); !sccEqual(test.want, got) {
+			t.Errorf("want %v for graph %v; got %v", test.want, test.name, got)
+		}
+		if !isRevTopSorted(test.graph, sccs) {
+			t.Errorf("%v not topologically sorted", test.name)
+		}
+	}
+}
+
+func TestPropagation(t *testing.T) {
+	suite := testSuite()
+	var canon typeutil.Map
+	for _, test := range []struct {
+		name  string
+		graph vtaGraph
+		want  map[string]string
+	}{
+		// No cycles graph pushes type information forward.
+		{name: "no-cycles", graph: suite["no-cycles"],
+			want: map[string]string{
+				"Local(t0)": "A",
+				"Local(t1)": "A;B",
+				"Local(t2)": "A;B;C",
+			},
+		},
+		// No interesting type flow in trivial cycle graph.
+		{name: "trivial-cycle", graph: suite["trivial-cycle"],
+			want: map[string]string{
+				"Local(t0)": "A",
+				"Local(t1)": "B",
+			},
+		},
+		// Circle cycle makes type A and B get propagated everywhere.
+		{name: "circle-cycle", graph: suite["circle-cycle"],
+			want: map[string]string{
+				"Local(t0)": "A;B",
+				"Local(t1)": "A;B",
+				"Local(t2)": "A;B",
+			},
+		},
+		// Similarly for fully connected graph.
+		{name: "fully-connected", graph: suite["fully-connected"],
+			want: map[string]string{
+				"Local(t0)": "A;B;C",
+				"Local(t1)": "A;B;C",
+				"Local(t2)": "A;B;C",
+			},
+		},
+		// The outer loop of subsumed-scc pushes A an B through the graph.
+		{name: "subsumed-scc", graph: suite["subsumed-scc"],
+			want: map[string]string{
+				"Local(t0)": "A;B",
+				"Local(t1)": "A;B",
+				"Local(t2)": "A;B",
+				"Local(t3)": "A;B",
+			},
+		},
+		// More realistic graph has a more fine grained flow.
+		{name: "more-realistic", graph: suite["more-realistic"],
+			want: map[string]string{
+				"Local(t0)":    "A",
+				"Local(t1)":    "A;B",
+				"Local(t2)":    "A;B",
+				"Function(F1)": "A;B;F1;F2;F3",
+				"Function(F2)": "A;B;F1;F2;F3",
+				"Function(F3)": "A;B;F1;F2;F3",
+				"Function(F4)": "A;B;F1;F2;F3;F4",
+			},
+		},
+	} {
+		if got := nodeToTypeString(propagate(test.graph, &canon)); !reflect.DeepEqual(got, test.want) {
+			t.Errorf("want %v for graph %v; got %v", test.want, test.name, got)
+		}
+	}
+}