blob: 2303fcfa0a8cc37838d2bf377aac250705b6ecea [file] [log] [blame]
Zvonimir Pavlinovicacaf2182021-04-26 14:45:08 -07001// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package vta computes the call graph of a Go program using the Variable
Russ Coxad8ef152022-04-11 22:39:42 -04006// Type Analysis (VTA) algorithm originally described in “Practical Virtual
Zvonimir Pavlinovicacaf2182021-04-26 14:45:08 -07007// Method Call Resolution for Java," Vijay Sundaresan, Laurie Hendren,
8// Chrislain Razafimahefa, Raja Vallée-Rai, Patrick Lam, Etienne Gagnon, and
9// Charles Godin.
10//
11// Note: this package is in experimental phase and its interface is
12// subject to change.
13// TODO(zpavlinovic): reiterate on documentation.
14//
15// The VTA algorithm overapproximates the set of types (and function literals)
16// a variable can take during runtime by building a global type propagation
17// graph and propagating types (and function literals) through the graph.
18//
19// A type propagation is a directed, labeled graph. A node can represent
20// one of the following:
Russ Coxad8ef152022-04-11 22:39:42 -040021// - A field of a struct type.
22// - A local (SSA) variable of a method/function.
23// - All pointers to a non-interface type.
24// - The return value of a method.
25// - All elements in an array.
26// - All elements in a slice.
27// - All elements in a map.
28// - All elements in a channel.
29// - A global variable.
30//
Zvonimir Pavlinovicacaf2182021-04-26 14:45:08 -070031// In addition, the implementation used in this package introduces
32// a few Go specific kinds of nodes:
Russ Coxad8ef152022-04-11 22:39:42 -040033// - (De)references of nested pointers to interfaces are modeled
34// as a unique nestedPtrInterface node in the type propagation graph.
35// - Each function literal is represented as a function node whose
36// internal value is the (SSA) representation of the function. This
37// is done to precisely infer flow of higher-order functions.
Zvonimir Pavlinovicacaf2182021-04-26 14:45:08 -070038//
39// Edges in the graph represent flow of types (and function literals) through
40// the program. That is, the model 1) typing constraints that are induced by
41// assignment statements or function and method calls and 2) higher-order flow
42// of functions in the program.
43//
44// The labeling function maps each node to a set of types and functions that
45// can intuitively reach the program construct the node represents. Initially,
46// every node is assigned a type corresponding to the program construct it
47// represents. Function nodes are also assigned the function they represent.
48// The labeling function then propagates types and function through the graph.
49//
50// The result of VTA is a type propagation graph in which each node is labeled
51// with a conservative overapproximation of the set of types (and functions)
52// it may have. This information is then used to construct the call graph.
53// For each unresolved call site, vta uses the set of types and functions
54// reaching the node representing the call site to create a set of callees.
Zvonimir Pavlinovicacaf2182021-04-26 14:45:08 -070055package vta
56
Tim King36a5c6a2022-08-25 11:09:24 -040057// TODO(zpavlinovic): update VTA for how it handles generic function bodies and instantiation wrappers.
58
Zvonimir Pavlinovicf6327c52021-05-26 15:42:08 -070059import (
60 "go/types"
61
62 "golang.org/x/tools/go/callgraph"
63 "golang.org/x/tools/go/ssa"
64)
65
66// CallGraph uses the VTA algorithm to compute call graph for all functions
Zvonimir Pavlinovic8373dc32021-08-31 15:22:59 -070067// f:true in funcs. VTA refines the results of initial call graph and uses it
68// to establish interprocedural type flow. The resulting graph does not have
69// a root node.
70//
71// CallGraph does not make any assumptions on initial types global variables
72// and function/method inputs can have. CallGraph is then sound, modulo use of
73// reflection and unsafe, if the initial call graph is sound.
Zvonimir Pavlinovicf6327c52021-05-26 15:42:08 -070074func CallGraph(funcs map[*ssa.Function]bool, initial *callgraph.Graph) *callgraph.Graph {
75 vtaG, canon := typePropGraph(funcs, initial)
76 types := propagate(vtaG, canon)
77
78 c := &constructor{types: types, initial: initial, cache: make(methodCache)}
79 return c.construct(funcs)
80}
81
82// constructor type linearly traverses the input program
83// and constructs a callgraph based on the results of the
84// VTA type propagation phase.
85type constructor struct {
86 types propTypeMap
87 cache methodCache
88 initial *callgraph.Graph
89}
90
91func (c *constructor) construct(funcs map[*ssa.Function]bool) *callgraph.Graph {
92 cg := &callgraph.Graph{Nodes: make(map[*ssa.Function]*callgraph.Node)}
93 for f, in := range funcs {
94 if in {
95 c.constrct(cg, f)
96 }
97 }
98 return cg
99}
100
101func (c *constructor) constrct(g *callgraph.Graph, f *ssa.Function) {
102 caller := g.CreateNode(f)
103 for _, call := range calls(f) {
104 for _, c := range c.callees(call) {
105 callgraph.AddEdge(caller, call, g.CreateNode(c))
106 }
107 }
108}
109
110// callees computes the set of functions to which VTA resolves `c`. The resolved
111// functions are intersected with functions to which `initial` resolves `c`.
112func (c *constructor) callees(call ssa.CallInstruction) []*ssa.Function {
113 cc := call.Common()
114 if cc.StaticCallee() != nil {
115 return []*ssa.Function{cc.StaticCallee()}
116 }
117
118 // Skip builtins as they are not *ssa.Function.
119 if _, ok := cc.Value.(*ssa.Builtin); ok {
120 return nil
121 }
122
123 // Cover the case of dynamic higher-order and interface calls.
124 return intersect(resolve(call, c.types, c.cache), siteCallees(call, c.initial))
125}
126
127// resolve returns a set of functions `c` resolves to based on the
128// type propagation results in `types`.
129func resolve(c ssa.CallInstruction, types propTypeMap, cache methodCache) []*ssa.Function {
130 n := local{val: c.Common().Value}
131 var funcs []*ssa.Function
Zvonimir Pavlinovice9000122021-11-03 10:43:00 -0700132 for _, p := range types.propTypes(n) {
Zvonimir Pavlinovicf6327c52021-05-26 15:42:08 -0700133 funcs = append(funcs, propFunc(p, c, cache)...)
134 }
135 return funcs
136}
137
138// propFunc returns the functions modeled with the propagation type `p`
cuishuang78ff15e2022-03-19 13:29:11 +0000139// assigned to call site `c`. If no such function exists, nil is returned.
Zvonimir Pavlinovicf6327c52021-05-26 15:42:08 -0700140func propFunc(p propType, c ssa.CallInstruction, cache methodCache) []*ssa.Function {
141 if p.f != nil {
142 return []*ssa.Function{p.f}
143 }
144
145 if c.Common().Method == nil {
146 return nil
147 }
148
149 return cache.methods(p.typ, c.Common().Method.Name(), c.Parent().Prog)
150}
151
152// methodCache serves as a type -> method name -> methods
153// cache when computing methods of a type using the
154// ssa.Program.MethodSets and ssa.Program.MethodValue
155// APIs. The cache is used to speed up querying of
156// methods of a type as the mentioned APIs are expensive.
Alan Donovan51df92b2023-11-06 13:27:49 -0500157//
158// TODO(adonovan): Program.MethodValue already does this kind of
159// caching. Is this really necessary?
Zvonimir Pavlinovicf6327c52021-05-26 15:42:08 -0700160type methodCache map[types.Type]map[string][]*ssa.Function
161
162// methods returns methods of a type `t` named `name`. First consults
163// `mc` and otherwise queries `prog` for the method. If no such method
164// exists, nil is returned.
165func (mc methodCache) methods(t types.Type, name string, prog *ssa.Program) []*ssa.Function {
166 if ms, ok := mc[t]; ok {
167 return ms[name]
168 }
169
170 ms := make(map[string][]*ssa.Function)
171 mset := prog.MethodSets.MethodSet(t)
172 for i, n := 0, mset.Len(); i < n; i++ {
173 // f can be nil when t is an interface or some
174 // other type without any runtime methods.
175 if f := prog.MethodValue(mset.At(i)); f != nil {
176 ms[f.Name()] = append(ms[f.Name()], f)
177 }
178 }
179 mc[t] = ms
180 return ms[name]
181}