Zvonimir Pavlinovic | acaf218 | 2021-04-26 14:45:08 -0700 | [diff] [blame] | 1 | // Copyright 2021 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package vta computes the call graph of a Go program using the Variable |
Russ Cox | ad8ef15 | 2022-04-11 22:39:42 -0400 | [diff] [blame] | 6 | // Type Analysis (VTA) algorithm originally described in “Practical Virtual |
Zvonimir Pavlinovic | acaf218 | 2021-04-26 14:45:08 -0700 | [diff] [blame] | 7 | // Method Call Resolution for Java," Vijay Sundaresan, Laurie Hendren, |
| 8 | // Chrislain Razafimahefa, Raja Vallée-Rai, Patrick Lam, Etienne Gagnon, and |
| 9 | // Charles Godin. |
| 10 | // |
| 11 | // Note: this package is in experimental phase and its interface is |
| 12 | // subject to change. |
| 13 | // TODO(zpavlinovic): reiterate on documentation. |
| 14 | // |
| 15 | // The VTA algorithm overapproximates the set of types (and function literals) |
| 16 | // a variable can take during runtime by building a global type propagation |
| 17 | // graph and propagating types (and function literals) through the graph. |
| 18 | // |
| 19 | // A type propagation is a directed, labeled graph. A node can represent |
| 20 | // one of the following: |
Russ Cox | ad8ef15 | 2022-04-11 22:39:42 -0400 | [diff] [blame] | 21 | // - A field of a struct type. |
| 22 | // - A local (SSA) variable of a method/function. |
| 23 | // - All pointers to a non-interface type. |
| 24 | // - The return value of a method. |
| 25 | // - All elements in an array. |
| 26 | // - All elements in a slice. |
| 27 | // - All elements in a map. |
| 28 | // - All elements in a channel. |
| 29 | // - A global variable. |
| 30 | // |
Zvonimir Pavlinovic | acaf218 | 2021-04-26 14:45:08 -0700 | [diff] [blame] | 31 | // In addition, the implementation used in this package introduces |
| 32 | // a few Go specific kinds of nodes: |
Russ Cox | ad8ef15 | 2022-04-11 22:39:42 -0400 | [diff] [blame] | 33 | // - (De)references of nested pointers to interfaces are modeled |
| 34 | // as a unique nestedPtrInterface node in the type propagation graph. |
| 35 | // - Each function literal is represented as a function node whose |
| 36 | // internal value is the (SSA) representation of the function. This |
| 37 | // is done to precisely infer flow of higher-order functions. |
Zvonimir Pavlinovic | acaf218 | 2021-04-26 14:45:08 -0700 | [diff] [blame] | 38 | // |
| 39 | // Edges in the graph represent flow of types (and function literals) through |
| 40 | // the program. That is, the model 1) typing constraints that are induced by |
| 41 | // assignment statements or function and method calls and 2) higher-order flow |
| 42 | // of functions in the program. |
| 43 | // |
| 44 | // The labeling function maps each node to a set of types and functions that |
| 45 | // can intuitively reach the program construct the node represents. Initially, |
| 46 | // every node is assigned a type corresponding to the program construct it |
| 47 | // represents. Function nodes are also assigned the function they represent. |
| 48 | // The labeling function then propagates types and function through the graph. |
| 49 | // |
| 50 | // The result of VTA is a type propagation graph in which each node is labeled |
| 51 | // with a conservative overapproximation of the set of types (and functions) |
| 52 | // it may have. This information is then used to construct the call graph. |
| 53 | // For each unresolved call site, vta uses the set of types and functions |
| 54 | // reaching the node representing the call site to create a set of callees. |
Zvonimir Pavlinovic | acaf218 | 2021-04-26 14:45:08 -0700 | [diff] [blame] | 55 | package vta |
| 56 | |
Tim King | 36a5c6a | 2022-08-25 11:09:24 -0400 | [diff] [blame] | 57 | // TODO(zpavlinovic): update VTA for how it handles generic function bodies and instantiation wrappers. |
| 58 | |
Zvonimir Pavlinovic | f6327c5 | 2021-05-26 15:42:08 -0700 | [diff] [blame] | 59 | import ( |
| 60 | "go/types" |
| 61 | |
| 62 | "golang.org/x/tools/go/callgraph" |
| 63 | "golang.org/x/tools/go/ssa" |
| 64 | ) |
| 65 | |
| 66 | // CallGraph uses the VTA algorithm to compute call graph for all functions |
Zvonimir Pavlinovic | 8373dc3 | 2021-08-31 15:22:59 -0700 | [diff] [blame] | 67 | // f:true in funcs. VTA refines the results of initial call graph and uses it |
| 68 | // to establish interprocedural type flow. The resulting graph does not have |
| 69 | // a root node. |
| 70 | // |
| 71 | // CallGraph does not make any assumptions on initial types global variables |
| 72 | // and function/method inputs can have. CallGraph is then sound, modulo use of |
| 73 | // reflection and unsafe, if the initial call graph is sound. |
Zvonimir Pavlinovic | f6327c5 | 2021-05-26 15:42:08 -0700 | [diff] [blame] | 74 | func CallGraph(funcs map[*ssa.Function]bool, initial *callgraph.Graph) *callgraph.Graph { |
| 75 | vtaG, canon := typePropGraph(funcs, initial) |
| 76 | types := propagate(vtaG, canon) |
| 77 | |
| 78 | c := &constructor{types: types, initial: initial, cache: make(methodCache)} |
| 79 | return c.construct(funcs) |
| 80 | } |
| 81 | |
| 82 | // constructor type linearly traverses the input program |
| 83 | // and constructs a callgraph based on the results of the |
| 84 | // VTA type propagation phase. |
| 85 | type constructor struct { |
| 86 | types propTypeMap |
| 87 | cache methodCache |
| 88 | initial *callgraph.Graph |
| 89 | } |
| 90 | |
| 91 | func (c *constructor) construct(funcs map[*ssa.Function]bool) *callgraph.Graph { |
| 92 | cg := &callgraph.Graph{Nodes: make(map[*ssa.Function]*callgraph.Node)} |
| 93 | for f, in := range funcs { |
| 94 | if in { |
| 95 | c.constrct(cg, f) |
| 96 | } |
| 97 | } |
| 98 | return cg |
| 99 | } |
| 100 | |
| 101 | func (c *constructor) constrct(g *callgraph.Graph, f *ssa.Function) { |
| 102 | caller := g.CreateNode(f) |
| 103 | for _, call := range calls(f) { |
| 104 | for _, c := range c.callees(call) { |
| 105 | callgraph.AddEdge(caller, call, g.CreateNode(c)) |
| 106 | } |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | // callees computes the set of functions to which VTA resolves `c`. The resolved |
| 111 | // functions are intersected with functions to which `initial` resolves `c`. |
| 112 | func (c *constructor) callees(call ssa.CallInstruction) []*ssa.Function { |
| 113 | cc := call.Common() |
| 114 | if cc.StaticCallee() != nil { |
| 115 | return []*ssa.Function{cc.StaticCallee()} |
| 116 | } |
| 117 | |
| 118 | // Skip builtins as they are not *ssa.Function. |
| 119 | if _, ok := cc.Value.(*ssa.Builtin); ok { |
| 120 | return nil |
| 121 | } |
| 122 | |
| 123 | // Cover the case of dynamic higher-order and interface calls. |
| 124 | return intersect(resolve(call, c.types, c.cache), siteCallees(call, c.initial)) |
| 125 | } |
| 126 | |
| 127 | // resolve returns a set of functions `c` resolves to based on the |
| 128 | // type propagation results in `types`. |
| 129 | func resolve(c ssa.CallInstruction, types propTypeMap, cache methodCache) []*ssa.Function { |
| 130 | n := local{val: c.Common().Value} |
| 131 | var funcs []*ssa.Function |
Zvonimir Pavlinovic | e900012 | 2021-11-03 10:43:00 -0700 | [diff] [blame] | 132 | for _, p := range types.propTypes(n) { |
Zvonimir Pavlinovic | f6327c5 | 2021-05-26 15:42:08 -0700 | [diff] [blame] | 133 | funcs = append(funcs, propFunc(p, c, cache)...) |
| 134 | } |
| 135 | return funcs |
| 136 | } |
| 137 | |
| 138 | // propFunc returns the functions modeled with the propagation type `p` |
cuishuang | 78ff15e | 2022-03-19 13:29:11 +0000 | [diff] [blame] | 139 | // assigned to call site `c`. If no such function exists, nil is returned. |
Zvonimir Pavlinovic | f6327c5 | 2021-05-26 15:42:08 -0700 | [diff] [blame] | 140 | func propFunc(p propType, c ssa.CallInstruction, cache methodCache) []*ssa.Function { |
| 141 | if p.f != nil { |
| 142 | return []*ssa.Function{p.f} |
| 143 | } |
| 144 | |
| 145 | if c.Common().Method == nil { |
| 146 | return nil |
| 147 | } |
| 148 | |
| 149 | return cache.methods(p.typ, c.Common().Method.Name(), c.Parent().Prog) |
| 150 | } |
| 151 | |
| 152 | // methodCache serves as a type -> method name -> methods |
| 153 | // cache when computing methods of a type using the |
| 154 | // ssa.Program.MethodSets and ssa.Program.MethodValue |
| 155 | // APIs. The cache is used to speed up querying of |
| 156 | // methods of a type as the mentioned APIs are expensive. |
Alan Donovan | 51df92b | 2023-11-06 13:27:49 -0500 | [diff] [blame] | 157 | // |
| 158 | // TODO(adonovan): Program.MethodValue already does this kind of |
| 159 | // caching. Is this really necessary? |
Zvonimir Pavlinovic | f6327c5 | 2021-05-26 15:42:08 -0700 | [diff] [blame] | 160 | type methodCache map[types.Type]map[string][]*ssa.Function |
| 161 | |
| 162 | // methods returns methods of a type `t` named `name`. First consults |
| 163 | // `mc` and otherwise queries `prog` for the method. If no such method |
| 164 | // exists, nil is returned. |
| 165 | func (mc methodCache) methods(t types.Type, name string, prog *ssa.Program) []*ssa.Function { |
| 166 | if ms, ok := mc[t]; ok { |
| 167 | return ms[name] |
| 168 | } |
| 169 | |
| 170 | ms := make(map[string][]*ssa.Function) |
| 171 | mset := prog.MethodSets.MethodSet(t) |
| 172 | for i, n := 0, mset.Len(); i < n; i++ { |
| 173 | // f can be nil when t is an interface or some |
| 174 | // other type without any runtime methods. |
| 175 | if f := prog.MethodValue(mset.At(i)); f != nil { |
| 176 | ms[f.Name()] = append(ms[f.Name()], f) |
| 177 | } |
| 178 | } |
| 179 | mc[t] = ms |
| 180 | return ms[name] |
| 181 | } |