blob: 72bd4a4d8b03b4c72bda2c60a8fb6e9d6d93228c [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package vta computes the call graph of a Go program using the Variable
// Type Analysis (VTA) algorithm originally described in “Practical Virtual
// Method Call Resolution for Java," Vijay Sundaresan, Laurie Hendren,
// Chrislain Razafimahefa, Raja Vallée-Rai, Patrick Lam, Etienne Gagnon, and
// Charles Godin.
//
// Note: this package is in experimental phase and its interface is
// subject to change.
// TODO(zpavlinovic): reiterate on documentation.
//
// The VTA algorithm overapproximates the set of types (and function literals)
// a variable can take during runtime by building a global type propagation
// graph and propagating types (and function literals) through the graph.
//
// A type propagation is a directed, labeled graph. A node can represent
// one of the following:
// - A field of a struct type.
// - A local (SSA) variable of a method/function.
// - All pointers to a non-interface type.
// - The return value of a method.
// - All elements in an array.
// - All elements in a slice.
// - All elements in a map.
// - All elements in a channel.
// - A global variable.
//
// In addition, the implementation used in this package introduces
// a few Go specific kinds of nodes:
// - (De)references of nested pointers to interfaces are modeled
// as a unique nestedPtrInterface node in the type propagation graph.
// - Each function literal is represented as a function node whose
// internal value is the (SSA) representation of the function. This
// is done to precisely infer flow of higher-order functions.
//
// Edges in the graph represent flow of types (and function literals) through
// the program. That is, the model 1) typing constraints that are induced by
// assignment statements or function and method calls and 2) higher-order flow
// of functions in the program.
//
// The labeling function maps each node to a set of types and functions that
// can intuitively reach the program construct the node represents. Initially,
// every node is assigned a type corresponding to the program construct it
// represents. Function nodes are also assigned the function they represent.
// The labeling function then propagates types and function through the graph.
//
// The result of VTA is a type propagation graph in which each node is labeled
// with a conservative overapproximation of the set of types (and functions)
// it may have. This information is then used to construct the call graph.
// For each unresolved call site, vta uses the set of types and functions
// reaching the node representing the call site to create a set of callees.
package vta
// TODO(zpavlinovic): update VTA for how it handles generic function bodies and instantiation wrappers.
import (
"go/types"
"golang.org/x/tools/go/callgraph"
"golang.org/x/tools/go/ssa"
)
// CallGraph uses the VTA algorithm to compute call graph for all functions
// f:true in funcs. VTA refines the results of initial call graph and uses it
// to establish interprocedural type flow. If initial is nil, VTA uses a more
// efficient approach to construct a CHA call graph.
//
// The resulting graph does not have a root node.
//
// CallGraph does not make any assumptions on initial types global variables
// and function/method inputs can have. CallGraph is then sound, modulo use of
// reflection and unsafe, if the initial call graph is sound.
func CallGraph(funcs map[*ssa.Function]bool, initial *callgraph.Graph) *callgraph.Graph {
callees := makeCalleesFunc(funcs, initial)
vtaG, canon := typePropGraph(funcs, callees)
types := propagate(vtaG, canon)
c := &constructor{types: types, callees: callees, cache: make(methodCache)}
return c.construct(funcs)
}
// constructor type linearly traverses the input program
// and constructs a callgraph based on the results of the
// VTA type propagation phase.
type constructor struct {
types propTypeMap
cache methodCache
callees calleesFunc
}
func (c *constructor) construct(funcs map[*ssa.Function]bool) *callgraph.Graph {
cg := &callgraph.Graph{Nodes: make(map[*ssa.Function]*callgraph.Node)}
for f, in := range funcs {
if in {
c.constrct(cg, f)
}
}
return cg
}
func (c *constructor) constrct(g *callgraph.Graph, f *ssa.Function) {
caller := g.CreateNode(f)
for _, call := range calls(f) {
for _, c := range c.resolves(call) {
callgraph.AddEdge(caller, call, g.CreateNode(c))
}
}
}
// resolves computes the set of functions to which VTA resolves `c`. The resolved
// functions are intersected with functions to which `c.initial` resolves `c`.
func (c *constructor) resolves(call ssa.CallInstruction) []*ssa.Function {
cc := call.Common()
if cc.StaticCallee() != nil {
return []*ssa.Function{cc.StaticCallee()}
}
// Skip builtins as they are not *ssa.Function.
if _, ok := cc.Value.(*ssa.Builtin); ok {
return nil
}
// Cover the case of dynamic higher-order and interface calls.
var res []*ssa.Function
resolved := resolve(call, c.types, c.cache)
siteCallees(call, c.callees)(func(f *ssa.Function) bool {
if _, ok := resolved[f]; ok {
res = append(res, f)
}
return true
})
return res
}
// resolve returns a set of functions `c` resolves to based on the
// type propagation results in `types`.
func resolve(c ssa.CallInstruction, types propTypeMap, cache methodCache) (fns map[*ssa.Function]empty) {
n := local{val: c.Common().Value}
types.propTypes(n)(func(p propType) bool {
pfs := propFunc(p, c, cache)
if len(pfs) == 0 {
return true
}
if fns == nil {
fns = make(map[*ssa.Function]empty)
}
for _, f := range pfs {
fns[f] = empty{}
}
return true
})
return fns
}
// propFunc returns the functions modeled with the propagation type `p`
// assigned to call site `c`. If no such function exists, nil is returned.
func propFunc(p propType, c ssa.CallInstruction, cache methodCache) []*ssa.Function {
if p.f != nil {
return []*ssa.Function{p.f}
}
if c.Common().Method == nil {
return nil
}
return cache.methods(p.typ, c.Common().Method.Name(), c.Parent().Prog)
}
// methodCache serves as a type -> method name -> methods
// cache when computing methods of a type using the
// ssa.Program.MethodSets and ssa.Program.MethodValue
// APIs. The cache is used to speed up querying of
// methods of a type as the mentioned APIs are expensive.
//
// TODO(adonovan): Program.MethodValue already does this kind of
// caching. Is this really necessary?
type methodCache map[types.Type]map[string][]*ssa.Function
// methods returns methods of a type `t` named `name`. First consults
// `mc` and otherwise queries `prog` for the method. If no such method
// exists, nil is returned.
func (mc methodCache) methods(t types.Type, name string, prog *ssa.Program) []*ssa.Function {
if ms, ok := mc[t]; ok {
return ms[name]
}
ms := make(map[string][]*ssa.Function)
mset := prog.MethodSets.MethodSet(t)
for i, n := 0, mset.Len(); i < n; i++ {
// f can be nil when t is an interface or some
// other type without any runtime methods.
if f := prog.MethodValue(mset.At(i)); f != nil {
ms[f.Name()] = append(ms[f.Name()], f)
}
}
mc[t] = ms
return ms[name]
}