| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // This package provides Rapid Type Analysis (RTA) for Go, a fast |
| // algorithm for call graph construction and discovery of reachable code |
| // (and hence dead code) and runtime types. The algorithm was first |
| // described in: |
| // |
| // David F. Bacon and Peter F. Sweeney. 1996. |
| // Fast static analysis of C++ virtual function calls. (OOPSLA '96) |
| // http://doi.acm.org/10.1145/236337.236371 |
| // |
| // The algorithm uses dynamic programming to tabulate the cross-product |
| // of the set of known "address-taken" functions with the set of known |
| // dynamic calls of the same type. As each new address-taken function |
| // is discovered, call graph edges are added from each known callsite, |
| // and as each new call site is discovered, call graph edges are added |
| // from it to each known address-taken function. |
| // |
| // A similar approach is used for dynamic calls via interfaces: it |
| // tabulates the cross-product of the set of known "runtime types", |
| // i.e. types that may appear in an interface value, or may be derived from |
| // one via reflection, with the set of known "invoke"-mode dynamic |
| // calls. As each new runtime type is discovered, call edges are |
| // added from the known call sites, and as each new call site is |
| // discovered, call graph edges are added to each compatible |
| // method. |
| // |
| // In addition, we must consider as reachable all address-taken |
| // functions and all exported methods of any runtime type, since they |
| // may be called via reflection. |
| // |
| // Each time a newly added call edge causes a new function to become |
| // reachable, the code of that function is analyzed for more call sites, |
| // address-taken functions, and runtime types. The process continues |
| // until a fixed point is reached. |
| package rta // import "golang.org/x/tools/go/callgraph/rta" |
| |
| import ( |
| "fmt" |
| "go/types" |
| "hash/crc32" |
| |
| "golang.org/x/tools/go/callgraph" |
| "golang.org/x/tools/go/ssa" |
| "golang.org/x/tools/go/types/typeutil" |
| "golang.org/x/tools/internal/aliases" |
| "golang.org/x/tools/internal/compat" |
| ) |
| |
| // A Result holds the results of Rapid Type Analysis, which includes the |
| // set of reachable functions/methods, runtime types, and the call graph. |
| type Result struct { |
| // CallGraph is the discovered callgraph. |
| // It does not include edges for calls made via reflection. |
| CallGraph *callgraph.Graph |
| |
| // Reachable contains the set of reachable functions and methods. |
| // This includes exported methods of runtime types, since |
| // they may be accessed via reflection. |
| // The value indicates whether the function is address-taken. |
| // |
| // (We wrap the bool in a struct to avoid inadvertent use of |
| // "if Reachable[f] {" to test for set membership.) |
| Reachable map[*ssa.Function]struct{ AddrTaken bool } |
| |
| // RuntimeTypes contains the set of types that are needed at |
| // runtime, for interfaces or reflection. |
| // |
| // The value indicates whether the type is inaccessible to reflection. |
| // Consider: |
| // type A struct{B} |
| // fmt.Println(new(A)) |
| // Types *A, A and B are accessible to reflection, but the unnamed |
| // type struct{B} is not. |
| RuntimeTypes typeutil.Map |
| } |
| |
| // Working state of the RTA algorithm. |
| type rta struct { |
| result *Result |
| |
| prog *ssa.Program |
| |
| reflectValueCall *ssa.Function // (*reflect.Value).Call, iff part of prog |
| |
| worklist []*ssa.Function // list of functions to visit |
| |
| // addrTakenFuncsBySig contains all address-taken *Functions, grouped by signature. |
| // Keys are *types.Signature, values are map[*ssa.Function]bool sets. |
| addrTakenFuncsBySig typeutil.Map |
| |
| // dynCallSites contains all dynamic "call"-mode call sites, grouped by signature. |
| // Keys are *types.Signature, values are unordered []ssa.CallInstruction. |
| dynCallSites typeutil.Map |
| |
| // invokeSites contains all "invoke"-mode call sites, grouped by interface. |
| // Keys are *types.Interface (never *types.Named), |
| // Values are unordered []ssa.CallInstruction sets. |
| invokeSites typeutil.Map |
| |
| // The following two maps together define the subset of the |
| // m:n "implements" relation needed by the algorithm. |
| |
| // concreteTypes maps each concrete type to information about it. |
| // Keys are types.Type, values are *concreteTypeInfo. |
| // Only concrete types used as MakeInterface operands are included. |
| concreteTypes typeutil.Map |
| |
| // interfaceTypes maps each interface type to information about it. |
| // Keys are *types.Interface, values are *interfaceTypeInfo. |
| // Only interfaces used in "invoke"-mode CallInstructions are included. |
| interfaceTypes typeutil.Map |
| } |
| |
| type concreteTypeInfo struct { |
| C types.Type |
| mset *types.MethodSet |
| fprint uint64 // fingerprint of method set |
| implements []*types.Interface // unordered set of implemented interfaces |
| } |
| |
| type interfaceTypeInfo struct { |
| I *types.Interface |
| mset *types.MethodSet |
| fprint uint64 |
| implementations []types.Type // unordered set of concrete implementations |
| } |
| |
| // addReachable marks a function as potentially callable at run-time, |
| // and ensures that it gets processed. |
| func (r *rta) addReachable(f *ssa.Function, addrTaken bool) { |
| reachable := r.result.Reachable |
| n := len(reachable) |
| v := reachable[f] |
| if addrTaken { |
| v.AddrTaken = true |
| } |
| reachable[f] = v |
| if len(reachable) > n { |
| // First time seeing f. Add it to the worklist. |
| r.worklist = append(r.worklist, f) |
| } |
| } |
| |
| // addEdge adds the specified call graph edge, and marks it reachable. |
| // addrTaken indicates whether to mark the callee as "address-taken". |
| // site is nil for calls made via reflection. |
| func (r *rta) addEdge(caller *ssa.Function, site ssa.CallInstruction, callee *ssa.Function, addrTaken bool) { |
| r.addReachable(callee, addrTaken) |
| |
| if g := r.result.CallGraph; g != nil { |
| if caller == nil { |
| panic(site) |
| } |
| from := g.CreateNode(caller) |
| to := g.CreateNode(callee) |
| callgraph.AddEdge(from, site, to) |
| } |
| } |
| |
| // ---------- addrTakenFuncs × dynCallSites ---------- |
| |
| // visitAddrTakenFunc is called each time we encounter an address-taken function f. |
| func (r *rta) visitAddrTakenFunc(f *ssa.Function) { |
| // Create two-level map (Signature -> Function -> bool). |
| S := f.Signature |
| funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
| if funcs == nil { |
| funcs = make(map[*ssa.Function]bool) |
| r.addrTakenFuncsBySig.Set(S, funcs) |
| } |
| if !funcs[f] { |
| // First time seeing f. |
| funcs[f] = true |
| |
| // If we've seen any dyncalls of this type, mark it reachable, |
| // and add call graph edges. |
| sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
| for _, site := range sites { |
| r.addEdge(site.Parent(), site, f, true) |
| } |
| |
| // If the program includes (*reflect.Value).Call, |
| // add a dynamic call edge from it to any address-taken |
| // function, regardless of signature. |
| // |
| // This isn't perfect. |
| // - The actual call comes from an internal function |
| // called reflect.call, but we can't rely on that here. |
| // - reflect.Value.CallSlice behaves similarly, |
| // but we don't bother to create callgraph edges from |
| // it as well as it wouldn't fundamentally change the |
| // reachability but it would add a bunch more edges. |
| // - We assume that if reflect.Value.Call is among |
| // the dependencies of the application, it is itself |
| // reachable. (It would be more accurate to defer |
| // all the addEdges below until r.V.Call itself |
| // becomes reachable.) |
| // - Fake call graph edges are added from r.V.Call to |
| // each address-taken function, but not to every |
| // method reachable through a materialized rtype, |
| // which is a little inconsistent. Still, the |
| // reachable set includes both kinds, which is what |
| // matters for e.g. deadcode detection.) |
| if r.reflectValueCall != nil { |
| var site ssa.CallInstruction = nil // can't find actual call site |
| r.addEdge(r.reflectValueCall, site, f, true) |
| } |
| } |
| } |
| |
| // visitDynCall is called each time we encounter a dynamic "call"-mode call. |
| func (r *rta) visitDynCall(site ssa.CallInstruction) { |
| S := site.Common().Signature() |
| |
| // Record the call site. |
| sites, _ := r.dynCallSites.At(S).([]ssa.CallInstruction) |
| r.dynCallSites.Set(S, append(sites, site)) |
| |
| // For each function of signature S that we know is address-taken, |
| // add an edge and mark it reachable. |
| funcs, _ := r.addrTakenFuncsBySig.At(S).(map[*ssa.Function]bool) |
| for g := range funcs { |
| r.addEdge(site.Parent(), site, g, true) |
| } |
| } |
| |
| // ---------- concrete types × invoke sites ---------- |
| |
| // addInvokeEdge is called for each new pair (site, C) in the matrix. |
| func (r *rta) addInvokeEdge(site ssa.CallInstruction, C types.Type) { |
| // Ascertain the concrete method of C to be called. |
| imethod := site.Common().Method |
| cmethod := r.prog.LookupMethod(C, imethod.Pkg(), imethod.Name()) |
| r.addEdge(site.Parent(), site, cmethod, true) |
| } |
| |
| // visitInvoke is called each time the algorithm encounters an "invoke"-mode call. |
| func (r *rta) visitInvoke(site ssa.CallInstruction) { |
| I := site.Common().Value.Type().Underlying().(*types.Interface) |
| |
| // Record the invoke site. |
| sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
| r.invokeSites.Set(I, append(sites, site)) |
| |
| // Add callgraph edge for each existing |
| // address-taken concrete type implementing I. |
| for _, C := range r.implementations(I) { |
| r.addInvokeEdge(site, C) |
| } |
| } |
| |
| // ---------- main algorithm ---------- |
| |
| // visitFunc processes function f. |
| func (r *rta) visitFunc(f *ssa.Function) { |
| var space [32]*ssa.Value // preallocate space for common case |
| |
| for _, b := range f.Blocks { |
| for _, instr := range b.Instrs { |
| rands := instr.Operands(space[:0]) |
| |
| switch instr := instr.(type) { |
| case ssa.CallInstruction: |
| call := instr.Common() |
| if call.IsInvoke() { |
| r.visitInvoke(instr) |
| } else if g := call.StaticCallee(); g != nil { |
| r.addEdge(f, instr, g, false) |
| } else if _, ok := call.Value.(*ssa.Builtin); !ok { |
| r.visitDynCall(instr) |
| } |
| |
| // Ignore the call-position operand when |
| // looking for address-taken Functions. |
| // Hack: assume this is rands[0]. |
| rands = rands[1:] |
| |
| case *ssa.MakeInterface: |
| // Converting a value of type T to an |
| // interface materializes its runtime |
| // type, allowing any of its exported |
| // methods to be called though reflection. |
| r.addRuntimeType(instr.X.Type(), false) |
| } |
| |
| // Process all address-taken functions. |
| for _, op := range rands { |
| if g, ok := (*op).(*ssa.Function); ok { |
| r.visitAddrTakenFunc(g) |
| } |
| } |
| } |
| } |
| } |
| |
| // Analyze performs Rapid Type Analysis, starting at the specified root |
| // functions. It returns nil if no roots were specified. |
| // |
| // The root functions must be one or more entrypoints (main and init |
| // functions) of a complete SSA program, with function bodies for all |
| // dependencies, constructed with the [ssa.InstantiateGenerics] mode |
| // flag. |
| // |
| // If buildCallGraph is true, Result.CallGraph will contain a call |
| // graph; otherwise, only the other fields (reachable functions) are |
| // populated. |
| func Analyze(roots []*ssa.Function, buildCallGraph bool) *Result { |
| if len(roots) == 0 { |
| return nil |
| } |
| |
| r := &rta{ |
| result: &Result{Reachable: make(map[*ssa.Function]struct{ AddrTaken bool })}, |
| prog: roots[0].Prog, |
| } |
| |
| if buildCallGraph { |
| // TODO(adonovan): change callgraph API to eliminate the |
| // notion of a distinguished root node. Some callgraphs |
| // have many roots, or none. |
| r.result.CallGraph = callgraph.New(roots[0]) |
| } |
| |
| // Grab ssa.Function for (*reflect.Value).Call, |
| // if "reflect" is among the dependencies. |
| if reflectPkg := r.prog.ImportedPackage("reflect"); reflectPkg != nil { |
| reflectValue := reflectPkg.Members["Value"].(*ssa.Type) |
| r.reflectValueCall = r.prog.LookupMethod(reflectValue.Object().Type(), reflectPkg.Pkg, "Call") |
| } |
| |
| hasher := typeutil.MakeHasher() |
| r.result.RuntimeTypes.SetHasher(hasher) |
| r.addrTakenFuncsBySig.SetHasher(hasher) |
| r.dynCallSites.SetHasher(hasher) |
| r.invokeSites.SetHasher(hasher) |
| r.concreteTypes.SetHasher(hasher) |
| r.interfaceTypes.SetHasher(hasher) |
| |
| for _, root := range roots { |
| r.addReachable(root, false) |
| } |
| |
| // Visit functions, processing their instructions, and adding |
| // new functions to the worklist, until a fixed point is |
| // reached. |
| var shadow []*ssa.Function // for efficiency, we double-buffer the worklist |
| for len(r.worklist) > 0 { |
| shadow, r.worklist = r.worklist, shadow[:0] |
| for _, f := range shadow { |
| r.visitFunc(f) |
| } |
| } |
| return r.result |
| } |
| |
| // interfaces(C) returns all currently known interfaces implemented by C. |
| func (r *rta) interfaces(C types.Type) []*types.Interface { |
| // Create an info for C the first time we see it. |
| var cinfo *concreteTypeInfo |
| if v := r.concreteTypes.At(C); v != nil { |
| cinfo = v.(*concreteTypeInfo) |
| } else { |
| mset := r.prog.MethodSets.MethodSet(C) |
| cinfo = &concreteTypeInfo{ |
| C: C, |
| mset: mset, |
| fprint: fingerprint(mset), |
| } |
| r.concreteTypes.Set(C, cinfo) |
| |
| // Ascertain set of interfaces C implements |
| // and update the 'implements' relation. |
| r.interfaceTypes.Iterate(func(I types.Type, v interface{}) { |
| iinfo := v.(*interfaceTypeInfo) |
| if I := I.(*types.Interface); implements(cinfo, iinfo) { |
| iinfo.implementations = append(iinfo.implementations, C) |
| cinfo.implements = append(cinfo.implements, I) |
| } |
| }) |
| } |
| |
| return cinfo.implements |
| } |
| |
| // implementations(I) returns all currently known concrete types that implement I. |
| func (r *rta) implementations(I *types.Interface) []types.Type { |
| // Create an info for I the first time we see it. |
| var iinfo *interfaceTypeInfo |
| if v := r.interfaceTypes.At(I); v != nil { |
| iinfo = v.(*interfaceTypeInfo) |
| } else { |
| mset := r.prog.MethodSets.MethodSet(I) |
| iinfo = &interfaceTypeInfo{ |
| I: I, |
| mset: mset, |
| fprint: fingerprint(mset), |
| } |
| r.interfaceTypes.Set(I, iinfo) |
| |
| // Ascertain set of concrete types that implement I |
| // and update the 'implements' relation. |
| r.concreteTypes.Iterate(func(C types.Type, v interface{}) { |
| cinfo := v.(*concreteTypeInfo) |
| if implements(cinfo, iinfo) { |
| cinfo.implements = append(cinfo.implements, I) |
| iinfo.implementations = append(iinfo.implementations, C) |
| } |
| }) |
| } |
| return iinfo.implementations |
| } |
| |
| // addRuntimeType is called for each concrete type that can be the |
| // dynamic type of some interface or reflect.Value. |
| // Adapted from needMethods in go/ssa/builder.go |
| func (r *rta) addRuntimeType(T types.Type, skip bool) { |
| // Never record aliases. |
| T = aliases.Unalias(T) |
| |
| if prev, ok := r.result.RuntimeTypes.At(T).(bool); ok { |
| if skip && !prev { |
| r.result.RuntimeTypes.Set(T, skip) |
| } |
| return |
| } |
| r.result.RuntimeTypes.Set(T, skip) |
| |
| mset := r.prog.MethodSets.MethodSet(T) |
| |
| if _, ok := T.Underlying().(*types.Interface); !ok { |
| // T is a new concrete type. |
| for i, n := 0, mset.Len(); i < n; i++ { |
| sel := mset.At(i) |
| m := sel.Obj() |
| |
| if m.Exported() { |
| // Exported methods are always potentially callable via reflection. |
| r.addReachable(r.prog.MethodValue(sel), true) |
| } |
| } |
| |
| // Add callgraph edge for each existing dynamic |
| // "invoke"-mode call via that interface. |
| for _, I := range r.interfaces(T) { |
| sites, _ := r.invokeSites.At(I).([]ssa.CallInstruction) |
| for _, site := range sites { |
| r.addInvokeEdge(site, T) |
| } |
| } |
| } |
| |
| // Precondition: T is not a method signature (*Signature with Recv()!=nil). |
| // Recursive case: skip => don't call makeMethods(T). |
| // Each package maintains its own set of types it has visited. |
| |
| var n *types.Named |
| switch T := T.(type) { |
| case *types.Named: |
| n = T |
| case *types.Pointer: |
| n, _ = aliases.Unalias(T.Elem()).(*types.Named) |
| } |
| if n != nil { |
| owner := n.Obj().Pkg() |
| if owner == nil { |
| return // built-in error type |
| } |
| } |
| |
| // Recursion over signatures of each exported method. |
| for i := 0; i < mset.Len(); i++ { |
| if mset.At(i).Obj().Exported() { |
| sig := mset.At(i).Type().(*types.Signature) |
| r.addRuntimeType(sig.Params(), true) // skip the Tuple itself |
| r.addRuntimeType(sig.Results(), true) // skip the Tuple itself |
| } |
| } |
| |
| switch t := T.(type) { |
| case *aliases.Alias: |
| panic("unreachable") |
| |
| case *types.Basic: |
| // nop |
| |
| case *types.Interface: |
| // nop---handled by recursion over method set. |
| |
| case *types.Pointer: |
| r.addRuntimeType(t.Elem(), false) |
| |
| case *types.Slice: |
| r.addRuntimeType(t.Elem(), false) |
| |
| case *types.Chan: |
| r.addRuntimeType(t.Elem(), false) |
| |
| case *types.Map: |
| r.addRuntimeType(t.Key(), false) |
| r.addRuntimeType(t.Elem(), false) |
| |
| case *types.Signature: |
| if t.Recv() != nil { |
| panic(fmt.Sprintf("Signature %s has Recv %s", t, t.Recv())) |
| } |
| r.addRuntimeType(t.Params(), true) // skip the Tuple itself |
| r.addRuntimeType(t.Results(), true) // skip the Tuple itself |
| |
| case *types.Named: |
| // A pointer-to-named type can be derived from a named |
| // type via reflection. It may have methods too. |
| r.addRuntimeType(types.NewPointer(T), false) |
| |
| // Consider 'type T struct{S}' where S has methods. |
| // Reflection provides no way to get from T to struct{S}, |
| // only to S, so the method set of struct{S} is unwanted, |
| // so set 'skip' flag during recursion. |
| r.addRuntimeType(t.Underlying(), true) |
| |
| case *types.Array: |
| r.addRuntimeType(t.Elem(), false) |
| |
| case *types.Struct: |
| for i, n := 0, t.NumFields(); i < n; i++ { |
| r.addRuntimeType(t.Field(i).Type(), false) |
| } |
| |
| case *types.Tuple: |
| for i, n := 0, t.Len(); i < n; i++ { |
| r.addRuntimeType(t.At(i).Type(), false) |
| } |
| |
| default: |
| panic(T) |
| } |
| } |
| |
| // fingerprint returns a bitmask with one bit set per method id, |
| // enabling 'implements' to quickly reject most candidates. |
| func fingerprint(mset *types.MethodSet) uint64 { |
| var space [64]byte |
| var mask uint64 |
| for i := 0; i < mset.Len(); i++ { |
| method := mset.At(i).Obj() |
| sig := method.Type().(*types.Signature) |
| sum := crc32.ChecksumIEEE(compat.Appendf(space[:], "%s/%d/%d", |
| method.Id(), |
| sig.Params().Len(), |
| sig.Results().Len())) |
| mask |= 1 << (sum % 64) |
| } |
| return mask |
| } |
| |
| // implements reports whether types.Implements(cinfo.C, iinfo.I), |
| // but more efficiently. |
| func implements(cinfo *concreteTypeInfo, iinfo *interfaceTypeInfo) (got bool) { |
| // The concrete type must have at least the methods |
| // (bits) of the interface type. Use a bitwise subset |
| // test to reject most candidates quickly. |
| return iinfo.fprint & ^cinfo.fprint == 0 && types.Implements(cinfo.C, iinfo.I) |
| } |