go/analysis/internal/facts: fact serialization support
Package facts provides an implementation of the Import/Export methods
of the analysis.Pass interface and functions to encode and decode
facts, using Gob encoding, to a file. It will be part of the vet-lite
driver (invoked by go vet) but the same logic has been validated in
other build systems such as Blaze.
Change-Id: I60ef561e84e833b9a3b17c269ab358e7d0800ff3
Reviewed-on: https://go-review.googlesource.com/c/144737
Reviewed-by: Jay Conrod <jayconrod@google.com>
Reviewed-by: Ian Cottrell <iancottrell@google.com>
diff --git a/go/analysis/internal/facts/facts.go b/go/analysis/internal/facts/facts.go
new file mode 100644
index 0000000..c1edc90
--- /dev/null
+++ b/go/analysis/internal/facts/facts.go
@@ -0,0 +1,308 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package facts defines a serializable set of analysis.Fact.
+//
+// It provides a partial implementation of the Fact-related parts of the
+// analysis.Pass interface for use in analysis drivers such as "go vet"
+// and other build systems.
+//
+// The serial format is unspecified and may change, so the same version
+// of this package must be used for reading and writing serialized facts.
+//
+// The handling of facts in the analysis system parallels the handling
+// of type information in the compiler: during compilation of package P,
+// the compiler emits an export data file that describes the type of
+// every object (named thing) defined in package P, plus every object
+// indirectly reachable from one of those objects. Thus the downstream
+// compiler of package Q need only load one export data file per direct
+// import of Q, and it will learn everything about the API of package P
+// and everything it needs to know about the API of P's dependencies.
+//
+// Similarly, analysis of package P emits a fact set containing facts
+// about all objects exported from P, plus additional facts about only
+// those objects of P's dependencies that are reachable from the API of
+// package P; the downstream analysis of Q need only load one fact set
+// per direct import of Q.
+//
+// The notion of "exportedness" that matters here is that of the
+// compiler. According to the language spec, a method pkg.T.f is
+// unexported simply because its name starts with lowercase. But the
+// compiler must nonethless export f so that downstream compilations can
+// accurately ascertain whether pkg.T implements an interface pkg.I
+// defined as interface{f()}. Exported thus means "described in export
+// data".
+//
+package facts
+
+import (
+ "bytes"
+ "encoding/gob"
+ "fmt"
+ "go/types"
+ "io/ioutil"
+ "log"
+ "reflect"
+ "sort"
+ "sync"
+
+ "golang.org/x/tools/go/analysis"
+ "golang.org/x/tools/go/types/objectpath"
+)
+
+const debug = false
+
+// A Set is a set of analysis.Facts.
+//
+// Decode creates a Set of facts by reading from the imports of a given
+// package, and Encode writes out the set. Between these operation,
+// the Import and Export methods will query and update the set.
+//
+// All of Set's methods except String are safe to call concurrently.
+type Set struct {
+ pkg *types.Package
+ mu sync.Mutex
+ m map[key]analysis.Fact
+}
+
+type key struct {
+ pkg *types.Package
+ obj types.Object // (object facts only)
+ t reflect.Type
+}
+
+// ImportObjectFact implements analysis.Pass.ImportObjectFact.
+func (s *Set) ImportObjectFact(obj types.Object, ptr analysis.Fact) bool {
+ if obj == nil {
+ panic("nil object")
+ }
+ key := key{pkg: obj.Pkg(), obj: obj, t: reflect.TypeOf(ptr)}
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if v, ok := s.m[key]; ok {
+ reflect.ValueOf(ptr).Elem().Set(reflect.ValueOf(v).Elem())
+ return true
+ }
+ return false
+}
+
+// ExportObjectFact implements analysis.Pass.ExportObjectFact.
+func (s *Set) ExportObjectFact(obj types.Object, fact analysis.Fact) {
+ if obj.Pkg() != s.pkg {
+ log.Panicf("in package %s: ExportObjectFact(%s, %T): can't set fact on object belonging another package",
+ s.pkg, obj, fact)
+ }
+ key := key{pkg: obj.Pkg(), obj: obj, t: reflect.TypeOf(fact)}
+ s.mu.Lock()
+ s.m[key] = fact // clobber any existing entry
+ s.mu.Unlock()
+}
+
+// ImportPackageFact implements analysis.Pass.ImportPackageFact.
+func (s *Set) ImportPackageFact(pkg *types.Package, ptr analysis.Fact) bool {
+ if pkg == nil {
+ panic("nil package")
+ }
+ key := key{pkg: pkg, t: reflect.TypeOf(ptr)}
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if v, ok := s.m[key]; ok {
+ reflect.ValueOf(ptr).Elem().Set(reflect.ValueOf(v).Elem())
+ return true
+ }
+ return false
+}
+
+// ExportPackageFact implements analysis.Pass.ExportPackageFact.
+func (s *Set) ExportPackageFact(fact analysis.Fact) {
+ key := key{pkg: s.pkg, t: reflect.TypeOf(fact)}
+ s.mu.Lock()
+ s.m[key] = fact // clobber any existing entry
+ s.mu.Unlock()
+}
+
+// gobFact is the Gob declaration of a serialized fact.
+type gobFact struct {
+ PkgPath string // path of package
+ Object objectpath.Path // optional path of object relative to package itself
+ Fact analysis.Fact // type and value of user-defined Fact
+}
+
+// Decode decodes all the facts relevant to the analysis of package pkg.
+// The read function reads serialized fact data from an external source
+// for one of of pkg's direct imports. The empty file is a valid
+// encoding of an empty fact set.
+//
+// It is the caller's responsibility to call gob.Register on all
+// necessary fact types.
+func Decode(pkg *types.Package, read func(packagePath string) ([]byte, error)) (*Set, error) {
+ // Compute the import map for this package.
+ // See the package doc comment.
+ packages := importMap(pkg.Imports())
+
+ // Read facts from imported packages.
+ // Facts may describe indirectly imported packages, or their objects.
+ m := make(map[key]analysis.Fact) // one big bucket
+ for _, imp := range pkg.Imports() {
+ logf := func(format string, args ...interface{}) {
+ if debug {
+ prefix := fmt.Sprintf("in %s, importing %s: ",
+ pkg.Path(), imp.Path())
+ log.Print(prefix, fmt.Sprintf(format, args...))
+ }
+ }
+
+ // Read the gob-encoded facts.
+ data, err := read(imp.Path())
+ if err != nil {
+ return nil, fmt.Errorf("in %s, can't import facts for package %q: %v",
+ pkg.Path(), imp.Path(), err)
+ }
+ if len(data) == 0 {
+ continue // no facts
+ }
+ var gobFacts []gobFact
+ if err := gob.NewDecoder(bytes.NewReader(data)).Decode(&gobFacts); err != nil {
+ return nil, fmt.Errorf("decoding facts for %q: %v", imp.Path(), err)
+ }
+ if debug {
+ logf("decoded %d facts: %v", len(gobFacts), gobFacts)
+ }
+
+ // Parse each one into a key and a Fact.
+ for _, f := range gobFacts {
+ factPkg := packages[f.PkgPath]
+ if factPkg == nil {
+ // Fact relates to a dependency that was
+ // unused in this translation unit. Skip.
+ logf("no package %q; discarding %v", f.PkgPath, f.Fact)
+ continue
+ }
+ key := key{pkg: factPkg, t: reflect.TypeOf(f.Fact)}
+ if f.Object != "" {
+ // object fact
+ obj, err := objectpath.Object(factPkg, f.Object)
+ if err != nil {
+ // (most likely due to unexported object)
+ // TODO(adonovan): audit for other possibilities.
+ logf("no object for path: %v; discarding %s", err, f.Fact)
+ continue
+ }
+ key.obj = obj
+ logf("read %T fact %s for %v", f.Fact, f.Fact, key.obj)
+ } else {
+ // package fact
+ logf("read %T fact %s for %v", f.Fact, f.Fact, factPkg)
+ }
+ m[key] = f.Fact
+ }
+ }
+
+ return &Set{pkg: pkg, m: m}, nil
+}
+
+// Encode encodes a set of facts to a memory buffer.
+//
+// It may fail if one of the Facts could not be gob-encoded, but this is
+// a sign of a bug in an Analyzer.
+func (s *Set) Encode() []byte {
+
+ // TODO(adonovan): opt: use a more efficient encoding
+ // that avoids repeating PkgPath for each fact.
+
+ // Gather all facts, including those from imported packages.
+ var gobFacts []gobFact
+
+ s.mu.Lock()
+ for k, fact := range s.m {
+ if debug {
+ log.Printf("%#v => %s\n", k, fact)
+ }
+ var object objectpath.Path
+ if k.obj != nil {
+ path, err := objectpath.For(k.obj)
+ if err != nil {
+ if debug {
+ log.Printf("discarding fact %s about %s\n", fact, k.obj)
+ }
+ continue // object not accessible from package API; discard fact
+ }
+ object = path
+ }
+ gobFacts = append(gobFacts, gobFact{
+ PkgPath: k.pkg.Path(),
+ Object: object,
+ Fact: fact,
+ })
+ }
+ s.mu.Unlock()
+
+ // Sort facts by (package, object, type) for determinism.
+ sort.Slice(gobFacts, func(i, j int) bool {
+ x, y := gobFacts[i], gobFacts[j]
+ if x.PkgPath != y.PkgPath {
+ return x.PkgPath < y.PkgPath
+ }
+ if x.Object != y.Object {
+ return x.Object < y.Object
+ }
+ tx := reflect.TypeOf(x.Fact)
+ ty := reflect.TypeOf(y.Fact)
+ if tx != ty {
+ return tx.String() < ty.String()
+ }
+ return false // equal
+ })
+
+ var buf bytes.Buffer
+ if len(gobFacts) > 0 {
+ if err := gob.NewEncoder(&buf).Encode(gobFacts); err != nil {
+ // Fact encoding should never fail. Identify the culprit.
+ //
+ // TODO(adonovan): what's the right thing to do here?
+ // The error is clearly a bug, so log.Fatal leads to early
+ // detection, but it could potentially bring down a big
+ // job because of an obscure dynamic bug in a fact.
+ // But perhaps that's fine: other bugs in Analyzers
+ // have the same potential to cause failures.
+ // Alternatively we could discard the bad facts with a
+ // log message, but who reads logs?
+ for _, gf := range gobFacts {
+ if err := gob.NewEncoder(ioutil.Discard).Encode(gf); err != nil {
+ fact := gf.Fact
+ pkgpath := reflect.TypeOf(fact).Elem().PkgPath()
+ log.Panicf("internal error: gob encoding of analysis fact %s failed: %v; please report a bug against fact %T in package %q",
+ fact, err, fact, pkgpath)
+ }
+ }
+ }
+ }
+
+ if debug {
+ log.Printf("package %q: encode %d facts, %d bytes\n",
+ s.pkg.Path(), len(gobFacts), buf.Len())
+ }
+
+ return buf.Bytes()
+}
+
+// String is provided only for debugging, and must not be called
+// concurrent with any Import/Export method.
+func (s *Set) String() string {
+ var buf bytes.Buffer
+ buf.WriteString("{")
+ for k, f := range s.m {
+ if buf.Len() > 1 {
+ buf.WriteString(", ")
+ }
+ if k.obj != nil {
+ buf.WriteString(k.obj.String())
+ } else {
+ buf.WriteString(k.pkg.Path())
+ }
+ fmt.Fprintf(&buf, ": %v", f)
+ }
+ buf.WriteString("}")
+ return buf.String()
+}
diff --git a/go/analysis/internal/facts/facts_test.go b/go/analysis/internal/facts/facts_test.go
new file mode 100644
index 0000000..e21a498
--- /dev/null
+++ b/go/analysis/internal/facts/facts_test.go
@@ -0,0 +1,174 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package facts_test
+
+import (
+ "encoding/gob"
+ "fmt"
+ "go/token"
+ "go/types"
+ "os"
+ "testing"
+
+ "golang.org/x/tools/go/analysis/analysistest"
+ "golang.org/x/tools/go/analysis/internal/facts"
+ "golang.org/x/tools/go/packages"
+)
+
+type myFact struct {
+ S string
+}
+
+func (f *myFact) String() string { return fmt.Sprintf("myFact(%s)", f.S) }
+func (f *myFact) AFact() {}
+
+func TestEncodeDecode(t *testing.T) {
+ gob.Register(new(myFact))
+
+ // c -> b -> a, a2
+ // c does not directly depend on a, but it indirectly uses a.T.
+ //
+ // Package a2 is never loaded directly so it is incomplete.
+ //
+ // We use only types in this example because we rely on
+ // types.Eval to resolve the lookup expressions, and it only
+ // works for types. This is a definite gap in the typechecker API.
+ files := map[string]string{
+ "a/a.go": `package a; type A int; type T int`,
+ "a2/a.go": `package a2; type A2 int; type Unneeded int`,
+ "b/b.go": `package b; import ("a"; "a2"); type B chan a2.A2; type F func() a.T`,
+ "c/c.go": `package c; import "b"; type C []b.B`,
+ }
+ dir, cleanup, err := analysistest.WriteFiles(files)
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer cleanup()
+
+ // factmap represents the passing of encoded facts from one
+ // package to another. In practice one would use the file system.
+ factmap := make(map[string][]byte)
+ read := func(path string) ([]byte, error) { return factmap[path], nil }
+
+ // In the following table, we analyze packages (a, b, c) in order,
+ // look up various objects accessible within each package,
+ // and see if they have a fact. The "analysis" exports a fact
+ // for every object at package level.
+ //
+ // Note: Loop iterations are not independent test cases;
+ // order matters, as we populate factmap.
+ type lookups []struct {
+ objexpr string
+ want string
+ }
+ for _, test := range []struct {
+ path string
+ lookups lookups
+ }{
+ {"a", lookups{
+ {"A", "myFact(a.A)"},
+ }},
+ {"b", lookups{
+ {"a.A", "myFact(a.A)"},
+ {"a.T", "myFact(a.T)"},
+ {"B", "myFact(b.B)"},
+ {"F", "myFact(b.F)"},
+ {"F(nil)()", "myFact(a.T)"}, // (result type of b.F)
+ }},
+ {"c", lookups{
+ {"b.B", "myFact(b.B)"},
+ {"b.F", "myFact(b.F)"},
+ //{"b.F(nil)()", "myFact(a.T)"}, // no fact; TODO(adonovan): investigate
+ {"C", "myFact(c.C)"},
+ {"C{}[0]", "myFact(b.B)"},
+ {"<-(C{}[0])", "no fact"}, // object but no fact (we never "analyze" a2)
+ }},
+ } {
+ // load package
+ pkg, err := load(dir, test.path)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // decode
+ facts, err := facts.Decode(pkg, read)
+ if err != nil {
+ t.Fatalf("Decode failed: %v", err)
+ }
+ if true {
+ t.Logf("decode %s facts = %v", pkg.Path(), facts) // show all facts
+ }
+
+ // export
+ // (one fact for each package-level object)
+ scope := pkg.Scope()
+ for _, name := range scope.Names() {
+ obj := scope.Lookup(name)
+ fact := &myFact{obj.Pkg().Name() + "." + obj.Name()}
+ facts.ExportObjectFact(obj, fact)
+ }
+
+ // import
+ // (after export, because an analyzer may import its own facts)
+ for _, lookup := range test.lookups {
+ fact := new(myFact)
+ var got string
+ if obj := find(pkg, lookup.objexpr); obj == nil {
+ got = "no object"
+ } else if facts.ImportObjectFact(obj, fact) {
+ got = fact.String()
+ } else {
+ got = "no fact"
+ }
+ if got != lookup.want {
+ t.Errorf("in %s, ImportObjectFact(%s, %T) = %s, want %s",
+ pkg.Path(), lookup.objexpr, fact, got, lookup.want)
+ }
+ }
+
+ // encode
+ factmap[pkg.Path()] = facts.Encode()
+ }
+}
+
+func find(p *types.Package, expr string) types.Object {
+ // types.Eval only allows us to compute a TypeName object for an expression.
+ // TODO(adonovan): support other expressions that denote an object:
+ // - an identifier (or qualified ident) for a func, const, or var
+ // - new(T).f for a field or method
+ // I've added CheckExpr in https://go-review.googlesource.com/c/go/+/144677.
+ // If that becomes available, use it.
+
+ // Choose an arbitrary position within the (single-file) package
+ // so that we are within the scope of its import declarations.
+ somepos := p.Scope().Lookup(p.Scope().Names()[0]).Pos()
+ tv, err := types.Eval(token.NewFileSet(), p, somepos, expr)
+ if err != nil {
+ return nil
+ }
+ if n, ok := tv.Type.(*types.Named); ok {
+ return n.Obj()
+ }
+ return nil
+}
+
+func load(dir string, path string) (*types.Package, error) {
+ cfg := &packages.Config{
+ Mode: packages.LoadSyntax,
+ Dir: dir,
+ Env: append(os.Environ(), "GOPATH="+dir, "GO111MODULE=off", "GOPROXY=off"),
+ }
+ pkgs, err := packages.Load(cfg, path)
+ if err != nil {
+ return nil, err
+ }
+ if packages.PrintErrors(pkgs) > 0 {
+ return nil, fmt.Errorf("packages had errors")
+ }
+ if len(pkgs) == 0 {
+ return nil, fmt.Errorf("no package matched %s", path)
+ }
+ return pkgs[0].Types, nil
+}
diff --git a/go/analysis/internal/facts/imports.go b/go/analysis/internal/facts/imports.go
new file mode 100644
index 0000000..34740f4
--- /dev/null
+++ b/go/analysis/internal/facts/imports.go
@@ -0,0 +1,88 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package facts
+
+import "go/types"
+
+// importMap computes the import map for a package by traversing the
+// entire exported API each of its imports.
+//
+// This is a workaround for the fact that we cannot access the map used
+// internally by the types.Importer returned by go/importer. The entries
+// in this map are the packages and objects that may be relevant to the
+// current analysis unit.
+//
+// Packages in the map that are only indirectly imported may be
+// incomplete (!pkg.Complete()).
+//
+func importMap(imports []*types.Package) map[string]*types.Package {
+ objects := make(map[types.Object]bool)
+ packages := make(map[string]*types.Package)
+
+ var addObj func(obj types.Object) bool
+ var addType func(T types.Type)
+
+ addObj = func(obj types.Object) bool {
+ if !objects[obj] {
+ objects[obj] = true
+ addType(obj.Type())
+ if pkg := obj.Pkg(); pkg != nil {
+ packages[pkg.Path()] = pkg
+ }
+ return true
+ }
+ return false
+ }
+
+ addType = func(T types.Type) {
+ switch T := T.(type) {
+ case *types.Basic:
+ // nop
+ case *types.Named:
+ if addObj(T.Obj()) {
+ for i := 0; i < T.NumMethods(); i++ {
+ addObj(T.Method(i))
+ }
+ }
+ case *types.Pointer:
+ addType(T.Elem())
+ case *types.Slice:
+ addType(T.Elem())
+ case *types.Array:
+ addType(T.Elem())
+ case *types.Chan:
+ addType(T.Elem())
+ case *types.Map:
+ addType(T.Key())
+ addType(T.Elem())
+ case *types.Signature:
+ addType(T.Params())
+ addType(T.Results())
+ case *types.Struct:
+ for i := 0; i < T.NumFields(); i++ {
+ addObj(T.Field(i))
+ }
+ case *types.Tuple:
+ for i := 0; i < T.Len(); i++ {
+ addObj(T.At(i))
+ }
+ case *types.Interface:
+ for i := 0; i < T.NumMethods(); i++ {
+ addObj(T.Method(i))
+ }
+ }
+ }
+
+ for _, imp := range imports {
+ packages[imp.Path()] = imp
+
+ scope := imp.Scope()
+ for _, name := range scope.Names() {
+ addObj(scope.Lookup(name))
+ }
+ }
+
+ return packages
+}