go/analysis/internal/facts: fact serialization support

Package facts provides an implementation of the Import/Export methods
of the analysis.Pass interface and functions to encode and decode
facts, using Gob encoding, to a file. It will be part of the vet-lite
driver (invoked by go vet) but the same logic has been validated in
other build systems such as Blaze.

Change-Id: I60ef561e84e833b9a3b17c269ab358e7d0800ff3
Reviewed-on: https://go-review.googlesource.com/c/144737
Reviewed-by: Jay Conrod <jayconrod@google.com>
Reviewed-by: Ian Cottrell <iancottrell@google.com>
diff --git a/go/analysis/internal/facts/facts.go b/go/analysis/internal/facts/facts.go
new file mode 100644
index 0000000..c1edc90
--- /dev/null
+++ b/go/analysis/internal/facts/facts.go
@@ -0,0 +1,308 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package facts defines a serializable set of analysis.Fact.
+//
+// It provides a partial implementation of the Fact-related parts of the
+// analysis.Pass interface for use in analysis drivers such as "go vet"
+// and other build systems.
+//
+// The serial format is unspecified and may change, so the same version
+// of this package must be used for reading and writing serialized facts.
+//
+// The handling of facts in the analysis system parallels the handling
+// of type information in the compiler: during compilation of package P,
+// the compiler emits an export data file that describes the type of
+// every object (named thing) defined in package P, plus every object
+// indirectly reachable from one of those objects. Thus the downstream
+// compiler of package Q need only load one export data file per direct
+// import of Q, and it will learn everything about the API of package P
+// and everything it needs to know about the API of P's dependencies.
+//
+// Similarly, analysis of package P emits a fact set containing facts
+// about all objects exported from P, plus additional facts about only
+// those objects of P's dependencies that are reachable from the API of
+// package P; the downstream analysis of Q need only load one fact set
+// per direct import of Q.
+//
+// The notion of "exportedness" that matters here is that of the
+// compiler. According to the language spec, a method pkg.T.f is
+// unexported simply because its name starts with lowercase. But the
+// compiler must nonethless export f so that downstream compilations can
+// accurately ascertain whether pkg.T implements an interface pkg.I
+// defined as interface{f()}. Exported thus means "described in export
+// data".
+//
+package facts
+
+import (
+	"bytes"
+	"encoding/gob"
+	"fmt"
+	"go/types"
+	"io/ioutil"
+	"log"
+	"reflect"
+	"sort"
+	"sync"
+
+	"golang.org/x/tools/go/analysis"
+	"golang.org/x/tools/go/types/objectpath"
+)
+
+const debug = false
+
+// A Set is a set of analysis.Facts.
+//
+// Decode creates a Set of facts by reading from the imports of a given
+// package, and Encode writes out the set. Between these operation,
+// the Import and Export methods will query and update the set.
+//
+// All of Set's methods except String are safe to call concurrently.
+type Set struct {
+	pkg *types.Package
+	mu  sync.Mutex
+	m   map[key]analysis.Fact
+}
+
+type key struct {
+	pkg *types.Package
+	obj types.Object // (object facts only)
+	t   reflect.Type
+}
+
+// ImportObjectFact implements analysis.Pass.ImportObjectFact.
+func (s *Set) ImportObjectFact(obj types.Object, ptr analysis.Fact) bool {
+	if obj == nil {
+		panic("nil object")
+	}
+	key := key{pkg: obj.Pkg(), obj: obj, t: reflect.TypeOf(ptr)}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if v, ok := s.m[key]; ok {
+		reflect.ValueOf(ptr).Elem().Set(reflect.ValueOf(v).Elem())
+		return true
+	}
+	return false
+}
+
+// ExportObjectFact implements analysis.Pass.ExportObjectFact.
+func (s *Set) ExportObjectFact(obj types.Object, fact analysis.Fact) {
+	if obj.Pkg() != s.pkg {
+		log.Panicf("in package %s: ExportObjectFact(%s, %T): can't set fact on object belonging another package",
+			s.pkg, obj, fact)
+	}
+	key := key{pkg: obj.Pkg(), obj: obj, t: reflect.TypeOf(fact)}
+	s.mu.Lock()
+	s.m[key] = fact // clobber any existing entry
+	s.mu.Unlock()
+}
+
+// ImportPackageFact implements analysis.Pass.ImportPackageFact.
+func (s *Set) ImportPackageFact(pkg *types.Package, ptr analysis.Fact) bool {
+	if pkg == nil {
+		panic("nil package")
+	}
+	key := key{pkg: pkg, t: reflect.TypeOf(ptr)}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if v, ok := s.m[key]; ok {
+		reflect.ValueOf(ptr).Elem().Set(reflect.ValueOf(v).Elem())
+		return true
+	}
+	return false
+}
+
+// ExportPackageFact implements analysis.Pass.ExportPackageFact.
+func (s *Set) ExportPackageFact(fact analysis.Fact) {
+	key := key{pkg: s.pkg, t: reflect.TypeOf(fact)}
+	s.mu.Lock()
+	s.m[key] = fact // clobber any existing entry
+	s.mu.Unlock()
+}
+
+// gobFact is the Gob declaration of a serialized fact.
+type gobFact struct {
+	PkgPath string          // path of package
+	Object  objectpath.Path // optional path of object relative to package itself
+	Fact    analysis.Fact   // type and value of user-defined Fact
+}
+
+// Decode decodes all the facts relevant to the analysis of package pkg.
+// The read function reads serialized fact data from an external source
+// for one of of pkg's direct imports. The empty file is a valid
+// encoding of an empty fact set.
+//
+// It is the caller's responsibility to call gob.Register on all
+// necessary fact types.
+func Decode(pkg *types.Package, read func(packagePath string) ([]byte, error)) (*Set, error) {
+	// Compute the import map for this package.
+	// See the package doc comment.
+	packages := importMap(pkg.Imports())
+
+	// Read facts from imported packages.
+	// Facts may describe indirectly imported packages, or their objects.
+	m := make(map[key]analysis.Fact) // one big bucket
+	for _, imp := range pkg.Imports() {
+		logf := func(format string, args ...interface{}) {
+			if debug {
+				prefix := fmt.Sprintf("in %s, importing %s: ",
+					pkg.Path(), imp.Path())
+				log.Print(prefix, fmt.Sprintf(format, args...))
+			}
+		}
+
+		// Read the gob-encoded facts.
+		data, err := read(imp.Path())
+		if err != nil {
+			return nil, fmt.Errorf("in %s, can't import facts for package %q: %v",
+				pkg.Path(), imp.Path(), err)
+		}
+		if len(data) == 0 {
+			continue // no facts
+		}
+		var gobFacts []gobFact
+		if err := gob.NewDecoder(bytes.NewReader(data)).Decode(&gobFacts); err != nil {
+			return nil, fmt.Errorf("decoding facts for %q: %v", imp.Path(), err)
+		}
+		if debug {
+			logf("decoded %d facts: %v", len(gobFacts), gobFacts)
+		}
+
+		// Parse each one into a key and a Fact.
+		for _, f := range gobFacts {
+			factPkg := packages[f.PkgPath]
+			if factPkg == nil {
+				// Fact relates to a dependency that was
+				// unused in this translation unit. Skip.
+				logf("no package %q; discarding %v", f.PkgPath, f.Fact)
+				continue
+			}
+			key := key{pkg: factPkg, t: reflect.TypeOf(f.Fact)}
+			if f.Object != "" {
+				// object fact
+				obj, err := objectpath.Object(factPkg, f.Object)
+				if err != nil {
+					// (most likely due to unexported object)
+					// TODO(adonovan): audit for other possibilities.
+					logf("no object for path: %v; discarding %s", err, f.Fact)
+					continue
+				}
+				key.obj = obj
+				logf("read %T fact %s for %v", f.Fact, f.Fact, key.obj)
+			} else {
+				// package fact
+				logf("read %T fact %s for %v", f.Fact, f.Fact, factPkg)
+			}
+			m[key] = f.Fact
+		}
+	}
+
+	return &Set{pkg: pkg, m: m}, nil
+}
+
+// Encode encodes a set of facts to a memory buffer.
+//
+// It may fail if one of the Facts could not be gob-encoded, but this is
+// a sign of a bug in an Analyzer.
+func (s *Set) Encode() []byte {
+
+	// TODO(adonovan): opt: use a more efficient encoding
+	// that avoids repeating PkgPath for each fact.
+
+	// Gather all facts, including those from imported packages.
+	var gobFacts []gobFact
+
+	s.mu.Lock()
+	for k, fact := range s.m {
+		if debug {
+			log.Printf("%#v => %s\n", k, fact)
+		}
+		var object objectpath.Path
+		if k.obj != nil {
+			path, err := objectpath.For(k.obj)
+			if err != nil {
+				if debug {
+					log.Printf("discarding fact %s about %s\n", fact, k.obj)
+				}
+				continue // object not accessible from package API; discard fact
+			}
+			object = path
+		}
+		gobFacts = append(gobFacts, gobFact{
+			PkgPath: k.pkg.Path(),
+			Object:  object,
+			Fact:    fact,
+		})
+	}
+	s.mu.Unlock()
+
+	// Sort facts by (package, object, type) for determinism.
+	sort.Slice(gobFacts, func(i, j int) bool {
+		x, y := gobFacts[i], gobFacts[j]
+		if x.PkgPath != y.PkgPath {
+			return x.PkgPath < y.PkgPath
+		}
+		if x.Object != y.Object {
+			return x.Object < y.Object
+		}
+		tx := reflect.TypeOf(x.Fact)
+		ty := reflect.TypeOf(y.Fact)
+		if tx != ty {
+			return tx.String() < ty.String()
+		}
+		return false // equal
+	})
+
+	var buf bytes.Buffer
+	if len(gobFacts) > 0 {
+		if err := gob.NewEncoder(&buf).Encode(gobFacts); err != nil {
+			// Fact encoding should never fail. Identify the culprit.
+			//
+			// TODO(adonovan): what's the right thing to do here?
+			// The error is clearly a bug, so log.Fatal leads to early
+			// detection, but it could potentially bring down a big
+			// job because of an obscure dynamic bug in a fact.
+			// But perhaps that's fine: other bugs in Analyzers
+			// have the same potential to cause failures.
+			// Alternatively we could discard the bad facts with a
+			// log message, but who reads logs?
+			for _, gf := range gobFacts {
+				if err := gob.NewEncoder(ioutil.Discard).Encode(gf); err != nil {
+					fact := gf.Fact
+					pkgpath := reflect.TypeOf(fact).Elem().PkgPath()
+					log.Panicf("internal error: gob encoding of analysis fact %s failed: %v; please report a bug against fact %T in package %q",
+						fact, err, fact, pkgpath)
+				}
+			}
+		}
+	}
+
+	if debug {
+		log.Printf("package %q: encode %d facts, %d bytes\n",
+			s.pkg.Path(), len(gobFacts), buf.Len())
+	}
+
+	return buf.Bytes()
+}
+
+// String is provided only for debugging, and must not be called
+// concurrent with any Import/Export method.
+func (s *Set) String() string {
+	var buf bytes.Buffer
+	buf.WriteString("{")
+	for k, f := range s.m {
+		if buf.Len() > 1 {
+			buf.WriteString(", ")
+		}
+		if k.obj != nil {
+			buf.WriteString(k.obj.String())
+		} else {
+			buf.WriteString(k.pkg.Path())
+		}
+		fmt.Fprintf(&buf, ": %v", f)
+	}
+	buf.WriteString("}")
+	return buf.String()
+}
diff --git a/go/analysis/internal/facts/facts_test.go b/go/analysis/internal/facts/facts_test.go
new file mode 100644
index 0000000..e21a498
--- /dev/null
+++ b/go/analysis/internal/facts/facts_test.go
@@ -0,0 +1,174 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package facts_test
+
+import (
+	"encoding/gob"
+	"fmt"
+	"go/token"
+	"go/types"
+	"os"
+	"testing"
+
+	"golang.org/x/tools/go/analysis/analysistest"
+	"golang.org/x/tools/go/analysis/internal/facts"
+	"golang.org/x/tools/go/packages"
+)
+
+type myFact struct {
+	S string
+}
+
+func (f *myFact) String() string { return fmt.Sprintf("myFact(%s)", f.S) }
+func (f *myFact) AFact()         {}
+
+func TestEncodeDecode(t *testing.T) {
+	gob.Register(new(myFact))
+
+	// c -> b -> a, a2
+	// c does not directly depend on a, but it indirectly uses a.T.
+	//
+	// Package a2 is never loaded directly so it is incomplete.
+	//
+	// We use only types in this example because we rely on
+	// types.Eval to resolve the lookup expressions, and it only
+	// works for types. This is a definite gap in the typechecker API.
+	files := map[string]string{
+		"a/a.go":  `package a; type A int; type T int`,
+		"a2/a.go": `package a2; type A2 int; type Unneeded int`,
+		"b/b.go":  `package b; import ("a"; "a2"); type B chan a2.A2; type F func() a.T`,
+		"c/c.go":  `package c; import "b"; type C []b.B`,
+	}
+	dir, cleanup, err := analysistest.WriteFiles(files)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cleanup()
+
+	// factmap represents the passing of encoded facts from one
+	// package to another. In practice one would use the file system.
+	factmap := make(map[string][]byte)
+	read := func(path string) ([]byte, error) { return factmap[path], nil }
+
+	// In the following table, we analyze packages (a, b, c) in order,
+	// look up various objects accessible within each package,
+	// and see if they have a fact.  The "analysis" exports a fact
+	// for every object at package level.
+	//
+	// Note: Loop iterations are not independent test cases;
+	// order matters, as we populate factmap.
+	type lookups []struct {
+		objexpr string
+		want    string
+	}
+	for _, test := range []struct {
+		path    string
+		lookups lookups
+	}{
+		{"a", lookups{
+			{"A", "myFact(a.A)"},
+		}},
+		{"b", lookups{
+			{"a.A", "myFact(a.A)"},
+			{"a.T", "myFact(a.T)"},
+			{"B", "myFact(b.B)"},
+			{"F", "myFact(b.F)"},
+			{"F(nil)()", "myFact(a.T)"}, // (result type of b.F)
+		}},
+		{"c", lookups{
+			{"b.B", "myFact(b.B)"},
+			{"b.F", "myFact(b.F)"},
+			//{"b.F(nil)()", "myFact(a.T)"}, // no fact; TODO(adonovan): investigate
+			{"C", "myFact(c.C)"},
+			{"C{}[0]", "myFact(b.B)"},
+			{"<-(C{}[0])", "no fact"}, // object but no fact (we never "analyze" a2)
+		}},
+	} {
+		// load package
+		pkg, err := load(dir, test.path)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// decode
+		facts, err := facts.Decode(pkg, read)
+		if err != nil {
+			t.Fatalf("Decode failed: %v", err)
+		}
+		if true {
+			t.Logf("decode %s facts = %v", pkg.Path(), facts) // show all facts
+		}
+
+		// export
+		// (one fact for each package-level object)
+		scope := pkg.Scope()
+		for _, name := range scope.Names() {
+			obj := scope.Lookup(name)
+			fact := &myFact{obj.Pkg().Name() + "." + obj.Name()}
+			facts.ExportObjectFact(obj, fact)
+		}
+
+		// import
+		// (after export, because an analyzer may import its own facts)
+		for _, lookup := range test.lookups {
+			fact := new(myFact)
+			var got string
+			if obj := find(pkg, lookup.objexpr); obj == nil {
+				got = "no object"
+			} else if facts.ImportObjectFact(obj, fact) {
+				got = fact.String()
+			} else {
+				got = "no fact"
+			}
+			if got != lookup.want {
+				t.Errorf("in %s, ImportObjectFact(%s, %T) = %s, want %s",
+					pkg.Path(), lookup.objexpr, fact, got, lookup.want)
+			}
+		}
+
+		// encode
+		factmap[pkg.Path()] = facts.Encode()
+	}
+}
+
+func find(p *types.Package, expr string) types.Object {
+	// types.Eval only allows us to compute a TypeName object for an expression.
+	// TODO(adonovan): support other expressions that denote an object:
+	// - an identifier (or qualified ident) for a func, const, or var
+	// - new(T).f for a field or method
+	// I've added CheckExpr in https://go-review.googlesource.com/c/go/+/144677.
+	// If that becomes available, use it.
+
+	// Choose an arbitrary position within the (single-file) package
+	// so that we are within the scope of its import declarations.
+	somepos := p.Scope().Lookup(p.Scope().Names()[0]).Pos()
+	tv, err := types.Eval(token.NewFileSet(), p, somepos, expr)
+	if err != nil {
+		return nil
+	}
+	if n, ok := tv.Type.(*types.Named); ok {
+		return n.Obj()
+	}
+	return nil
+}
+
+func load(dir string, path string) (*types.Package, error) {
+	cfg := &packages.Config{
+		Mode: packages.LoadSyntax,
+		Dir:  dir,
+		Env:  append(os.Environ(), "GOPATH="+dir, "GO111MODULE=off", "GOPROXY=off"),
+	}
+	pkgs, err := packages.Load(cfg, path)
+	if err != nil {
+		return nil, err
+	}
+	if packages.PrintErrors(pkgs) > 0 {
+		return nil, fmt.Errorf("packages had errors")
+	}
+	if len(pkgs) == 0 {
+		return nil, fmt.Errorf("no package matched %s", path)
+	}
+	return pkgs[0].Types, nil
+}
diff --git a/go/analysis/internal/facts/imports.go b/go/analysis/internal/facts/imports.go
new file mode 100644
index 0000000..34740f4
--- /dev/null
+++ b/go/analysis/internal/facts/imports.go
@@ -0,0 +1,88 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package facts
+
+import "go/types"
+
+// importMap computes the import map for a package by traversing the
+// entire exported API each of its imports.
+//
+// This is a workaround for the fact that we cannot access the map used
+// internally by the types.Importer returned by go/importer. The entries
+// in this map are the packages and objects that may be relevant to the
+// current analysis unit.
+//
+// Packages in the map that are only indirectly imported may be
+// incomplete (!pkg.Complete()).
+//
+func importMap(imports []*types.Package) map[string]*types.Package {
+	objects := make(map[types.Object]bool)
+	packages := make(map[string]*types.Package)
+
+	var addObj func(obj types.Object) bool
+	var addType func(T types.Type)
+
+	addObj = func(obj types.Object) bool {
+		if !objects[obj] {
+			objects[obj] = true
+			addType(obj.Type())
+			if pkg := obj.Pkg(); pkg != nil {
+				packages[pkg.Path()] = pkg
+			}
+			return true
+		}
+		return false
+	}
+
+	addType = func(T types.Type) {
+		switch T := T.(type) {
+		case *types.Basic:
+			// nop
+		case *types.Named:
+			if addObj(T.Obj()) {
+				for i := 0; i < T.NumMethods(); i++ {
+					addObj(T.Method(i))
+				}
+			}
+		case *types.Pointer:
+			addType(T.Elem())
+		case *types.Slice:
+			addType(T.Elem())
+		case *types.Array:
+			addType(T.Elem())
+		case *types.Chan:
+			addType(T.Elem())
+		case *types.Map:
+			addType(T.Key())
+			addType(T.Elem())
+		case *types.Signature:
+			addType(T.Params())
+			addType(T.Results())
+		case *types.Struct:
+			for i := 0; i < T.NumFields(); i++ {
+				addObj(T.Field(i))
+			}
+		case *types.Tuple:
+			for i := 0; i < T.Len(); i++ {
+				addObj(T.At(i))
+			}
+		case *types.Interface:
+			for i := 0; i < T.NumMethods(); i++ {
+				addObj(T.Method(i))
+			}
+		}
+	}
+
+	for _, imp := range imports {
+		packages[imp.Path()] = imp
+
+		scope := imp.Scope()
+		for _, name := range scope.Names() {
+			addObj(scope.Lookup(name))
+		}
+	}
+
+	return packages
+}