go/analysis: a new API for analysis tools

This CL contains just the API, the validate function,
and two example analyses, findcall and pkglemma.

Change-Id: Ia1f2652647050b1e0e15dad8b9ae10cf1a5fbdbc
Synopsis: go-review.googlesource.com/c/tools/+/134935
Design:   docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk
Reviewed-on: https://go-review.googlesource.com/135635
Reviewed-by: Ian Cottrell <iancottrell@google.com>
diff --git a/go/analysis/analysis.go b/go/analysis/analysis.go
new file mode 100644
index 0000000..e103fde
--- /dev/null
+++ b/go/analysis/analysis.go
@@ -0,0 +1,212 @@
+// The analysis package defines a uniform interface for static checkers
+// of Go source code. By implementing a common interface, checkers from
+// a variety of sources can be easily selected, incorporated, and reused
+// in a wide range of programs including command-line tools, text
+// editors and IDEs, build systems, test frameworks, code review tools,
+// and batch pipelines for large code bases. For the design, see
+// https://docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk
+//
+// Each analysis is invoked once per Go package, and is provided the
+// abstract syntax trees (ASTs) and type information for that package.
+//
+// The principal data types of this package are structs, not interfaces,
+// to permit later addition of optional fields as the API evolves.
+package analysis
+
+import (
+	"flag"
+	"fmt"
+	"go/ast"
+	"go/token"
+	"go/types"
+	"reflect"
+)
+
+// An Analysis describes an analysis function and its options.
+type Analysis struct {
+	// The Name of the analysis must be a valid Go identifier
+	// as it may appear in command-line flags, URLs, and so on.
+	Name string
+
+	// Doc is the documentation for the analysis.
+	Doc string
+
+	// Flags defines any flags accepted by the analysis.
+	// The manner in which these flags are exposed to the user
+	// depends on the driver which runs the analysis.
+	Flags flag.FlagSet
+
+	// Run applies the analysis to a package.
+	// It returns an error if the analysis failed.
+	Run func(*Unit) error
+
+	// RunDespiteErrors allows the driver to invoke
+	// the Run method of this analysis even on a
+	// package that contains parse or type errors.
+	RunDespiteErrors bool
+
+	// Requires is a set of analyses that must run successfully
+	// before this one on a given package. This analysis may inspect
+	// the outputs produced by each analysis in Requires.
+	// The graph over analyses implied by Requires edges must be acyclic.
+	//
+	// Requires establishes a "horizontal" dependency between
+	// analysis units (different analyses, same package).
+	Requires []*Analysis
+
+	// OutputType is the type of the optional Output value
+	// computed by this analysis and stored in Unit.Output.
+	// (The Output is provided as an Input to
+	// each analysis that Requires this one.)
+	OutputType reflect.Type
+
+	// LemmaTypes is the set of types of lemmas produced and
+	// consumed by this analysis. An analysis that uses lemmas
+	// may assume that its import dependencies have been
+	// similarly analyzed before it runs. Lemmas are pointers.
+	//
+	// LemmaTypes establishes a "vertical" dependency between
+	// analysis units (same analysis, different packages).
+	LemmaTypes []reflect.Type
+}
+
+func (a *Analysis) String() string { return a.Name }
+
+// A Unit provides information to the Run function that
+// applies a specific analysis to a single Go package.
+//
+// It forms the interface between the analysis logic and the driver
+// program, and has both input and an output components.
+type Unit struct {
+	// -- inputs --
+
+	Analysis *Analysis // the identity of the current analysis
+
+	// syntax and type information
+	Fset   *token.FileSet // file position information
+	Syntax []*ast.File    // the abstract syntax tree of each file
+	Pkg    *types.Package // type information about the package
+	Info   *types.Info    // type information about the syntax trees
+
+	// Inputs provides the inputs to this analysis unit, which are
+	// the corresponding outputs of its prerequisite analysis.
+	// The map keys are the elements of Analysis.Required,
+	// and the type of each corresponding value is the required
+	// analysis's OutputType.
+	Inputs map[*Analysis]interface{}
+
+	// ObjectLemma retrieves a lemma associated with obj.
+	// Given a value ptr of type *T, where *T satisfies Lemma,
+	// ObjectLemma copies the value to *ptr.
+	//
+	// ObjectLemma may panic if applied to a lemma type that
+	// the analysis did not declare among its LemmaTypes,
+	// or if called after analysis of the unit is complete.
+	//
+	// ObjectLemma is not concurrency-safe.
+	ObjectLemma func(obj types.Object, lemma Lemma) bool
+
+	// PackageLemma retrives a lemma associated with package pkg,
+	// which must be this package or one if its dependencies.
+	// See comments for ObjectLemma.
+	PackageLemma func(pkg *types.Package, lemma Lemma) bool
+
+	// -- outputs --
+
+	// Findings is a list of findings about specific locations
+	// in the analyzed source code, such as potential mistakes.
+	// It is populated by the Run function.
+	Findings []*Finding
+
+	// SetObjectLemma associates a lemma of type *T with the obj,
+	// replacing any previous lemma of that type.
+	//
+	// SetObjectLemma panics if the lemma's type is not among
+	// Analysis.LemmaTypes, or if obj does not belong to the package
+	// being analyzed, or if it is called after analysis of the unit
+	// is complete.
+	//
+	// SetObjectLemma is not concurrency-safe.
+	SetObjectLemma func(obj types.Object, lemma Lemma)
+
+	// SetPackageLemma associates a lemma with the current package.
+	// See comments for SetObjectLemma.
+	SetPackageLemma func(lemma Lemma)
+
+	// Output is an immutable result computed by this analysis unit
+	// and set by the Run function.
+	// It will be made available as an input to any analysis that
+	// depends directly on this one; see Analysis.Requires.
+	// Its type must match Analysis.OutputType.
+	//
+	// Outputs are available as Inputs to later analyses of the
+	// same package. To pass analysis results between packages (and
+	// thus potentially between address spaces), use Lemmas, which
+	// are serializable.
+	Output interface{}
+
+	/* Further fields may be added in future. */
+	// For example, suggested or applied refactorings.
+}
+
+// Findingf is a helper function that creates a new Finding using the
+// specified position and formatted error message, appends it to
+// unit.Findings, and returns it.
+func (unit *Unit) Findingf(pos token.Pos, format string, args ...interface{}) *Finding {
+	msg := fmt.Sprintf(format, args...)
+	f := &Finding{Pos: pos, Message: msg}
+	unit.Findings = append(unit.Findings, f)
+	return f
+}
+
+func (unit *Unit) String() string {
+	return fmt.Sprintf("%s@%s", unit.Analysis.Name, unit.Pkg.Path())
+}
+
+// A Lemma is an intermediate fact produced during analysis.
+//
+// Each lemma is associated with a named declaration (a types.Object).
+// A single object may have multiple associated lemmas, but only one of
+// any particular lemma type.
+//
+// A Lemma represents a predicate such as "never returns", but does not
+// represent the subject of the predicate such as "function F".
+//
+// Lemmas may be produced in one analysis unit and consumed by another
+// analysis unit even if these are in different address spaces.
+// If package P imports Q, all lemmas about objects of Q produced during
+// analysis of that package will be available during later analysis of P.
+// Lemmas are analogous to type export data in a build system:
+// just as export data enables separate compilation of several units,
+// lemmas enable "separate analysis".
+//
+// Each unit of analysis starts with the set of lemmas produced by the
+// same analysis applied to the packages directly imported by the
+// current one. The analysis may add additional lemmas to the set, and
+// they may be exported in turn. An analysis's Run function may retrieve
+// lemmas by calling Unit.Lemma and set them using Unit.SetLemma.
+//
+// Each type of Lemma may be produced by at most one Analysis.
+// Lemmas are logically private to their Analysis; to pass values
+// between different analysis, use the Input/Output mechanism.
+//
+// A Lemma type must be a pointer. (Unit.GetLemma relies on it.)
+// Lemmas are encoded and decoded using encoding/gob.
+// A Lemma may implement the GobEncoder/GobDecoder interfaces
+// to customize its encoding; Lemma encoding should not fail.
+//
+// A Lemma should not be modified once passed to SetLemma.
+type Lemma interface {
+	IsLemma() // dummy method to avoid type errors
+}
+
+// A Finding is a message associated with a source location.
+//
+// An Analysis may return a variety of findings; the optional Category,
+// which should be a constant, may be used to classify them.
+// It is primarily intended to make it easy to look up documentation.
+type Finding struct {
+	Pos      token.Pos
+	Category string // optional
+	Message  string
+}
diff --git a/go/analysis/plugin/README b/go/analysis/plugin/README
new file mode 100644
index 0000000..add86bd
--- /dev/null
+++ b/go/analysis/plugin/README
@@ -0,0 +1,8 @@
+
+This directory does not contain a Go package,
+but acts as a container for various analyses
+that implement the golang.org/x/tools/go/analysis
+API and may be imported into an analysis tool.
+
+By convention, each package foo provides the analysis,
+and each command foo/cmd/foo provides a standalone driver.
diff --git a/go/analysis/plugin/findcall/findcall.go b/go/analysis/plugin/findcall/findcall.go
new file mode 100644
index 0000000..c794255
--- /dev/null
+++ b/go/analysis/plugin/findcall/findcall.go
@@ -0,0 +1,45 @@
+// The findcall package is a trivial example and test of an analyzer of
+// Go source code. It reports a finding for every call to a function or
+// method of the name specified by its --name flag.
+package findcall
+
+import (
+	"go/ast"
+
+	"golang.org/x/tools/go/analysis"
+)
+
+var Analysis = &analysis.Analysis{
+	Name:             "findcall",
+	Doc:              "find calls to a particular function",
+	Run:              findcall,
+	RunDespiteErrors: true,
+}
+
+var name = "println" // --name flag
+
+func init() {
+	Analysis.Flags.StringVar(&name, "name", name, "name of the function to find")
+}
+
+func findcall(unit *analysis.Unit) error {
+	for _, f := range unit.Syntax {
+		ast.Inspect(f, func(n ast.Node) bool {
+			if call, ok := n.(*ast.CallExpr); ok {
+				var id *ast.Ident
+				switch fun := call.Fun.(type) {
+				case *ast.Ident:
+					id = fun
+				case *ast.SelectorExpr:
+					id = fun.Sel
+				}
+				if id != nil && !unit.Info.Types[id].IsType() && id.Name == name {
+					unit.Findingf(call.Lparen, "call of %s(...)", id.Name)
+				}
+			}
+			return true
+		})
+	}
+
+	return nil
+}
diff --git a/go/analysis/plugin/pkglemma/pkglemma.go b/go/analysis/plugin/pkglemma/pkglemma.go
new file mode 100644
index 0000000..94b4e17
--- /dev/null
+++ b/go/analysis/plugin/pkglemma/pkglemma.go
@@ -0,0 +1,102 @@
+// The pkglemma package is a demonstration and test of the package lemma
+// mechanism.
+//
+// The output of the pkglemma analysis is a set of key/values pairs
+// gathered from the analyzed package and its imported dependencies.
+// Each key/value pair comes from a top-level constant declaration
+// whose name starts with "_".  For example:
+//
+//      package p
+//
+// 	const _greeting  = "hello"
+// 	const _audience  = "world"
+//
+// the pkglemma analysis output for package p would be:
+//
+//   {"greeting": "hello", "audience": "world"}.
+//
+// In addition, the analysis reports a finding at each import
+// showing which key/value pairs it contributes.
+package pkglemma
+
+import (
+	"fmt"
+	"go/ast"
+	"go/token"
+	"go/types"
+	"reflect"
+	"sort"
+	"strings"
+
+	"golang.org/x/tools/go/analysis"
+)
+
+var Analysis = &analysis.Analysis{
+	Name:       "pkglemma",
+	Doc:        "gather name/value pairs from constant declarations",
+	Run:        run,
+	LemmaTypes: []reflect.Type{reflect.TypeOf(new(note))},
+	OutputType: reflect.TypeOf(map[string]string{}),
+}
+
+// A note is a package-level lemma that records
+// key/value pairs accumulated from constant
+// declarations in this package and its dependencies.
+type note struct {
+	M map[string]string
+}
+
+func (*note) IsLemma() {}
+
+func run(unit *analysis.Unit) error {
+	m := make(map[string]string)
+
+	// At each import, print the lemma from the imported
+	// package and accumulate its information into m.
+	doImport := func(spec *ast.ImportSpec) {
+		pkg := unit.Info.Defs[spec.Name].(*types.PkgName).Imported()
+		var lemma note
+		if unit.PackageLemma(pkg, &lemma) {
+			var lines []string
+			for k, v := range lemma.M {
+				m[k] = v
+				lines = append(lines, fmt.Sprintf("%s=%s", k, v))
+			}
+			sort.Strings(lines)
+			unit.Findingf(spec.Pos(), "%s", strings.Join(lines, " "))
+		}
+	}
+
+	// At each "const _name = value", add a fact into m.
+	doConst := func(spec *ast.ValueSpec) {
+		if len(spec.Names) == len(spec.Values) {
+			for i := range spec.Names {
+				name := spec.Names[i].Name
+				if strings.HasPrefix(name, "_") {
+					m[name[1:]] = unit.Info.Types[spec.Values[i]].Value.String()
+				}
+			}
+		}
+	}
+
+	for _, f := range unit.Syntax {
+		for _, decl := range f.Decls {
+			if decl, ok := decl.(*ast.GenDecl); ok {
+				for _, spec := range decl.Specs {
+					switch decl.Tok {
+					case token.IMPORT:
+						doImport(spec.(*ast.ImportSpec))
+					case token.CONST:
+						doConst(spec.(*ast.ValueSpec))
+					}
+				}
+			}
+		}
+	}
+
+	unit.Output = m
+
+	unit.SetPackageLemma(&note{m})
+
+	return nil
+}
diff --git a/go/analysis/validate.go b/go/analysis/validate.go
new file mode 100644
index 0000000..7fbb3b5
--- /dev/null
+++ b/go/analysis/validate.go
@@ -0,0 +1,97 @@
+package analysis
+
+import (
+	"fmt"
+	"reflect"
+	"unicode"
+)
+
+// Validate reports an error if any of the analyses are misconfigured.
+// Checks include:
+// - that the name is a valid identifier;
+// - that analysis names are unique;
+// - that the Requires graph is acylic;
+// - that analyses' lemma and output types are unique.
+// - that each lemma type is a pointer.
+func Validate(analyses []*Analysis) error {
+	names := make(map[string]bool)
+
+	// Map each lemma/output type to its sole generating analysis.
+	lemmaTypes := make(map[reflect.Type]*Analysis)
+	outputTypes := make(map[reflect.Type]*Analysis)
+
+	// Traverse the Requires graph, depth first.
+	color := make(map[*Analysis]uint8) // 0=white 1=grey 2=black
+	var visit func(a *Analysis) error
+	visit = func(a *Analysis) error {
+		if a == nil {
+			return fmt.Errorf("nil *Analysis")
+		}
+		if color[a] == 0 { // white
+			color[a] = 1 // grey
+
+			// names
+			if !validIdent(a.Name) {
+				return fmt.Errorf("invalid analysis name %q", a)
+			}
+			if names[a.Name] {
+				return fmt.Errorf("duplicate analysis name %q", a)
+			}
+			names[a.Name] = true
+
+			if a.Doc == "" {
+				return fmt.Errorf("analysis %q is undocumented", a)
+			}
+
+			// lemma types
+			for _, t := range a.LemmaTypes {
+				if t == nil {
+					return fmt.Errorf("analysis %s has nil LemmaType", a)
+				}
+				if prev := lemmaTypes[t]; prev != nil {
+					return fmt.Errorf("lemma type %s registered by two analyses: %v, %v",
+						t, a, prev)
+				}
+				if t.Kind() != reflect.Ptr {
+					return fmt.Errorf("%s: lemma type %s is not a pointer", a, t)
+				}
+				lemmaTypes[t] = a
+			}
+
+			// output types
+			if a.OutputType != nil {
+				if prev := outputTypes[a.OutputType]; prev != nil {
+					return fmt.Errorf("output type %s registered by two analyses: %v, %v",
+						a.OutputType, a, prev)
+				}
+				outputTypes[a.OutputType] = a
+			}
+
+			// recursion
+			for i, req := range a.Requires {
+				if err := visit(req); err != nil {
+					return fmt.Errorf("%s.Requires[%d]: %v", a.Name, i, err)
+				}
+			}
+			color[a] = 2 // black
+		}
+
+		return nil
+	}
+	for _, a := range analyses {
+		if err := visit(a); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func validIdent(name string) bool {
+	for i, r := range name {
+		if !(r == '_' || unicode.IsLetter(r) || i > 0 && unicode.IsDigit(r)) {
+			return false
+		}
+	}
+	return name != ""
+}