go/analysis: a new API for analysis tools
This CL contains just the API, the validate function,
and two example analyses, findcall and pkglemma.
Change-Id: Ia1f2652647050b1e0e15dad8b9ae10cf1a5fbdbc
Synopsis: go-review.googlesource.com/c/tools/+/134935
Design: docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk
Reviewed-on: https://go-review.googlesource.com/135635
Reviewed-by: Ian Cottrell <iancottrell@google.com>
diff --git a/go/analysis/analysis.go b/go/analysis/analysis.go
new file mode 100644
index 0000000..e103fde
--- /dev/null
+++ b/go/analysis/analysis.go
@@ -0,0 +1,212 @@
+// The analysis package defines a uniform interface for static checkers
+// of Go source code. By implementing a common interface, checkers from
+// a variety of sources can be easily selected, incorporated, and reused
+// in a wide range of programs including command-line tools, text
+// editors and IDEs, build systems, test frameworks, code review tools,
+// and batch pipelines for large code bases. For the design, see
+// https://docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk
+//
+// Each analysis is invoked once per Go package, and is provided the
+// abstract syntax trees (ASTs) and type information for that package.
+//
+// The principal data types of this package are structs, not interfaces,
+// to permit later addition of optional fields as the API evolves.
+package analysis
+
+import (
+ "flag"
+ "fmt"
+ "go/ast"
+ "go/token"
+ "go/types"
+ "reflect"
+)
+
+// An Analysis describes an analysis function and its options.
+type Analysis struct {
+ // The Name of the analysis must be a valid Go identifier
+ // as it may appear in command-line flags, URLs, and so on.
+ Name string
+
+ // Doc is the documentation for the analysis.
+ Doc string
+
+ // Flags defines any flags accepted by the analysis.
+ // The manner in which these flags are exposed to the user
+ // depends on the driver which runs the analysis.
+ Flags flag.FlagSet
+
+ // Run applies the analysis to a package.
+ // It returns an error if the analysis failed.
+ Run func(*Unit) error
+
+ // RunDespiteErrors allows the driver to invoke
+ // the Run method of this analysis even on a
+ // package that contains parse or type errors.
+ RunDespiteErrors bool
+
+ // Requires is a set of analyses that must run successfully
+ // before this one on a given package. This analysis may inspect
+ // the outputs produced by each analysis in Requires.
+ // The graph over analyses implied by Requires edges must be acyclic.
+ //
+ // Requires establishes a "horizontal" dependency between
+ // analysis units (different analyses, same package).
+ Requires []*Analysis
+
+ // OutputType is the type of the optional Output value
+ // computed by this analysis and stored in Unit.Output.
+ // (The Output is provided as an Input to
+ // each analysis that Requires this one.)
+ OutputType reflect.Type
+
+ // LemmaTypes is the set of types of lemmas produced and
+ // consumed by this analysis. An analysis that uses lemmas
+ // may assume that its import dependencies have been
+ // similarly analyzed before it runs. Lemmas are pointers.
+ //
+ // LemmaTypes establishes a "vertical" dependency between
+ // analysis units (same analysis, different packages).
+ LemmaTypes []reflect.Type
+}
+
+func (a *Analysis) String() string { return a.Name }
+
+// A Unit provides information to the Run function that
+// applies a specific analysis to a single Go package.
+//
+// It forms the interface between the analysis logic and the driver
+// program, and has both input and an output components.
+type Unit struct {
+ // -- inputs --
+
+ Analysis *Analysis // the identity of the current analysis
+
+ // syntax and type information
+ Fset *token.FileSet // file position information
+ Syntax []*ast.File // the abstract syntax tree of each file
+ Pkg *types.Package // type information about the package
+ Info *types.Info // type information about the syntax trees
+
+ // Inputs provides the inputs to this analysis unit, which are
+ // the corresponding outputs of its prerequisite analysis.
+ // The map keys are the elements of Analysis.Required,
+ // and the type of each corresponding value is the required
+ // analysis's OutputType.
+ Inputs map[*Analysis]interface{}
+
+ // ObjectLemma retrieves a lemma associated with obj.
+ // Given a value ptr of type *T, where *T satisfies Lemma,
+ // ObjectLemma copies the value to *ptr.
+ //
+ // ObjectLemma may panic if applied to a lemma type that
+ // the analysis did not declare among its LemmaTypes,
+ // or if called after analysis of the unit is complete.
+ //
+ // ObjectLemma is not concurrency-safe.
+ ObjectLemma func(obj types.Object, lemma Lemma) bool
+
+ // PackageLemma retrives a lemma associated with package pkg,
+ // which must be this package or one if its dependencies.
+ // See comments for ObjectLemma.
+ PackageLemma func(pkg *types.Package, lemma Lemma) bool
+
+ // -- outputs --
+
+ // Findings is a list of findings about specific locations
+ // in the analyzed source code, such as potential mistakes.
+ // It is populated by the Run function.
+ Findings []*Finding
+
+ // SetObjectLemma associates a lemma of type *T with the obj,
+ // replacing any previous lemma of that type.
+ //
+ // SetObjectLemma panics if the lemma's type is not among
+ // Analysis.LemmaTypes, or if obj does not belong to the package
+ // being analyzed, or if it is called after analysis of the unit
+ // is complete.
+ //
+ // SetObjectLemma is not concurrency-safe.
+ SetObjectLemma func(obj types.Object, lemma Lemma)
+
+ // SetPackageLemma associates a lemma with the current package.
+ // See comments for SetObjectLemma.
+ SetPackageLemma func(lemma Lemma)
+
+ // Output is an immutable result computed by this analysis unit
+ // and set by the Run function.
+ // It will be made available as an input to any analysis that
+ // depends directly on this one; see Analysis.Requires.
+ // Its type must match Analysis.OutputType.
+ //
+ // Outputs are available as Inputs to later analyses of the
+ // same package. To pass analysis results between packages (and
+ // thus potentially between address spaces), use Lemmas, which
+ // are serializable.
+ Output interface{}
+
+ /* Further fields may be added in future. */
+ // For example, suggested or applied refactorings.
+}
+
+// Findingf is a helper function that creates a new Finding using the
+// specified position and formatted error message, appends it to
+// unit.Findings, and returns it.
+func (unit *Unit) Findingf(pos token.Pos, format string, args ...interface{}) *Finding {
+ msg := fmt.Sprintf(format, args...)
+ f := &Finding{Pos: pos, Message: msg}
+ unit.Findings = append(unit.Findings, f)
+ return f
+}
+
+func (unit *Unit) String() string {
+ return fmt.Sprintf("%s@%s", unit.Analysis.Name, unit.Pkg.Path())
+}
+
+// A Lemma is an intermediate fact produced during analysis.
+//
+// Each lemma is associated with a named declaration (a types.Object).
+// A single object may have multiple associated lemmas, but only one of
+// any particular lemma type.
+//
+// A Lemma represents a predicate such as "never returns", but does not
+// represent the subject of the predicate such as "function F".
+//
+// Lemmas may be produced in one analysis unit and consumed by another
+// analysis unit even if these are in different address spaces.
+// If package P imports Q, all lemmas about objects of Q produced during
+// analysis of that package will be available during later analysis of P.
+// Lemmas are analogous to type export data in a build system:
+// just as export data enables separate compilation of several units,
+// lemmas enable "separate analysis".
+//
+// Each unit of analysis starts with the set of lemmas produced by the
+// same analysis applied to the packages directly imported by the
+// current one. The analysis may add additional lemmas to the set, and
+// they may be exported in turn. An analysis's Run function may retrieve
+// lemmas by calling Unit.Lemma and set them using Unit.SetLemma.
+//
+// Each type of Lemma may be produced by at most one Analysis.
+// Lemmas are logically private to their Analysis; to pass values
+// between different analysis, use the Input/Output mechanism.
+//
+// A Lemma type must be a pointer. (Unit.GetLemma relies on it.)
+// Lemmas are encoded and decoded using encoding/gob.
+// A Lemma may implement the GobEncoder/GobDecoder interfaces
+// to customize its encoding; Lemma encoding should not fail.
+//
+// A Lemma should not be modified once passed to SetLemma.
+type Lemma interface {
+ IsLemma() // dummy method to avoid type errors
+}
+
+// A Finding is a message associated with a source location.
+//
+// An Analysis may return a variety of findings; the optional Category,
+// which should be a constant, may be used to classify them.
+// It is primarily intended to make it easy to look up documentation.
+type Finding struct {
+ Pos token.Pos
+ Category string // optional
+ Message string
+}
diff --git a/go/analysis/plugin/README b/go/analysis/plugin/README
new file mode 100644
index 0000000..add86bd
--- /dev/null
+++ b/go/analysis/plugin/README
@@ -0,0 +1,8 @@
+
+This directory does not contain a Go package,
+but acts as a container for various analyses
+that implement the golang.org/x/tools/go/analysis
+API and may be imported into an analysis tool.
+
+By convention, each package foo provides the analysis,
+and each command foo/cmd/foo provides a standalone driver.
diff --git a/go/analysis/plugin/findcall/findcall.go b/go/analysis/plugin/findcall/findcall.go
new file mode 100644
index 0000000..c794255
--- /dev/null
+++ b/go/analysis/plugin/findcall/findcall.go
@@ -0,0 +1,45 @@
+// The findcall package is a trivial example and test of an analyzer of
+// Go source code. It reports a finding for every call to a function or
+// method of the name specified by its --name flag.
+package findcall
+
+import (
+ "go/ast"
+
+ "golang.org/x/tools/go/analysis"
+)
+
+var Analysis = &analysis.Analysis{
+ Name: "findcall",
+ Doc: "find calls to a particular function",
+ Run: findcall,
+ RunDespiteErrors: true,
+}
+
+var name = "println" // --name flag
+
+func init() {
+ Analysis.Flags.StringVar(&name, "name", name, "name of the function to find")
+}
+
+func findcall(unit *analysis.Unit) error {
+ for _, f := range unit.Syntax {
+ ast.Inspect(f, func(n ast.Node) bool {
+ if call, ok := n.(*ast.CallExpr); ok {
+ var id *ast.Ident
+ switch fun := call.Fun.(type) {
+ case *ast.Ident:
+ id = fun
+ case *ast.SelectorExpr:
+ id = fun.Sel
+ }
+ if id != nil && !unit.Info.Types[id].IsType() && id.Name == name {
+ unit.Findingf(call.Lparen, "call of %s(...)", id.Name)
+ }
+ }
+ return true
+ })
+ }
+
+ return nil
+}
diff --git a/go/analysis/plugin/pkglemma/pkglemma.go b/go/analysis/plugin/pkglemma/pkglemma.go
new file mode 100644
index 0000000..94b4e17
--- /dev/null
+++ b/go/analysis/plugin/pkglemma/pkglemma.go
@@ -0,0 +1,102 @@
+// The pkglemma package is a demonstration and test of the package lemma
+// mechanism.
+//
+// The output of the pkglemma analysis is a set of key/values pairs
+// gathered from the analyzed package and its imported dependencies.
+// Each key/value pair comes from a top-level constant declaration
+// whose name starts with "_". For example:
+//
+// package p
+//
+// const _greeting = "hello"
+// const _audience = "world"
+//
+// the pkglemma analysis output for package p would be:
+//
+// {"greeting": "hello", "audience": "world"}.
+//
+// In addition, the analysis reports a finding at each import
+// showing which key/value pairs it contributes.
+package pkglemma
+
+import (
+ "fmt"
+ "go/ast"
+ "go/token"
+ "go/types"
+ "reflect"
+ "sort"
+ "strings"
+
+ "golang.org/x/tools/go/analysis"
+)
+
+var Analysis = &analysis.Analysis{
+ Name: "pkglemma",
+ Doc: "gather name/value pairs from constant declarations",
+ Run: run,
+ LemmaTypes: []reflect.Type{reflect.TypeOf(new(note))},
+ OutputType: reflect.TypeOf(map[string]string{}),
+}
+
+// A note is a package-level lemma that records
+// key/value pairs accumulated from constant
+// declarations in this package and its dependencies.
+type note struct {
+ M map[string]string
+}
+
+func (*note) IsLemma() {}
+
+func run(unit *analysis.Unit) error {
+ m := make(map[string]string)
+
+ // At each import, print the lemma from the imported
+ // package and accumulate its information into m.
+ doImport := func(spec *ast.ImportSpec) {
+ pkg := unit.Info.Defs[spec.Name].(*types.PkgName).Imported()
+ var lemma note
+ if unit.PackageLemma(pkg, &lemma) {
+ var lines []string
+ for k, v := range lemma.M {
+ m[k] = v
+ lines = append(lines, fmt.Sprintf("%s=%s", k, v))
+ }
+ sort.Strings(lines)
+ unit.Findingf(spec.Pos(), "%s", strings.Join(lines, " "))
+ }
+ }
+
+ // At each "const _name = value", add a fact into m.
+ doConst := func(spec *ast.ValueSpec) {
+ if len(spec.Names) == len(spec.Values) {
+ for i := range spec.Names {
+ name := spec.Names[i].Name
+ if strings.HasPrefix(name, "_") {
+ m[name[1:]] = unit.Info.Types[spec.Values[i]].Value.String()
+ }
+ }
+ }
+ }
+
+ for _, f := range unit.Syntax {
+ for _, decl := range f.Decls {
+ if decl, ok := decl.(*ast.GenDecl); ok {
+ for _, spec := range decl.Specs {
+ switch decl.Tok {
+ case token.IMPORT:
+ doImport(spec.(*ast.ImportSpec))
+ case token.CONST:
+ doConst(spec.(*ast.ValueSpec))
+ }
+ }
+ }
+ }
+ }
+
+ unit.Output = m
+
+ unit.SetPackageLemma(¬e{m})
+
+ return nil
+}
diff --git a/go/analysis/validate.go b/go/analysis/validate.go
new file mode 100644
index 0000000..7fbb3b5
--- /dev/null
+++ b/go/analysis/validate.go
@@ -0,0 +1,97 @@
+package analysis
+
+import (
+ "fmt"
+ "reflect"
+ "unicode"
+)
+
+// Validate reports an error if any of the analyses are misconfigured.
+// Checks include:
+// - that the name is a valid identifier;
+// - that analysis names are unique;
+// - that the Requires graph is acylic;
+// - that analyses' lemma and output types are unique.
+// - that each lemma type is a pointer.
+func Validate(analyses []*Analysis) error {
+ names := make(map[string]bool)
+
+ // Map each lemma/output type to its sole generating analysis.
+ lemmaTypes := make(map[reflect.Type]*Analysis)
+ outputTypes := make(map[reflect.Type]*Analysis)
+
+ // Traverse the Requires graph, depth first.
+ color := make(map[*Analysis]uint8) // 0=white 1=grey 2=black
+ var visit func(a *Analysis) error
+ visit = func(a *Analysis) error {
+ if a == nil {
+ return fmt.Errorf("nil *Analysis")
+ }
+ if color[a] == 0 { // white
+ color[a] = 1 // grey
+
+ // names
+ if !validIdent(a.Name) {
+ return fmt.Errorf("invalid analysis name %q", a)
+ }
+ if names[a.Name] {
+ return fmt.Errorf("duplicate analysis name %q", a)
+ }
+ names[a.Name] = true
+
+ if a.Doc == "" {
+ return fmt.Errorf("analysis %q is undocumented", a)
+ }
+
+ // lemma types
+ for _, t := range a.LemmaTypes {
+ if t == nil {
+ return fmt.Errorf("analysis %s has nil LemmaType", a)
+ }
+ if prev := lemmaTypes[t]; prev != nil {
+ return fmt.Errorf("lemma type %s registered by two analyses: %v, %v",
+ t, a, prev)
+ }
+ if t.Kind() != reflect.Ptr {
+ return fmt.Errorf("%s: lemma type %s is not a pointer", a, t)
+ }
+ lemmaTypes[t] = a
+ }
+
+ // output types
+ if a.OutputType != nil {
+ if prev := outputTypes[a.OutputType]; prev != nil {
+ return fmt.Errorf("output type %s registered by two analyses: %v, %v",
+ a.OutputType, a, prev)
+ }
+ outputTypes[a.OutputType] = a
+ }
+
+ // recursion
+ for i, req := range a.Requires {
+ if err := visit(req); err != nil {
+ return fmt.Errorf("%s.Requires[%d]: %v", a.Name, i, err)
+ }
+ }
+ color[a] = 2 // black
+ }
+
+ return nil
+ }
+ for _, a := range analyses {
+ if err := visit(a); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func validIdent(name string) bool {
+ for i, r := range name {
+ if !(r == '_' || unicode.IsLetter(r) || i > 0 && unicode.IsDigit(r)) {
+ return false
+ }
+ }
+ return name != ""
+}