go/loader: use concurrency to reduce I/O latency and perform typechecking in parallel.
See loader.go comments for orientation.
This is what happens when you use start using Dmitry's new trace tool. :)
Tests:
- added test of cycles
- load/parse/typecheck of 'godoc' is ~2.5x faster
- race detector finds no errors.
Change-Id: Icb5712c7825002342baf471b216252cecd9149d6
Reviewed-on: https://go-review.googlesource.com/1582
Reviewed-by: Robert Griesemer <gri@golang.org>
diff --git a/go/loader/loader.go b/go/loader/loader.go
index 256cb04..26a6ae2 100644
--- a/go/loader/loader.go
+++ b/go/loader/loader.go
@@ -73,7 +73,7 @@
// DEPENDENCY is a package loaded to satisfy an import in an initial
// package or another dependency.
//
-package loader // import "golang.org/x/tools/go/loader"
+package loader
// 'go test', in-package test files, and import cycles
// ---------------------------------------------------
@@ -112,12 +112,78 @@
// compress/bzip2/bzip2_test.go (package bzip2) imports "io/ioutil"
// io/ioutil/tempfile_test.go (package ioutil) imports "regexp"
// regexp/exec_test.go (package regexp) imports "compress/bzip2"
+//
+//
+// Concurrency
+// -----------
+//
+// Let us define the import dependency graph as follows. Each node is a
+// list of files passed to (Checker).Files at once. Many of these lists
+// are the production code of an importable Go package, so those nodes
+// are labelled by the package's import path. The remaining nodes are
+// ad-hoc packages and lists of in-package *_test.go files that augment
+// an importable package; those nodes have no label.
+//
+// The edges of the graph represent import statements appearing within a
+// file. An edge connects a node (a list of files) to the node it
+// imports, which is importable and thus always labelled.
+//
+// Loading is controlled by this dependency graph.
+//
+// To reduce I/O latency, we start loading a package's dependencies
+// asynchronously as soon as we've parsed its files and enumerated its
+// imports (scanImports). This performs a preorder traversal of the
+// import dependency graph.
+//
+// To exploit hardware parallelism, we type-check unrelated packages in
+// parallel, where "unrelated" means not ordered by the partial order of
+// the import dependency graph.
+//
+// We use a concurrency-safe blocking cache (importer.imported) to
+// record the results of type-checking, whether success or failure. An
+// entry is created in this cache by startLoad the first time the
+// package is imported. The first goroutine to request an entry becomes
+// responsible for completing the task and broadcasting completion to
+// subsequent requestors, which block until then.
+//
+// Type checking occurs in (parallel) postorder: we cannot type-check a
+// set of files until we have loaded and type-checked all of their
+// immediate dependencies (and thus all of their transitive
+// dependencies). If the input were guaranteed free of import cycles,
+// this would be trivial: we could simply wait for completion of the
+// dependencies and then invoke the typechecker.
+//
+// But as we saw in the 'go test' section above, some cycles in the
+// import graph over packages are actually legal, so long as the
+// cycle-forming edge originates in the in-package test files that
+// augment the package. This explains why the nodes of the import
+// dependency graph are not packages, but lists of files: the unlabelled
+// nodes avoid the cycles. Consider packages A and B where B imports A
+// and A's in-package tests AT import B. The naively constructed import
+// graph over packages would contain a cycle (A+AT) --> B --> (A+AT) but
+// the graph over lists of files is AT --> B --> A, where AT is an
+// unlabelled node.
+//
+// Awaiting completion of the dependencies in a cyclic graph would
+// deadlock, so we must materialize the import dependency graph (as
+// importer.graph) and check whether each import edge forms a cycle. If
+// x imports y, and the graph already contains a path from y to x, then
+// there is an import cycle, in which case the processing of x must not
+// wait for the completion of processing of y.
+//
+// When the type-checker makes a callback (doImport) to the loader for a
+// given import edge, there are two possible cases. In the normal case,
+// the dependency has already been completely type-checked; doImport
+// does a cache lookup and returns it. In the cyclic case, the entry in
+// the cache is still necessarily incomplete, indicating a cycle. We
+// perform the cycle check again to obtain the error message, and return
+// the error.
+//
+// The result of using concurrency is about a 2.5x speedup for stdlib_test.
// TODO(adonovan):
// - (*Config).ParseFile is very handy, but feels like feature creep.
// (*Config).CreateFromFiles has a nasty precondition.
-// - s/path/importPath/g to avoid ambiguity with other meanings of
-// "path": a file name, a colon-separated directory list.
// - cache the calls to build.Import so we don't do it three times per
// test package.
// - Thorough overhaul of package documentation.
@@ -135,12 +201,16 @@
"go/token"
"os"
"strings"
+ "sync"
+ "time"
"golang.org/x/tools/go/ast/astutil"
"golang.org/x/tools/go/gcimporter"
"golang.org/x/tools/go/types"
)
+const trace = false // show timing info for type-checking
+
// Config specifies the configuration for a program to load.
// The zero value for Config is a ready-to-use default configuration.
type Config struct {
@@ -242,7 +312,7 @@
}
type CreatePkg struct {
- Path string
+ Path string // the import path of the resulting (non-importable) types.Package
Files []*ast.File
}
@@ -395,6 +465,8 @@
// conf.CreatePkgs.
//
// It fails if any file could not be loaded or parsed.
+// TODO(adonovan): make it not return an error, by making CreatePkg
+// store filenames and defer parsing until Load.
//
func (conf *Config) CreateFromFilenames(path string, filenames ...string) error {
files, errs := parseFiles(conf.fset(), conf.build(), nil, ".", filenames, conf.ParserMode)
@@ -506,15 +578,63 @@
// importer holds the working state of the algorithm.
type importer struct {
- conf *Config // the client configuration
- prog *Program // resulting program
- imported map[string]*importInfo // all imported packages (incl. failures) by import path
+ conf *Config // the client configuration
+ prog *Program // resulting program
+ start time.Time // for logging
+
+ // This mutex serializes access to prog.ImportMap (aka
+ // TypeChecker.Packages); we also use it for AllPackages.
+ //
+ // The TypeChecker.Packages map is not really used by this
+ // package, but may be used by the client's Import function,
+ // and by clients of the returned Program.
+ typecheckerMu sync.Mutex
+
+ importedMu sync.Mutex
+ imported map[string]*importInfo // all imported packages (incl. failures) by import path
+
+ // import dependency graph: graph[x][y] => x imports y
+ //
+ // Since non-importable packages cannot be cyclic, we ignore
+ // their imports, thus we only need the subgraph over importable
+ // packages. Nodes are identified by their import paths.
+ graphMu sync.Mutex
+ graph map[string]map[string]bool
}
// importInfo tracks the success or failure of a single import.
+//
+// Upon completion, exactly one of info and err is non-nil:
+// info on successful creation of a package, err otherwise.
+// A successful package may still contain type errors.
+//
type importInfo struct {
- info *PackageInfo // results of typechecking (including errors)
- err error // reason for failure to make a package
+ path string // import path
+ mu sync.Mutex // guards the following fields prior to completion
+ info *PackageInfo // results of typechecking (including errors)
+ err error // reason for failure to create a package
+ complete sync.Cond // complete condition is that one of info, err is non-nil.
+}
+
+// awaitCompletion blocks until ii is complete,
+// i.e. the info and err fields are safe to inspect without a lock.
+// It is concurrency-safe and idempotent.
+func (ii *importInfo) awaitCompletion() {
+ ii.mu.Lock()
+ for ii.info == nil && ii.err == nil {
+ ii.complete.Wait()
+ }
+ ii.mu.Unlock()
+}
+
+// Complete marks ii as complete.
+// Its info and err fields will not be subsequently updated.
+func (ii *importInfo) Complete(info *PackageInfo, err error) {
+ ii.mu.Lock()
+ ii.info = info
+ ii.err = err
+ ii.complete.Broadcast()
+ ii.mu.Unlock()
}
// Load creates the initial packages specified by conf.{Create,Import}Pkgs,
@@ -553,17 +673,23 @@
conf: conf,
prog: prog,
imported: make(map[string]*importInfo),
+ start: time.Now(),
+ graph: make(map[string]map[string]bool),
}
- for path := range conf.ImportPkgs {
- info, err := imp.importPackage(path)
- if err != nil {
- return nil, err // failed to create package
+ // -- loading proper (concurrent phase) --------------------------------
+
+ // Load the initially imported packages and their dependencies,
+ // in parallel.
+ for _, ii := range imp.loadAll("", conf.ImportPkgs) {
+ if ii.err != nil {
+ return nil, ii.err // failed to create package
}
- prog.Imported[path] = info
+ prog.Imported[ii.info.Pkg.Path()] = ii.info
}
- // Now augment those packages that need it.
+ // Augment the initial packages that need it.
+ // Dependencies are loaded in parallel.
for path, augment := range conf.ImportPkgs {
if augment {
// Find and create the actual package.
@@ -573,25 +699,37 @@
return nil, err // package not found
}
+ imp.importedMu.Lock() // (unnecessary, we're sequential here)
info := imp.imported[path].info // must be non-nil, see above
+ imp.importedMu.Unlock()
+
files, errs := imp.conf.parsePackageFiles(bp, 't')
for _, err := range errs {
info.appendError(err)
}
- typeCheckFiles(info, files...)
+ // The test files augmenting package P cannot be imported,
+ // but may import packages that import P,
+ // so we must disable the cycle check.
+ imp.addFiles(info, files, false)
}
}
+ // CreatePkgs includes all external test packages,
+ // so they must be processed after augmentation.
+ // Dependencies are loaded in parallel.
for _, create := range conf.CreatePkgs {
path := create.Path
if create.Path == "" && len(create.Files) > 0 {
path = create.Files[0].Name.Name
}
info := imp.newPackageInfo(path)
- typeCheckFiles(info, create.Files...)
+ // Ad-hoc packages are non-importable; no cycle check is needed.
+ imp.addFiles(info, create.Files, false)
prog.Created = append(prog.Created, info)
}
+ // -- finishing up (sequential) ----------------------------------------
+
if len(prog.Imported)+len(prog.Created) == 0 {
return nil, errors.New("no initial packages were specified")
}
@@ -743,52 +881,150 @@
//
// Idempotent.
//
-func (imp *importer) doImport(imports map[string]*types.Package, path string) (*types.Package, error) {
+func (imp *importer) doImport(from *PackageInfo, to string) (*types.Package, error) {
// Package unsafe is handled specially, and has no PackageInfo.
- if path == "unsafe" {
+ // TODO(adonovan): move this check into go/types?
+ if to == "unsafe" {
return types.Unsafe, nil
}
- info, err := imp.importPackage(path)
- if err != nil {
- return nil, err
+ imp.importedMu.Lock()
+ ii := imp.imported[to]
+ imp.importedMu.Unlock()
+ if ii == nil {
+ panic("internal error: unexpected import: " + to)
+ }
+ if ii.err != nil {
+ return nil, ii.err
+ }
+ if ii.info != nil {
+ return ii.info.Pkg, nil
}
- // Update the type checker's package map on success.
- imports[path] = info.Pkg
+ // Import of incomplete package: this indicates a cycle.
+ fromPath := from.Pkg.Path()
+ if cycle := imp.findPath(to, fromPath); cycle != nil {
+ cycle = append([]string{fromPath}, cycle...)
+ return nil, fmt.Errorf("import cycle: %s", strings.Join(cycle, " -> "))
+ }
- return info.Pkg, nil
+ panic("internal error: import of incomplete (yet acyclic) package: " + fromPath)
}
-// importPackage imports the package with the given import path, plus
-// its dependencies.
+// loadAll loads, parses, and type-checks the specified packages in
+// parallel and returns their completed importInfos in unspecified order.
//
-// On success, it returns a PackageInfo, possibly containing errors.
-// importPackage returns an error if it couldn't even create the package.
+// fromPath is the import path of the importing package, if it is
+// importable, "" otherwise. It is used for cycle detection.
+//
+func (imp *importer) loadAll(fromPath string, paths map[string]bool) []*importInfo {
+ result := make([]*importInfo, 0, len(paths))
+ for path := range paths {
+ result = append(result, imp.startLoad(path))
+ }
+
+ if fromPath != "" {
+ // We're loading a set of imports.
+ //
+ // We must record graph edges from the importing package
+ // to its dependencies, and check for cycles.
+ imp.graphMu.Lock()
+ deps, ok := imp.graph[fromPath]
+ if !ok {
+ deps = make(map[string]bool)
+ imp.graph[fromPath] = deps
+ }
+ for path := range paths {
+ deps[path] = true
+ }
+ imp.graphMu.Unlock()
+ }
+
+ for _, ii := range result {
+ if fromPath != "" {
+ if cycle := imp.findPath(ii.path, fromPath); cycle != nil {
+ // Cycle-forming import: we must not await its
+ // completion since it would deadlock.
+ //
+ // We don't record the error in ii since
+ // the error is really associated with the
+ // cycle-forming edge, not the package itself.
+ // (Also it would complicate the
+ // invariants of importPath completion.)
+ if trace {
+ fmt.Fprintln(os.Stderr, "import cycle: %q", cycle)
+ }
+ continue
+ }
+ }
+ ii.awaitCompletion()
+
+ }
+ return result
+}
+
+// findPath returns an arbitrary path from 'from' to 'to' in the import
+// graph, or nil if there was none.
+func (imp *importer) findPath(from, to string) []string {
+ imp.graphMu.Lock()
+ defer imp.graphMu.Unlock()
+
+ seen := make(map[string]bool)
+ var search func(stack []string, importPath string) []string
+ search = func(stack []string, importPath string) []string {
+ if !seen[importPath] {
+ seen[importPath] = true
+ stack = append(stack, importPath)
+ if importPath == to {
+ return stack
+ }
+ for x := range imp.graph[importPath] {
+ if p := search(stack, x); p != nil {
+ return p
+ }
+ }
+ }
+ return nil
+ }
+ return search(make([]string, 0, 20), from)
+}
+
+// startLoad initiates the loading, parsing and type-checking of the
+// specified package and its dependencies, if it has not already begun.
+//
+// It returns an importInfo, not necessarily in a completed state. The
+// caller must call awaitCompletion() before accessing its info and err
+// fields.
+//
+// startLoad is concurrency-safe and idempotent.
//
// Precondition: path != "unsafe".
//
-func (imp *importer) importPackage(path string) (*PackageInfo, error) {
+func (imp *importer) startLoad(path string) *importInfo {
+ imp.importedMu.Lock()
ii, ok := imp.imported[path]
if !ok {
- // In preorder, initialize the map entry to a cycle
- // error in case importPackage(path) is called again
- // before the import is completed.
- ii = &importInfo{err: fmt.Errorf("import cycle in package %s", path)}
+ ii = &importInfo{path: path}
+ ii.complete.L = &ii.mu
imp.imported[path] = ii
- // Find and create the actual package.
- if _, ok := imp.conf.ImportPkgs[path]; ok || imp.conf.SourceImports {
- ii.info, ii.err = imp.importFromSource(path)
- } else {
- ii.info, ii.err = imp.importFromBinary(path)
- }
- if ii.info != nil {
- ii.info.Importable = true
- }
+ go imp.load(path, ii)
}
+ imp.importedMu.Unlock()
- return ii.info, ii.err
+ return ii
+}
+
+func (imp *importer) load(path string, ii *importInfo) {
+ var info *PackageInfo
+ var err error
+ // Find and create the actual package.
+ if _, ok := imp.conf.ImportPkgs[path]; ok || imp.conf.SourceImports {
+ info, err = imp.loadFromSource(path)
+ } else {
+ info, err = imp.importFromBinary(path)
+ }
+ ii.Complete(info, err)
}
// importFromBinary implements package loading from the client-supplied
@@ -800,43 +1036,79 @@
if importfn == nil {
importfn = gcimporter.Import
}
+ imp.typecheckerMu.Lock()
pkg, err := importfn(imp.conf.TypeChecker.Packages, path)
+ if pkg != nil {
+ imp.conf.TypeChecker.Packages[path] = pkg
+ }
+ imp.typecheckerMu.Unlock()
if err != nil {
return nil, err
}
info := &PackageInfo{Pkg: pkg}
+ info.Importable = true
+ imp.typecheckerMu.Lock()
imp.prog.AllPackages[pkg] = info
+ imp.typecheckerMu.Unlock()
return info, nil
}
-// importFromSource implements package loading by parsing Go source files
+// loadFromSource implements package loading by parsing Go source files
// located by go/build.
+// The returned PackageInfo's typeCheck function must be called.
//
-func (imp *importer) importFromSource(path string) (*PackageInfo, error) {
+func (imp *importer) loadFromSource(path string) (*PackageInfo, error) {
bp, err := imp.conf.findSourcePackage(path)
if err != nil {
return nil, err // package not found
}
- // Type-check the package.
info := imp.newPackageInfo(path)
+ info.Importable = true
files, errs := imp.conf.parsePackageFiles(bp, 'g')
for _, err := range errs {
info.appendError(err)
}
- typeCheckFiles(info, files...)
+
+ imp.addFiles(info, files, true)
+
+ imp.typecheckerMu.Lock()
+ imp.conf.TypeChecker.Packages[path] = info.Pkg
+ imp.typecheckerMu.Unlock()
+
return info, nil
}
-// typeCheckFiles adds the specified files to info and type-checks them.
-// The order of files determines the package initialization order.
-// It may be called multiple times.
+// addFiles adds and type-checks the specified files to info, loading
+// their dependencies if needed. The order of files determines the
+// package initialization order. It may be called multiple times on the
+// same package. Errors are appended to the info.Errors field.
//
-// Errors are stored in the info.Errors field.
-func typeCheckFiles(info *PackageInfo, files ...*ast.File) {
+// cycleCheck determines whether the imports within files create
+// dependency edges that should be checked for potential cycles.
+//
+func (imp *importer) addFiles(info *PackageInfo, files []*ast.File, cycleCheck bool) {
info.Files = append(info.Files, files...)
- // Ignore the returned (first) error since we already collect them all.
- _ = info.checker.Files(files)
+ // Ensure the dependencies are loaded, in parallel.
+ var fromPath string
+ if cycleCheck {
+ fromPath = info.Pkg.Path()
+ }
+ imp.loadAll(fromPath, scanImports(files))
+
+ if trace {
+ fmt.Fprintf(os.Stderr, "%s: start %q (%d)\n",
+ time.Since(imp.start), info.Pkg.Path(), len(files))
+ }
+
+ // Ignore the returned (first) error since we
+ // already collect them all in the PackageInfo.
+ info.checker.Files(files)
+
+ if trace {
+ fmt.Fprintf(os.Stderr, "%s: stop %q\n",
+ time.Since(imp.start), info.Pkg.Path())
+ }
}
func (imp *importer) newPackageInfo(path string) *PackageInfo {
@@ -863,10 +1135,14 @@
if f := imp.conf.TypeCheckFuncBodies; f != nil {
tc.IgnoreFuncBodies = !f(path)
}
- tc.Import = imp.doImport // doImport wraps the user's importfn, effectively
+ tc.Import = func(_ map[string]*types.Package, to string) (*types.Package, error) {
+ return imp.doImport(info, to)
+ }
tc.Error = info.appendError // appendError wraps the user's Error function
info.checker = types.NewChecker(&tc, imp.conf.fset(), pkg, &info.Info)
+ imp.typecheckerMu.Lock()
imp.prog.AllPackages[pkg] = info
+ imp.typecheckerMu.Unlock()
return info
}