internal/imports: cache things outside the mod cache

Since a user's module cache is generally going to be much bigger than
their main module, one would expect that caching just information about
the module cache would be sufficient. It turns out that's not correct.
When we discover something in the module cache, we have to make sure
that a different version of it isn't already in scope. Doing that can
require information about the main module or replace targets, so that
needs to be cached too.

Concretely, when I'm working in x/tools, if a scan discovers a version
of x/tools in the module cache, it should usually ignore that version.
But that might not be true in more complicated cases, especially those
involving nested modules whose boundaries change.

So, cache everything except GOROOT. Since the new data is mutable,
we store it separately from the module cache data so that it can be
discarded easily between runs.

Change-Id: I47364f6c0270fee03af8898fec6c85d1b9c8d780
Reviewed-on: https://go-review.googlesource.com/c/tools/+/202045
Reviewed-by: Rebecca Stambler <rstambler@golang.org>
diff --git a/internal/gopathwalk/walk.go b/internal/gopathwalk/walk.go
index 60eb67b..9a61bdb 100644
--- a/internal/gopathwalk/walk.go
+++ b/internal/gopathwalk/walk.go
@@ -16,6 +16,7 @@
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"golang.org/x/tools/internal/fastwalk"
 )
@@ -83,8 +84,9 @@
 		}
 		return
 	}
+	start := time.Now()
 	if opts.Debug {
-		log.Printf("scanning %s", root.Path)
+		log.Printf("gopathwalk: scanning %s", root.Path)
 	}
 	w := &walker{
 		root: root,
@@ -98,7 +100,7 @@
 	}
 
 	if opts.Debug {
-		log.Printf("scanned %s", root.Path)
+		log.Printf("gopathwalk: scanned %s in %v", root.Path, time.Since(start))
 	}
 }
 
diff --git a/internal/imports/fix_test.go b/internal/imports/fix_test.go
index af271ed..6dafd40 100644
--- a/internal/imports/fix_test.go
+++ b/internal/imports/fix_test.go
@@ -8,6 +8,7 @@
 	"flag"
 	"fmt"
 	"go/build"
+	"log"
 	"path/filepath"
 	"reflect"
 	"runtime"
@@ -1642,6 +1643,7 @@
 					WorkingDir:      exported.Config.Dir,
 					ForceGoPackages: forceGoPackages,
 					Debug:           *testDebug,
+					Logf:            log.Printf,
 				},
 				exported: exported,
 			}
diff --git a/internal/imports/mod.go b/internal/imports/mod.go
index 2bb42c2..fd746c10 100644
--- a/internal/imports/mod.go
+++ b/internal/imports/mod.go
@@ -31,8 +31,9 @@
 	ModsByModPath []*ModuleJSON // All modules, ordered by # of path components in module Path...
 	ModsByDir     []*ModuleJSON // ...or Dir.
 
-	// moduleCacheInfo stores information about the module cache.
-	moduleCacheInfo *moduleCacheInfo
+	// moduleCacheCache stores information about the module cache.
+	moduleCacheCache *dirInfoCache
+	otherCache       *dirInfoCache
 }
 
 type ModuleJSON struct {
@@ -93,9 +94,14 @@
 		return count(j) < count(i) // descending order
 	})
 
-	if r.moduleCacheInfo == nil {
-		r.moduleCacheInfo = &moduleCacheInfo{
-			modCacheDirInfo: make(map[string]*directoryPackageInfo),
+	if r.moduleCacheCache == nil {
+		r.moduleCacheCache = &dirInfoCache{
+			dirs: map[string]*directoryPackageInfo{},
+		}
+	}
+	if r.otherCache == nil {
+		r.otherCache = &dirInfoCache{
+			dirs: map[string]*directoryPackageInfo{},
 		}
 	}
 
@@ -103,6 +109,20 @@
 	return nil
 }
 
+func (r *ModuleResolver) ClearForNewScan() {
+	r.otherCache = &dirInfoCache{
+		dirs: map[string]*directoryPackageInfo{},
+	}
+}
+
+func (r *ModuleResolver) ClearForNewMod() {
+	env := r.env
+	*r = ModuleResolver{
+		env: env,
+	}
+	r.init()
+}
+
 // findPackage returns the module and directory that contains the package at
 // the given import path, or returns nil, "" if no module is in scope.
 func (r *ModuleResolver) findPackage(importPath string) (*ModuleJSON, string) {
@@ -118,11 +138,7 @@
 			continue
 		}
 
-		if info, ok := r.moduleCacheInfo.Load(pkgDir); ok {
-			// This is slightly wrong: a directory doesn't have to have an
-			// importable package to count as a package for package-to-module
-			// resolution. package main or _test files should count but
-			// don't.
+		if info, ok := r.cacheLoad(pkgDir); ok {
 			if loaded, err := info.reachedStatus(nameLoaded); loaded {
 				if err != nil {
 					continue // No package in this dir.
@@ -132,13 +148,21 @@
 			if scanned, err := info.reachedStatus(directoryScanned); scanned && err != nil {
 				continue // Dir is unreadable, etc.
 			}
+			// This is slightly wrong: a directory doesn't have to have an
+			// importable package to count as a package for package-to-module
+			// resolution. package main or _test files should count but
+			// don't.
+			// TODO(heschi): fix this.
+			if _, err := r.cachePackageName(pkgDir); err == nil {
+				return m, pkgDir
+			}
 		}
 
+		// Not cached. Read the filesystem.
 		pkgFiles, err := ioutil.ReadDir(pkgDir)
 		if err != nil {
 			continue
 		}
-
 		// A module only contains a package if it has buildable go
 		// files in that directory. If not, it could be provided by an
 		// outer module. See #29736.
@@ -151,6 +175,42 @@
 	return nil, ""
 }
 
+func (r *ModuleResolver) cacheLoad(dir string) (directoryPackageInfo, bool) {
+	if info, ok := r.moduleCacheCache.Load(dir); ok {
+		return info, ok
+	}
+	return r.otherCache.Load(dir)
+}
+
+func (r *ModuleResolver) cacheStore(info directoryPackageInfo) {
+	if info.rootType == gopathwalk.RootModuleCache {
+		r.moduleCacheCache.Store(info.dir, info)
+	} else {
+		r.otherCache.Store(info.dir, info)
+	}
+}
+
+func (r *ModuleResolver) cacheKeys() []string {
+	return append(r.moduleCacheCache.Keys(), r.otherCache.Keys()...)
+}
+
+// cachePackageName caches the package name for a dir already in the cache.
+func (r *ModuleResolver) cachePackageName(dir string) (directoryPackageInfo, error) {
+	info, ok := r.cacheLoad(dir)
+	if !ok {
+		panic("cachePackageName on uncached dir " + dir)
+	}
+
+	loaded, err := info.reachedStatus(nameLoaded)
+	if loaded {
+		return info, err
+	}
+	info.packageName, info.err = packageDirToName(info.dir)
+	info.status = nameLoaded
+	r.cacheStore(info)
+	return info, info.err
+}
+
 // findModuleByDir returns the module that contains dir, or nil if no such
 // module is in scope.
 func (r *ModuleResolver) findModuleByDir(dir string) *ModuleJSON {
@@ -269,22 +329,15 @@
 	roots = filterRoots(roots, exclude)
 
 	var result []*pkg
-	dupCheck := make(map[string]bool)
 	var mu sync.Mutex
 
-	// Packages in the module cache are immutable. If we have
-	// already seen this package on a previous scan of the module
-	// cache, return that result.
+	// We assume cached directories have not changed. We can skip them and their
+	// children.
 	skip := func(root gopathwalk.Root, dir string) bool {
 		mu.Lock()
 		defer mu.Unlock()
-		// If we have already processed this directory on this walk, skip it.
-		if _, dup := dupCheck[dir]; dup {
-			return true
-		}
 
-		// If we have saved this directory information, skip it.
-		info, ok := r.moduleCacheInfo.Load(dir)
+		info, ok := r.cacheLoad(dir)
 		if !ok {
 			return false
 		}
@@ -295,51 +348,19 @@
 		return packageScanned
 	}
 
+	// Add anything new to the cache. We'll process everything in it below.
 	add := func(root gopathwalk.Root, dir string) {
 		mu.Lock()
 		defer mu.Unlock()
-		if _, dup := dupCheck[dir]; dup {
-			return
-		}
 
-		info, err := r.scanDirForPackage(root, dir)
-		if err != nil {
-			return
-		}
-		if root.Type == gopathwalk.RootModuleCache {
-			// Save this package information in the cache and return.
-			// Packages from the module cache are added after Walk.
-			r.moduleCacheInfo.Store(dir, info)
-			return
-		}
-
-		// Skip this package if there was an error loading package info.
-		if info.err != nil {
-			return
-		}
-
-		if loadNames {
-			info.packageName, info.err = packageDirToName(dir)
-			if info.err != nil {
-				return
-			}
-		}
-
-		// The rest of this function canonicalizes the packages using the results
-		// of initializing the resolver from 'go list -m'.
-		res, err := r.canonicalize(root.Type, info)
-		if err != nil {
-			return
-		}
-
-		result = append(result, res)
+		r.cacheStore(r.scanDirForPackage(root, dir))
 	}
 
 	gopathwalk.WalkSkip(roots, add, skip, gopathwalk.Options{Debug: r.env.Debug, ModulesEnabled: true})
 
-	// Add the packages from the modules in the mod cache that were skipped.
-	for _, dir := range r.moduleCacheInfo.Keys() {
-		info, ok := r.moduleCacheInfo.Load(dir)
+	// Everything we already had, and everything new, is now in the cache.
+	for _, dir := range r.cacheKeys() {
+		info, ok := r.cacheLoad(dir)
 		if !ok {
 			continue
 		}
@@ -351,21 +372,13 @@
 
 		// If we want package names, make sure the cache has them.
 		if loadNames {
-			loaded, err := info.reachedStatus(nameLoaded)
-			if loaded && err != nil {
+			var err error
+			if info, err = r.cachePackageName(info.dir); err != nil {
 				continue
 			}
-			if !loaded {
-				info.packageName, info.err = packageDirToName(info.dir)
-				info.status = nameLoaded
-				r.moduleCacheInfo.Store(info.dir, info)
-				if info.err != nil {
-					continue
-				}
-			}
 		}
 
-		res, err := r.canonicalize(gopathwalk.RootModuleCache, info)
+		res, err := r.canonicalize(info)
 		if err != nil {
 			continue
 		}
@@ -377,9 +390,9 @@
 
 // canonicalize gets the result of canonicalizing the packages using the results
 // of initializing the resolver from 'go list -m'.
-func (r *ModuleResolver) canonicalize(rootType gopathwalk.RootType, info directoryPackageInfo) (*pkg, error) {
+func (r *ModuleResolver) canonicalize(info directoryPackageInfo) (*pkg, error) {
 	// Packages in GOROOT are already canonical, regardless of the std/cmd modules.
-	if rootType == gopathwalk.RootGOROOT {
+	if info.rootType == gopathwalk.RootGOROOT {
 		return &pkg{
 			importPathShort: info.nonCanonicalImportPath,
 			dir:             info.dir,
@@ -422,7 +435,7 @@
 	return loadExportsFromFiles(ctx, r.env, expectPackage, pkg.dir)
 }
 
-func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) (directoryPackageInfo, error) {
+func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) directoryPackageInfo {
 	subdir := ""
 	if dir != root.Path {
 		subdir = dir[len(root.Path)+len("/"):]
@@ -434,7 +447,10 @@
 		// is a mess. There's no easy way to tell if it's on.
 		// We can still find things in the mod cache and
 		// map them into /vendor when -mod=vendor is on.
-		return directoryPackageInfo{}, fmt.Errorf("vendor directory")
+		return directoryPackageInfo{
+			status: directoryScanned,
+			err:    fmt.Errorf("vendor directory"),
+		}
 	}
 	switch root.Type {
 	case gopathwalk.RootCurrentModule:
@@ -445,7 +461,7 @@
 			return directoryPackageInfo{
 				status: directoryScanned,
 				err:    fmt.Errorf("invalid module cache path: %v", subdir),
-			}, nil
+			}
 		}
 		modPath, err := module.DecodePath(filepath.ToSlash(matches[1]))
 		if err != nil {
@@ -455,7 +471,7 @@
 			return directoryPackageInfo{
 				status: directoryScanned,
 				err:    fmt.Errorf("decoding module cache path %q: %v", subdir, err),
-			}, nil
+			}
 		}
 		importPath = path.Join(modPath, filepath.ToSlash(matches[3]))
 	case gopathwalk.RootGOROOT:
@@ -465,12 +481,13 @@
 	result := directoryPackageInfo{
 		status:                 directoryScanned,
 		dir:                    dir,
+		rootType:               root.Type,
 		nonCanonicalImportPath: importPath,
 		needsReplace:           false,
 	}
 	if root.Type == gopathwalk.RootGOROOT {
 		// stdlib packages are always in scope, despite the confusing go.mod
-		return result, nil
+		return result
 	}
 	// Check that this package is not obviously impossible to import.
 	modFile := r.findModFile(dir)
@@ -483,7 +500,7 @@
 		result.needsReplace = true
 	}
 
-	return result, nil
+	return result
 }
 
 // modCacheRegexp splits a path in a module cache into module, module version, and package.
diff --git a/internal/imports/mod_cache.go b/internal/imports/mod_cache.go
index 82a6dda..9147391 100644
--- a/internal/imports/mod_cache.go
+++ b/internal/imports/mod_cache.go
@@ -2,11 +2,10 @@
 
 import (
 	"sync"
+
+	"golang.org/x/tools/internal/gopathwalk"
 )
 
-// ModuleResolver implements Resolver for modules using the go command as little
-// as feasible.
-//
 // To find packages to import, the resolver needs to know about all of the
 // the packages that could be imported. This includes packages that are
 // already in modules that are in (1) the current module, (2) replace targets,
@@ -42,12 +41,13 @@
 	// Set when status >= directoryScanned.
 
 	// dir is the absolute directory of this package.
-	dir string
-	// nonCanonicalImportPath is the expected import path for this package.
-	// This may not be an import path that can be used to import this package.
+	dir      string
+	rootType gopathwalk.RootType
+	// nonCanonicalImportPath is the package's expected import path. It may
+	// not actually be importable at that path.
 	nonCanonicalImportPath string
 	// needsReplace is true if the nonCanonicalImportPath does not match the
-	// the modules declared path, making it impossible to import without a
+	// module's declared path, making it impossible to import without a
 	// replace directive.
 	needsReplace bool
 
@@ -68,8 +68,8 @@
 	return true, nil
 }
 
-// moduleCacheInfo is a concurrency safe map for storing information about
-// the directories in the module cache.
+// dirInfoCache is a concurrency safe map for storing information about
+// directories that may contain packages.
 //
 // The information in this cache is built incrementally. Entries are initialized in scan.
 // No new keys should be added in any other functions, as all directories containing
@@ -78,32 +78,30 @@
 // Other functions, including loadExports and findPackage, may update entries in this cache
 // as they discover new things about the directory.
 //
-// We do not need to protect the data in the cache for multiple writes, because it only stores
-// module cache directories, which do not change. If two competing stores take place, there will be
-// one store that wins. Although this could result in a loss of information it will not be incorrect
-// and may just result in recomputing the same result later.
+// The information in the cache is not expected to change for the cache's
+// lifetime, so there is no protection against competing writes. Users should
+// take care not to hold the cache across changes to the underlying files.
 //
 // TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc)
-type moduleCacheInfo struct {
+type dirInfoCache struct {
 	mu sync.Mutex
-	// modCacheDirInfo stores information about packages in
-	// module cache directories. Keyed by absolute directory.
-	modCacheDirInfo map[string]*directoryPackageInfo
+	// dirs stores information about packages in directories, keyed by absolute path.
+	dirs map[string]*directoryPackageInfo
 }
 
 // Store stores the package info for dir.
-func (d *moduleCacheInfo) Store(dir string, info directoryPackageInfo) {
+func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 	stored := info // defensive copy
-	d.modCacheDirInfo[dir] = &stored
+	d.dirs[dir] = &stored
 }
 
 // Load returns a copy of the directoryPackageInfo for absolute directory dir.
-func (d *moduleCacheInfo) Load(dir string) (directoryPackageInfo, bool) {
+func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
-	info, ok := d.modCacheDirInfo[dir]
+	info, ok := d.dirs[dir]
 	if !ok {
 		return directoryPackageInfo{}, false
 	}
@@ -111,10 +109,10 @@
 }
 
 // Keys returns the keys currently present in d.
-func (d *moduleCacheInfo) Keys() (keys []string) {
+func (d *dirInfoCache) Keys() (keys []string) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
-	for key := range d.modCacheDirInfo {
+	for key := range d.dirs {
 		keys = append(keys, key)
 	}
 	return keys
diff --git a/internal/imports/mod_cache_test.go b/internal/imports/mod_cache_test.go
index 0eeaf17..14d29c4 100644
--- a/internal/imports/mod_cache_test.go
+++ b/internal/imports/mod_cache_test.go
@@ -53,8 +53,8 @@
 }
 
 func TestModCacheInfo(t *testing.T) {
-	m := &moduleCacheInfo{
-		modCacheDirInfo: make(map[string]*directoryPackageInfo),
+	m := &dirInfoCache{
+		dirs: make(map[string]*directoryPackageInfo),
 	}
 
 	dirInfo := []struct {
diff --git a/internal/imports/mod_test.go b/internal/imports/mod_test.go
index c332136..e625606 100644
--- a/internal/imports/mod_test.go
+++ b/internal/imports/mod_test.go
@@ -807,10 +807,11 @@
 		GOROOT: build.Default.GOROOT,
 		Logf:   log.Printf,
 	}
-	exclude := []gopathwalk.RootType{gopathwalk.RootCurrentModule, gopathwalk.RootGOROOT, gopathwalk.RootOther}
+	exclude := []gopathwalk.RootType{gopathwalk.RootGOROOT}
 	env.GetResolver().scan(nil, true, exclude)
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		env.GetResolver().scan(nil, true, exclude)
+		env.GetResolver().(*ModuleResolver).ClearForNewScan()
 	}
 }
diff --git a/internal/lsp/cache/view.go b/internal/lsp/cache/view.go
index e01094e..9027505 100644
--- a/internal/lsp/cache/view.go
+++ b/internal/lsp/cache/view.go
@@ -141,15 +141,11 @@
 	}
 
 	// Before running the user provided function, clear caches in the resolver.
-	if v.modFilesChanged() {
-		if r, ok := v.processEnv.GetResolver().(*imports.ModuleResolver); ok {
-			// Clear the resolver cache and set Initialized to false.
-			r.Initialized = false
-			r.Main = nil
-			r.ModsByModPath = nil
-			r.ModsByDir = nil
-			// Reset the modFileVersions.
-			v.modFileVersions = nil
+	if r, ok := v.processEnv.GetResolver().(*imports.ModuleResolver); ok {
+		if v.modFilesChanged() {
+			r.ClearForNewMod()
+		} else {
+			r.ClearForNewScan()
 		}
 	}