gopls/internal/cache: share goimports state for GOMODCACHE

When using the gopls daemon, or with the multi-View workspaces that will
be increasingly common following golang/go#57979, there is a lot of
redundant work performed scanning the module cache. This CL eliminates
that redundancy, by moving module cache information into the cache.Cache
shared by all Sessions and Views.

There should be effectively no change in behavior for gopls resulting
from this CL. In ModuleResolver.scan, we still require that module cache
roots are scanned. However, we no longer invalidate this scan in
ModuleResolver.ClearForNewScan: re-scanning the module cache is the
responsibility of a new ScanModuleCache function, which is independently
scheduled. To enable this separation of refresh logic, a new
refreshTimer type is extracted to encapsulate the refresh logic.

For golang/go#44863

Change-Id: I333d55fca009be7984a514ed4abdc9a9fcafc08a
Reviewed-on: https://go-review.googlesource.com/c/tools/+/559636
Reviewed-by: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/gopls/internal/cache/cache.go b/gopls/internal/cache/cache.go
index 310cf02..a6a166a 100644
--- a/gopls/internal/cache/cache.go
+++ b/gopls/internal/cache/cache.go
@@ -10,6 +10,7 @@
 	"sync/atomic"
 
 	"golang.org/x/tools/gopls/internal/protocol/command"
+	"golang.org/x/tools/internal/imports"
 	"golang.org/x/tools/internal/memoize"
 )
 
@@ -30,20 +31,36 @@
 		id:         strconv.FormatInt(index, 10),
 		store:      store,
 		memoizedFS: newMemoizedFS(),
+		modCache: &sharedModCache{
+			caches: make(map[string]*imports.DirInfoCache),
+			timers: make(map[string]*refreshTimer),
+		},
 	}
 	return c
 }
 
-// A Cache holds caching stores that are bundled together for consistency.
-//
-// TODO(rfindley): once fset and store need not be bundled together, the Cache
-// type can be eliminated.
+// A Cache holds content that is shared across multiple gopls sessions.
 type Cache struct {
 	id string
 
+	// store holds cached calculations.
+	//
+	// TODO(rfindley): at this point, these are not important, as we've moved our
+	// content-addressable cache to the file system (the filecache package). It
+	// is unlikely that this shared cache provides any shared value. We should
+	// consider removing it, replacing current uses with a simpler futures cache,
+	// as we've done for e.g. type-checked packages.
 	store *memoize.Store
 
-	*memoizedFS // implements file.Source
+	// memoizedFS holds a shared file.Source that caches reads.
+	//
+	// Reads are invalidated when *any* session gets a didChangeWatchedFile
+	// notification. This is fine: it is the responsibility of memoizedFS to hold
+	// our best knowledge of the current file system state.
+	*memoizedFS
+
+	// modCache holds the
+	modCache *sharedModCache
 }
 
 var cacheIndex, sessionIndex, viewIndex int64
diff --git a/gopls/internal/cache/imports.go b/gopls/internal/cache/imports.go
index 9d6154d..cfba562 100644
--- a/gopls/internal/cache/imports.go
+++ b/gopls/internal/cache/imports.go
@@ -13,19 +13,129 @@
 	"golang.org/x/tools/gopls/internal/file"
 	"golang.org/x/tools/internal/event"
 	"golang.org/x/tools/internal/event/keys"
+	"golang.org/x/tools/internal/event/tag"
 	"golang.org/x/tools/internal/imports"
 )
 
-type importsState struct {
-	ctx context.Context
-
-	mu                   sync.Mutex
-	processEnv           *imports.ProcessEnv
-	cacheRefreshDuration time.Duration
-	cacheRefreshTimer    *time.Timer
-	cachedModFileHash    file.Hash
+// refreshTimer implements delayed asynchronous refreshing of state.
+//
+// See the [refreshTimer.schedule] documentation for more details.
+type refreshTimer struct {
+	mu        sync.Mutex
+	duration  time.Duration
+	timer     *time.Timer
+	refreshFn func()
 }
 
+// newRefreshTimer constructs a new refresh timer which schedules refreshes
+// using the given function.
+func newRefreshTimer(refresh func()) *refreshTimer {
+	return &refreshTimer{
+		refreshFn: refresh,
+	}
+}
+
+// schedule schedules the refresh function to run at some point in the future,
+// if no existing refresh is already scheduled.
+//
+// At a minimum, scheduled refreshes are delayed by 30s, but they may be
+// delayed longer to keep their expected execution time under 2% of wall clock
+// time.
+func (t *refreshTimer) schedule() {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.timer == nil {
+		// Don't refresh more than twice per minute.
+		delay := 30 * time.Second
+		// Don't spend more than ~2% of the time refreshing.
+		if adaptive := 50 * t.duration; adaptive > delay {
+			delay = adaptive
+		}
+		t.timer = time.AfterFunc(delay, func() {
+			start := time.Now()
+			t.refreshFn()
+			t.mu.Lock()
+			t.duration = time.Since(start)
+			t.timer = nil
+			t.mu.Unlock()
+		})
+	}
+}
+
+// A sharedModCache tracks goimports state for GOMODCACHE directories
+// (each session may have its own GOMODCACHE).
+//
+// This state is refreshed independently of view-specific imports state.
+type sharedModCache struct {
+	mu     sync.Mutex
+	caches map[string]*imports.DirInfoCache // GOMODCACHE -> cache content; never invalidated
+	timers map[string]*refreshTimer         // GOMODCACHE -> timer
+}
+
+func (c *sharedModCache) dirCache(dir string) *imports.DirInfoCache {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	cache, ok := c.caches[dir]
+	if !ok {
+		cache = imports.NewDirInfoCache()
+		c.caches[dir] = cache
+	}
+	return cache
+}
+
+// refreshDir schedules a refresh of the given directory, which must be a
+// module cache.
+func (c *sharedModCache) refreshDir(ctx context.Context, dir string, logf func(string, ...any)) {
+	cache := c.dirCache(dir)
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	timer, ok := c.timers[dir]
+	if !ok {
+		timer = newRefreshTimer(func() {
+			_, done := event.Start(ctx, "cache.sharedModCache.refreshDir", tag.Directory.Of(dir))
+			defer done()
+			imports.ScanModuleCache(dir, cache, logf)
+		})
+		c.timers[dir] = timer
+	}
+
+	timer.schedule()
+}
+
+// importsState tracks view-specific imports state.
+type importsState struct {
+	ctx          context.Context
+	modCache     *sharedModCache
+	refreshTimer *refreshTimer
+
+	mu                sync.Mutex
+	processEnv        *imports.ProcessEnv
+	cachedModFileHash file.Hash
+}
+
+// newImportsState constructs a new imports state for running goimports
+// functions via [runProcessEnvFunc].
+//
+// The returned state will automatically refresh itself following a call to
+// runProcessEnvFunc.
+func newImportsState(backgroundCtx context.Context, modCache *sharedModCache, env *imports.ProcessEnv) *importsState {
+	s := &importsState{
+		ctx:        backgroundCtx,
+		modCache:   modCache,
+		processEnv: env,
+	}
+	s.refreshTimer = newRefreshTimer(s.refreshProcessEnv)
+	return s
+}
+
+// runProcessEnvFunc runs goimports.
+//
+// Any call to runProcessEnvFunc will schedule a refresh of the imports state
+// at some point in the future, if such a refresh is not already scheduled. See
+// [refreshTimer] for more details.
 func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *Snapshot, fn func(context.Context, *imports.Options) error) error {
 	ctx, done := event.Start(ctx, "cache.importsState.runProcessEnvFunc")
 	defer done()
@@ -72,15 +182,20 @@
 		return err
 	}
 
-	if s.cacheRefreshTimer == nil {
-		// Don't refresh more than twice per minute.
-		delay := 30 * time.Second
-		// Don't spend more than a couple percent of the time refreshing.
-		if adaptive := 50 * s.cacheRefreshDuration; adaptive > delay {
-			delay = adaptive
-		}
-		s.cacheRefreshTimer = time.AfterFunc(delay, s.refreshProcessEnv)
-	}
+	// Refresh the imports resolver after usage. This may seem counterintuitive,
+	// since it means the first ProcessEnvFunc after a long period of inactivity
+	// may be stale, but in practice we run ProcessEnvFuncs frequently during
+	// active development (e.g. during completion), and so this mechanism will be
+	// active while gopls is in use, and inactive when gopls is idle.
+	s.refreshTimer.schedule()
+
+	// TODO(rfindley): the GOMODCACHE value used here isn't directly tied to the
+	// ProcessEnv.Env["GOMODCACHE"], though they should theoretically always
+	// agree. It would be better if we guaranteed this, possibly by setting all
+	// required environment variables in ProcessEnv.Env, to avoid the redundant
+	// Go command invocation.
+	gomodcache := snapshot.view.folder.Env.GOMODCACHE
+	s.modCache.refreshDir(s.ctx, gomodcache, s.processEnv.Logf)
 
 	return nil
 }
@@ -96,16 +211,17 @@
 	if resolver, err := s.processEnv.GetResolver(); err == nil {
 		resolver.ClearForNewScan()
 	}
+	// TODO(rfindley): it's not clear why we're unlocking here. Shouldn't we
+	// guard the use of env below? In any case, we can prime a separate resolver.
 	s.mu.Unlock()
 
 	event.Log(s.ctx, "background imports cache refresh starting")
+
+	// TODO(rfindley, golang/go#59216): do this priming with a separate resolver,
+	// and then replace, so that we never have to wait on an unprimed cache.
 	if err := imports.PrimeCache(context.Background(), env); err == nil {
 		event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)))
 	} else {
 		event.Log(ctx, fmt.Sprintf("background refresh finished after %v", time.Since(start)), keys.Err.Of(err))
 	}
-	s.mu.Lock()
-	s.cacheRefreshDuration = time.Since(start)
-	s.cacheRefreshTimer = nil
-	s.mu.Unlock()
 }
diff --git a/gopls/internal/cache/session.go b/gopls/internal/cache/session.go
index 27380f1..102a226 100644
--- a/gopls/internal/cache/session.go
+++ b/gopls/internal/cache/session.go
@@ -209,6 +209,7 @@
 			SkipPathInScan: skipPath,
 			Env:            env,
 			WorkingDir:     def.root.Path(),
+			ModCache:       s.cache.modCache.dirCache(def.folder.Env.GOMODCACHE),
 		}
 		if def.folder.Options.VerboseOutput {
 			pe.Logf = func(format string, args ...interface{}) {
@@ -227,10 +228,7 @@
 		ignoreFilter:         ignoreFilter,
 		fs:                   s.overlayFS,
 		viewDefinition:       def,
-		importsState: &importsState{
-			ctx:        backgroundCtx,
-			processEnv: pe,
-		},
+		importsState:         newImportsState(backgroundCtx, s.cache.modCache, pe),
 	}
 
 	s.snapshotWG.Add(1)
diff --git a/internal/imports/fix.go b/internal/imports/fix.go
index 21bdc06..606d6a5 100644
--- a/internal/imports/fix.go
+++ b/internal/imports/fix.go
@@ -884,6 +884,10 @@
 	// If Logf is non-nil, debug logging is enabled through this function.
 	Logf func(format string, args ...interface{})
 
+	// If set, ModCache holds a shared cache of directory info to use across
+	// multiple ProcessEnvs.
+	ModCache *DirInfoCache
+
 	initialized bool // see TODO above
 
 	// resolver and resolverErr are lazily evaluated (see GetResolver).
@@ -984,7 +988,7 @@
 		if len(e.Env["GOMOD"]) == 0 && len(e.Env["GOWORK"]) == 0 {
 			e.resolver = newGopathResolver(e)
 		} else {
-			e.resolver, e.resolverErr = newModuleResolver(e)
+			e.resolver, e.resolverErr = newModuleResolver(e, e.ModCache)
 		}
 	}
 
@@ -1252,17 +1256,14 @@
 type gopathResolver struct {
 	env      *ProcessEnv
 	walked   bool
-	cache    *dirInfoCache
+	cache    *DirInfoCache
 	scanSema chan struct{} // scanSema prevents concurrent scans.
 }
 
 func newGopathResolver(env *ProcessEnv) *gopathResolver {
 	r := &gopathResolver{
-		env: env,
-		cache: &dirInfoCache{
-			dirs:      map[string]*directoryPackageInfo{},
-			listeners: map[*int]cacheListener{},
-		},
+		env:      env,
+		cache:    NewDirInfoCache(),
 		scanSema: make(chan struct{}, 1),
 	}
 	r.scanSema <- struct{}{}
@@ -1271,10 +1272,7 @@
 
 func (r *gopathResolver) ClearForNewScan() {
 	<-r.scanSema
-	r.cache = &dirInfoCache{
-		dirs:      map[string]*directoryPackageInfo{},
-		listeners: map[*int]cacheListener{},
-	}
+	r.cache = NewDirInfoCache()
 	r.walked = false
 	r.scanSema <- struct{}{}
 }
diff --git a/internal/imports/mod.go b/internal/imports/mod.go
index c8a040c..ab25b6a 100644
--- a/internal/imports/mod.go
+++ b/internal/imports/mod.go
@@ -82,11 +82,11 @@
 	//
 	// otherCache stores information about all other roots (even GOROOT), which
 	// may change.
-	moduleCacheCache *dirInfoCache
-	otherCache       *dirInfoCache
+	moduleCacheCache *DirInfoCache
+	otherCache       *DirInfoCache
 }
 
-func newModuleResolver(e *ProcessEnv) (*ModuleResolver, error) {
+func newModuleResolver(e *ProcessEnv, moduleCacheCache *DirInfoCache) (*ModuleResolver, error) {
 	r := &ModuleResolver{
 		env:      e,
 		scanSema: make(chan struct{}, 1),
@@ -196,18 +196,22 @@
 				addDep(mod)
 			}
 		}
+		// If provided, share the moduleCacheCache.
+		//
+		// TODO(rfindley): The module cache is immutable. However, the loaded
+		// exports do depend on GOOS and GOARCH. Fortunately, the
+		// ProcessEnv.buildContext does not adjust these from build.DefaultContext
+		// (even though it should). So for now, this is OK to share, but we need to
+		// add logic for handling GOOS/GOARCH.
+		r.moduleCacheCache = moduleCacheCache
 		r.roots = append(r.roots, gopathwalk.Root{Path: r.moduleCacheDir, Type: gopathwalk.RootModuleCache})
 	}
 
 	r.scannedRoots = map[gopathwalk.Root]bool{}
-	r.moduleCacheCache = &dirInfoCache{
-		dirs:      map[string]*directoryPackageInfo{},
-		listeners: map[*int]cacheListener{},
+	if r.moduleCacheCache == nil {
+		r.moduleCacheCache = NewDirInfoCache()
 	}
-	r.otherCache = &dirInfoCache{
-		dirs:      map[string]*directoryPackageInfo{},
-		listeners: map[*int]cacheListener{},
-	}
+	r.otherCache = NewDirInfoCache()
 	return r, nil
 }
 
@@ -263,11 +267,22 @@
 // contents, since they are assumed to be immutable.
 func (r *ModuleResolver) ClearForNewScan() {
 	<-r.scanSema
+	prevRoots := r.scannedRoots
 	r.scannedRoots = map[gopathwalk.Root]bool{}
-	r.otherCache = &dirInfoCache{
-		dirs:      map[string]*directoryPackageInfo{},
-		listeners: map[*int]cacheListener{},
+	// Invalidate root scans. We don't need to invalidate module cache roots,
+	// because they are immutable.
+	// (We don't support a use case where GOMODCACHE is cleaned in the middle of
+	// e.g. a gopls session: the user must restart gopls to get accurate
+	// imports.)
+	//
+	// Scanning for new directories in GOMODCACHE should be handled elsewhere,
+	// via a call to ScanModuleCache.
+	for _, root := range r.roots {
+		if root.Type == gopathwalk.RootModuleCache && prevRoots[root] {
+			r.scannedRoots[root] = true
+		}
 	}
+	r.otherCache = NewDirInfoCache()
 	r.scanSema <- struct{}{}
 }
 
@@ -282,7 +297,7 @@
 // TODO(rfindley): move this to a new env.go, consolidating ProcessEnv methods.
 func (e *ProcessEnv) ClearModuleInfo() {
 	if r, ok := e.resolver.(*ModuleResolver); ok {
-		resolver, resolverErr := newModuleResolver(e)
+		resolver, resolverErr := newModuleResolver(e, e.ModCache)
 		if resolverErr == nil {
 			<-r.scanSema // guards caches
 			resolver.moduleCacheCache = r.moduleCacheCache
@@ -294,8 +309,9 @@
 	}
 }
 
-// findPackage returns the module and directory that contains the package at
-// the given import path, or returns nil, "" if no module is in scope.
+// findPackage returns the module and directory from within the main modules
+// and their dependencies that contains the package at the given import path,
+// or returns nil, "" if no module is in scope.
 func (r *ModuleResolver) findPackage(importPath string) (*gocommand.ModuleJSON, string) {
 	// This can't find packages in the stdlib, but that's harmless for all
 	// the existing code paths.
@@ -429,15 +445,15 @@
 	return modDir != mod.Dir
 }
 
-func (r *ModuleResolver) modInfo(dir string) (modDir string, modName string) {
-	readModName := func(modFile string) string {
-		modBytes, err := os.ReadFile(modFile)
-		if err != nil {
-			return ""
-		}
-		return modulePath(modBytes)
+func readModName(modFile string) string {
+	modBytes, err := os.ReadFile(modFile)
+	if err != nil {
+		return ""
 	}
+	return modulePath(modBytes)
+}
 
+func (r *ModuleResolver) modInfo(dir string) (modDir, modName string) {
 	if r.dirInModuleCache(dir) {
 		if matches := modCacheRegexp.FindStringSubmatch(dir); len(matches) == 3 {
 			index := strings.Index(dir, matches[1]+"@"+matches[2])
@@ -473,6 +489,7 @@
 func (r *ModuleResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) {
 	names := map[string]string{}
 	for _, path := range importPaths {
+		// TODO(rfindley): shouldn't this use the dirInfoCache?
 		_, packageDir := r.findPackage(path)
 		if packageDir == "" {
 			continue
diff --git a/internal/imports/mod_cache.go b/internal/imports/mod_cache.go
index 03b1926..cfc5465 100644
--- a/internal/imports/mod_cache.go
+++ b/internal/imports/mod_cache.go
@@ -7,8 +7,12 @@
 import (
 	"context"
 	"fmt"
+	"path"
+	"path/filepath"
+	"strings"
 	"sync"
 
+	"golang.org/x/mod/module"
 	"golang.org/x/tools/internal/gopathwalk"
 )
 
@@ -65,6 +69,10 @@
 	packageName string // the package name, as declared in the source.
 
 	// Set when status >= exportsLoaded.
+	// TODO(rfindley): it's hard to see this, but exports depend implicitly on
+	// the default build context GOOS and GOARCH.
+	//
+	// We can make this explicit, and key exports by GOOS, GOARCH.
 	exports []string
 }
 
@@ -80,7 +88,7 @@
 	return true, nil
 }
 
-// dirInfoCache is a concurrency safe map for storing information about
+// DirInfoCache is a concurrency-safe map for storing information about
 // directories that may contain packages.
 //
 // The information in this cache is built incrementally. Entries are initialized in scan.
@@ -93,21 +101,26 @@
 // The information in the cache is not expected to change for the cache's
 // lifetime, so there is no protection against competing writes. Users should
 // take care not to hold the cache across changes to the underlying files.
-//
-// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc)
-type dirInfoCache struct {
+type DirInfoCache struct {
 	mu sync.Mutex
 	// dirs stores information about packages in directories, keyed by absolute path.
 	dirs      map[string]*directoryPackageInfo
 	listeners map[*int]cacheListener
 }
 
+func NewDirInfoCache() *DirInfoCache {
+	return &DirInfoCache{
+		dirs:      make(map[string]*directoryPackageInfo),
+		listeners: make(map[*int]cacheListener),
+	}
+}
+
 type cacheListener func(directoryPackageInfo)
 
 // ScanAndListen calls listener on all the items in the cache, and on anything
 // newly added. The returned stop function waits for all in-flight callbacks to
 // finish and blocks new ones.
-func (d *dirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() {
+func (d *DirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() {
 	ctx, cancel := context.WithCancel(ctx)
 
 	// Flushing out all the callbacks is tricky without knowing how many there
@@ -163,8 +176,10 @@
 }
 
 // Store stores the package info for dir.
-func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) {
+func (d *DirInfoCache) Store(dir string, info directoryPackageInfo) {
 	d.mu.Lock()
+	// TODO(rfindley, golang/go#59216): should we overwrite an existing entry?
+	// That seems incorrect as the cache should be idempotent.
 	_, old := d.dirs[dir]
 	d.dirs[dir] = &info
 	var listeners []cacheListener
@@ -181,7 +196,7 @@
 }
 
 // Load returns a copy of the directoryPackageInfo for absolute directory dir.
-func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) {
+func (d *DirInfoCache) Load(dir string) (directoryPackageInfo, bool) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 	info, ok := d.dirs[dir]
@@ -192,7 +207,7 @@
 }
 
 // Keys returns the keys currently present in d.
-func (d *dirInfoCache) Keys() (keys []string) {
+func (d *DirInfoCache) Keys() (keys []string) {
 	d.mu.Lock()
 	defer d.mu.Unlock()
 	for key := range d.dirs {
@@ -201,7 +216,7 @@
 	return keys
 }
 
-func (d *dirInfoCache) CachePackageName(info directoryPackageInfo) (string, error) {
+func (d *DirInfoCache) CachePackageName(info directoryPackageInfo) (string, error) {
 	if loaded, err := info.reachedStatus(nameLoaded); loaded {
 		return info.packageName, err
 	}
@@ -214,7 +229,7 @@
 	return info.packageName, info.err
 }
 
-func (d *dirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) {
+func (d *DirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) {
 	if reached, _ := info.reachedStatus(exportsLoaded); reached {
 		return info.packageName, info.exports, info.err
 	}
@@ -235,3 +250,81 @@
 	d.Store(info.dir, info)
 	return info.packageName, info.exports, info.err
 }
+
+// ScanModuleCache walks the given directory, which must be a GOMODCACHE value,
+// for directory package information, storing the results in cache.
+func ScanModuleCache(dir string, cache *DirInfoCache, logf func(string, ...any)) {
+	// Note(rfindley): it's hard to see, but this function attempts to implement
+	// just the side effects on cache of calling PrimeCache with a ProcessEnv
+	// that has the given dir as its GOMODCACHE.
+	//
+	// Teasing out the control flow, we see that we can avoid any handling of
+	// vendor/ and can infer module info entirely from the path, simplifying the
+	// logic here.
+
+	root := gopathwalk.Root{
+		Path: filepath.Clean(dir),
+		Type: gopathwalk.RootModuleCache,
+	}
+
+	directoryInfo := func(root gopathwalk.Root, dir string) directoryPackageInfo {
+		// This is a copy of ModuleResolver.scanDirForPackage, trimmed down to
+		// logic that applies to a module cache directory.
+
+		subdir := ""
+		if dir != root.Path {
+			subdir = dir[len(root.Path)+len("/"):]
+		}
+
+		matches := modCacheRegexp.FindStringSubmatch(subdir)
+		if len(matches) == 0 {
+			return directoryPackageInfo{
+				status: directoryScanned,
+				err:    fmt.Errorf("invalid module cache path: %v", subdir),
+			}
+		}
+		modPath, err := module.UnescapePath(filepath.ToSlash(matches[1]))
+		if err != nil {
+			if logf != nil {
+				logf("decoding module cache path %q: %v", subdir, err)
+			}
+			return directoryPackageInfo{
+				status: directoryScanned,
+				err:    fmt.Errorf("decoding module cache path %q: %v", subdir, err),
+			}
+		}
+		importPath := path.Join(modPath, filepath.ToSlash(matches[3]))
+		index := strings.Index(dir, matches[1]+"@"+matches[2])
+		modDir := filepath.Join(dir[:index], matches[1]+"@"+matches[2])
+		modName := readModName(filepath.Join(modDir, "go.mod"))
+		return directoryPackageInfo{
+			status:                 directoryScanned,
+			dir:                    dir,
+			rootType:               root.Type,
+			nonCanonicalImportPath: importPath,
+			moduleDir:              modDir,
+			moduleName:             modName,
+		}
+	}
+
+	add := func(root gopathwalk.Root, dir string) {
+		info := directoryInfo(root, dir)
+		cache.Store(info.dir, info)
+	}
+
+	skip := func(_ gopathwalk.Root, dir string) bool {
+		// Skip directories that have already been scanned.
+		//
+		// Note that gopathwalk only adds "package" directories, which must contain
+		// a .go file, and all such package directories in the module cache are
+		// immutable. So if we can load a dir, it can be skipped.
+		info, ok := cache.Load(dir)
+		if !ok {
+			return false
+		}
+		packageScanned, _ := info.reachedStatus(directoryScanned)
+		return packageScanned
+	}
+
+	gopathwalk.WalkSkip([]gopathwalk.Root{root}, add, skip, gopathwalk.Options{Logf: logf, ModulesEnabled: true})
+}
diff --git a/internal/imports/mod_cache_test.go b/internal/imports/mod_cache_test.go
index 39c691e..3af85fb 100644
--- a/internal/imports/mod_cache_test.go
+++ b/internal/imports/mod_cache_test.go
@@ -6,9 +6,12 @@
 
 import (
 	"fmt"
+	"os/exec"
 	"reflect"
 	"sort"
+	"strings"
 	"testing"
+	"time"
 )
 
 func TestDirectoryPackageInfoReachedStatus(t *testing.T) {
@@ -58,9 +61,7 @@
 }
 
 func TestModCacheInfo(t *testing.T) {
-	m := &dirInfoCache{
-		dirs: make(map[string]*directoryPackageInfo),
-	}
+	m := NewDirInfoCache()
 
 	dirInfo := []struct {
 		dir  string
@@ -124,3 +125,20 @@
 		}
 	}
 }
+
+func BenchmarkScanModuleCache(b *testing.B) {
+	output, err := exec.Command("go", "env", "GOMODCACHE").Output()
+	if err != nil {
+		b.Fatal(err)
+	}
+	gomodcache := strings.TrimSpace(string(output))
+	cache := NewDirInfoCache()
+	start := time.Now()
+	ScanModuleCache(gomodcache, cache, nil)
+	b.Logf("initial scan took %v", time.Since(start))
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		ScanModuleCache(gomodcache, cache, nil)
+	}
+}