internal/lsp/cache: factor out guessPackagesForURI from snapshot.clone

To simplify snapshot.clone a bit, factor out the logic to figure out
which packages to invalidate when a new file arrives. This also made it
easier to improve the algorithm by caching os.Stat results when walking
known packages.

Change-Id: I012b60e8ad73eb4df80950aac48791204877e9e0
Reviewed-on: https://go-review.googlesource.com/c/tools/+/263204
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Trust: Robert Findley <rfindley@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go
index ae45f7d..7659432 100644
--- a/internal/lsp/cache/snapshot.go
+++ b/internal/lsp/cache/snapshot.go
@@ -1072,11 +1072,13 @@
 	for withoutURI, currentFH := range withoutURIs {
 		directIDs := map[packageID]struct{}{}
 
+		filePackages := guessPackagesForURI(withoutURI, s.ids)
 		// Collect all of the package IDs that correspond to the given file.
 		// TODO: if the file has moved into a new package, we should invalidate that too.
-		for _, id := range s.ids[withoutURI] {
+		for _, id := range filePackages {
 			directIDs[id] = struct{}{}
 		}
+
 		// The original FileHandle for this URI is cached on the snapshot.
 		originalFH := s.files[withoutURI]
 
@@ -1160,25 +1162,6 @@
 			}
 		}
 
-		// If this is a file we don't yet know about,
-		// then we do not yet know what packages it should belong to.
-		// Make a rough estimate of what metadata to invalidate by finding the package IDs
-		// of all of the files in the same directory as this one.
-		// TODO(rstambler): Speed this up by mapping directories to filenames.
-		if len(directIDs) == 0 {
-			if dirStat, err := os.Stat(filepath.Dir(withoutURI.Filename())); err == nil {
-				for uri := range s.files {
-					if fdirStat, err := os.Stat(filepath.Dir(uri.Filename())); err == nil {
-						if os.SameFile(dirStat, fdirStat) {
-							for _, id := range s.ids[uri] {
-								directIDs[id] = struct{}{}
-							}
-						}
-					}
-				}
-			}
-		}
-
 		// Invalidate reverse dependencies too.
 		// TODO(heschi): figure out the locking model and use transitiveReverseDeps?
 		var addRevDeps func(packageID)
@@ -1316,6 +1299,56 @@
 	return result, reinitialize
 }
 
+// guessPackagesForURI returns all packages related to uri. If we haven't seen this
+// URI before, we guess based on files in the same directory. This is of course
+// incorrect in build systems where packages are not organized by directory.
+func guessPackagesForURI(uri span.URI, known map[span.URI][]packageID) []packageID {
+	packages := known[uri]
+	if len(packages) > 0 {
+		// We've seen this file before.
+		return packages
+	}
+	// This is a file we don't yet know about. Guess relevant packages by
+	// considering files in the same directory.
+
+	// Cache of FileInfo to avoid unnecessary stats for multiple files in the
+	// same directory.
+	stats := make(map[string]struct {
+		os.FileInfo
+		error
+	})
+	getInfo := func(dir string) (os.FileInfo, error) {
+		if res, ok := stats[dir]; ok {
+			return res.FileInfo, res.error
+		}
+		fi, err := os.Stat(dir)
+		stats[dir] = struct {
+			os.FileInfo
+			error
+		}{fi, err}
+		return fi, err
+	}
+	dir := filepath.Dir(uri.Filename())
+	fi, err := getInfo(dir)
+	if err != nil {
+		return nil
+	}
+
+	// Aggregate all possibly relevant package IDs.
+	var found []packageID
+	for knownURI, ids := range known {
+		knownDir := filepath.Dir(knownURI.Filename())
+		knownFI, err := getInfo(knownDir)
+		if err != nil {
+			continue
+		}
+		if os.SameFile(fi, knownFI) {
+			found = append(found, ids...)
+		}
+	}
+	return found
+}
+
 type reinitializeView int
 
 const (