internal/lsp/cache: fail addPackageHandle if metadata is stale

If metadata is refreshed during the execution of buildPackageHandle, we
should not store the resulting package handle in the snapshot, as it
breaks the invariant that computed packages match the currently loaded
metadata.

This strictness revealed another bug: because of our fine-grained
locking in snapshot.load, it is possible that we set valid metadata
multiple times, leading to unnecessary invalidation and potential
further races to package handles. Fix this by building all metadata when
processing the go/packages result, and only filtering updates after
grabbing the lock.

For golang/go#53733

Change-Id: Ib63ae4fbd97d0d25d45fe04f9bcd835996db41da
Reviewed-on: https://go-review.googlesource.com/c/tools/+/416224
Run-TryBot: Robert Findley <rfindley@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/internal/lsp/cache/check.go b/internal/lsp/cache/check.go
index aae6de0..c8b314a 100644
--- a/internal/lsp/cache/check.go
+++ b/internal/lsp/cache/check.go
@@ -209,9 +209,7 @@
 	// been cached, addPackage will return the cached value. This is fine,
 	// since the original package handle above will have no references and be
 	// garbage collected.
-	ph = s.addPackageHandle(ph, release)
-
-	return ph, nil
+	return s.addPackageHandle(ph, release)
 }
 
 func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode {
diff --git a/internal/lsp/cache/load.go b/internal/lsp/cache/load.go
index 08c88ab..8937f93 100644
--- a/internal/lsp/cache/load.go
+++ b/internal/lsp/cache/load.go
@@ -47,6 +47,8 @@
 		if errors.Is(err, context.Canceled) {
 			return
 		}
+		// TODO(rfindley): merge these metadata updates with the updates below, to
+		// avoid updating the graph twice.
 		s.clearShouldLoad(scopes...)
 	}()
 
@@ -154,7 +156,7 @@
 	}
 
 	moduleErrs := make(map[string][]packages.Error) // module path -> errors
-	updates := make(map[PackageID]*KnownMetadata)
+	newMetadata := make(map[PackageID]*KnownMetadata)
 	for _, pkg := range pkgs {
 		// The Go command returns synthetic list results for module queries that
 		// encountered module errors.
@@ -196,31 +198,48 @@
 		}
 		// Skip filtered packages. They may be added anyway if they're
 		// dependencies of non-filtered packages.
+		//
+		// TODO(rfindley): why exclude metadata arbitrarily here? It should be safe
+		// to capture all metadata.
 		if s.view.allFilesExcluded(pkg) {
 			continue
 		}
-		// TODO: once metadata is immutable, we shouldn't have to lock here.
-		s.mu.Lock()
-		err := computeMetadataUpdates(ctx, s.meta, PackagePath(pkg.PkgPath), pkg, cfg, query, updates, nil)
-		s.mu.Unlock()
-		if err != nil {
+		if err := buildMetadata(ctx, PackagePath(pkg.PkgPath), pkg, cfg, query, newMetadata, nil); err != nil {
 			return err
 		}
 	}
 
-	var loadedIDs []PackageID
-	for id := range updates {
-		loadedIDs = append(loadedIDs, id)
+	s.mu.Lock()
+
+	// Only update metadata where we don't already have valid metadata.
+	//
+	// We want to preserve an invariant that s.packages.Get(id).m.Metadata
+	// matches s.meta.metadata[id].Metadata. By avoiding overwriting valid
+	// metadata, we minimize the amount of invalidation required to preserve this
+	// invariant.
+	//
+	// TODO(rfindley): perform a sanity check that metadata matches here. If not,
+	// we have an invalidation bug elsewhere.
+	updates := make(map[PackageID]*KnownMetadata)
+	var updatedIDs []PackageID
+	for _, m := range newMetadata {
+		if existing := s.meta.metadata[m.ID]; existing == nil || !existing.Valid {
+			updates[m.ID] = m
+			updatedIDs = append(updatedIDs, m.ID)
+		}
 	}
 
 	event.Log(ctx, fmt.Sprintf("%s: updating metadata for %d packages", eventName, len(updates)))
 
-	s.mu.Lock()
+	// Invalidate the reverse transitive closure of packages that have changed.
+	//
+	// Note that the original metadata is being invalidated here, so we use the
+	// original metadata graph to compute the reverse closure.
+	invalidatedPackages := s.meta.reverseTransitiveClosure(true, updatedIDs...)
 
-	// invalidate the reverse transitive closure of packages that have changed.
-	invalidatedPackages := s.meta.reverseTransitiveClosure(true, loadedIDs...)
 	s.meta = s.meta.Clone(updates)
 	s.resetIsActivePackageLocked()
+
 	// Invalidate any packages we may have associated with this metadata.
 	//
 	// TODO(rfindley): this should not be necessary, as we should have already
@@ -431,10 +450,10 @@
 	return tmpdir, nil
 }
 
-// computeMetadataUpdates populates the updates map with metadata updates to
+// buildMetadata populates the updates map with metadata updates to
 // apply, based on the given pkg. It recurs through pkg.Imports to ensure that
 // metadata exists for all dependencies.
-func computeMetadataUpdates(ctx context.Context, g *metadataGraph, pkgPath PackagePath, pkg *packages.Package, cfg *packages.Config, query []string, updates map[PackageID]*KnownMetadata, path []PackageID) error {
+func buildMetadata(ctx context.Context, pkgPath PackagePath, pkg *packages.Package, cfg *packages.Config, query []string, updates map[PackageID]*KnownMetadata, path []PackageID) error {
 	id := PackageID(pkg.ID)
 	if source.IsCommandLineArguments(pkg.ID) {
 		suffix := ":" + strings.Join(query, ",")
@@ -442,21 +461,12 @@
 		pkgPath = PackagePath(string(pkgPath) + suffix)
 	}
 
-	// If we have valid metadata for this package, don't update. This minimizes
-	// the amount of subsequent invalidation.
-	//
-	// TODO(rfindley): perform a sanity check that metadata matches here. If not,
-	// we have an invalidation bug elsewhere.
-	if existing := g.metadata[id]; existing != nil && existing.Valid {
-		return nil
-	}
-
 	if _, ok := updates[id]; ok {
 		// If we've already seen this dependency, there may be an import cycle, or
 		// we may have reached the same package transitively via distinct paths.
 		// Check the path to confirm.
 
-		// TODO(rfindley): this doesn't look right. Any single piece of new
+		// TODO(rfindley): this doesn't look sufficient. Any single piece of new
 		// metadata could theoretically introduce import cycles in the metadata
 		// graph. What's the point of this limited check here (and is it even
 		// possible to get an import cycle in data from go/packages)? Consider
@@ -535,7 +545,7 @@
 			m.MissingDeps[importPkgPath] = struct{}{}
 			continue
 		}
-		if err := computeMetadataUpdates(ctx, g, importPkgPath, importPkg, cfg, query, updates, append(path, id)); err != nil {
+		if err := buildMetadata(ctx, importPkgPath, importPkg, cfg, query, updates, append(path, id)); err != nil {
 			event.Error(ctx, "error in dependency", err)
 		}
 	}
diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go
index b962435..5623f24 100644
--- a/internal/lsp/cache/snapshot.go
+++ b/internal/lsp/cache/snapshot.go
@@ -91,6 +91,11 @@
 
 	// packages maps a packageKey to a *packageHandle.
 	// It may be invalidated when a file's content changes.
+	//
+	// Invariants to preserve:
+	//  - packages.Get(id).m.Metadata == meta.metadata[id].Metadata for all ids
+	//  - if a package is in packages, then all of its dependencies should also
+	//    be in packages, unless there is a missing import
 	packages packagesMap
 
 	// isActivePackageCache maps package ID to the cached value if it is active or not.
@@ -712,18 +717,29 @@
 	return s.meta.importedBy[id]
 }
 
-func (s *snapshot) addPackageHandle(ph *packageHandle, release func()) *packageHandle {
+// addPackageHandle stores ph in the snapshot, or returns a pre-existing handle
+// for the given package key, if it exists.
+//
+// An error is returned if the metadata used to build ph is no longer relevant.
+func (s *snapshot) addPackageHandle(ph *packageHandle, release func()) (*packageHandle, error) {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
+	if s.meta.metadata[ph.m.ID].Metadata != ph.m.Metadata {
+		return nil, fmt.Errorf("stale metadata for %s", ph.m.ID)
+	}
+
 	// If the package handle has already been cached,
 	// return the cached handle instead of overriding it.
 	if result, ok := s.packages.Get(ph.packageKey()); ok {
 		release()
-		return result
+		if result.m.Metadata != ph.m.Metadata {
+			return nil, bug.Errorf("existing package handle does not match for %s", ph.m.ID)
+		}
+		return result, nil
 	}
 	s.packages.Set(ph.packageKey(), ph, release)
-	return ph
+	return ph, nil
 }
 
 func (s *snapshot) workspacePackageIDs() (ids []PackageID) {