gopls/internal/lsp/cache: (re-)ensure clean shutdown

CL 549415 (rightly) changed the logic for View shutdown to not await all
work on the Snapshot, as this leads to potential deadlocks: we should
never await work while holding a mutex. However, we still need to await
all work when shutting down the Session, otherwise we end up with
failures like golang/go#64971 ("directory not empty"). Therefore, we
need a new synchronization mechanism.

Introduce a sync.WaitGroup on the Session to allow awaiting the
destruction of all Snapshots created on behalf of the Session. In order
to support this, View invalidation becomes a method on the Session,
rather than the View, and requires the Session.viewMu.

Also make a few unrelated cosmetic improvements.

Fixes golang/go#64971

Change-Id: I43fc0b5ff8a7762887fbfd64df7596e524383279
Reviewed-on: https://go-review.googlesource.com/c/tools/+/554996
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/gopls/internal/lsp/cache/session.go b/gopls/internal/lsp/cache/session.go
index 7436bcb..bf8cc54 100644
--- a/gopls/internal/lsp/cache/session.go
+++ b/gopls/internal/lsp/cache/session.go
@@ -42,6 +42,11 @@
 	views   []*View
 	viewMap map[protocol.DocumentURI]*View // file->best view; nil after shutdown
 
+	// snapshots is a counting semaphore that records the number
+	// of unreleased snapshots associated with this session.
+	// Shutdown waits for it to fall to zero.
+	snapshotWG sync.WaitGroup
+
 	parseCache *parseCache
 
 	*overlayFS
@@ -68,6 +73,7 @@
 		view.shutdown()
 	}
 	s.parseCache.stop()
+	s.snapshotWG.Wait() // wait for all work on associated snapshots to finish
 	event.Log(ctx, "Shutdown session", KeyShutdownSession.Of(s))
 }
 
@@ -183,12 +189,14 @@
 		},
 	}
 
+	s.snapshotWG.Add(1)
 	v.snapshot = &Snapshot{
 		view:             v,
 		backgroundCtx:    backgroundCtx,
 		cancel:           cancel,
 		store:            s.cache.store,
 		refcount:         1, // Snapshots are born referenced.
+		done:             s.snapshotWG.Done,
 		packages:         new(persistent.Map[PackageID, *packageHandle]),
 		meta:             new(metadata.Graph),
 		files:            newFileMap(),
@@ -217,7 +225,7 @@
 
 	// Initialize the view without blocking.
 	initCtx, initCancel := context.WithCancel(xcontext.Detach(ctx))
-	v.initCancelFirstAttempt = initCancel
+	v.cancelInitialWorkspaceLoad = initCancel
 	snapshot := v.snapshot
 
 	// Pass a second reference to the background goroutine.
@@ -822,7 +830,7 @@
 	// ...but changes may be relevant to other views, for example if they are
 	// changes to a shared package.
 	for _, v := range s.views {
-		_, release, needsDiagnosis := v.Invalidate(ctx, StateChange{Files: changed})
+		_, release, needsDiagnosis := s.invalidateViewLocked(ctx, v, StateChange{Files: changed})
 		release()
 
 		if needsDiagnosis || checkViews {
diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go
index 1b1ed12..83e02d2 100644
--- a/gopls/internal/lsp/cache/snapshot.go
+++ b/gopls/internal/lsp/cache/snapshot.go
@@ -85,12 +85,14 @@
 	store *memoize.Store // cache of handles shared by all snapshots
 
 	refMu sync.Mutex
+
 	// refcount holds the number of outstanding references to the current
-	// Snapshot. When refcount is decremented to 0, the Snapshot maps can be
-	// safely destroyed.
+	// Snapshot. When refcount is decremented to 0, the Snapshot maps are
+	// destroyed and the done function is called.
 	//
 	// TODO(rfindley): use atomic.Int32 on Go 1.19+.
 	refcount int
+	done     func() // for implementing Session.Shutdown
 
 	// mu guards all of the maps in the snapshot, as well as the builtin URI and
 	// initialized.
@@ -248,6 +250,7 @@
 		s.unloadableFiles.Destroy()
 		s.moduleUpgrades.Destroy()
 		s.vulns.Destroy()
+		s.done()
 	}
 }
 
@@ -1663,10 +1666,10 @@
 // also require more strictness about diagnostic dependencies. For example,
 // template.Diagnostics currently re-parses every time: there is no Snapshot
 // data responsible for providing these diagnostics.
-func (s *Snapshot) clone(ctx, bgCtx context.Context, changed StateChange) (*Snapshot, bool) {
+func (s *Snapshot) clone(ctx, bgCtx context.Context, changed StateChange, done func()) (*Snapshot, bool) {
 	changedFiles := changed.Files
-	ctx, done := event.Start(ctx, "cache.snapshot.clone")
-	defer done()
+	ctx, stop := event.Start(ctx, "cache.snapshot.clone")
+	defer stop()
 
 	s.mu.Lock()
 	defer s.mu.Unlock()
@@ -1680,6 +1683,7 @@
 		sequenceID:        s.sequenceID + 1,
 		store:             s.store,
 		refcount:          1, // Snapshots are born referenced.
+		done:              done,
 		view:              s.view,
 		backgroundCtx:     bgCtx,
 		cancel:            cancel,
diff --git a/gopls/internal/lsp/cache/view.go b/gopls/internal/lsp/cache/view.go
index 0b1e1db..bf2a8f0 100644
--- a/gopls/internal/lsp/cache/view.go
+++ b/gopls/internal/lsp/cache/view.go
@@ -29,6 +29,7 @@
 	"golang.org/x/tools/gopls/internal/settings"
 	"golang.org/x/tools/gopls/internal/util/maps"
 	"golang.org/x/tools/gopls/internal/util/pathutil"
+	"golang.org/x/tools/gopls/internal/util/slices"
 	"golang.org/x/tools/gopls/internal/vulncheck"
 	"golang.org/x/tools/internal/event"
 	"golang.org/x/tools/internal/gocommand"
@@ -100,21 +101,12 @@
 	// ignoreFilter is used for fast checking of ignored files.
 	ignoreFilter *ignoreFilter
 
-	// initCancelFirstAttempt can be used to terminate the view's first
+	// cancelInitialWorkspaceLoad can be used to terminate the view's first
 	// attempt at initialization.
-	initCancelFirstAttempt context.CancelFunc
+	cancelInitialWorkspaceLoad context.CancelFunc
 
-	// Track the latest snapshot via the snapshot field, guarded by snapshotMu.
-	//
-	// Invariant: whenever the snapshot field is overwritten, destroy(snapshot)
-	// is called on the previous (overwritten) snapshot while snapshotMu is held,
-	// incrementing snapshotWG. During shutdown the final snapshot is
-	// overwritten with nil and destroyed, guaranteeing that all observed
-	// snapshots have been destroyed via the destroy method, and snapshotWG may
-	// be waited upon to let these destroy operations complete.
 	snapshotMu sync.Mutex
-	snapshot   *Snapshot      // latest snapshot; nil after shutdown has been called
-	snapshotWG sync.WaitGroup // refcount for pending destroy operations
+	snapshot   *Snapshot // latest snapshot; nil after shutdown has been called
 
 	// initialWorkspaceLoad is closed when the first workspace initialization has
 	// completed. If we failed to load, we only retry if the go.mod file changes,
@@ -513,11 +505,10 @@
 	}
 }
 
-// shutdown releases resources associated with the view, and waits for ongoing
-// work to complete.
+// shutdown releases resources associated with the view.
 func (v *View) shutdown() {
 	// Cancel the initial workspace load if it is still running.
-	v.initCancelFirstAttempt()
+	v.cancelInitialWorkspaceLoad()
 
 	v.snapshotMu.Lock()
 	if v.snapshot != nil {
@@ -526,8 +517,6 @@
 		v.snapshot = nil
 	}
 	v.snapshotMu.Unlock()
-
-	v.snapshotWG.Wait()
 }
 
 // IgnoredFile reports if a file would be ignored by a `go list` of the whole
@@ -767,16 +756,33 @@
 	GCDetails      map[metadata.PackageID]bool // package -> whether or not we want details
 }
 
-// Invalidate processes the provided state change, invalidating any derived
+// InvalidateView processes the provided state change, invalidating any derived
 // results that depend on the changed state.
 //
 // The resulting snapshot is non-nil, representing the outcome of the state
 // change. The second result is a function that must be called to release the
 // snapshot when the snapshot is no longer needed.
 //
-// The resulting bool reports whether the new View needs to be re-diagnosed.
-// See Snapshot.clone for more details.
-func (v *View) Invalidate(ctx context.Context, changed StateChange) (*Snapshot, func(), bool) {
+// An error is returned if the given view is no longer active in the session.
+func (s *Session) InvalidateView(ctx context.Context, view *View, changed StateChange) (*Snapshot, func(), error) {
+	s.viewMu.Lock()
+	defer s.viewMu.Unlock()
+
+	if !slices.Contains(s.views, view) {
+		return nil, nil, fmt.Errorf("view is no longer active")
+	}
+	snapshot, release, _ := s.invalidateViewLocked(ctx, view, changed)
+	return snapshot, release, nil
+}
+
+// invalidateViewLocked invalidates the content of the given view.
+// (See [Session.InvalidateView]).
+//
+// The resulting bool reports whether the View needs to be re-diagnosed.
+// (See [Snapshot.clone]).
+//
+// s.viewMu must be held while calling this method.
+func (s *Session) invalidateViewLocked(ctx context.Context, v *View, changed StateChange) (*Snapshot, func(), bool) {
 	// Detach the context so that content invalidation cannot be canceled.
 	ctx = xcontext.Detach(ctx)
 
@@ -799,9 +805,9 @@
 	// TODO(rfindley): shouldn't we do this before canceling?
 	prevSnapshot.AwaitInitialized(ctx)
 
-	// Save one lease of the cloned snapshot in the view.
 	var needsDiagnosis bool
-	v.snapshot, needsDiagnosis = prevSnapshot.clone(ctx, v.baseCtx, changed)
+	s.snapshotWG.Add(1)
+	v.snapshot, needsDiagnosis = prevSnapshot.clone(ctx, v.baseCtx, changed, s.snapshotWG.Done)
 
 	// Remove the initial reference created when prevSnapshot was created.
 	prevSnapshot.decref()
diff --git a/gopls/internal/server/command.go b/gopls/internal/server/command.go
index 330f0a8..60c7184 100644
--- a/gopls/internal/server/command.go
+++ b/gopls/internal/server/command.go
@@ -285,10 +285,9 @@
 			if err != nil {
 				return nil, nil, err
 			}
-			snapshot, release, _ := deps.snapshot.View().Invalidate(ctx, cache.StateChange{
+			return c.s.session.InvalidateView(ctx, deps.snapshot.View(), cache.StateChange{
 				ModuleUpgrades: map[protocol.DocumentURI]map[string]string{args.URI: upgrades},
 			})
-			return snapshot, release, nil
 		})
 	})
 }
@@ -306,7 +305,7 @@
 		forURI: args.URI,
 	}, func(ctx context.Context, deps commandDeps) error {
 		return c.modifyState(ctx, FromResetGoModDiagnostics, func() (*cache.Snapshot, func(), error) {
-			snapshot, release, _ := deps.snapshot.View().Invalidate(ctx, cache.StateChange{
+			return c.s.session.InvalidateView(ctx, deps.snapshot.View(), cache.StateChange{
 				ModuleUpgrades: map[protocol.DocumentURI]map[string]string{
 					deps.fh.URI(): nil,
 				},
@@ -314,7 +313,6 @@
 					deps.fh.URI(): nil,
 				},
 			})
-			return snapshot, release, nil
 		})
 	})
 }
@@ -443,7 +441,7 @@
 		if err != nil {
 			return err
 		}
-		edits, err := dropDependency(deps.snapshot, pm, args.ModulePath)
+		edits, err := dropDependency(pm, args.ModulePath)
 		if err != nil {
 			return err
 		}
@@ -476,7 +474,7 @@
 
 // dropDependency returns the edits to remove the given require from the go.mod
 // file.
-func dropDependency(snapshot *cache.Snapshot, pm *cache.ParsedModule, modulePath string) ([]protocol.TextEdit, error) {
+func dropDependency(pm *cache.ParsedModule, modulePath string) ([]protocol.TextEdit, error) {
 	// We need a private copy of the parsed go.mod file, since we're going to
 	// modify it.
 	copied, err := modfile.Parse("", pm.Mapper.Content, nil)
@@ -796,12 +794,11 @@
 				return nil, nil, err
 			}
 			wantDetails := !deps.snapshot.WantGCDetails(meta.ID) // toggle the gc details state
-			snapshot, release, _ := deps.snapshot.View().Invalidate(ctx, cache.StateChange{
+			return c.s.session.InvalidateView(ctx, deps.snapshot.View(), cache.StateChange{
 				GCDetails: map[metadata.PackageID]bool{
 					meta.ID: wantDetails,
 				},
 			})
-			return snapshot, release, nil
 		})
 	})
 }
@@ -995,9 +992,12 @@
 			return err
 		}
 
-		snapshot, release, _ := deps.snapshot.View().Invalidate(ctx, cache.StateChange{
+		snapshot, release, err := c.s.session.InvalidateView(ctx, deps.snapshot.View(), cache.StateChange{
 			Vulns: map[protocol.DocumentURI]*vulncheck.Result{args.URI: result},
 		})
+		if err != nil {
+			return err
+		}
 		defer release()
 		c.s.diagnoseSnapshot(snapshot, nil, 0)
 
@@ -1292,7 +1292,7 @@
 func (c *commandHandler) DiagnoseFiles(ctx context.Context, args command.DiagnoseFilesArgs) error {
 	return c.run(ctx, commandConfig{
 		progress: "Diagnose files",
-	}, func(ctx context.Context, deps commandDeps) error {
+	}, func(ctx context.Context, _ commandDeps) error {
 
 		// TODO(rfindley): even better would be textDocument/diagnostics (golang/go#60122).
 		// Though note that implementing pull diagnostics may cause some servers to