gopls/internal/lsp/cache: used time-based eviction in parseCache
Our arbitrary choice of caching 200 recent files has proven to be
problematic when editing very large packages (golang/go#61207). Fix this
by switching to time-based eviction, but with a minimum cache size to
optimize the case where editing is resumed after a while.
This caused an increase in high-water mark during initial workspace
load, but that is mitigated by avoiding the parse cache when type
checking for import (i.e. non-workspace files): such parsed files are
almost never cache hits as they are only ever parsed once, and would
instead benefit more from avoiding ParseComments.
Move the ownership of the parseCache to the cache.Session (and pass it
to each View) to make its lifecycle clearer and avoid passing it around
to each snapshot.
For golang/go#61207
Change-Id: I357d8b1fa36eabb516dbb7147266df0e5153ac11
Reviewed-on: https://go-review.googlesource.com/c/tools/+/511337
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/gopls/internal/lsp/cache/cache.go b/gopls/internal/lsp/cache/cache.go
index 43f9ef5..473d251 100644
--- a/gopls/internal/lsp/cache/cache.go
+++ b/gopls/internal/lsp/cache/cache.go
@@ -9,6 +9,7 @@
"reflect"
"strconv"
"sync/atomic"
+ "time"
"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/internal/event"
@@ -67,6 +68,7 @@
gocmdRunner: &gocommand.Runner{},
options: options,
overlayFS: newOverlayFS(c),
+ parseCache: newParseCache(1 * time.Minute), // keep recently parsed files for a minute, to optimize typing CPU
}
event.Log(ctx, "New session", KeyCreateSession.Of(s))
return s
diff --git a/gopls/internal/lsp/cache/check.go b/gopls/internal/lsp/cache/check.go
index 90fc202..fa1b2c4 100644
--- a/gopls/internal/lsp/cache/check.go
+++ b/gopls/internal/lsp/cache/check.go
@@ -356,7 +356,7 @@
// If a non-nil importGraph is provided, imports in this graph will be reused.
func (s *snapshot) forEachPackageInternal(ctx context.Context, importGraph *importGraph, importIDs, syntaxIDs []PackageID, pre preTypeCheck, post postTypeCheck, handles map[PackageID]*packageHandle) (*typeCheckBatch, error) {
b := &typeCheckBatch{
- parseCache: s.parseCache,
+ parseCache: s.view.parseCache,
pre: pre,
post: post,
handles: handles,
@@ -593,9 +593,32 @@
}
cfg := b.typesConfig(ctx, ph.localInputs, onError)
cfg.IgnoreFuncBodies = true
- pgfs, err := b.parseCache.parseFiles(ctx, b.fset, source.ParseFull, ph.localInputs.compiledGoFiles...)
- if err != nil {
- return nil, err
+
+ // Parse the compiled go files, bypassing the parse cache as packages checked
+ // for import are unlikely to get cache hits. Additionally, we can optimize
+ // parsing slightly by not passing parser.ParseComments.
+ pgfs := make([]*source.ParsedGoFile, len(ph.localInputs.compiledGoFiles))
+ {
+ var group errgroup.Group
+ // Set an arbitrary concurrency limit; we want some parallelism but don't
+ // need GOMAXPROCS, as there is already a lot of concurrency among calls to
+ // checkPackageForImport.
+ //
+ // TODO(rfindley): is there a better way to limit parallelism here? We could
+ // have a global limit on the type-check batch, but would have to be very
+ // careful to avoid starvation.
+ group.SetLimit(4)
+ for i, fh := range ph.localInputs.compiledGoFiles {
+ i, fh := i, fh
+ group.Go(func() error {
+ pgf, err := parseGoImpl(ctx, b.fset, fh, parser.SkipObjectResolution)
+ pgfs[i] = pgf
+ return err
+ })
+ }
+ if err := group.Wait(); err != nil {
+ return nil, err // cancelled, or catastrophic error (e.g. missing file)
+ }
}
pkg := types.NewPackage(string(ph.localInputs.pkgPath), string(ph.localInputs.name))
check := types.NewChecker(cfg, b.fset, pkg, nil)
diff --git a/gopls/internal/lsp/cache/mod_tidy.go b/gopls/internal/lsp/cache/mod_tidy.go
index 8dd555d..3f89db9 100644
--- a/gopls/internal/lsp/cache/mod_tidy.go
+++ b/gopls/internal/lsp/cache/mod_tidy.go
@@ -486,7 +486,7 @@
//
// TODO(rfindley): this should key off source.ImportPath.
func parseImports(ctx context.Context, s *snapshot, files []source.FileHandle) (map[string]bool, error) {
- pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseHeader, files...)
+ pgfs, err := s.view.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseHeader, files...)
if err != nil { // e.g. context cancellation
return nil, err
}
diff --git a/gopls/internal/lsp/cache/parse.go b/gopls/internal/lsp/cache/parse.go
index 315d133..fa650ae 100644
--- a/gopls/internal/lsp/cache/parse.go
+++ b/gopls/internal/lsp/cache/parse.go
@@ -27,7 +27,7 @@
// ParseGo parses the file whose contents are provided by fh, using a cache.
// The resulting tree may have beeen fixed up.
func (s *snapshot) ParseGo(ctx context.Context, fh source.FileHandle, mode parser.Mode) (*source.ParsedGoFile, error) {
- pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
+ pgfs, err := s.view.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
if err != nil {
return nil, err
}
diff --git a/gopls/internal/lsp/cache/parse_cache.go b/gopls/internal/lsp/cache/parse_cache.go
index 016db4c..7e0298e 100644
--- a/gopls/internal/lsp/cache/parse_cache.go
+++ b/gopls/internal/lsp/cache/parse_cache.go
@@ -14,13 +14,22 @@
"math/bits"
"runtime"
"sync"
+ "time"
"golang.org/x/sync/errgroup"
"golang.org/x/tools/gopls/internal/lsp/source"
+ "golang.org/x/tools/gopls/internal/span"
"golang.org/x/tools/internal/memoize"
"golang.org/x/tools/internal/tokeninternal"
)
+// This file contains an implementation of an LRU parse cache, that offsets the
+// base token.Pos value of each cached file so that they may be later described
+// by a single dedicated FileSet.
+//
+// This is achieved by tracking a monotonic offset in the token.Pos space, that
+// is incremented before parsing allow room for the resulting parsed file.
+
// reservedForParsing defines the room in the token.Pos space reserved for
// cached parsed files.
//
@@ -58,21 +67,11 @@
return fset
}
-// This file contains an implementation of a bounded-size parse cache, that
-// offsets the base token.Pos value of each cached file so that they may be
-// later described by a single dedicated FileSet.
-//
-// This is achieved by tracking a monotonic offset in the token.Pos space, that
-// is incremented before parsing allow room for the resulting parsed file.
-
-// Keep 200 recently parsed files, based on the following rationale:
-// - One of the most important benefits of caching is avoiding re-parsing
-// everything in a package when working on a single file. No packages in
-// Kubernetes have > 200 files (only one has > 100).
-// - Experience has shown that ~1000 parsed files can use noticeable space.
-// 200 feels like a sweet spot between limiting cache size and optimizing
-// cache hits for low-latency operations.
-const parseCacheMaxFiles = 200
+const (
+ // Always keep 100 recent files, independent of their wall-clock age, to
+ // optimize the case where the user resumes editing after a delay.
+ parseCacheMinFiles = 100
+)
// parsePadding is additional padding allocated to allow for increases in
// length (such as appending missing braces) caused by fixAST.
@@ -89,13 +88,16 @@
// This value is mutable for testing, so that we can exercise the slow path.
var parsePadding = 1000 // mutable for testing
-// A parseCache holds a bounded number of recently accessed parsed Go files. As
-// new files are stored, older files may be evicted from the cache.
+// A parseCache holds recently accessed parsed Go files. After new files are
+// stored, older files may be evicted from the cache via garbage collection.
//
// The parseCache.parseFiles method exposes a batch API for parsing (and
// caching) multiple files. This is necessary for type-checking, where files
// must be parsed in a common fileset.
type parseCache struct {
+ maxAge time.Duration // interval at which to collect expired cache entries
+ done chan struct{} // closed when GC is stopped
+
mu sync.Mutex
m map[parseKey]*parseCacheEntry
lru queue // min-atime priority queue of *parseCacheEntry
@@ -103,17 +105,38 @@
nextBase int // base offset for the next parsed file
}
+// newParseCache creates a new parse cache and starts a goroutine to garbage
+// collect old entries that are older than maxAge.
+//
+// Callers must call parseCache.stop when the parse cache is no longer in use.
+func newParseCache(maxAge time.Duration) *parseCache {
+ c := &parseCache{
+ maxAge: maxAge,
+ m: make(map[parseKey]*parseCacheEntry),
+ done: make(chan struct{}),
+ }
+ go c.gc()
+ return c
+}
+
+// stop causes the GC goroutine to exit.
+func (c *parseCache) stop() {
+ close(c.done)
+}
+
// parseKey uniquely identifies a parsed Go file.
type parseKey struct {
- file source.FileIdentity
+ uri span.URI
mode parser.Mode
}
type parseCacheEntry struct {
key parseKey
+ hash source.Hash
promise *memoize.Promise // memoize.Promise[*source.ParsedGoFile]
- atime uint64 // clock time of last access
- lruIndex int
+ atime uint64 // clock time of last access, for use in LRU sorting
+ walltime time.Time // actual time of last access, for use in time-based eviction; too coarse for LRU on some systems
+ lruIndex int // owned by the queue implementation
}
// startParse prepares a parsing pass, creating new promises in the cache for
@@ -131,6 +154,7 @@
//
// All entries parsed from a single call get the same access time.
c.clock++
+ walltime := time.Now()
// Read file data and collect cacheable files.
var (
@@ -149,15 +173,22 @@
data[i] = content
key := parseKey{
- file: fh.FileIdentity(),
+ uri: fh.URI(),
mode: mode,
}
- if e, ok := c.m[key]; ok { // cache hit
- e.atime = c.clock
- heap.Fix(&c.lru, e.lruIndex)
- promises[i] = e.promise
- continue
+ if e, ok := c.m[key]; ok {
+ if e.hash == fh.FileIdentity().Hash { // cache hit
+ e.atime = c.clock
+ e.walltime = walltime
+ heap.Fix(&c.lru, e.lruIndex)
+ promises[i] = e.promise
+ continue
+ } else {
+ // A cache hit, for a different version. Delete it.
+ delete(c.m, e.key)
+ heap.Remove(&c.lru, e.lruIndex)
+ }
}
uri := fh.URI()
@@ -200,21 +231,14 @@
})
promises[i] = promise
- var e *parseCacheEntry
- if len(c.lru) < parseCacheMaxFiles {
- // add new entry
- e = new(parseCacheEntry)
- if c.m == nil {
- c.m = make(map[parseKey]*parseCacheEntry)
- }
- } else {
- // evict oldest entry
- e = heap.Pop(&c.lru).(*parseCacheEntry)
- delete(c.m, e.key)
+ // add new entry; entries are gc'ed asynchronously
+ e := &parseCacheEntry{
+ key: key,
+ hash: fh.FileIdentity().Hash,
+ promise: promise,
+ atime: c.clock,
+ walltime: walltime,
}
- e.key = key
- e.promise = promise
- e.atime = c.clock
c.m[e.key] = e
heap.Push(&c.lru, e)
}
@@ -226,6 +250,38 @@
return promises, firstReadError
}
+func (c *parseCache) gc() {
+ const period = 10 * time.Second // gc period
+ timer := time.NewTimer(period)
+ defer timer.Stop()
+
+ for {
+ select {
+ case <-c.done:
+ return
+ case <-timer.C:
+ }
+
+ c.gcOnce()
+ }
+}
+
+func (c *parseCache) gcOnce() {
+ now := time.Now()
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ for len(c.m) > parseCacheMinFiles {
+ e := heap.Pop(&c.lru).(*parseCacheEntry)
+ if now.Sub(e.walltime) > c.maxAge {
+ delete(c.m, e.key)
+ } else {
+ heap.Push(&c.lru, e)
+ break
+ }
+ }
+}
+
// allocateSpace reserves the next n bytes of token.Pos space in the
// cache.
//
diff --git a/gopls/internal/lsp/cache/parse_cache_test.go b/gopls/internal/lsp/cache/parse_cache_test.go
index 2a69129..de48777 100644
--- a/gopls/internal/lsp/cache/parse_cache_test.go
+++ b/gopls/internal/lsp/cache/parse_cache_test.go
@@ -10,6 +10,7 @@
"go/token"
"math/bits"
"testing"
+ "time"
"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/gopls/internal/span"
@@ -29,7 +30,7 @@
fh := makeFakeFileHandle(uri, []byte("package p\n\nconst _ = \"foo\""))
fset := token.NewFileSet()
- var cache parseCache
+ cache := newParseCache(0)
pgfs1, err := cache.parseFiles(ctx, fset, source.ParseFull, fh)
if err != nil {
t.Fatal(err)
@@ -45,8 +46,10 @@
}
// Fill up the cache with other files, but don't evict the file above.
+ cache.gcOnce()
files := []source.FileHandle{fh}
- files = append(files, dummyFileHandles(parseCacheMaxFiles-1)...)
+ files = append(files, dummyFileHandles(parseCacheMinFiles-1)...)
+
pgfs3, err := cache.parseFiles(ctx, fset, source.ParseFull, files...)
pgf3 := pgfs3[0]
if pgf3 != pgf1 {
@@ -60,11 +63,14 @@
}
// Now overwrite the cache, after which we should get new results.
- files = dummyFileHandles(parseCacheMaxFiles)
+ cache.gcOnce()
+ files = dummyFileHandles(parseCacheMinFiles)
_, err = cache.parseFiles(ctx, fset, source.ParseFull, files...)
if err != nil {
t.Fatal(err)
}
+ // force a GC, which should collect the recently parsed files
+ cache.gcOnce()
pgfs4, err := cache.parseFiles(ctx, fset, source.ParseFull, fh)
if err != nil {
t.Fatal(err)
@@ -82,13 +88,13 @@
}(parsePadding)
parsePadding = 0
- files := dummyFileHandles(parseCacheMaxFiles)
+ files := dummyFileHandles(parseCacheMinFiles)
danglingSelector := []byte("package p\nfunc _() {\n\tx.\n}")
files = append(files, makeFakeFileHandle("file:///bad1", danglingSelector))
files = append(files, makeFakeFileHandle("file:///bad2", danglingSelector))
// Parsing should succeed even though we overflow the padding.
- var cache parseCache
+ cache := newParseCache(0)
_, err := cache.parseFiles(context.Background(), token.NewFileSet(), source.ParseFull, files...)
if err != nil {
t.Fatal(err)
@@ -108,13 +114,64 @@
files := []source.FileHandle{makeFakeFileHandle("file:///bad", danglingSelector)}
// Parsing should succeed even though we overflow the padding.
- var cache parseCache
+ cache := newParseCache(0)
_, err := cache.parseFiles(context.Background(), token.NewFileSet(), source.ParseFull, files...)
if err != nil {
t.Fatal(err)
}
}
+func TestParseCache_TimeEviction(t *testing.T) {
+ skipIfNoParseCache(t)
+
+ ctx := context.Background()
+ fset := token.NewFileSet()
+ uri := span.URI("file:///myfile")
+ fh := makeFakeFileHandle(uri, []byte("package p\n\nconst _ = \"foo\""))
+
+ const gcDuration = 10 * time.Millisecond
+ cache := newParseCache(gcDuration)
+ cache.stop() // we'll manage GC manually, for testing.
+
+ pgfs0, err := cache.parseFiles(ctx, fset, source.ParseFull, fh, fh)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ files := dummyFileHandles(parseCacheMinFiles)
+ _, err = cache.parseFiles(ctx, fset, source.ParseFull, files...)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Even after filling up the 'min' files, we get a cache hit for our original file.
+ pgfs1, err := cache.parseFiles(ctx, fset, source.ParseFull, fh, fh)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if pgfs0[0] != pgfs1[0] {
+ t.Errorf("before GC, got unexpected cache miss")
+ }
+
+ // But after GC, we get a cache miss.
+ _, err = cache.parseFiles(ctx, fset, source.ParseFull, files...) // mark dummy files as newer
+ if err != nil {
+ t.Fatal(err)
+ }
+ time.Sleep(gcDuration)
+ cache.gcOnce()
+
+ pgfs2, err := cache.parseFiles(ctx, fset, source.ParseFull, fh, fh)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if pgfs0[0] == pgfs2[0] {
+ t.Errorf("after GC, got unexpected cache hit for %s", pgfs0[0].URI)
+ }
+}
+
func TestParseCache_Duplicates(t *testing.T) {
skipIfNoParseCache(t)
@@ -122,7 +179,7 @@
uri := span.URI("file:///myfile")
fh := makeFakeFileHandle(uri, []byte("package p\n\nconst _ = \"foo\""))
- var cache parseCache
+ cache := newParseCache(0)
pgfs, err := cache.parseFiles(ctx, token.NewFileSet(), source.ParseFull, fh, fh)
if err != nil {
t.Fatal(err)
diff --git a/gopls/internal/lsp/cache/session.go b/gopls/internal/lsp/cache/session.go
index fd2e4d2..6b75f10 100644
--- a/gopls/internal/lsp/cache/session.go
+++ b/gopls/internal/lsp/cache/session.go
@@ -39,6 +39,8 @@
views []*View
viewMap map[span.URI]*View // map of URI->best view
+ parseCache *parseCache
+
*overlayFS
}
@@ -76,6 +78,7 @@
for _, view := range views {
view.shutdown()
}
+ s.parseCache.stop()
event.Log(ctx, "Shutdown session", KeyShutdownSession.Of(s))
}
@@ -138,6 +141,7 @@
folder: folder,
moduleUpgrades: map[span.URI]map[string]string{},
vulns: map[span.URI]*govulncheck.Result{},
+ parseCache: s.parseCache,
fs: s.overlayFS,
workspaceInformation: info,
}
@@ -168,7 +172,6 @@
packages: persistent.NewMap(packageIDLessInterface),
meta: new(metadataGraph),
files: newFilesMap(),
- parseCache: new(parseCache),
activePackages: persistent.NewMap(packageIDLessInterface),
symbolizeHandles: persistent.NewMap(uriLessInterface),
workspacePackages: make(map[PackageID]PackagePath),
diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go
index dbb3a41..f2471c2 100644
--- a/gopls/internal/lsp/cache/snapshot.go
+++ b/gopls/internal/lsp/cache/snapshot.go
@@ -100,9 +100,6 @@
// It may invalidated when a file's content changes.
files filesMap
- // parseCache holds an LRU cache of recently parsed files.
- parseCache *parseCache
-
// symbolizeHandles maps each file URI to a handle for the future
// result of computing the symbols declared in that file.
symbolizeHandles *persistent.Map // from span.URI to *memoize.Promise[symbolizeResult]
@@ -1989,7 +1986,6 @@
packages: s.packages.Clone(),
activePackages: s.activePackages.Clone(),
files: s.files.Clone(),
- parseCache: s.parseCache,
symbolizeHandles: s.symbolizeHandles.Clone(),
workspacePackages: make(map[PackageID]PackagePath, len(s.workspacePackages)),
unloadableFiles: make(map[span.URI]struct{}, len(s.unloadableFiles)),
@@ -2436,8 +2432,8 @@
fset := token.NewFileSet()
// Parse headers to compare package names and imports.
- oldHeads, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseHeader, oldFH)
- newHeads, newErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseHeader, newFH)
+ oldHeads, oldErr := lockedSnapshot.view.parseCache.parseFiles(ctx, fset, source.ParseHeader, oldFH)
+ newHeads, newErr := lockedSnapshot.view.parseCache.parseFiles(ctx, fset, source.ParseHeader, newFH)
if oldErr != nil || newErr != nil {
// TODO(rfindley): we can get here if newFH does not exist. There is
@@ -2488,8 +2484,8 @@
// Note: if this affects performance we can probably avoid parsing in the
// common case by first scanning the source for potential comments.
if !invalidate {
- origFulls, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseFull, oldFH)
- newFulls, newErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseFull, newFH)
+ origFulls, oldErr := lockedSnapshot.view.parseCache.parseFiles(ctx, fset, source.ParseFull, oldFH)
+ newFulls, newErr := lockedSnapshot.view.parseCache.parseFiles(ctx, fset, source.ParseFull, newFH)
if oldErr == nil && newErr == nil {
invalidate = magicCommentsChanged(origFulls[0].File, newFulls[0].File)
} else {
@@ -2574,7 +2570,7 @@
// For the builtin file only, we need syntactic object resolution
// (since we can't type check).
mode := source.ParseFull &^ source.SkipObjectResolution
- pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
+ pgfs, err := s.view.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
if err != nil {
return nil, err
}
diff --git a/gopls/internal/lsp/cache/symbols.go b/gopls/internal/lsp/cache/symbols.go
index 6f8c083..14874f1 100644
--- a/gopls/internal/lsp/cache/symbols.go
+++ b/gopls/internal/lsp/cache/symbols.go
@@ -63,7 +63,7 @@
// It may use a parsed file already present in the cache but
// otherwise does not populate the cache.
func symbolizeImpl(ctx context.Context, snapshot *snapshot, fh source.FileHandle) ([]source.Symbol, error) {
- pgfs, err := snapshot.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseFull, fh)
+ pgfs, err := snapshot.view.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseFull, fh)
if err != nil {
return nil, err
}
diff --git a/gopls/internal/lsp/cache/view.go b/gopls/internal/lsp/cache/view.go
index 74a07cf..70395d1 100644
--- a/gopls/internal/lsp/cache/view.go
+++ b/gopls/internal/lsp/cache/view.go
@@ -67,6 +67,9 @@
vulnsMu sync.Mutex
vulns map[span.URI]*govulncheck.Result
+ // parseCache holds an LRU cache of recently parsed files.
+ parseCache *parseCache
+
// fs is the file source used to populate this view.
fs *overlayFS