blob: 8bd64a5794a159483dec231b9957f1b2684e8c36 [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cache
import (
"container/heap"
"context"
"go/parser"
"go/token"
"runtime"
"sort"
"sync"
"golang.org/x/sync/errgroup"
"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/internal/memoize"
)
// This file contains an implementation of a bounded-size parse cache, that
// offsets the base token.Pos value of each cached file so that they may be
// later described by a single dedicated FileSet.
//
// This is achieved by tracking a monotonic offset in the token.Pos space, that
// is incremented before parsing allow room for the resulting parsed file.
// Keep 200 recently parsed files, based on the following rationale:
// - One of the most important benefits of caching is avoiding re-parsing
// everything in a package when working on a single file. No packages in
// Kubernetes have > 200 files (only one has > 100).
// - Experience has shown that ~1000 parsed files can use noticeable space.
// 200 feels like a sweet spot between limiting cache size and optimizing
// cache hits for low-latency operations.
const parseCacheMaxFiles = 200
// parsePadding is additional padding allocated between entries in the parse
// cache to allow for increases in length (such as appending missing braces)
// caused by fixAST.
//
// This is used to mitigate a chicken and egg problem: we must know the base
// offset of the file we're about to parse, before we start parsing, and yet
// src fixups may affect the actual size of the parsed content (and therefore
// the offsets of subsequent files).
//
// When we encounter a file that no longer fits in its allocated space in the
// fileset, we have no choice but to re-parse it. Leaving a generous padding
// reduces the likelihood of this "slow path".
//
// This value is mutable for testing, so that we can exercise the slow path.
var parsePadding = 1000 // mutable for testing
// A parseCache holds a bounded number of recently accessed parsed Go files. As
// new files are stored, older files may be evicted from the cache.
//
// The parseCache.parseFiles method exposes a batch API for parsing (and
// caching) multiple files. This is necessary for type-checking, where files
// must be parsed in a common fileset.
type parseCache struct {
mu sync.Mutex
m map[parseKey]*parseCacheEntry
lru queue // min-atime priority queue of *parseCacheEntry
clock uint64 // clock time, incremented when the cache is updated
nextOffset token.Pos // token.Pos offset for the next parsed file
}
// parseKey uniquely identifies a parsed Go file.
type parseKey struct {
file source.FileIdentity
mode parser.Mode
}
type parseCacheEntry struct {
key parseKey
promise *memoize.Promise // memoize.Promise[*source.ParsedGoFile]
atime uint64 // clock time of last access
lruIndex int
}
// startParse prepares a parsing pass, using the following steps:
// - search for cache hits
// - create new promises for cache misses
// - store as many new promises in the cache as space will allow
//
// The resulting slice has an entry for every given file handle, though some
// entries may be nil if there was an error reading the file (in which case the
// resulting error will be non-nil).
func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*memoize.Promise, error) {
c.mu.Lock()
defer c.mu.Unlock()
// Any parsing pass increments the clock, as we'll update access times.
// (technically, if fhs is empty this isn't necessary, but that's a degenerate case).
//
// All entries parsed from a single call get the same access time.
c.clock++
// Read file data and collect cacheable files.
var (
data = make([][]byte, len(fhs)) // file content for each readable file
promises = make([]*memoize.Promise, len(fhs))
firstReadError error // first error from fh.Read, or nil
)
for i, fh := range fhs {
content, err := fh.Content()
if err != nil {
if firstReadError == nil {
firstReadError = err
}
continue
}
data[i] = content
key := parseKey{
file: fh.FileIdentity(),
mode: mode,
}
// Check for a cache hit.
if e, ok := c.m[key]; ok {
e.atime = c.clock
heap.Fix(&c.lru, e.lruIndex)
promises[i] = e.promise
continue
}
// ...otherwise, create a new promise to parse with a non-overlapping offset
fset := token.NewFileSet()
if c.nextOffset > 0 {
// Add a dummy file so that this parsed file does not overlap with others.
fset.AddFile("", 1, int(c.nextOffset))
}
c.nextOffset += token.Pos(len(content) + parsePadding + 1) // leave room for src fixes
fh := fh
promise := memoize.NewPromise(string(fh.URI()), func(ctx context.Context, _ interface{}) interface{} {
return parseGoSrc(ctx, fset, fh.URI(), content, mode)
})
promises[i] = promise
var e *parseCacheEntry
if len(c.lru) < parseCacheMaxFiles {
// add new entry
e = new(parseCacheEntry)
if c.m == nil {
c.m = make(map[parseKey]*parseCacheEntry)
}
} else {
// evict oldest entry
e = heap.Pop(&c.lru).(*parseCacheEntry)
delete(c.m, e.key)
}
e.key = key
e.promise = promise
e.atime = c.clock
c.m[e.key] = e
heap.Push(&c.lru, e)
}
if len(c.m) != len(c.lru) {
panic("map and LRU are inconsistent")
}
return promises, firstReadError
}
// parseFiles returns a ParsedGoFile for the given file handles in the
// requested parse mode.
//
// If parseFiles returns an error, it still returns a slice,
// but with a nil entry for each file that could not be parsed.
//
// The second result is a FileSet describing all resulting parsed files.
//
// For parsed files that already exists in the cache, access time will be
// updated. For others, parseFiles will parse and store as many results in the
// cache as space allows.
func (c *parseCache) parseFiles(ctx context.Context, mode parser.Mode, fhs ...source.FileHandle) ([]*source.ParsedGoFile, *token.FileSet, error) {
promises, firstReadError := c.startParse(mode, fhs...)
// Await all parsing.
var g errgroup.Group
g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
pgfs := make([]*source.ParsedGoFile, len(fhs))
for i, promise := range promises {
if promise == nil {
continue
}
i := i
promise := promise
g.Go(func() error {
result, err := promise.Get(ctx, nil)
if err != nil {
return err
}
pgfs[i] = result.(*source.ParsedGoFile)
return nil
})
}
if err := g.Wait(); err != nil {
return nil, nil, err
}
// Construct a token.FileSet mapping all parsed files, and update their
// Tok to the corresponding file in the new fileset.
//
// In the unlikely event that a parsed file no longer fits in its allocated
// space in the FileSet range, it will need to be re-parsed.
var tokenFiles []*token.File
fileIndex := make(map[*token.File]int) // to look up original indexes after sorting
for i, pgf := range pgfs {
if pgf == nil {
continue
}
fileIndex[pgf.Tok] = i
tokenFiles = append(tokenFiles, pgf.Tok)
}
sort.Slice(tokenFiles, func(i, j int) bool {
return tokenFiles[i].Base() < tokenFiles[j].Base()
})
var needReparse []int // files requiring reparsing
out := tokenFiles[:0]
for i, f := range tokenFiles {
if i < len(tokenFiles)-1 && f.Base()+f.Size() >= tokenFiles[i+1].Base() {
if f != tokenFiles[i+1] { // no need to re-parse duplicates
needReparse = append(needReparse, fileIndex[f])
}
} else {
out = append(out, f)
}
}
fset := source.FileSetFor(out...)
// Re-parse any remaining files using the stitched fileSet.
for _, i := range needReparse {
// Start from scratch, rather than using ParsedGoFile.Src, so that source
// fixing operates exactly the same (note that fixing stops after a limited
// number of tries).
fh := fhs[i]
content, err := fh.Content()
if err != nil {
if firstReadError == nil {
firstReadError = err
}
continue
}
pgfs[i] = parseGoSrc(ctx, fset, fh.URI(), content, mode)
}
// Ensure each PGF refers to a token.File from the new FileSet.
for i, pgf := range pgfs {
if pgf == nil {
continue
}
newTok := fset.File(token.Pos(pgf.Tok.Base()))
if newTok == nil {
panic("internal error: missing tok for " + pgf.URI)
}
if newTok.Base() != pgf.Tok.Base() || newTok.Size() != pgf.Tok.Size() {
panic("internal error: mismatching token.File in synthetic FileSet")
}
pgf2 := *pgf
pgf2.Tok = newTok
pgfs[i] = &pgf2
}
return pgfs, fset, firstReadError
}
// -- priority queue boilerplate --
// queue is a min-atime prority queue of cache entries.
type queue []*parseCacheEntry
func (q queue) Len() int { return len(q) }
func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime }
func (q queue) Swap(i, j int) {
q[i], q[j] = q[j], q[i]
q[i].lruIndex = i
q[j].lruIndex = j
}
func (q *queue) Push(x interface{}) {
e := x.(*parseCacheEntry)
e.lruIndex = len(*q)
*q = append(*q, e)
}
func (q *queue) Pop() interface{} {
last := len(*q) - 1
e := (*q)[last]
(*q)[last] = nil // aid GC
*q = (*q)[:last]
return e
}