blob: 231ede31069942f30cfcf2baa1fe62ff84de8fa4 [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cache
import (
"container/heap"
"context"
"fmt"
"go/parser"
"go/token"
"math/bits"
"runtime"
"sync"
"golang.org/x/sync/errgroup"
"golang.org/x/tools/gopls/internal/lsp/source"
"golang.org/x/tools/internal/memoize"
"golang.org/x/tools/internal/tokeninternal"
)
// reservedForParsing defines the room in the token.Pos space reserved for
// cached parsed files.
//
// Files parsed through the parseCache are guaranteed not to have overlapping
// spans: the parseCache tracks a monotonic base for newly parsed files.
//
// By offsetting the initial base of a FileSet, we can allow other operations
// accepting the FileSet (such as the gcimporter) to add new files using the
// normal FileSet APIs without overlapping with cached parsed files.
//
// Note that 1<<60 represents an exabyte of parsed data, more than any gopls
// process can ever parse.
//
// On 32-bit systems we don't cache parse results (see parseFiles).
const reservedForParsing = 1 << (bits.UintSize - 4)
// fileSetWithBase returns a new token.FileSet with Base() equal to the
// requested base, or 1 if base < 1.
func fileSetWithBase(base int) *token.FileSet {
fset := token.NewFileSet()
if base > 1 {
// Add a dummy file to set the base of fset. We won't ever use the
// resulting FileSet, so it doesn't matter how we achieve this.
//
// FileSets leave a 1-byte padding between files, so we set the base by
// adding a zero-length file at base-1.
fset.AddFile("", base-1, 0)
}
if base >= 1 && fset.Base() != base {
panic("unexpected FileSet.Base")
}
return fset
}
// This file contains an implementation of a bounded-size parse cache, that
// offsets the base token.Pos value of each cached file so that they may be
// later described by a single dedicated FileSet.
//
// This is achieved by tracking a monotonic offset in the token.Pos space, that
// is incremented before parsing allow room for the resulting parsed file.
// Keep 200 recently parsed files, based on the following rationale:
// - One of the most important benefits of caching is avoiding re-parsing
// everything in a package when working on a single file. No packages in
// Kubernetes have > 200 files (only one has > 100).
// - Experience has shown that ~1000 parsed files can use noticeable space.
// 200 feels like a sweet spot between limiting cache size and optimizing
// cache hits for low-latency operations.
const parseCacheMaxFiles = 200
// parsePadding is additional padding allocated to allow for increases in
// length (such as appending missing braces) caused by fixAST.
//
// This is used to mitigate a chicken and egg problem: we must know the base
// offset of the file we're about to parse, before we start parsing, and yet
// src fixups may affect the actual size of the parsed content (and therefore
// the offsets of subsequent files).
//
// When we encounter a file that no longer fits in its allocated space in the
// fileset, we have no choice but to re-parse it. Leaving a generous padding
// reduces the likelihood of this "slow path".
//
// This value is mutable for testing, so that we can exercise the slow path.
var parsePadding = 1000 // mutable for testing
// A parseCache holds a bounded number of recently accessed parsed Go files. As
// new files are stored, older files may be evicted from the cache.
//
// The parseCache.parseFiles method exposes a batch API for parsing (and
// caching) multiple files. This is necessary for type-checking, where files
// must be parsed in a common fileset.
type parseCache struct {
mu sync.Mutex
m map[parseKey]*parseCacheEntry
lru queue // min-atime priority queue of *parseCacheEntry
clock uint64 // clock time, incremented when the cache is updated
nextBase int // base offset for the next parsed file
}
// parseKey uniquely identifies a parsed Go file.
type parseKey struct {
file source.FileIdentity
mode parser.Mode
}
type parseCacheEntry struct {
key parseKey
promise *memoize.Promise // memoize.Promise[*source.ParsedGoFile]
atime uint64 // clock time of last access
lruIndex int
}
// startParse prepares a parsing pass, creating new promises in the cache for
// any cache misses.
//
// The resulting slice has an entry for every given file handle, though some
// entries may be nil if there was an error reading the file (in which case the
// resulting error will be non-nil).
func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*memoize.Promise, error) {
c.mu.Lock()
defer c.mu.Unlock()
// Any parsing pass increments the clock, as we'll update access times.
// (technically, if fhs is empty this isn't necessary, but that's a degenerate case).
//
// All entries parsed from a single call get the same access time.
c.clock++
// Read file data and collect cacheable files.
var (
data = make([][]byte, len(fhs)) // file content for each readable file
promises = make([]*memoize.Promise, len(fhs))
firstReadError error // first error from fh.Read, or nil
)
for i, fh := range fhs {
content, err := fh.Content()
if err != nil {
if firstReadError == nil {
firstReadError = err
}
continue
}
data[i] = content
key := parseKey{
file: fh.FileIdentity(),
mode: mode,
}
// Check for a cache hit.
if e, ok := c.m[key]; ok {
e.atime = c.clock
heap.Fix(&c.lru, e.lruIndex)
promises[i] = e.promise
continue
}
// ...otherwise, create a new promise to parse with a non-overlapping
// offset. We allocate 2*len(content)+parsePadding to allow for re-parsing
// once inside of parseGoSrc without exceeding the allocated space.
base, nextBase, fset := c.allocateSpaceLocked(2*len(content) + parsePadding)
uri := fh.URI()
promise := memoize.NewPromise(string(fh.URI()), func(ctx context.Context, _ interface{}) interface{} {
pgf := parseGoSrc(ctx, fset, uri, content, mode)
file := pgf.Tok
if file.Base()+file.Size()+1 > nextBase {
// The parsed file exceeds its allocated space, likely due to multiple
// passes of src fixing. In this case, we have no choice but to re-do
// the operation with the correct size.
//
// Even though the final successful parse requires only file.Size()
// bytes of Pos space, we need to accommodate all the missteps to get
// there, as parseGoSrc will repeat them.
actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing
c.mu.Lock()
_, next, fset := c.allocateSpaceLocked(actual)
c.mu.Unlock()
pgf = parseGoSrc(ctx, fset, uri, content, mode)
if pgf.Tok.Base()+pgf.Tok.Size() != next-1 {
panic("internal error: non-deterministic parsing result")
}
}
return pgf
})
promises[i] = promise
var e *parseCacheEntry
if len(c.lru) < parseCacheMaxFiles {
// add new entry
e = new(parseCacheEntry)
if c.m == nil {
c.m = make(map[parseKey]*parseCacheEntry)
}
} else {
// evict oldest entry
e = heap.Pop(&c.lru).(*parseCacheEntry)
delete(c.m, e.key)
}
e.key = key
e.promise = promise
e.atime = c.clock
c.m[e.key] = e
heap.Push(&c.lru, e)
}
if len(c.m) != len(c.lru) {
panic("map and LRU are inconsistent")
}
return promises, firstReadError
}
// allocateSpaceLocked reserves the next n bytes of token.Pos space in the
// cache.
//
// It returns the resulting file base, next base, and an offset FileSet to use
// for parsing.
func (c *parseCache) allocateSpaceLocked(size int) (int, int, *token.FileSet) {
base := c.nextBase
c.nextBase += size + 1
return base, c.nextBase, fileSetWithBase(base)
}
// The parse cache is not supported on 32-bit systems, where reservedForParsing
// is too small to be viable.
func parseCacheSupported() bool {
return bits.UintSize != 32
}
// parseFiles returns a ParsedGoFile for each file handle in fhs, in the
// requested parse mode.
//
// For parsed files that already exists in the cache, access time will be
// updated. For others, parseFiles will parse and store as many results in the
// cache as space allows.
//
// The token.File for each resulting parsed file will be added to the provided
// FileSet, using the tokeninternal.AddExistingFiles API. Consequently, the
// given fset should only be used in other APIs if its base is >=
// reservedForParsing.
//
// If parseFiles returns an error, it still returns a slice,
// but with a nil entry for each file that could not be parsed.
func (c *parseCache) parseFiles(ctx context.Context, fset *token.FileSet, mode parser.Mode, fhs ...source.FileHandle) ([]*source.ParsedGoFile, error) {
pgfs := make([]*source.ParsedGoFile, len(fhs))
// Temporary fall-back for 32-bit systems, where reservedForParsing is too
// small to be viable. We don't actually support 32-bit systems, so this
// workaround is only for tests and can be removed when we stop running
// 32-bit TryBots for gopls.
if bits.UintSize == 32 {
for i, fh := range fhs {
var err error
pgfs[i], err = parseGoImpl(ctx, fset, fh, mode)
if err != nil {
return pgfs, err
}
}
return pgfs, nil
}
promises, firstErr := c.startParse(mode, fhs...)
// Await all parsing.
var g errgroup.Group
g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
for i, promise := range promises {
if promise == nil {
continue
}
i := i
promise := promise
g.Go(func() error {
result, err := promise.Get(ctx, nil)
if err != nil {
return err
}
pgfs[i] = result.(*source.ParsedGoFile)
return nil
})
}
if err := g.Wait(); err != nil && firstErr == nil {
firstErr = err
}
// Augment the FileSet to map all parsed files.
var tokenFiles []*token.File
for _, pgf := range pgfs {
if pgf == nil {
continue
}
tokenFiles = append(tokenFiles, pgf.Tok)
}
tokeninternal.AddExistingFiles(fset, tokenFiles)
const debugIssue59080 = true
if debugIssue59080 {
for _, f := range tokenFiles {
pos := token.Pos(f.Base())
f2 := fset.File(pos)
if f2 != f {
panic(fmt.Sprintf("internal error: File(%d (start)) = %v, not %v", pos, f2, f))
}
pos = token.Pos(f.Base() + f.Size())
f2 = fset.File(pos)
if f2 != f {
panic(fmt.Sprintf("internal error: File(%d (end)) = %v, not %v", pos, f2, f))
}
}
}
return pgfs, firstErr
}
// -- priority queue boilerplate --
// queue is a min-atime prority queue of cache entries.
type queue []*parseCacheEntry
func (q queue) Len() int { return len(q) }
func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime }
func (q queue) Swap(i, j int) {
q[i], q[j] = q[j], q[i]
q[i].lruIndex = i
q[j].lruIndex = j
}
func (q *queue) Push(x interface{}) {
e := x.(*parseCacheEntry)
e.lruIndex = len(*q)
*q = append(*q, e)
}
func (q *queue) Pop() interface{} {
last := len(*q) - 1
e := (*q)[last]
(*q)[last] = nil // aid GC
*q = (*q)[:last]
return e
}