blob: 8586f655d2808b8a9e9cdfe25c38fbbc0bfaa4ef [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cache
import (
"bytes"
"container/heap"
"context"
"fmt"
"go/parser"
"go/token"
"math/bits"
"runtime"
"sync"
"time"
"golang.org/x/sync/errgroup"
"golang.org/x/tools/gopls/internal/cache/parsego"
"golang.org/x/tools/gopls/internal/file"
"golang.org/x/tools/gopls/internal/protocol"
"golang.org/x/tools/internal/memoize"
"golang.org/x/tools/internal/tokeninternal"
)
// This file contains an implementation of an LRU parse cache, that offsets the
// base token.Pos value of each cached file so that they may be later described
// by a single dedicated FileSet.
//
// This is achieved by tracking a monotonic offset in the token.Pos space, that
// is incremented before parsing allow room for the resulting parsed file.
// reservedForParsing defines the room in the token.Pos space reserved for
// cached parsed files.
//
// Files parsed through the parseCache are guaranteed not to have overlapping
// spans: the parseCache tracks a monotonic base for newly parsed files.
//
// By offsetting the initial base of a FileSet, we can allow other operations
// accepting the FileSet (such as the gcimporter) to add new files using the
// normal FileSet APIs without overlapping with cached parsed files.
//
// Note that 1<<60 represents an exabyte of parsed data, more than any gopls
// process can ever parse.
//
// On 32-bit systems we don't cache parse results (see parseFiles).
const reservedForParsing = 1 << (bits.UintSize - 4)
// fileSetWithBase returns a new token.FileSet with Base() equal to the
// requested base.
//
// If base < 1, fileSetWithBase panics.
// (1 is the smallest permitted FileSet base).
func fileSetWithBase(base int) *token.FileSet {
fset := token.NewFileSet()
if base > 1 {
// Add a dummy file to set the base of fset. We won't ever use the
// resulting FileSet, so it doesn't matter how we achieve this.
//
// FileSets leave a 1-byte padding between files, so we set the base by
// adding a zero-length file at base-1.
fset.AddFile("", base-1, 0)
}
if fset.Base() != base {
panic("unexpected FileSet.Base")
}
return fset
}
const (
// Always keep 100 recent files, independent of their wall-clock age, to
// optimize the case where the user resumes editing after a delay.
parseCacheMinFiles = 100
)
// parsePadding is additional padding allocated to allow for increases in
// length (such as appending missing braces) caused by fixAST.
//
// This is used to mitigate a chicken and egg problem: we must know the base
// offset of the file we're about to parse, before we start parsing, and yet
// src fixups may affect the actual size of the parsed content (and therefore
// the offsets of subsequent files).
//
// When we encounter a file that no longer fits in its allocated space in the
// fileset, we have no choice but to re-parse it. Leaving a generous padding
// reduces the likelihood of this "slow path".
//
// This value is mutable for testing, so that we can exercise the slow path.
var parsePadding = 1000 // mutable for testing
// A parseCache holds recently accessed parsed Go files. After new files are
// stored, older files may be evicted from the cache via garbage collection.
//
// The parseCache.parseFiles method exposes a batch API for parsing (and
// caching) multiple files. This is necessary for type-checking, where files
// must be parsed in a common fileset.
type parseCache struct {
expireAfter time.Duration // interval at which to collect expired cache entries
done chan struct{} // closed when GC is stopped
mu sync.Mutex
m map[parseKey]*parseCacheEntry
lru queue // min-atime priority queue of *parseCacheEntry
clock uint64 // clock time, incremented when the cache is updated
nextBase int // base offset for the next parsed file
}
// newParseCache creates a new parse cache and starts a goroutine to garbage
// collect entries whose age is at least expireAfter.
//
// Callers must call parseCache.stop when the parse cache is no longer in use.
func newParseCache(expireAfter time.Duration) *parseCache {
c := &parseCache{
expireAfter: expireAfter,
m: make(map[parseKey]*parseCacheEntry),
done: make(chan struct{}),
}
go c.gc()
return c
}
// stop causes the GC goroutine to exit.
func (c *parseCache) stop() {
close(c.done)
}
// parseKey uniquely identifies a parsed Go file.
type parseKey struct {
uri protocol.DocumentURI
mode parser.Mode
purgeFuncBodies bool
}
type parseCacheEntry struct {
key parseKey
hash file.Hash
promise *memoize.Promise // memoize.Promise[*parsego.File]
atime uint64 // clock time of last access, for use in LRU sorting
walltime time.Time // actual time of last access, for use in time-based eviction; too coarse for LRU on some systems
lruIndex int // owned by the queue implementation
}
// startParse prepares a parsing pass, creating new promises in the cache for
// any cache misses.
//
// The resulting slice has an entry for every given file handle, though some
// entries may be nil if there was an error reading the file (in which case the
// resulting error will be non-nil).
func (c *parseCache) startParse(mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*memoize.Promise, error) {
c.mu.Lock()
defer c.mu.Unlock()
// Any parsing pass increments the clock, as we'll update access times.
// (technically, if fhs is empty this isn't necessary, but that's a degenerate case).
//
// All entries parsed from a single call get the same access time.
c.clock++
walltime := time.Now()
// Read file data and collect cacheable files.
var (
data = make([][]byte, len(fhs)) // file content for each readable file
promises = make([]*memoize.Promise, len(fhs))
firstReadError error // first error from fh.Read, or nil
)
for i, fh := range fhs {
content, err := fh.Content()
if err != nil {
if firstReadError == nil {
firstReadError = err
}
continue
}
data[i] = content
key := parseKey{
uri: fh.URI(),
mode: mode,
purgeFuncBodies: purgeFuncBodies,
}
if e, ok := c.m[key]; ok {
if e.hash == fh.Identity().Hash { // cache hit
e.atime = c.clock
e.walltime = walltime
heap.Fix(&c.lru, e.lruIndex)
promises[i] = e.promise
continue
} else {
// A cache hit, for a different version. Delete it.
delete(c.m, e.key)
heap.Remove(&c.lru, e.lruIndex)
}
}
uri := fh.URI()
promise := memoize.NewPromise("parseCache.parse", func(ctx context.Context, _ interface{}) interface{} {
// Allocate 2*len(content)+parsePadding to allow for re-parsing once
// inside of parseGoSrc without exceeding the allocated space.
base, nextBase := c.allocateSpace(2*len(content) + parsePadding)
pgf, fixes1 := parsego.Parse(ctx, fileSetWithBase(base), uri, content, mode, purgeFuncBodies)
file := pgf.Tok
if file.Base()+file.Size()+1 > nextBase {
// The parsed file exceeds its allocated space, likely due to multiple
// passes of src fixing. In this case, we have no choice but to re-do
// the operation with the correct size.
//
// Even though the final successful parse requires only file.Size()
// bytes of Pos space, we need to accommodate all the missteps to get
// there, as parseGoSrc will repeat them.
actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing
base2, nextBase2 := c.allocateSpace(actual)
pgf2, fixes2 := parsego.Parse(ctx, fileSetWithBase(base2), uri, content, mode, purgeFuncBodies)
// In golang/go#59097 we observed that this panic condition was hit.
// One bug was found and fixed, but record more information here in
// case there is still a bug here.
if end := pgf2.Tok.Base() + pgf2.Tok.Size(); end != nextBase2-1 {
var errBuf bytes.Buffer
fmt.Fprintf(&errBuf, "internal error: non-deterministic parsing result:\n")
fmt.Fprintf(&errBuf, "\t%q (%d-%d) does not span %d-%d\n", uri, pgf2.Tok.Base(), base2, end, nextBase2-1)
fmt.Fprintf(&errBuf, "\tfirst %q (%d-%d)\n", pgf.URI, pgf.Tok.Base(), pgf.Tok.Base()+pgf.Tok.Size())
fmt.Fprintf(&errBuf, "\tfirst space: (%d-%d), second space: (%d-%d)\n", base, nextBase, base2, nextBase2)
fmt.Fprintf(&errBuf, "\tfirst mode: %v, second mode: %v", pgf.Mode, pgf2.Mode)
fmt.Fprintf(&errBuf, "\tfirst err: %v, second err: %v", pgf.ParseErr, pgf2.ParseErr)
fmt.Fprintf(&errBuf, "\tfirst fixes: %v, second fixes: %v", fixes1, fixes2)
panic(errBuf.String())
}
pgf = pgf2
}
return pgf
})
promises[i] = promise
// add new entry; entries are gc'ed asynchronously
e := &parseCacheEntry{
key: key,
hash: fh.Identity().Hash,
promise: promise,
atime: c.clock,
walltime: walltime,
}
c.m[e.key] = e
heap.Push(&c.lru, e)
}
if len(c.m) != len(c.lru) {
panic("map and LRU are inconsistent")
}
return promises, firstReadError
}
func (c *parseCache) gc() {
const period = 10 * time.Second // gc period
timer := time.NewTicker(period)
defer timer.Stop()
for {
select {
case <-c.done:
return
case <-timer.C:
}
c.gcOnce()
}
}
func (c *parseCache) gcOnce() {
now := time.Now()
c.mu.Lock()
defer c.mu.Unlock()
for len(c.m) > parseCacheMinFiles {
e := heap.Pop(&c.lru).(*parseCacheEntry)
if now.Sub(e.walltime) >= c.expireAfter {
delete(c.m, e.key)
} else {
heap.Push(&c.lru, e)
break
}
}
}
// allocateSpace reserves the next n bytes of token.Pos space in the
// cache.
//
// It returns the resulting file base, next base, and an offset FileSet to use
// for parsing.
func (c *parseCache) allocateSpace(size int) (int, int) {
c.mu.Lock()
defer c.mu.Unlock()
if c.nextBase == 0 {
// FileSet base values must be at least 1.
c.nextBase = 1
}
base := c.nextBase
c.nextBase += size + 1
return base, c.nextBase
}
// parseFiles returns a parsego.File for each file handle in fhs, in the
// requested parse mode.
//
// For parsed files that already exists in the cache, access time will be
// updated. For others, parseFiles will parse and store as many results in the
// cache as space allows.
//
// The token.File for each resulting parsed file will be added to the provided
// FileSet, using the tokeninternal.AddExistingFiles API. Consequently, the
// given fset should only be used in other APIs if its base is >=
// reservedForParsing.
//
// If parseFiles returns an error, it still returns a slice,
// but with a nil entry for each file that could not be parsed.
func (c *parseCache) parseFiles(ctx context.Context, fset *token.FileSet, mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*parsego.File, error) {
pgfs := make([]*parsego.File, len(fhs))
// Temporary fall-back for 32-bit systems, where reservedForParsing is too
// small to be viable. We don't actually support 32-bit systems, so this
// workaround is only for tests and can be removed when we stop running
// 32-bit TryBots for gopls.
if bits.UintSize == 32 {
for i, fh := range fhs {
var err error
pgfs[i], err = parseGoImpl(ctx, fset, fh, mode, purgeFuncBodies)
if err != nil {
return pgfs, err
}
}
return pgfs, nil
}
promises, firstErr := c.startParse(mode, purgeFuncBodies, fhs...)
// Await all parsing.
var g errgroup.Group
g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
for i, promise := range promises {
if promise == nil {
continue
}
i := i
promise := promise
g.Go(func() error {
result, err := promise.Get(ctx, nil)
if err != nil {
return err
}
pgfs[i] = result.(*parsego.File)
return nil
})
}
if err := g.Wait(); err != nil && firstErr == nil {
firstErr = err
}
// Augment the FileSet to map all parsed files.
var tokenFiles []*token.File
for _, pgf := range pgfs {
if pgf == nil {
continue
}
tokenFiles = append(tokenFiles, pgf.Tok)
}
tokeninternal.AddExistingFiles(fset, tokenFiles)
const debugIssue59080 = true
if debugIssue59080 {
for _, f := range tokenFiles {
pos := token.Pos(f.Base())
f2 := fset.File(pos)
if f2 != f {
panic(fmt.Sprintf("internal error: File(%d (start)) = %v, not %v", pos, f2, f))
}
pos = token.Pos(f.Base() + f.Size())
f2 = fset.File(pos)
if f2 != f {
panic(fmt.Sprintf("internal error: File(%d (end)) = %v, not %v", pos, f2, f))
}
}
}
return pgfs, firstErr
}
// -- priority queue boilerplate --
// queue is a min-atime prority queue of cache entries.
type queue []*parseCacheEntry
func (q queue) Len() int { return len(q) }
func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime }
func (q queue) Swap(i, j int) {
q[i], q[j] = q[j], q[i]
q[i].lruIndex = i
q[j].lruIndex = j
}
func (q *queue) Push(x interface{}) {
e := x.(*parseCacheEntry)
e.lruIndex = len(*q)
*q = append(*q, e)
}
func (q *queue) Pop() interface{} {
last := len(*q) - 1
e := (*q)[last]
(*q)[last] = nil // aid GC
*q = (*q)[:last]
return e
}