blob: 44485cb80ac218543b3e83769499925a76eacf61 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The filecache package provides a file-based shared durable blob cache.
//
// The cache is a machine-global mapping from (kind string, key
// [32]byte) to []byte, where kind is an identifier describing the
// namespace or purpose (e.g. "analysis"), and key is a SHA-256 digest
// of the recipe of the value. (It need not be the digest of the value
// itself, so you can query the cache without knowing what value the
// recipe would produce.)
//
// The space budget of the cache can be controlled by [SetBudget].
// Cache entries may be evicted at any time or in any order.
//
// The Get and Set operations are concurrency-safe.
package filecache
import (
"bytes"
"crypto/sha256"
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sort"
"sync"
"sync/atomic"
"time"
"golang.org/x/tools/internal/lockedfile"
)
// Get retrieves from the cache and returns a newly allocated
// copy of the value most recently supplied to Set(kind, key),
// possibly by another process.
// Get returns ErrNotFound if the value was not found.
func Get(kind string, key [32]byte) ([]byte, error) {
name := filename(kind, key)
data, err := lockedfile.Read(name)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, ErrNotFound
}
return nil, err
}
// Verify that the Write was complete
// by checking the recorded length.
if len(data) < 8 {
return nil, ErrNotFound // cache entry is incomplete
}
if length := binary.LittleEndian.Uint64(data); int(length) != len(data)-8 {
return nil, ErrNotFound // cache entry is incomplete (or too long!)
}
data = data[8:]
// Update file time for use by LRU eviction.
// (This turns every read into a write operation.
// If this is a performance problem, we should
// touch the files aynchronously.)
//
// (Traditionally the access time would be updated
// automatically, but for efficiency most POSIX systems have
// for many years set the noatime mount option to avoid every
// open or read operation entailing a metadata write.)
now := time.Now()
if err := os.Chtimes(name, now, now); err != nil {
return nil, fmt.Errorf("failed to update access time: %w", err)
}
return data, nil
}
// ErrNotFound is the distinguished error
// returned by Get when the key is not found.
var ErrNotFound = fmt.Errorf("not found")
// Set updates the value in the cache.
func Set(kind string, key [32]byte, value []byte) error {
name := filename(kind, key)
if err := os.MkdirAll(filepath.Dir(name), 0700); err != nil {
return err
}
// In the unlikely event of a short write (e.g. ENOSPC)
// followed by process termination (e.g. a power cut), we
// don't want a reader to see a short file, so we record
// the expected length first and verify it in Get.
var length [8]byte
binary.LittleEndian.PutUint64(length[:], uint64(len(value)))
header := bytes.NewReader(length[:])
payload := bytes.NewReader(value)
// Windows doesn't support atomic rename--we tried MoveFile,
// MoveFileEx, ReplaceFileEx, and SetFileInformationByHandle
// of RenameFileInfo, all to no avail--so instead we use
// advisory file locking, which is only about 2x slower even
// on POSIX platforms with atomic rename.
return lockedfile.Write(name, io.MultiReader(header, payload), 0600)
}
var budget int64 = 1e9 // 1GB
// SetBudget sets a soft limit on disk usage of the cache (in bytes)
// and returns the previous value. Supplying a negative value queries
// the current value without changing it.
//
// If two gopls processes have different budgets, the one with the
// lower budget will collect garbage more actively, but both will
// observe the effect.
func SetBudget(new int64) (old int64) {
if new < 0 {
return atomic.LoadInt64(&budget)
}
return atomic.SwapInt64(&budget, new)
}
// --- implementation ----
// filename returns the cache entry of the specified kind and key.
//
// A typical cache entry is a file name such as:
//
// $HOME/Library/Caches / gopls / VVVVVVVV / kind / KK / KKKK...KKKK
//
// The portions separated by spaces are as follows:
// - The user's preferred cache directory; the default value varies by OS.
// - The constant "gopls".
// - The "version", 32 bits of the digest of the gopls executable.
// - The kind or purpose of this cache subtree (e.g. "analysis").
// - The first 8 bits of the key, to avoid huge directories.
// - The full 256 bits of the key.
//
// Once a file is written its contents are never modified, though it
// may be atomically replaced or removed.
//
// New versions of gopls are free to reorganize the contents of the
// version directory as needs evolve. But all versions of gopls must
// in perpetuity treat the "gopls" directory in a common fashion.
//
// In particular, each gopls process attempts to garbage collect
// the entire gopls directory so that newer binaries can clean up
// after older ones: in the development cycle especially, new
// new versions may be created frequently.
func filename(kind string, key [32]byte) string {
hex := fmt.Sprintf("%x", key)
return filepath.Join(getCacheDir(), kind, hex[:2], hex)
}
// getCacheDir returns the persistent cache directory of all processes
// running this version of the gopls executable.
//
// It must incorporate the hash of the executable so that we needn't
// worry about incompatible changes to the file format or changes to
// the algorithm that produced the index.
func getCacheDir() string {
cacheDirOnce.Do(func() {
// Use user's preferred cache directory.
userDir := os.Getenv("GOPLS_CACHE")
if userDir == "" {
var err error
userDir, err = os.UserCacheDir()
if err != nil {
userDir = os.TempDir()
}
}
goplsDir := filepath.Join(userDir, "gopls")
// UserCacheDir may return a nonexistent directory
// (in which case we must create it, which may fail),
// or it may return a non-writable directory, in
// which case we should ideally respect the user's express
// wishes (e.g. XDG_CACHE_HOME) and not write somewhere else.
// Sadly UserCacheDir doesn't currently let us distinguish
// such intent from accidental misconfiguraton such as HOME=/
// in a CI builder. So, we check whether the gopls subdirectory
// can be created (or already exists) and not fall back to /tmp.
// See also https://github.com/golang/go/issues/57638.
if os.MkdirAll(goplsDir, 0700) != nil {
goplsDir = filepath.Join(os.TempDir(), "gopls")
}
// Start the garbage collector.
go gc(goplsDir)
// Compute the hash of this executable (~20ms) and create a subdirectory.
hash, err := hashExecutable()
if err != nil {
log.Fatalf("can't hash gopls executable: %v", err)
}
// Use only 32 bits of the digest to avoid unwieldy filenames.
// It's not an adversarial situation.
cacheDir = filepath.Join(goplsDir, fmt.Sprintf("%x", hash[:4]))
if err := os.MkdirAll(cacheDir, 0700); err != nil {
log.Fatalf("can't create cache: %v", err)
}
})
return cacheDir
}
var (
cacheDirOnce sync.Once
cacheDir string // only accessed by getCacheDir
)
func hashExecutable() (hash [32]byte, err error) {
exe, err := os.Executable()
if err != nil {
return hash, err
}
f, err := os.Open(exe)
if err != nil {
return hash, err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return hash, fmt.Errorf("can't read executable: %w", err)
}
h.Sum(hash[:0])
return hash, nil
}
// gc runs forever, periodically deleting files from the gopls
// directory until the space budget is no longer exceeded, and also
// deleting files older than the maximum age, regardless of budget.
//
// One gopls process may delete garbage created by a different gopls
// process, possibly running a different version of gopls, possibly
// running concurrently.
func gc(goplsDir string) {
const period = 1 * time.Minute // period between collections
const statDelay = 100 * time.Microsecond // delay between stats to smooth out I/O
const maxAge = 5 * 24 * time.Hour // max time since last access before file is deleted
// The macOS filesystem is strikingly slow, at least on some machines.
// /usr/bin/find achieves only about 25,000 stats per second
// at full speed (no pause between items), meaning a large
// cache may take several minutes to scan.
// We must ensure that short-lived processes (crucially,
// tests) are able to make progress sweeping garbage.
//
// (gopls' caches should never actually get this big in
// practise: the example mentioned above resulted from a bug
// that caused filecache to fail to delete any files.)
const debug = false
for {
// Enumerate all files in the cache.
type item struct {
path string
stat os.FileInfo
}
var files []item
var total int64 // bytes
_ = filepath.Walk(goplsDir, func(path string, stat os.FileInfo, err error) error {
// TODO(adonovan): opt: also collect empty directories,
// as they typically occupy around 1KB.
if err == nil && !stat.IsDir() {
// Unconditionally delete files we haven't used in ages.
// (We do this here, not in the second loop, so that we
// perform age-based collection even in short-lived processes.)
age := time.Since(stat.ModTime())
if age > maxAge {
if debug {
log.Printf("age: deleting stale file %s (%dB, age %v)",
path, stat.Size(), age)
}
os.Remove(path) // ignore error
} else {
files = append(files, item{path, stat})
total += stat.Size()
time.Sleep(statDelay)
}
}
return nil
})
// Sort oldest files first.
sort.Slice(files, func(i, j int) bool {
return files[i].stat.ModTime().Before(files[j].stat.ModTime())
})
// Delete oldest files until we're under budget.
budget := atomic.LoadInt64(&budget)
for _, file := range files {
if total < budget {
break
}
if debug {
age := time.Since(file.stat.ModTime())
log.Printf("budget: deleting stale file %s (%dB, age %v)",
file.path, file.stat.Size(), age)
}
os.Remove(file.path) // ignore error
total -= file.stat.Size()
}
time.Sleep(period)
}
}