gopls/internal/lsp/filecache/filecache.go - tools - Git at Google

 // Copyright 2022 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // The filecache package provides a file-based shared durable blob cache.
 //
 // The cache is a machine-global mapping from (kind string, key
 // [32]byte) to []byte, where kind is an identifier describing the
 // namespace or purpose (e.g. "analysis"), and key is a SHA-256 digest
 // of the recipe of the value. (It need not be the digest of the value
 // itself, so you can query the cache without knowing what value the
 // recipe would produce.)
 //
 // The space budget of the cache can be controlled by [SetBudget].
 // Cache entries may be evicted at any time or in any order.
 // Note that "du -sh $GOPLSCACHE" may report a disk usage
 // figure that is rather larger (e.g. 50%) than the budget because
 // it rounds up partial disk blocks.
 //
 // The Get and Set operations are concurrency-safe.
 package filecache

 import (
 	"bytes"
 	"crypto/sha256"
 	"encoding/binary"
 	"encoding/hex"
 	"errors"
 	"fmt"
 	"hash/crc32"
 	"io"
 	"io/fs"
 	"log"
 	"os"
 	"path/filepath"
 	"runtime"
 	"sort"
 	"sync"
 	"sync/atomic"
 	"time"

 	"golang.org/x/tools/gopls/internal/bug"
 	"golang.org/x/tools/gopls/internal/lsp/lru"
 	"golang.org/x/tools/internal/lockedfile"
 )

 // Start causes the filecache to initialize and start garbage gollection.
 //
 // Start is automatically called by the first call to Get, but may be called
 // explicitly to pre-initialize the cache.
 func Start() {
 	go getCacheDir()
 }

 // As an optimization, use a 100MB in-memory LRU cache in front of filecache
 // operations. This reduces I/O for operations such as diagnostics or
 // implementations that repeatedly access the same cache entries.
 var memCache = lru.New(100 * 1e6)

 type memKey struct {
 	kind string
 	key  [32]byte
 }

 // Get retrieves from the cache and returns a newly allocated
 // copy of the value most recently supplied to Set(kind, key),
 // possibly by another process.
 // Get returns ErrNotFound if the value was not found.
 func Get(kind string, key [32]byte) ([]byte, error) {
 	// First consult the read-through memory cache.
 	// Note that memory cache hits do not update the times
 	// used for LRU eviction of the file-based cache.
 	if value := memCache.Get(memKey{kind, key}); value != nil {
 		return value.([]byte), nil
 	}

 	iolimit <- struct{}{}        // acquire a token
 	defer func() { <-iolimit }() // release a token

 	name, err := filename(kind, key)
 	if err != nil {
 		return nil, err
 	}
 	data, err := lockedfile.Read(name)
 	if err != nil {
 		if errors.Is(err, os.ErrNotExist) {
 			return nil, ErrNotFound
 		}
 		return nil, err
 	}

 	// Verify that the Write was complete
 	// by checking the recorded length.
 	if len(data) < 8+4 {
 		return nil, ErrNotFound // cache entry is incomplete
 	}
 	length, value, checksum := data[:8], data[8:len(data)-4], data[len(data)-4:]
 	if binary.LittleEndian.Uint64(length) != uint64(len(value)) {
 		return nil, ErrNotFound // cache entry is incomplete (or too long!)
 	}

 	// Check for corruption and print the entire file content; see
 	// issue #59289. TODO(adonovan): stop printing the entire file
 	// once we've seen enough reports to understand the pattern.
 	if binary.LittleEndian.Uint32(checksum) != crc32.ChecksumIEEE(value) {
 		// Darwin has repeatedly displayed a problem (#59895)
 		// whereby the checksum portion (and only it) is zero,
 		// which suggests a bug in its file system . Don't
 		// panic, but keep an eye on other failures for now.
 		errorf := bug.Errorf
 		if binary.LittleEndian.Uint32(checksum) == 0 && runtime.GOOS == "darwin" {
 			errorf = fmt.Errorf
 		}

 		return nil, errorf("internal error in filecache.Get(%q, %x): invalid checksum at end of %d-byte file %s:\n%q",
 			kind, key, len(data), name, data)
 	}

 	// Update file time for use by LRU eviction.
 	// (This turns every read into a write operation.
 	// If this is a performance problem, we should
 	// touch the files aynchronously.)
 	//
 	// (Traditionally the access time would be updated
 	// automatically, but for efficiency most POSIX systems have
 	// for many years set the noatime mount option to avoid every
 	// open or read operation entailing a metadata write.)
 	now := time.Now()
 	if err := os.Chtimes(name, now, now); err != nil {
 		return nil, fmt.Errorf("failed to update access time: %w", err)
 	}

 	memCache.Set(memKey{kind, key}, value, len(value))
 	return value, nil
 }

 // ErrNotFound is the distinguished error
 // returned by Get when the key is not found.
 var ErrNotFound = fmt.Errorf("not found")

 // Set updates the value in the cache.
 func Set(kind string, key [32]byte, value []byte) error {
 	memCache.Set(memKey{kind, key}, value, len(value))

 	iolimit <- struct{}{}        // acquire a token
 	defer func() { <-iolimit }() // release a token

 	name, err := filename(kind, key)
 	if err != nil {
 		return err
 	}
 	if err := os.MkdirAll(filepath.Dir(name), 0700); err != nil {
 		return err
 	}

 	// In the unlikely event of a short write (e.g. ENOSPC)
 	// followed by process termination (e.g. a power cut), we
 	// don't want a reader to see a short file, so we record
 	// the expected length first and verify it in Get.
 	var length [8]byte
 	binary.LittleEndian.PutUint64(length[:], uint64(len(value)))

 	// Occasional file corruption (presence of zero bytes in JSON
 	// files) has been reported on macOS (see issue #59289),
 	// assumed due to a nonatomicity problem in the file system.
 	// Ideally the macOS kernel would be fixed, or lockedfile
 	// would implement a workaround (since its job is to provide
 	// reliable the mutual exclusion primitive that allows
 	// cooperating gopls processes to implement transactional
 	// file replacement), but for now we add an extra integrity
 	// check: a 32-bit checksum at the end.
 	var checksum [4]byte
 	binary.LittleEndian.PutUint32(checksum[:], crc32.ChecksumIEEE(value))

 	// Windows doesn't support atomic rename--we tried MoveFile,
 	// MoveFileEx, ReplaceFileEx, and SetFileInformationByHandle
 	// of RenameFileInfo, all to no avail--so instead we use
 	// advisory file locking, which is only about 2x slower even
 	// on POSIX platforms with atomic rename.
 	return lockedfile.Write(name, io.MultiReader(
 		bytes.NewReader(length[:]),
 		bytes.NewReader(value),
 		bytes.NewReader(checksum[:])),
 		0600)
 }

 var iolimit = make(chan struct{}, 128) // counting semaphore to limit I/O concurrency in Set.

 var budget int64 = 1e9 // 1GB

 // SetBudget sets a soft limit on disk usage of the cache (in bytes)
 // and returns the previous value. Supplying a negative value queries
 // the current value without changing it.
 //
 // If two gopls processes have different budgets, the one with the
 // lower budget will collect garbage more actively, but both will
 // observe the effect.
 func SetBudget(new int64) (old int64) {
 	if new < 0 {
 		return atomic.LoadInt64(&budget)
 	}
 	return atomic.SwapInt64(&budget, new)
 }

 // --- implementation ----

 // filename returns the cache entry of the specified kind and key.
 //
 // A typical cache entry is a file name such as:
 //
 //	$HOME/Library/Caches / gopls / VVVVVVVV / kind / KK / KKKK...KKKK
 //
 // The portions separated by spaces are as follows:
 // - The user's preferred cache directory; the default value varies by OS.
 // - The constant "gopls".
 // - The "version", 32 bits of the digest of the gopls executable.
 // - The kind or purpose of this cache subtree (e.g. "analysis").
 // - The first 8 bits of the key, to avoid huge directories.
 // - The full 256 bits of the key.
 //
 // Once a file is written its contents are never modified, though it
 // may be atomically replaced or removed.
 //
 // New versions of gopls are free to reorganize the contents of the
 // version directory as needs evolve.  But all versions of gopls must
 // in perpetuity treat the "gopls" directory in a common fashion.
 //
 // In particular, each gopls process attempts to garbage collect
 // the entire gopls directory so that newer binaries can clean up
 // after older ones: in the development cycle especially, new
 // new versions may be created frequently.
 func filename(kind string, key [32]byte) (string, error) {
 	hex := fmt.Sprintf("%x", key)
 	dir, err := getCacheDir()
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(dir, kind, hex[:2], hex), nil
 }

 // getCacheDir returns the persistent cache directory of all processes
 // running this version of the gopls executable.
 //
 // It must incorporate the hash of the executable so that we needn't
 // worry about incompatible changes to the file format or changes to
 // the algorithm that produced the index.
 func getCacheDir() (string, error) {
 	cacheDirOnce.Do(func() {
 		// Use user's preferred cache directory.
 		userDir := os.Getenv("GOPLSCACHE")
 		if userDir == "" {
 			var err error
 			userDir, err = os.UserCacheDir()
 			if err != nil {
 				userDir = os.TempDir()
 			}
 		}
 		goplsDir := filepath.Join(userDir, "gopls")

 		// UserCacheDir may return a nonexistent directory
 		// (in which case we must create it, which may fail),
 		// or it may return a non-writable directory, in
 		// which case we should ideally respect the user's express
 		// wishes (e.g. XDG_CACHE_HOME) and not write somewhere else.
 		// Sadly UserCacheDir doesn't currently let us distinguish
 		// such intent from accidental misconfiguraton such as HOME=/
 		// in a CI builder. So, we check whether the gopls subdirectory
 		// can be created (or already exists) and not fall back to /tmp.
 		// See also https://github.com/golang/go/issues/57638.
 		if os.MkdirAll(goplsDir, 0700) != nil {
 			goplsDir = filepath.Join(os.TempDir(), "gopls")
 		}

 		// Start the garbage collector.
 		go gc(goplsDir)

 		// Compute the hash of this executable (~20ms) and create a subdirectory.
 		hash, err := hashExecutable()
 		if err != nil {
 			cacheDirErr = fmt.Errorf("can't hash gopls executable: %v", err)
 		}
 		// Use only 32 bits of the digest to avoid unwieldy filenames.
 		// It's not an adversarial situation.
 		cacheDir = filepath.Join(goplsDir, fmt.Sprintf("%x", hash[:4]))
 		if err := os.MkdirAll(cacheDir, 0700); err != nil {
 			cacheDirErr = fmt.Errorf("can't create cache: %v", err)
 		}
 	})
 	return cacheDir, cacheDirErr
 }

 var (
 	cacheDirOnce sync.Once
 	cacheDir     string
 	cacheDirErr  error
 )

 func hashExecutable() (hash [32]byte, err error) {
 	exe, err := os.Executable()
 	if err != nil {
 		return hash, err
 	}
 	f, err := os.Open(exe)
 	if err != nil {
 		return hash, err
 	}
 	defer f.Close()
 	h := sha256.New()
 	if _, err := io.Copy(h, f); err != nil {
 		return hash, fmt.Errorf("can't read executable: %w", err)
 	}
 	h.Sum(hash[:0])
 	return hash, nil
 }

 // gc runs forever, periodically deleting files from the gopls
 // directory until the space budget is no longer exceeded, and also
 // deleting files older than the maximum age, regardless of budget.
 //
 // One gopls process may delete garbage created by a different gopls
 // process, possibly running a different version of gopls, possibly
 // running concurrently.
 func gc(goplsDir string) {
 	const period = 1 * time.Minute // period between collections
 	// Sleep statDelay*batchSize between stats to smooth out I/O.
 	//
 	// The constants below were chosen using the following heuristics:
 	//  - 1GB of filecache is on the order of ~100-200k files, in which case
 	//    100μs delay per file introduces 10-20s of additional walk time, less
 	//    than the 1m gc period.
 	//  - Processing batches of stats at once is much more efficient than
 	//    sleeping after every stat (due to OS optimizations).
 	const statDelay = 100 * time.Microsecond // average delay between stats, to smooth out I/O
 	const batchSize = 1000                   // # of stats to process before sleeping
 	maxAge := 5 * 24 * time.Hour             // max time since last access before file is deleted

 	// This environment variable is set when running under a Go test builder.
 	// We use it to trigger much more aggressive cache eviction to prevent
 	// filling of the tmp volume by short-lived test processes.
 	// A single run of the gopls tests takes on the order of a minute
 	// and produces <50MB of cache data, so these are still generous.
 	if os.Getenv("GO_BUILDER_NAME") != "" {
 		maxAge = 1 * time.Hour
 		SetBudget(250 * 1e6) // 250MB
 	}

 	// The macOS filesystem is strikingly slow, at least on some machines.
 	// /usr/bin/find achieves only about 25,000 stats per second
 	// at full speed (no pause between items), meaning a large
 	// cache may take several minutes to scan.
 	// We must ensure that short-lived processes (crucially,
 	// tests) are able to make progress sweeping garbage.
 	//
 	// (gopls' caches should never actually get this big in
 	// practice: the example mentioned above resulted from a bug
 	// that caused filecache to fail to delete any files.)

 	const debug = false

 	// Names of all directories found in first pass; nil thereafter.
 	dirs := make(map[string]bool)

 	for {
 		// Enumerate all files in the cache.
 		type item struct {
 			path string
 			stat os.FileInfo
 		}
 		var files []item
 		start := time.Now()
 		var total int64 // bytes
 		_ = filepath.Walk(goplsDir, func(path string, stat os.FileInfo, err error) error {
 			if err != nil {
 				return nil // ignore errors
 			}
 			if stat.IsDir() {
 				// Collect (potentially empty) directories.
 				if dirs != nil {
 					dirs[path] = true
 				}
 			} else {
 				// Unconditionally delete files we haven't used in ages.
 				// (We do this here, not in the second loop, so that we
 				// perform age-based collection even in short-lived processes.)
 				age := time.Since(stat.ModTime())
 				if age > maxAge {
 					if debug {
 						log.Printf("age: deleting stale file %s (%dB, age %v)",
 							path, stat.Size(), age)
 					}
 					os.Remove(path) // ignore error
 				} else {
 					files = append(files, item{path, stat})
 					total += stat.Size()
 					if debug && len(files)%1000 == 0 {
 						log.Printf("filecache: checked %d files in %v", len(files), time.Since(start))
 					}
 					if len(files)%batchSize == 0 {
 						time.Sleep(batchSize * statDelay)
 					}
 				}
 			}
 			return nil
 		})

 		// Sort oldest files first.
 		sort.Slice(files, func(i, j int) bool {
 			return files[i].stat.ModTime().Before(files[j].stat.ModTime())
 		})

 		// Delete oldest files until we're under budget.
 		budget := atomic.LoadInt64(&budget)
 		for _, file := range files {
 			if total < budget {
 				break
 			}
 			if debug {
 				age := time.Since(file.stat.ModTime())
 				log.Printf("budget: deleting stale file %s (%dB, age %v)",
 					file.path, file.stat.Size(), age)
 			}
 			os.Remove(file.path) // ignore error
 			total -= file.stat.Size()
 		}

 		time.Sleep(period)

 		// Once only, delete all directories.
 		// This will succeed only for the empty ones,
 		// and ensures that stale directories (whose
 		// files have been deleted) are removed eventually.
 		// They don't take up much space but they do slow
 		// down the traversal.
 		//
 		// We do this after the sleep to minimize the
 		// race against Set, which may create a directory
 		// that is momentarily empty.
 		//
 		// (Test processes don't live that long, so
 		// this may not be reached on the CI builders.)
 		if dirs != nil {
 			dirnames := make([]string, 0, len(dirs))
 			for dir := range dirs {
 				dirnames = append(dirnames, dir)
 			}
 			dirs = nil

 			// Descending length order => children before parents.
 			sort.Slice(dirnames, func(i, j int) bool {
 				return len(dirnames[i]) > len(dirnames[j])
 			})
 			var deleted int
 			for _, dir := range dirnames {
 				if os.Remove(dir) == nil { // ignore error
 					deleted++
 				}
 			}
 			if debug {
 				log.Printf("deleted %d empty directories", deleted)
 			}
 		}
 	}
 }

 const bugKind = "bug" // reserved kind for gopls bug reports

 func init() {
 	// Register a handler to durably record this process's first
 	// assertion failure in the cache so that we can ask users to
 	// share this information via the stats command.
 	bug.Handle(func(bug bug.Bug) {
 		// Wait for cache init (bugs in tests happen early).
 		_, _ = getCacheDir()

 		value := []byte(fmt.Sprintf("%s: %+v", time.Now().Format(time.RFC3339), bug))
 		key := sha256.Sum256(value)
 		_ = Set(bugKind, key, value)
 	})
 }

 // BugReports returns a new unordered array of the contents
 // of all cached bug reports produced by this executable.
 func BugReports() [][]byte {
 	dir, err := getCacheDir()
 	if err != nil {
 		return nil // ignore initialization errors
 	}
 	var result [][]byte
 	_ = filepath.Walk(filepath.Join(dir, bugKind),
 		func(path string, info fs.FileInfo, err error) error {
 			if err != nil {
 				return nil // ignore readdir/stat errors
 			}
 			if !info.IsDir() {
 				var key [32]byte
 				n, err := hex.Decode(key[:], []byte(filepath.Base(path)))
 				if err != nil || n != len(key) {
 					return nil // ignore malformed file names
 				}
 				content, err := Get(bugKind, key)
 				if err == nil { // ignore read errors
 					result = append(result, content)
 				}
 			}
 			return nil
 		})
 	return result
 }
	// Copyright 2022 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// The filecache package provides a file-based shared durable blob cache.
	//
	// The cache is a machine-global mapping from (kind string, key
	// [32]byte) to []byte, where kind is an identifier describing the
	// namespace or purpose (e.g. "analysis"), and key is a SHA-256 digest
	// of the recipe of the value. (It need not be the digest of the value
	// itself, so you can query the cache without knowing what value the
	// recipe would produce.)
	//
	// The space budget of the cache can be controlled by [SetBudget].
	// Cache entries may be evicted at any time or in any order.
	// Note that "du -sh $GOPLSCACHE" may report a disk usage
	// figure that is rather larger (e.g. 50%) than the budget because
	// it rounds up partial disk blocks.
	//
	// The Get and Set operations are concurrency-safe.
	package filecache

	import (
	"bytes"
	"crypto/sha256"
	"encoding/binary"
	"encoding/hex"
	"errors"
	"fmt"
	"hash/crc32"
	"io"
	"io/fs"
	"log"
	"os"
	"path/filepath"
	"runtime"
	"sort"
	"sync"
	"sync/atomic"
	"time"

	"golang.org/x/tools/gopls/internal/bug"
	"golang.org/x/tools/gopls/internal/lsp/lru"
	"golang.org/x/tools/internal/lockedfile"
	)

	// Start causes the filecache to initialize and start garbage gollection.
	//
	// Start is automatically called by the first call to Get, but may be called
	// explicitly to pre-initialize the cache.
	func Start() {
	go getCacheDir()
	}

	// As an optimization, use a 100MB in-memory LRU cache in front of filecache
	// operations. This reduces I/O for operations such as diagnostics or
	// implementations that repeatedly access the same cache entries.
	var memCache = lru.New(100 * 1e6)

	type memKey struct {
	kind string
	key [32]byte
	}

	// Get retrieves from the cache and returns a newly allocated
	// copy of the value most recently supplied to Set(kind, key),
	// possibly by another process.
	// Get returns ErrNotFound if the value was not found.
	func Get(kind string, key [32]byte) ([]byte, error) {
	// First consult the read-through memory cache.
	// Note that memory cache hits do not update the times
	// used for LRU eviction of the file-based cache.
	if value := memCache.Get(memKey{kind, key}); value != nil {
	return value.([]byte), nil
	}

	iolimit <- struct{}{} // acquire a token
	defer func() { <-iolimit }() // release a token

	name, err := filename(kind, key)
	if err != nil {
	return nil, err
	}
	data, err := lockedfile.Read(name)
	if err != nil {
	if errors.Is(err, os.ErrNotExist) {
	return nil, ErrNotFound
	}
	return nil, err
	}

	// Verify that the Write was complete
	// by checking the recorded length.
	if len(data) < 8+4 {
	return nil, ErrNotFound // cache entry is incomplete
	}
	length, value, checksum := data[:8], data[8:len(data)-4], data[len(data)-4:]
	if binary.LittleEndian.Uint64(length) != uint64(len(value)) {
	return nil, ErrNotFound // cache entry is incomplete (or too long!)
	}

	// Check for corruption and print the entire file content; see
	// issue #59289. TODO(adonovan): stop printing the entire file
	// once we've seen enough reports to understand the pattern.
	if binary.LittleEndian.Uint32(checksum) != crc32.ChecksumIEEE(value) {
	// Darwin has repeatedly displayed a problem (#59895)
	// whereby the checksum portion (and only it) is zero,
	// which suggests a bug in its file system . Don't
	// panic, but keep an eye on other failures for now.
	errorf := bug.Errorf
	if binary.LittleEndian.Uint32(checksum) == 0 && runtime.GOOS == "darwin" {
	errorf = fmt.Errorf
	}

	return nil, errorf("internal error in filecache.Get(%q, %x): invalid checksum at end of %d-byte file %s:\n%q",
	kind, key, len(data), name, data)
	}

	// Update file time for use by LRU eviction.
	// (This turns every read into a write operation.
	// If this is a performance problem, we should
	// touch the files aynchronously.)
	//
	// (Traditionally the access time would be updated
	// automatically, but for efficiency most POSIX systems have
	// for many years set the noatime mount option to avoid every
	// open or read operation entailing a metadata write.)
	now := time.Now()
	if err := os.Chtimes(name, now, now); err != nil {
	return nil, fmt.Errorf("failed to update access time: %w", err)
	}

	memCache.Set(memKey{kind, key}, value, len(value))
	return value, nil
	}

	// ErrNotFound is the distinguished error
	// returned by Get when the key is not found.
	var ErrNotFound = fmt.Errorf("not found")

	// Set updates the value in the cache.
	func Set(kind string, key [32]byte, value []byte) error {
	memCache.Set(memKey{kind, key}, value, len(value))

	iolimit <- struct{}{} // acquire a token
	defer func() { <-iolimit }() // release a token

	name, err := filename(kind, key)
	if err != nil {
	return err
	}
	if err := os.MkdirAll(filepath.Dir(name), 0700); err != nil {
	return err
	}

	// In the unlikely event of a short write (e.g. ENOSPC)
	// followed by process termination (e.g. a power cut), we
	// don't want a reader to see a short file, so we record
	// the expected length first and verify it in Get.
	var length [8]byte
	binary.LittleEndian.PutUint64(length[:], uint64(len(value)))

	// Occasional file corruption (presence of zero bytes in JSON
	// files) has been reported on macOS (see issue #59289),
	// assumed due to a nonatomicity problem in the file system.
	// Ideally the macOS kernel would be fixed, or lockedfile
	// would implement a workaround (since its job is to provide
	// reliable the mutual exclusion primitive that allows
	// cooperating gopls processes to implement transactional
	// file replacement), but for now we add an extra integrity
	// check: a 32-bit checksum at the end.
	var checksum [4]byte
	binary.LittleEndian.PutUint32(checksum[:], crc32.ChecksumIEEE(value))

	// Windows doesn't support atomic rename--we tried MoveFile,
	// MoveFileEx, ReplaceFileEx, and SetFileInformationByHandle
	// of RenameFileInfo, all to no avail--so instead we use
	// advisory file locking, which is only about 2x slower even
	// on POSIX platforms with atomic rename.
	return lockedfile.Write(name, io.MultiReader(
	bytes.NewReader(length[:]),
	bytes.NewReader(value),
	bytes.NewReader(checksum[:])),
	0600)
	}

	var iolimit = make(chan struct{}, 128) // counting semaphore to limit I/O concurrency in Set.

	var budget int64 = 1e9 // 1GB

	// SetBudget sets a soft limit on disk usage of the cache (in bytes)
	// and returns the previous value. Supplying a negative value queries
	// the current value without changing it.
	//
	// If two gopls processes have different budgets, the one with the
	// lower budget will collect garbage more actively, but both will
	// observe the effect.
	func SetBudget(new int64) (old int64) {
	if new < 0 {
	return atomic.LoadInt64(&budget)
	}
	return atomic.SwapInt64(&budget, new)
	}

	// --- implementation ----

	// filename returns the cache entry of the specified kind and key.
	//
	// A typical cache entry is a file name such as:
	//
	// $HOME/Library/Caches / gopls / VVVVVVVV / kind / KK / KKKK...KKKK
	//
	// The portions separated by spaces are as follows:
	// - The user's preferred cache directory; the default value varies by OS.
	// - The constant "gopls".
	// - The "version", 32 bits of the digest of the gopls executable.
	// - The kind or purpose of this cache subtree (e.g. "analysis").
	// - The first 8 bits of the key, to avoid huge directories.
	// - The full 256 bits of the key.
	//
	// Once a file is written its contents are never modified, though it
	// may be atomically replaced or removed.
	//
	// New versions of gopls are free to reorganize the contents of the
	// version directory as needs evolve. But all versions of gopls must
	// in perpetuity treat the "gopls" directory in a common fashion.
	//
	// In particular, each gopls process attempts to garbage collect
	// the entire gopls directory so that newer binaries can clean up
	// after older ones: in the development cycle especially, new
	// new versions may be created frequently.
	func filename(kind string, key [32]byte) (string, error) {
	hex := fmt.Sprintf("%x", key)
	dir, err := getCacheDir()
	if err != nil {
	return "", err
	}
	return filepath.Join(dir, kind, hex[:2], hex), nil
	}

	// getCacheDir returns the persistent cache directory of all processes
	// running this version of the gopls executable.
	//
	// It must incorporate the hash of the executable so that we needn't
	// worry about incompatible changes to the file format or changes to
	// the algorithm that produced the index.
	func getCacheDir() (string, error) {
	cacheDirOnce.Do(func() {
	// Use user's preferred cache directory.
	userDir := os.Getenv("GOPLSCACHE")
	if userDir == "" {
	var err error
	userDir, err = os.UserCacheDir()
	if err != nil {
	userDir = os.TempDir()
	}
	}
	goplsDir := filepath.Join(userDir, "gopls")

	// UserCacheDir may return a nonexistent directory
	// (in which case we must create it, which may fail),
	// or it may return a non-writable directory, in
	// which case we should ideally respect the user's express
	// wishes (e.g. XDG_CACHE_HOME) and not write somewhere else.
	// Sadly UserCacheDir doesn't currently let us distinguish
	// such intent from accidental misconfiguraton such as HOME=/
	// in a CI builder. So, we check whether the gopls subdirectory
	// can be created (or already exists) and not fall back to /tmp.
	// See also https://github.com/golang/go/issues/57638.
	if os.MkdirAll(goplsDir, 0700) != nil {
	goplsDir = filepath.Join(os.TempDir(), "gopls")
	}

	// Start the garbage collector.
	go gc(goplsDir)

	// Compute the hash of this executable (~20ms) and create a subdirectory.
	hash, err := hashExecutable()
	if err != nil {
	cacheDirErr = fmt.Errorf("can't hash gopls executable: %v", err)
	}
	// Use only 32 bits of the digest to avoid unwieldy filenames.
	// It's not an adversarial situation.
	cacheDir = filepath.Join(goplsDir, fmt.Sprintf("%x", hash[:4]))
	if err := os.MkdirAll(cacheDir, 0700); err != nil {
	cacheDirErr = fmt.Errorf("can't create cache: %v", err)
	}
	})
	return cacheDir, cacheDirErr
	}

	var (
	cacheDirOnce sync.Once
	cacheDir string
	cacheDirErr error
	)

	func hashExecutable() (hash [32]byte, err error) {
	exe, err := os.Executable()
	if err != nil {
	return hash, err
	}
	f, err := os.Open(exe)
	if err != nil {
	return hash, err
	}
	defer f.Close()
	h := sha256.New()
	if _, err := io.Copy(h, f); err != nil {
	return hash, fmt.Errorf("can't read executable: %w", err)
	}
	h.Sum(hash[:0])
	return hash, nil
	}

	// gc runs forever, periodically deleting files from the gopls
	// directory until the space budget is no longer exceeded, and also
	// deleting files older than the maximum age, regardless of budget.
	//
	// One gopls process may delete garbage created by a different gopls
	// process, possibly running a different version of gopls, possibly
	// running concurrently.
	func gc(goplsDir string) {
	const period = 1 * time.Minute // period between collections
	// Sleep statDelay*batchSize between stats to smooth out I/O.
	//
	// The constants below were chosen using the following heuristics:
	// - 1GB of filecache is on the order of ~100-200k files, in which case
	// 100μs delay per file introduces 10-20s of additional walk time, less
	// than the 1m gc period.
	// - Processing batches of stats at once is much more efficient than
	// sleeping after every stat (due to OS optimizations).
	const statDelay = 100 * time.Microsecond // average delay between stats, to smooth out I/O
	const batchSize = 1000 // # of stats to process before sleeping
	maxAge := 5 * 24 * time.Hour // max time since last access before file is deleted

	// This environment variable is set when running under a Go test builder.
	// We use it to trigger much more aggressive cache eviction to prevent
	// filling of the tmp volume by short-lived test processes.
	// A single run of the gopls tests takes on the order of a minute
	// and produces <50MB of cache data, so these are still generous.
	if os.Getenv("GO_BUILDER_NAME") != "" {
	maxAge = 1 * time.Hour
	SetBudget(250 * 1e6) // 250MB
	}

	// The macOS filesystem is strikingly slow, at least on some machines.
	// /usr/bin/find achieves only about 25,000 stats per second
	// at full speed (no pause between items), meaning a large
	// cache may take several minutes to scan.
	// We must ensure that short-lived processes (crucially,
	// tests) are able to make progress sweeping garbage.
	//
	// (gopls' caches should never actually get this big in
	// practice: the example mentioned above resulted from a bug
	// that caused filecache to fail to delete any files.)

	const debug = false

	// Names of all directories found in first pass; nil thereafter.
	dirs := make(map[string]bool)

	for {
	// Enumerate all files in the cache.
	type item struct {
	path string
	stat os.FileInfo
	}
	var files []item
	start := time.Now()
	var total int64 // bytes
	_ = filepath.Walk(goplsDir, func(path string, stat os.FileInfo, err error) error {
	if err != nil {
	return nil // ignore errors
	}
	if stat.IsDir() {
	// Collect (potentially empty) directories.
	if dirs != nil {
	dirs[path] = true
	}
	} else {
	// Unconditionally delete files we haven't used in ages.
	// (We do this here, not in the second loop, so that we
	// perform age-based collection even in short-lived processes.)
	age := time.Since(stat.ModTime())
	if age > maxAge {
	if debug {
	log.Printf("age: deleting stale file %s (%dB, age %v)",
	path, stat.Size(), age)
	}
	os.Remove(path) // ignore error
	} else {
	files = append(files, item{path, stat})
	total += stat.Size()
	if debug && len(files)%1000 == 0 {
	log.Printf("filecache: checked %d files in %v", len(files), time.Since(start))
	}
	if len(files)%batchSize == 0 {
	time.Sleep(batchSize * statDelay)
	}
	}
	}
	return nil
	})

	// Sort oldest files first.
	sort.Slice(files, func(i, j int) bool {
	return files[i].stat.ModTime().Before(files[j].stat.ModTime())
	})

	// Delete oldest files until we're under budget.
	budget := atomic.LoadInt64(&budget)
	for _, file := range files {
	if total < budget {
	break
	}
	if debug {
	age := time.Since(file.stat.ModTime())
	log.Printf("budget: deleting stale file %s (%dB, age %v)",
	file.path, file.stat.Size(), age)
	}
	os.Remove(file.path) // ignore error
	total -= file.stat.Size()
	}

	time.Sleep(period)

	// Once only, delete all directories.
	// This will succeed only for the empty ones,
	// and ensures that stale directories (whose
	// files have been deleted) are removed eventually.
	// They don't take up much space but they do slow
	// down the traversal.
	//
	// We do this after the sleep to minimize the
	// race against Set, which may create a directory
	// that is momentarily empty.
	//
	// (Test processes don't live that long, so
	// this may not be reached on the CI builders.)
	if dirs != nil {
	dirnames := make([]string, 0, len(dirs))
	for dir := range dirs {
	dirnames = append(dirnames, dir)
	}
	dirs = nil

	// Descending length order => children before parents.
	sort.Slice(dirnames, func(i, j int) bool {
	return len(dirnames[i]) > len(dirnames[j])
	})
	var deleted int
	for _, dir := range dirnames {
	if os.Remove(dir) == nil { // ignore error
	deleted++
	}
	}
	if debug {
	log.Printf("deleted %d empty directories", deleted)
	}
	}
	}
	}

	const bugKind = "bug" // reserved kind for gopls bug reports

	func init() {
	// Register a handler to durably record this process's first
	// assertion failure in the cache so that we can ask users to
	// share this information via the stats command.
	bug.Handle(func(bug bug.Bug) {
	// Wait for cache init (bugs in tests happen early).
	_, _ = getCacheDir()

	value := []byte(fmt.Sprintf("%s: %+v", time.Now().Format(time.RFC3339), bug))
	key := sha256.Sum256(value)
	_ = Set(bugKind, key, value)
	})
	}

	// BugReports returns a new unordered array of the contents
	// of all cached bug reports produced by this executable.
	func BugReports() [][]byte {
	dir, err := getCacheDir()
	if err != nil {
	return nil // ignore initialization errors
	}
	var result [][]byte
	_ = filepath.Walk(filepath.Join(dir, bugKind),
	func(path string, info fs.FileInfo, err error) error {
	if err != nil {
	return nil // ignore readdir/stat errors
	}
	if !info.IsDir() {
	var key [32]byte
	n, err := hex.Decode(key[:], []byte(filepath.Base(path)))
	if err != nil \|\| n != len(key) {
	return nil // ignore malformed file names
	}
	content, err := Get(bugKind, key)
	if err == nil { // ignore read errors
	result = append(result, content)
	}
	}
	return nil
	})
	return result
	}