gopls/internal/cache: use AddExistingFiles in the parse cache

Use the new tokeninternal.AddExistingFiles API to simplify the parse
cache and type-checking pass. Since we can now add more parsed files
even after starting importing, we no longer need the multi-phased
type-checking pass, and can evaluate promises in any order.

This broke things on 32-bit systems, so add a temporary fall-back on
those systems that doesn't cache parsed files.

For golang/go#57987

Change-Id: I465763acd8baf275675890de64975bdbc045f375
Reviewed-on: https://go-review.googlesource.com/c/tools/+/473165
Run-TryBot: Robert Findley <rfindley@google.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/gopls/internal/lsp/cache/check.go b/gopls/internal/lsp/cache/check.go
index be27518..529940a 100644
--- a/gopls/internal/lsp/cache/check.go
+++ b/gopls/internal/lsp/cache/check.go
@@ -45,10 +45,10 @@
 type typeCheckBatch struct {
 	meta *metadataGraph
 
-	cpulimit    chan struct{}                     // concurrency limiter for CPU-bound operations
-	needSyntax  map[PackageID]bool                // packages that need type-checked syntax
-	parsedFiles map[span.URI]*source.ParsedGoFile // parsed files necessary for type-checking
-	fset        *token.FileSet                    // FileSet describing all parsed files
+	fset       *token.FileSet     // FileSet describing all parsed files
+	parseCache *parseCache        // shared parsing cache
+	cpulimit   chan struct{}      // concurrency limiter for CPU-bound operations
+	needSyntax map[PackageID]bool // packages that need type-checked syntax
 
 	// Promises holds promises to either read export data for the package, or
 	// parse and type-check its syntax.
@@ -57,11 +57,15 @@
 	// awaited, they must write an entry into the imports map.
 	promises map[PackageID]*memoize.Promise
 
-	mu         sync.Mutex
-	needFiles  map[span.URI]source.FileHandle // de-duplicated file handles required for type-checking
-	imports    map[PackageID]pkgOrErr         // types.Packages to use for importing
-	exportData map[PackageID][]byte
-	packages   map[PackageID]*Package
+	mu sync.Mutex
+	// TODO(rfindley): parsedFiles, which holds every file needed for
+	// type-checking, may not be necessary given the parseCache.
+	//
+	// In fact, parsedFiles may be counter-productive due to pinning all files in
+	// memory during large operations.
+	parsedFiles map[span.URI]*source.ParsedGoFile // parsed files necessary for type-checking
+	imports     map[PackageID]pkgOrErr            // types.Packages to use for importing
+	packages    map[PackageID]*Package
 }
 
 type pkgOrErr struct {
@@ -79,18 +83,17 @@
 // indicates context cancellation or otherwise significant failure to perform
 // the type-checking operation.
 func (s *snapshot) TypeCheck(ctx context.Context, ids ...PackageID) ([]source.Package, error) {
-	// Build up shared state for efficient type-checking.
+	// Shared state for efficient type-checking.
 	b := &typeCheckBatch{
-		cpulimit:    make(chan struct{}, runtime.GOMAXPROCS(0)),
-		needSyntax:  make(map[PackageID]bool),
-		parsedFiles: make(map[span.URI]*source.ParsedGoFile),
-		// fset is built during the parsing pass.
+		fset:       fileSetWithBase(reservedForParsing),
+		parseCache: s.parseCache,
+		cpulimit:   make(chan struct{}, runtime.GOMAXPROCS(0)),
+		needSyntax: make(map[PackageID]bool),
 
-		needFiles:  make(map[span.URI]source.FileHandle),
-		promises:   make(map[PackageID]*memoize.Promise),
-		imports:    make(map[PackageID]pkgOrErr),
-		exportData: make(map[PackageID][]byte),
-		packages:   make(map[PackageID]*Package),
+		parsedFiles: make(map[span.URI]*source.ParsedGoFile),
+		promises:    make(map[PackageID]*memoize.Promise),
+		imports:     make(map[PackageID]pkgOrErr),
+		packages:    make(map[PackageID]*Package),
 	}
 
 	// Check for existing active packages.
@@ -120,12 +123,8 @@
 	b.meta = s.meta
 	s.mu.Unlock()
 
-	//  -- Step 1: assemble the promises graph --
-	var (
-		needExportData = make(map[PackageID]packageHandleKey)
-		packageHandles = make(map[PackageID]*packageHandle)
-	)
-
+	//  -- assemble the promises graph --
+	//
 	// collectPromises collects promises to load packages from export data or
 	// type-check.
 	var collectPromises func(PackageID) error
@@ -152,21 +151,11 @@
 		if err != nil {
 			return err
 		}
-		packageHandles[id] = ph
-
-		if b.needSyntax[id] {
-			// We will need to parse and type-check this package.
-			//
-			// We may also need to parse and type-check if export data is missing,
-			// but that is handled after fetching export data below.
-			b.addNeededFiles(ph)
-		} else if id != "unsafe" { // we can't load export data for unsafe
-			needExportData[id] = ph.key
-		}
 
 		debugName := fmt.Sprintf("check(%s)", id)
 		b.promises[id] = memoize.NewPromise(debugName, func(ctx context.Context, _ interface{}) interface{} {
 			pkg, err := b.processPackage(ctx, ph)
+
 			b.mu.Lock()
 			b.imports[m.ID] = pkgOrErr{pkg, err}
 			b.mu.Unlock()
@@ -178,57 +167,7 @@
 		collectPromises(id)
 	}
 
-	// -- Step 2: collect export data --
-	//
-	// This must be done before parsing in order to determine which files must be
-	// parsed.
-	{
-		var g errgroup.Group
-		for id, key := range needExportData {
-			id := id
-			key := key
-			g.Go(func() error {
-				data, err := filecache.Get(exportDataKind, key)
-				if err != nil {
-					if err == filecache.ErrNotFound {
-						ph := packageHandles[id]
-						b.addNeededFiles(ph) // we will need to parse and type check
-						return nil           // ok: we will type check later
-					}
-					return err
-				}
-				b.mu.Lock()
-				b.exportData[id] = data
-				b.mu.Unlock()
-				return nil
-			})
-		}
-		if err := g.Wait(); err != nil {
-			return pkgs, err
-		}
-	}
-
-	// -- Step 3: parse files required for type checking. --
-	//
-	// Parse all necessary files in parallel. Unfortunately we can't start
-	// parsing each package's file as soon as we discover that it is a syntax
-	// package, because the parseCache cannot add files to an existing FileSet.
-	{
-		var fhs []source.FileHandle
-		for _, fh := range b.needFiles {
-			fhs = append(fhs, fh)
-		}
-		pgfs, fset, err := s.parseCache.parseFiles(ctx, source.ParseFull, fhs...)
-		if err != nil {
-			return pkgs, err
-		}
-		for _, pgf := range pgfs {
-			b.parsedFiles[pgf.URI] = pgf
-		}
-		b.fset = fset
-	}
-
-	// -- Step 4: await results --
+	// -- await type-checking. --
 	//
 	// Start a single goroutine for each promise.
 	{
@@ -277,20 +216,41 @@
 	return pkgs, firstErr
 }
 
-// addNeededFiles records the files necessary for type-checking ph, for later
-// parsing.
-func (b *typeCheckBatch) addNeededFiles(ph *packageHandle) {
+// parseFiles gets pre-existing parsed files for fhs from b.parsedFiles, or
+// parses as needed.
+func (b *typeCheckBatch) parseFiles(ctx context.Context, fhs []source.FileHandle) ([]*source.ParsedGoFile, error) {
+	var needFiles []source.FileHandle // files that need to be parsed
+	var needIndex []int               // indexes in fhs of the entries in needFiles
+
+	pgfs := make([]*source.ParsedGoFile, len(fhs))
+	b.mu.Lock()
+	for i, fh := range fhs {
+		if pgf, ok := b.parsedFiles[fh.URI()]; ok {
+			pgfs[i] = pgf
+		} else {
+			needFiles = append(needFiles, fh)
+			needIndex = append(needIndex, i)
+		}
+	}
+	b.mu.Unlock()
+
+	parsed, err := b.parseCache.parseFiles(ctx, b.fset, source.ParseFull, needFiles...)
+	if err != nil {
+		return nil, err
+	}
+
 	b.mu.Lock()
 	defer b.mu.Unlock()
-
-	// Technically for export-only packages we only need compiledGoFiles, but
-	// these slices are usually redundant.
-	for _, fh := range ph.inputs.goFiles {
-		b.needFiles[fh.URI()] = fh
+	for i, pgf := range parsed {
+		idx := needIndex[i]
+		if existing, ok := b.parsedFiles[pgf.URI]; ok { // lost a race
+			pgfs[idx] = existing
+		} else {
+			b.parsedFiles[pgf.URI] = pgf
+			pgfs[idx] = pgf
+		}
 	}
-	for _, fh := range ph.inputs.compiledGoFiles {
-		b.needFiles[fh.URI()] = fh
-	}
+	return pgfs, nil
 }
 
 // processPackage processes the package handle for the type checking batch,
@@ -315,31 +275,32 @@
 		}()
 	}
 
-	b.mu.Lock()
-	data, ok := b.exportData[ph.m.ID]
-	b.mu.Unlock()
+	if b.needSyntax[ph.m.ID] {
+		// We need a syntax package.
+		syntaxPkg, err := b.checkPackage(ctx, ph)
+		if err != nil {
+			return nil, err
+		}
 
-	if ok {
-		// We need export data, and have it.
-		return b.importPackage(ctx, ph.m, data)
+		b.mu.Lock()
+		b.packages[ph.m.ID] = syntaxPkg
+		b.mu.Unlock()
+		return syntaxPkg.pkg.types, nil
 	}
 
-	if !b.needSyntax[ph.m.ID] {
-		// We need only a types.Package, but don't have export data.
-		// Type-check as fast as possible (skipping function bodies).
+	if ph.m.ID == "unsafe" {
+		return types.Unsafe, nil
+	}
+
+	data, err := filecache.Get(exportDataKind, ph.key)
+	if err == filecache.ErrNotFound {
+		// No cached export data: type-check as fast as possible.
 		return b.checkPackageForImport(ctx, ph)
 	}
-
-	// We need a syntax package.
-	syntaxPkg, err := b.checkPackage(ctx, ph)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("failed to read cache data for %s: %v", ph.m.ID, err)
 	}
-
-	b.mu.Lock()
-	b.packages[ph.m.ID] = syntaxPkg
-	b.mu.Unlock()
-	return syntaxPkg.pkg.types, nil
+	return b.importPackage(ctx, ph.m, data)
 }
 
 // importPackage loads the given package from its export data in p.exportData
@@ -364,26 +325,23 @@
 // checkPackageForImport type checks, but skips function bodies and does not
 // record syntax information.
 func (b *typeCheckBatch) checkPackageForImport(ctx context.Context, ph *packageHandle) (*types.Package, error) {
-	if ph.m.ID == "unsafe" {
-		return types.Unsafe, nil
-	}
 	impMap, errMap := b.importMap(ph.inputs.id)
 	onError := func(e error) {
 		// Ignore errors for exporting.
 	}
 	cfg := b.typesConfig(ph.inputs, onError, impMap, errMap)
-	var files []*ast.File
-	for _, fh := range ph.inputs.compiledGoFiles {
-		pgf := b.parsedFiles[fh.URI()]
-		if pgf == nil {
-			return nil, fmt.Errorf("compiled go file %q failed to parse", fh.URI().Filename())
-		}
-		files = append(files, pgf.File)
+	pgfs, err := b.parseFiles(ctx, ph.inputs.compiledGoFiles)
+	if err != nil {
+		return nil, err
 	}
 	cfg.IgnoreFuncBodies = true
 	pkg := types.NewPackage(string(ph.inputs.pkgPath), string(ph.inputs.name))
 	check := types.NewChecker(cfg, b.fset, pkg, nil)
 
+	files := make([]*ast.File, len(pgfs))
+	for i, pgf := range pgfs {
+		files[i] = pgf.File
+	}
 	_ = check.Files(files) // ignore errors
 
 	// If the context was cancelled, we may have returned a ton of transient
@@ -923,24 +881,19 @@
 
 	// Collect parsed files from the type check pass, capturing parse errors from
 	// compiled files.
-	for _, fh := range inputs.goFiles {
-		pgf := b.parsedFiles[fh.URI()]
-		if pgf == nil {
-			// If go/packages told us that a file is in a package, it should be
-			// parseable (after all, it was parsed by go list).
-			return nil, bug.Errorf("go file %q failed to parse", fh.URI().Filename())
-		}
-		pkg.goFiles = append(pkg.goFiles, pgf)
+	var err error
+	pkg.goFiles, err = b.parseFiles(ctx, inputs.goFiles)
+	if err != nil {
+		return nil, err
 	}
-	for _, fh := range inputs.compiledGoFiles {
-		pgf := b.parsedFiles[fh.URI()]
-		if pgf == nil {
-			return nil, fmt.Errorf("compiled go file %q failed to parse", fh.URI().Filename())
-		}
+	pkg.compiledGoFiles, err = b.parseFiles(ctx, inputs.compiledGoFiles)
+	if err != nil {
+		return nil, err
+	}
+	for _, pgf := range pkg.compiledGoFiles {
 		if pgf.ParseErr != nil {
 			pkg.parseErrors = append(pkg.parseErrors, pgf.ParseErr)
 		}
-		pkg.compiledGoFiles = append(pkg.compiledGoFiles, pgf)
 	}
 
 	// Use the default type information for the unsafe package.
diff --git a/gopls/internal/lsp/cache/mod_tidy.go b/gopls/internal/lsp/cache/mod_tidy.go
index 8818a28..00e0e5c 100644
--- a/gopls/internal/lsp/cache/mod_tidy.go
+++ b/gopls/internal/lsp/cache/mod_tidy.go
@@ -8,6 +8,7 @@
 	"context"
 	"fmt"
 	"go/ast"
+	"go/token"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -460,7 +461,7 @@
 //
 // TODO(rfindley): this should key off source.ImportPath.
 func parseImports(ctx context.Context, s *snapshot, files []source.FileHandle) (map[string]bool, error) {
-	pgfs, _, err := s.parseCache.parseFiles(ctx, source.ParseHeader, files...)
+	pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseHeader, files...)
 	if err != nil { // e.g. context cancellation
 		return nil, err
 	}
diff --git a/gopls/internal/lsp/cache/parse.go b/gopls/internal/lsp/cache/parse.go
index 55b1510..59420be 100644
--- a/gopls/internal/lsp/cache/parse.go
+++ b/gopls/internal/lsp/cache/parse.go
@@ -27,7 +27,7 @@
 // ParseGo parses the file whose contents are provided by fh, using a cache.
 // The resulting tree may have beeen fixed up.
 func (s *snapshot) ParseGo(ctx context.Context, fh source.FileHandle, mode parser.Mode) (*source.ParsedGoFile, error) {
-	pgfs, _, err := s.parseCache.parseFiles(ctx, mode, fh)
+	pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gopls/internal/lsp/cache/parse_cache.go b/gopls/internal/lsp/cache/parse_cache.go
index 8bd64a5..3511b75 100644
--- a/gopls/internal/lsp/cache/parse_cache.go
+++ b/gopls/internal/lsp/cache/parse_cache.go
@@ -9,15 +9,50 @@
 	"context"
 	"go/parser"
 	"go/token"
+	"math/bits"
 	"runtime"
-	"sort"
 	"sync"
 
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/tools/gopls/internal/lsp/source"
 	"golang.org/x/tools/internal/memoize"
+	"golang.org/x/tools/internal/tokeninternal"
 )
 
+// reservedForParsing defines the room in the token.Pos space reserved for
+// cached parsed files.
+//
+// Files parsed through the parseCache are guaranteed not to have overlapping
+// spans: the parseCache tracks a monotonic base for newly parsed files.
+//
+// By offsetting the initial base of a FileSet, we can allow other operations
+// accepting the FileSet (such as the gcimporter) to add new files using the
+// normal FileSet APIs without overlapping with cached parsed files.
+//
+// Note that 1<<60 represents an exabyte of parsed data, more than any gopls
+// process can ever parse.
+//
+// On 32-bit systems we don't cache parse results (see parseFiles).
+const reservedForParsing = 1 << (bits.UintSize - 4)
+
+// fileSetWithBase returns a new token.FileSet with Base() equal to the
+// requested base, or 1 if base < 1.
+func fileSetWithBase(base int) *token.FileSet {
+	fset := token.NewFileSet()
+	if base > 1 {
+		// Add a dummy file to set the base of fset. We won't ever use the
+		// resulting FileSet, so it doesn't matter how we achieve this.
+		//
+		// FileSets leave a 1-byte padding between files, so we set the base by
+		// adding a zero-length file at base-1.
+		fset.AddFile("", base-1, 0)
+	}
+	if base >= 1 && fset.Base() != base {
+		panic("unexpected FileSet.Base")
+	}
+	return fset
+}
+
 // This file contains an implementation of a bounded-size parse cache, that
 // offsets the base token.Pos value of each cached file so that they may be
 // later described by a single dedicated FileSet.
@@ -34,9 +69,8 @@
 //     cache hits for low-latency operations.
 const parseCacheMaxFiles = 200
 
-// parsePadding is additional padding allocated between entries in the parse
-// cache to allow for increases in length (such as appending missing braces)
-// caused by fixAST.
+// parsePadding is additional padding allocated to allow for increases in
+// length (such as appending missing braces) caused by fixAST.
 //
 // This is used to mitigate a chicken and egg problem: we must know the base
 // offset of the file we're about to parse, before we start parsing, and yet
@@ -57,11 +91,11 @@
 // caching) multiple files. This is necessary for type-checking, where files
 // must be parsed in a common fileset.
 type parseCache struct {
-	mu         sync.Mutex
-	m          map[parseKey]*parseCacheEntry
-	lru        queue     // min-atime priority queue of *parseCacheEntry
-	clock      uint64    // clock time, incremented when the cache is updated
-	nextOffset token.Pos // token.Pos offset for the next parsed file
+	mu       sync.Mutex
+	m        map[parseKey]*parseCacheEntry
+	lru      queue  // min-atime priority queue of *parseCacheEntry
+	clock    uint64 // clock time, incremented when the cache is updated
+	nextBase int    // base offset for the next parsed file
 }
 
 // parseKey uniquely identifies a parsed Go file.
@@ -77,10 +111,8 @@
 	lruIndex int
 }
 
-// startParse prepares a parsing pass, using the following steps:
-//   - search for cache hits
-//   - create new promises for cache misses
-//   - store as many new promises in the cache as space will allow
+// startParse prepares a parsing pass, creating new promises in the cache for
+// any cache misses.
 //
 // The resulting slice has an entry for every given file handle, though some
 // entries may be nil if there was an error reading the file (in which case the
@@ -124,16 +156,32 @@
 			continue
 		}
 
-		// ...otherwise, create a new promise to parse with a non-overlapping offset
-		fset := token.NewFileSet()
-		if c.nextOffset > 0 {
-			// Add a dummy file so that this parsed file does not overlap with others.
-			fset.AddFile("", 1, int(c.nextOffset))
-		}
-		c.nextOffset += token.Pos(len(content) + parsePadding + 1) // leave room for src fixes
-		fh := fh
+		// ...otherwise, create a new promise to parse with a non-overlapping
+		// offset. We allocate 2*len(content)+parsePadding to allow for re-parsing
+		// once inside of parseGoSrc without exceeding the allocated space.
+		base, nextBase, fset := c.allocateSpaceLocked(2*len(content) + parsePadding)
+		uri := fh.URI()
 		promise := memoize.NewPromise(string(fh.URI()), func(ctx context.Context, _ interface{}) interface{} {
-			return parseGoSrc(ctx, fset, fh.URI(), content, mode)
+			pgf := parseGoSrc(ctx, fset, uri, content, mode)
+			file := pgf.Tok
+			if file.Base()+file.Size()+1 > nextBase {
+				// The parsed file exceeds its allocated space, likely due to multiple
+				// passes of src fixing. In this case, we have no choice but to re-do
+				// the operation with the correct size.
+				//
+				// Even though the final successful parse requires only file.Size()
+				// bytes of Pos space, we need to accommodate all the missteps to get
+				// there, as parseGoSrc will repeat them.
+				actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing
+				c.mu.Lock()
+				_, next, fset := c.allocateSpaceLocked(actual)
+				c.mu.Unlock()
+				pgf = parseGoSrc(ctx, fset, uri, content, mode)
+				if pgf.Tok.Base()+pgf.Tok.Size() != next-1 {
+					panic("internal error: non-deterministic parsing result")
+				}
+			}
+			return pgf
 		})
 		promises[i] = promise
 
@@ -163,24 +211,60 @@
 	return promises, firstReadError
 }
 
-// parseFiles returns a ParsedGoFile for the given file handles in the
+// allocateSpaceLocked reserves the next n bytes of token.Pos space in the
+// cache.
+//
+// It returns the resulting file base, next base, and an offset FileSet to use
+// for parsing.
+func (c *parseCache) allocateSpaceLocked(size int) (int, int, *token.FileSet) {
+	base := c.nextBase
+	c.nextBase += size + 1
+	return base, c.nextBase, fileSetWithBase(base)
+}
+
+// The parse cache is not supported on 32-bit systems, where reservedForParsing
+// is too small to be viable.
+func parseCacheSupported() bool {
+	return bits.UintSize != 32
+}
+
+// parseFiles returns a ParsedGoFile for each file handle in fhs, in the
 // requested parse mode.
 //
-// If parseFiles returns an error, it still returns a slice,
-// but with a nil entry for each file that could not be parsed.
-//
-// The second result is a FileSet describing all resulting parsed files.
-//
 // For parsed files that already exists in the cache, access time will be
 // updated. For others, parseFiles will parse and store as many results in the
 // cache as space allows.
-func (c *parseCache) parseFiles(ctx context.Context, mode parser.Mode, fhs ...source.FileHandle) ([]*source.ParsedGoFile, *token.FileSet, error) {
-	promises, firstReadError := c.startParse(mode, fhs...)
+//
+// The token.File for each resulting parsed file will be added to the provided
+// FileSet, using the tokeninternal.AddExistingFiles API. Consequently, the
+// given fset should only be used in other APIs if its base is >=
+// reservedForParsing.
+//
+// If parseFiles returns an error, it still returns a slice,
+// but with a nil entry for each file that could not be parsed.
+func (c *parseCache) parseFiles(ctx context.Context, fset *token.FileSet, mode parser.Mode, fhs ...source.FileHandle) ([]*source.ParsedGoFile, error) {
+	pgfs := make([]*source.ParsedGoFile, len(fhs))
+
+	// Temporary fall-back for 32-bit systems, where reservedForParsing is too
+	// small to be viable. We don't actually support 32-bit systems, so this
+	// workaround is only for tests and can be removed when we stop running
+	// 32-bit TryBots for gopls.
+	if bits.UintSize == 32 {
+		for i, fh := range fhs {
+			var err error
+			pgfs[i], err = parseGoImpl(ctx, fset, fh, mode)
+			if err != nil {
+				return pgfs, err
+			}
+		}
+		return pgfs, nil
+	}
+
+	promises, firstErr := c.startParse(mode, fhs...)
 
 	// Await all parsing.
 	var g errgroup.Group
 	g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
-	pgfs := make([]*source.ParsedGoFile, len(fhs))
 	for i, promise := range promises {
 		if promise == nil {
 			continue
@@ -196,77 +280,22 @@
 			return nil
 		})
 	}
-	if err := g.Wait(); err != nil {
-		return nil, nil, err
+
+	if err := g.Wait(); err != nil && firstErr == nil {
+		firstErr = err
 	}
 
-	// Construct a token.FileSet mapping all parsed files, and update their
-	// Tok to the corresponding file in the new fileset.
-	//
-	// In the unlikely event that a parsed file no longer fits in its allocated
-	// space in the FileSet range, it will need to be re-parsed.
-
+	// Augment the FileSet to map all parsed files.
 	var tokenFiles []*token.File
-	fileIndex := make(map[*token.File]int) // to look up original indexes after sorting
-	for i, pgf := range pgfs {
+	for _, pgf := range pgfs {
 		if pgf == nil {
 			continue
 		}
-		fileIndex[pgf.Tok] = i
 		tokenFiles = append(tokenFiles, pgf.Tok)
 	}
+	tokeninternal.AddExistingFiles(fset, tokenFiles)
 
-	sort.Slice(tokenFiles, func(i, j int) bool {
-		return tokenFiles[i].Base() < tokenFiles[j].Base()
-	})
-
-	var needReparse []int // files requiring reparsing
-	out := tokenFiles[:0]
-	for i, f := range tokenFiles {
-		if i < len(tokenFiles)-1 && f.Base()+f.Size() >= tokenFiles[i+1].Base() {
-			if f != tokenFiles[i+1] { // no need to re-parse duplicates
-				needReparse = append(needReparse, fileIndex[f])
-			}
-		} else {
-			out = append(out, f)
-		}
-	}
-	fset := source.FileSetFor(out...)
-
-	// Re-parse any remaining files using the stitched fileSet.
-	for _, i := range needReparse {
-		// Start from scratch, rather than using ParsedGoFile.Src, so that source
-		// fixing operates exactly the same (note that fixing stops after a limited
-		// number of tries).
-		fh := fhs[i]
-		content, err := fh.Content()
-		if err != nil {
-			if firstReadError == nil {
-				firstReadError = err
-			}
-			continue
-		}
-		pgfs[i] = parseGoSrc(ctx, fset, fh.URI(), content, mode)
-	}
-
-	// Ensure each PGF refers to a token.File from the new FileSet.
-	for i, pgf := range pgfs {
-		if pgf == nil {
-			continue
-		}
-		newTok := fset.File(token.Pos(pgf.Tok.Base()))
-		if newTok == nil {
-			panic("internal error: missing tok for " + pgf.URI)
-		}
-		if newTok.Base() != pgf.Tok.Base() || newTok.Size() != pgf.Tok.Size() {
-			panic("internal error: mismatching token.File in synthetic FileSet")
-		}
-		pgf2 := *pgf
-		pgf2.Tok = newTok
-		pgfs[i] = &pgf2
-	}
-
-	return pgfs, fset, firstReadError
+	return pgfs, firstErr
 }
 
 // -- priority queue boilerplate --
diff --git a/gopls/internal/lsp/cache/parse_cache_test.go b/gopls/internal/lsp/cache/parse_cache_test.go
index cc28a6b..817c466 100644
--- a/gopls/internal/lsp/cache/parse_cache_test.go
+++ b/gopls/internal/lsp/cache/parse_cache_test.go
@@ -8,43 +8,50 @@
 	"context"
 	"fmt"
 	"go/token"
+	"math/bits"
 	"testing"
 
 	"golang.org/x/tools/gopls/internal/lsp/source"
 	"golang.org/x/tools/gopls/internal/span"
 )
 
+func skipIfNoParseCache(t *testing.T) {
+	if bits.UintSize == 32 {
+		t.Skip("the parse cache is not supported on 32-bit systems")
+	}
+}
+
 func TestParseCache(t *testing.T) {
+	skipIfNoParseCache(t)
+
 	ctx := context.Background()
 	uri := span.URI("file:///myfile")
 	fh := makeFakeFileHandle(uri, []byte("package p\n\nconst _ = \"foo\""))
+	fset := token.NewFileSet()
 
 	var cache parseCache
-	pgfs1, _, err := cache.parseFiles(ctx, source.ParseFull, fh)
+	pgfs1, err := cache.parseFiles(ctx, fset, source.ParseFull, fh)
 	if err != nil {
 		t.Fatal(err)
 	}
 	pgf1 := pgfs1[0]
-	pgfs2, _, err := cache.parseFiles(ctx, source.ParseFull, fh)
+	pgfs2, err := cache.parseFiles(ctx, fset, source.ParseFull, fh)
 	pgf2 := pgfs2[0]
 	if err != nil {
 		t.Fatal(err)
 	}
-	if pgf1.File != pgf2.File {
+	if pgf1 != pgf2 {
 		t.Errorf("parseFiles(%q): unexpected cache miss on repeated call", uri)
 	}
 
 	// Fill up the cache with other files, but don't evict the file above.
 	files := []source.FileHandle{fh}
 	files = append(files, dummyFileHandles(parseCacheMaxFiles-1)...)
-	pgfs3, fset, err := cache.parseFiles(ctx, source.ParseFull, files...)
+	pgfs3, err := cache.parseFiles(ctx, fset, source.ParseFull, files...)
 	pgf3 := pgfs3[0]
-	if pgf3.File != pgf1.File {
+	if pgf3 != pgf1 {
 		t.Errorf("parseFiles(%q, ...): unexpected cache miss", uri)
 	}
-	if pgf3.Tok == pgf1.Tok {
-		t.Errorf("parseFiles(%q, ...): unexpectedly matching token file", uri)
-	}
 	if pgf3.Tok.Base() != pgf1.Tok.Base() || pgf3.Tok.Size() != pgf1.Tok.Size() {
 		t.Errorf("parseFiles(%q, ...): result.Tok has base: %d, size: %d, want (%d, %d)", uri, pgf3.Tok.Base(), pgf3.Tok.Size(), pgf1.Tok.Base(), pgf1.Tok.Size())
 	}
@@ -54,20 +61,22 @@
 
 	// Now overwrite the cache, after which we should get new results.
 	files = dummyFileHandles(parseCacheMaxFiles)
-	_, _, err = cache.parseFiles(ctx, source.ParseFull, files...)
+	_, err = cache.parseFiles(ctx, fset, source.ParseFull, files...)
 	if err != nil {
 		t.Fatal(err)
 	}
-	pgfs4, _, err := cache.parseFiles(ctx, source.ParseFull, fh)
+	pgfs4, err := cache.parseFiles(ctx, fset, source.ParseFull, fh)
 	if err != nil {
 		t.Fatal(err)
 	}
-	if pgfs4[0].File == pgf1.File {
+	if pgfs4[0] == pgf1 {
 		t.Errorf("parseFiles(%q): unexpected cache hit after overwriting cache", uri)
 	}
 }
 
 func TestParseCache_Reparsing(t *testing.T) {
+	skipIfNoParseCache(t)
+
 	defer func(padding int) {
 		parsePadding = padding
 	}(parsePadding)
@@ -80,23 +89,25 @@
 
 	// Parsing should succeed even though we overflow the padding.
 	var cache parseCache
-	_, _, err := cache.parseFiles(context.Background(), source.ParseFull, files...)
+	_, err := cache.parseFiles(context.Background(), token.NewFileSet(), source.ParseFull, files...)
 	if err != nil {
 		t.Fatal(err)
 	}
 }
 
 func TestParseCache_Duplicates(t *testing.T) {
+	skipIfNoParseCache(t)
+
 	ctx := context.Background()
 	uri := span.URI("file:///myfile")
 	fh := makeFakeFileHandle(uri, []byte("package p\n\nconst _ = \"foo\""))
 
 	var cache parseCache
-	pgfs, _, err := cache.parseFiles(ctx, source.ParseFull, fh, fh)
+	pgfs, err := cache.parseFiles(ctx, token.NewFileSet(), source.ParseFull, fh, fh)
 	if err != nil {
 		t.Fatal(err)
 	}
-	if pgfs[0].File != pgfs[1].File {
+	if pgfs[0] != pgfs[1] {
 		t.Errorf("parseFiles(fh, fh): = [%p, %p], want duplicate files", pgfs[0].File, pgfs[1].File)
 	}
 }
diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go
index e0b9a23..406a428 100644
--- a/gopls/internal/lsp/cache/snapshot.go
+++ b/gopls/internal/lsp/cache/snapshot.go
@@ -2059,9 +2059,10 @@
 		return false, false, false
 	}
 
+	fset := token.NewFileSet()
 	// Parse headers to compare package names and imports.
-	oldHeads, _, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, source.ParseHeader, oldFH)
-	newHeads, _, newErr := lockedSnapshot.parseCache.parseFiles(ctx, source.ParseHeader, newFH)
+	oldHeads, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseHeader, oldFH)
+	newHeads, newErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseHeader, newFH)
 
 	if oldErr != nil || newErr != nil {
 		// TODO(rfindley): we can get here if newFH does not exist. There is
@@ -2112,8 +2113,8 @@
 	// Note: if this affects performance we can probably avoid parsing in the
 	// common case by first scanning the source for potential comments.
 	if !invalidate {
-		origFulls, _, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, source.ParseFull, oldFH)
-		newFulls, _, newErr := lockedSnapshot.parseCache.parseFiles(ctx, source.ParseFull, newFH)
+		origFulls, oldErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseFull, oldFH)
+		newFulls, newErr := lockedSnapshot.parseCache.parseFiles(ctx, fset, source.ParseFull, newFH)
 		if oldErr == nil && newErr == nil {
 			invalidate = magicCommentsChanged(origFulls[0].File, newFulls[0].File)
 		} else {
diff --git a/gopls/internal/lsp/cache/symbols.go b/gopls/internal/lsp/cache/symbols.go
index 1611334..a254a0c 100644
--- a/gopls/internal/lsp/cache/symbols.go
+++ b/gopls/internal/lsp/cache/symbols.go
@@ -62,7 +62,7 @@
 // It may use a parsed file already present in the cache but
 // otherwise does not populate the cache.
 func symbolizeImpl(ctx context.Context, snapshot *snapshot, fh source.FileHandle) ([]source.Symbol, error) {
-	pgfs, _, err := snapshot.parseCache.parseFiles(ctx, source.ParseFull, fh)
+	pgfs, err := snapshot.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseFull, fh)
 	if err != nil {
 		return nil, err
 	}