gopls/internal/lsp/cache: simplify canonical URI cache
The View's URI canonicalization cache entry, fileBase,
used to hold a list of URIs even though only the first
is ever needed. This change simplifies it to a pair
of (URI, filename). This should reduce allocation slightly.
Also:
- Eliminate the unnecessary pointer type to further reduce allocation.
- Use a dedicated mutex for the two maps.
- Update the names to better reflect the purpose.
- Document various failed approaches to optimization
and two problems in the existing logic, one
of correctness, the other of performance.
Change-Id: Ic74ae4bae8864de292fe97d26c5f9aacf2367961
Reviewed-on: https://go-review.googlesource.com/c/tools/+/454435
Run-TryBot: Alan Donovan <adonovan@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/gopls/internal/lsp/cache/session.go b/gopls/internal/lsp/cache/session.go
index 51c8bb7..cb2570e 100644
--- a/gopls/internal/lsp/cache/session.go
+++ b/gopls/internal/lsp/cache/session.go
@@ -235,8 +235,8 @@
folder: folder,
moduleUpgrades: map[span.URI]map[string]string{},
vulns: map[span.URI]*govulncheck.Result{},
- filesByURI: map[span.URI]*fileBase{},
- filesByBase: map[string][]*fileBase{},
+ filesByURI: make(map[span.URI]span.URI),
+ filesByBase: make(map[string][]canonicalURI),
rootURI: root,
explicitGowork: goworkURI,
workspaceInformation: *wsInfo,
@@ -510,8 +510,8 @@
// Apply the changes to all affected views.
for _, view := range changedViews {
- // Make sure that the file is added to the view.
- _ = view.getFile(c.URI)
+ // Make sure that the file is added to the view's knownFiles set.
+ view.canonicalURI(c.URI, true) // ignore result
if _, ok := views[view]; !ok {
views[view] = make(map[span.URI]*fileChange)
}
diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go
index 80de11a..1ef4622 100644
--- a/gopls/internal/lsp/cache/snapshot.go
+++ b/gopls/internal/lsp/cache/snapshot.go
@@ -1287,12 +1287,12 @@
}
func (s *snapshot) FindFile(uri span.URI) source.VersionedFileHandle {
- f := s.view.getFile(uri)
+ uri, _ = s.view.canonicalURI(uri, true)
s.mu.Lock()
defer s.mu.Unlock()
- result, _ := s.files.Get(f.URI())
+ result, _ := s.files.Get(uri)
return result
}
@@ -1302,11 +1302,22 @@
// GetVersionedFile succeeds even if the file does not exist. A non-nil error return
// indicates some type of internal error, for example if ctx is cancelled.
func (s *snapshot) GetVersionedFile(ctx context.Context, uri span.URI) (source.VersionedFileHandle, error) {
- f := s.view.getFile(uri)
+ uri, _ = s.view.canonicalURI(uri, true)
s.mu.Lock()
defer s.mu.Unlock()
- return s.getFileLocked(ctx, f)
+
+ if fh, ok := s.files.Get(uri); ok {
+ return fh, nil
+ }
+
+ fh, err := s.view.cache.getFile(ctx, uri) // read the file
+ if err != nil {
+ return nil, err
+ }
+ closed := &closedFile{fh}
+ s.files.Set(uri, closed)
+ return closed, nil
}
// GetFile implements the fileSource interface by wrapping GetVersionedFile.
@@ -1314,20 +1325,6 @@
return s.GetVersionedFile(ctx, uri)
}
-func (s *snapshot) getFileLocked(ctx context.Context, f *fileBase) (source.VersionedFileHandle, error) {
- if fh, ok := s.files.Get(f.URI()); ok {
- return fh, nil
- }
-
- fh, err := s.view.cache.getFile(ctx, f.URI()) // read the file
- if err != nil {
- return nil, err
- }
- closed := &closedFile{fh}
- s.files.Set(f.URI(), closed)
- return closed, nil
-}
-
func (s *snapshot) IsOpen(uri span.URI) bool {
s.mu.Lock()
defer s.mu.Unlock()
diff --git a/gopls/internal/lsp/cache/view.go b/gopls/internal/lsp/cache/view.go
index bcb6f3a..bc623ff 100644
--- a/gopls/internal/lsp/cache/view.go
+++ b/gopls/internal/lsp/cache/view.go
@@ -65,10 +65,12 @@
vulns map[span.URI]*govulncheck.Result
- // keep track of files by uri and by basename, a single file may be mapped
- // to multiple uris, and the same basename may map to multiple files
- filesByURI map[span.URI]*fileBase
- filesByBase map[string][]*fileBase
+ // filesByURI maps URIs to the canonical URI for the file it denotes.
+ // We also keep a set of candidates for a given basename
+ // to reduce the set of pairs that need to be tested for sameness.
+ filesByMu sync.Mutex
+ filesByURI map[span.URI]span.URI // key is noncanonical URI (alias)
+ filesByBase map[string][]canonicalURI // key is basename
// initCancelFirstAttempt can be used to terminate the view's first
// attempt at initialization.
@@ -205,28 +207,6 @@
tempModfile
)
-// fileBase holds the common functionality for all files.
-// It is intended to be embedded in the file implementations
-type fileBase struct {
- uris []span.URI
- fname string
-
- view *View
-}
-
-func (f *fileBase) URI() span.URI {
- return f.uris[0]
-}
-
-func (f *fileBase) filename() string {
- return f.fname
-}
-
-func (f *fileBase) addURI(uri span.URI) int {
- f.uris = append(f.uris, uri)
- return len(f.uris)
-}
-
func (v *View) ID() string { return v.id }
// tempModFile creates a temporary go.mod file based on the contents
@@ -493,18 +473,6 @@
}
}
-func (v *View) mapFile(uri span.URI, f *fileBase) {
- v.filesByURI[uri] = f
- if f.addURI(uri) == 1 {
- basename := basename(f.filename())
- v.filesByBase[basename] = append(v.filesByBase[basename], f)
- }
-}
-
-func basename(filename string) string {
- return strings.ToLower(filepath.Base(filename))
-}
-
func (v *View) relevantChange(c source.FileModification) bool {
// If the file is known to the view, the change is relevant.
if v.knownFile(c.URI) {
@@ -530,64 +498,73 @@
return v.contains(c.URI)
}
+// knownFile reports whether the specified valid URI (or an alias) is known to the view.
func (v *View) knownFile(uri span.URI) bool {
- v.mu.Lock()
- defer v.mu.Unlock()
-
- f, err := v.findFile(uri)
- return f != nil && err == nil
+ _, known := v.canonicalURI(uri, false)
+ return known
}
-// getFile returns a file for the given URI.
-func (v *View) getFile(uri span.URI) *fileBase {
- v.mu.Lock()
- defer v.mu.Unlock()
-
- f, _ := v.findFile(uri)
- if f != nil {
- return f
- }
- f = &fileBase{
- view: v,
- fname: uri.Filename(),
- }
- v.mapFile(uri, f)
- return f
+// TODO(adonovan): opt: eliminate 'filename' optimization. I doubt the
+// cost of allocation is significant relative to the
+// stat/open/fstat/close operations that follow on Windows.
+type canonicalURI struct {
+ uri span.URI
+ filename string // = uri.Filename(), an optimization (on Windows)
}
-// findFile checks the cache for any file matching the given uri.
+// canonicalURI returns the canonical URI that denotes the same file
+// as uri, which may differ due to case insensitivity, unclean paths,
+// soft or hard links, and so on. If no previous alias was found, or
+// the file is missing, insert determines whether to make uri the
+// canonical representative of the file or to return false.
//
-// An error is only returned for an irreparable failure, for example, if the
-// filename in question does not exist.
-func (v *View) findFile(uri span.URI) (*fileBase, error) {
- if f := v.filesByURI[uri]; f != nil {
- // a perfect match
- return f, nil
+// The cache grows indefinitely without invalidation: file system
+// operations may cause two URIs that used to denote the same file to
+// no longer to do so. Also, the basename cache grows without bound.
+// TODO(adonovan): fix both bugs.
+func (v *View) canonicalURI(uri span.URI, insert bool) (span.URI, bool) {
+ v.filesByMu.Lock()
+ defer v.filesByMu.Unlock()
+
+ // Have we seen this exact URI before?
+ if canonical, ok := v.filesByURI[uri]; ok {
+ return canonical, true
}
- // no exact match stored, time to do some real work
- // check for any files with the same basename
- fname := uri.Filename()
- basename := basename(fname)
+
+ // Inspect all candidates with the same lowercase basename.
+ // This heuristic is easily defeated by symbolic links to files.
+ // Files with some basenames (e.g. doc.go) are very numerous.
+ //
+ // The set of candidates grows without bound, and incurs a
+ // linear sequence of SameFile queries to the file system.
+ //
+ // It is tempting to fetch the device/inode pair that
+ // uniquely identifies a file once, and then compare those
+ // pairs, but that would cause us to cache stale file system
+ // state (in addition to the filesByURI staleness).
+ filename := uri.Filename()
+ basename := strings.ToLower(filepath.Base(filename))
if candidates := v.filesByBase[basename]; candidates != nil {
- pathStat, err := os.Stat(fname)
- if os.IsNotExist(err) {
- return nil, err
- }
- if err != nil {
- return nil, nil // the file may exist, return without an error
- }
- for _, c := range candidates {
- if cStat, err := os.Stat(c.filename()); err == nil {
- if os.SameFile(pathStat, cStat) {
- // same file, map it
- v.mapFile(uri, c)
- return c, nil
+ if pathStat, _ := os.Stat(filename); pathStat != nil {
+ for _, c := range candidates {
+ if cStat, _ := os.Stat(c.filename); cStat != nil {
+ // On Windows, SameFile is more expensive as it must
+ // open the file and use the equivalent of fstat(2).
+ if os.SameFile(pathStat, cStat) {
+ v.filesByURI[uri] = c.uri
+ return c.uri, true
+ }
}
}
}
}
- // no file with a matching name was found, it wasn't in our cache
- return nil, nil
+
+ // No candidates, stat failed, or no candidate matched.
+ if insert {
+ v.filesByURI[uri] = uri
+ v.filesByBase[basename] = append(v.filesByBase[basename], canonicalURI{uri, filename})
+ }
+ return uri, insert
}
// shutdown releases resources associated with the view, and waits for ongoing