gopls/internal/lsp: optimize checks for ignored files

Optimize checking for ignored files to avoid unnecessary checks, and
only build prefixes once. Along the way, fix a bug where path segments
were not handled correctly in the ignore check. Encapsulate the check to
make this easy to test.

As a result, the DiagnoseChange/google-cloud-go benchmark improved ~5x
from ~1.5s to 300ms.

Also remove span.Dir, which tended to lead to unnecessary
filepath->span->filepath conversions. Inline it in the one place where
it was correct.

For golang/go#60089

Change-Id: Id24d05b504b43e6a6d9b77b5b578583e1351de31
Reviewed-on: https://go-review.googlesource.com/c/tools/+/494097
Reviewed-by: Alan Donovan <adonovan@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
diff --git a/gopls/internal/lsp/cache/snapshot.go b/gopls/internal/lsp/cache/snapshot.go
index 6bd4be8..7e9a06f 100644
--- a/gopls/internal/lsp/cache/snapshot.go
+++ b/gopls/internal/lsp/cache/snapshot.go
@@ -185,6 +185,11 @@
 
 	// pkgIndex is an index of package IDs, for efficient storage of typerefs.
 	pkgIndex *typerefs.PackageIndex
+
+	// Only compute module prefixes once, as they are used with high frequency to
+	// detect ignored files.
+	ignoreFilterOnce sync.Once
+	ignoreFilter     *ignoreFilter
 }
 
 var globalSnapshotID uint64
@@ -1195,7 +1200,7 @@
 func moduleForURI(modFiles map[span.URI]struct{}, uri span.URI) span.URI {
 	var match span.URI
 	for modURI := range modFiles {
-		if !source.InDir(span.Dir(modURI).Filename(), uri.Filename()) {
+		if !source.InDir(filepath.Dir(modURI.Filename()), uri.Filename()) {
 			continue
 		}
 		if len(modURI) > len(match) {
diff --git a/gopls/internal/lsp/cache/view.go b/gopls/internal/lsp/cache/view.go
index 644a43d..ae98836 100644
--- a/gopls/internal/lsp/cache/view.go
+++ b/gopls/internal/lsp/cache/view.go
@@ -588,22 +588,57 @@
 	v.snapshotWG.Wait()
 }
 
+// While go list ./... skips directories starting with '.', '_', or 'testdata',
+// gopls may still load them via file queries. Explicitly filter them out.
 func (s *snapshot) IgnoredFile(uri span.URI) bool {
-	filename := uri.Filename()
-	var prefixes []string
-	if len(s.workspaceModFiles) == 0 {
-		for _, entry := range filepath.SplitList(s.view.gopath) {
-			prefixes = append(prefixes, filepath.Join(entry, "src"))
-		}
-	} else {
-		prefixes = append(prefixes, s.view.gomodcache)
-		for m := range s.workspaceModFiles {
-			prefixes = append(prefixes, span.Dir(m).Filename())
+	// Fast path: if uri doesn't contain '.', '_', or 'testdata', it is not
+	// possible that it is ignored.
+	{
+		uriStr := string(uri)
+		if !strings.Contains(uriStr, ".") && !strings.Contains(uriStr, "_") && !strings.Contains(uriStr, "testdata") {
+			return false
 		}
 	}
-	for _, prefix := range prefixes {
-		if strings.HasPrefix(filename, prefix) {
-			return checkIgnored(filename[len(prefix):])
+
+	s.ignoreFilterOnce.Do(func() {
+		var dirs []string
+		if len(s.workspaceModFiles) == 0 {
+			for _, entry := range filepath.SplitList(s.view.gopath) {
+				dirs = append(dirs, filepath.Join(entry, "src"))
+			}
+		} else {
+			dirs = append(dirs, s.view.gomodcache)
+			for m := range s.workspaceModFiles {
+				dirs = append(dirs, filepath.Dir(m.Filename()))
+			}
+		}
+		s.ignoreFilter = newIgnoreFilter(dirs)
+	})
+
+	return s.ignoreFilter.ignored(uri.Filename())
+}
+
+// An ignoreFilter implements go list's exclusion rules via its 'ignored' method.
+type ignoreFilter struct {
+	prefixes []string // root dirs, ending in filepath.Separator
+}
+
+// newIgnoreFilter returns a new ignoreFilter implementing exclusion rules
+// relative to the provided directories.
+func newIgnoreFilter(dirs []string) *ignoreFilter {
+	f := new(ignoreFilter)
+	for _, d := range dirs {
+		f.prefixes = append(f.prefixes, filepath.Clean(d)+string(filepath.Separator))
+	}
+	return f
+}
+
+func (f *ignoreFilter) ignored(filename string) bool {
+	for _, prefix := range f.prefixes {
+		if suffix := strings.TrimPrefix(filename, prefix); suffix != filename {
+			if checkIgnored(suffix) {
+				return true
+			}
 		}
 	}
 	return false
@@ -615,6 +650,8 @@
 //	Directory and file names that begin with "." or "_" are ignored
 //	by the go tool, as are directories named "testdata".
 func checkIgnored(suffix string) bool {
+	// Note: this could be further optimized by writing a HasSegment helper, a
+	// segment-boundary respecting variant of strings.Contains.
 	for _, component := range strings.Split(suffix, string(filepath.Separator)) {
 		if len(component) == 0 {
 			continue
@@ -911,7 +948,7 @@
 	// TODO(golang/go#57514): eliminate the expandWorkspaceToModule setting
 	// entirely.
 	if v.Options().ExpandWorkspaceToModule && v.gomod != "" {
-		return span.Dir(v.gomod)
+		return span.URIFromPath(filepath.Dir(v.gomod.Filename()))
 	}
 	return v.folder
 }
diff --git a/gopls/internal/lsp/cache/view_test.go b/gopls/internal/lsp/cache/view_test.go
index 9e6d23b..90471ed 100644
--- a/gopls/internal/lsp/cache/view_test.go
+++ b/gopls/internal/lsp/cache/view_test.go
@@ -276,3 +276,32 @@
 	b, _ := json.MarshalIndent(x, "", " ")
 	return string(b)
 }
+
+func TestIgnoreFilter(t *testing.T) {
+	tests := []struct {
+		dirs []string
+		path string
+		want bool
+	}{
+		{[]string{"a"}, "a/testdata/foo", true},
+		{[]string{"a"}, "a/_ignore/foo", true},
+		{[]string{"a"}, "a/.ignore/foo", true},
+		{[]string{"a"}, "b/testdata/foo", false},
+		{[]string{"a"}, "testdata/foo", false},
+		{[]string{"a", "b"}, "b/testdata/foo", true},
+		{[]string{"a"}, "atestdata/foo", false},
+	}
+
+	for _, test := range tests {
+		// convert to filepaths, for convenience
+		for i, dir := range test.dirs {
+			test.dirs[i] = filepath.FromSlash(dir)
+		}
+		test.path = filepath.FromSlash(test.path)
+
+		f := newIgnoreFilter(test.dirs)
+		if got := f.ignored(test.path); got != test.want {
+			t.Errorf("newIgnoreFilter(%q).ignore(%q) = %t, want %t", test.dirs, test.path, got, test.want)
+		}
+	}
+}