internal/lsp/cache: use cached parsed files for symbols, if available Optimize building the symbol index for a file, in two ways: - use the cached full parse tree, if it already exists - if it doesn't exist, optimize parsing by skipping both comments and object resolution, which aren't necessary for symbols This results in around 3x faster initial indexing of symbols. In my manual testing, indexing of Kubernetes went from 16s->5s, and indexing of x/tools went from 2.4s->700ms. Also fix a typo in gopls/internal/regtest/bench/bench_test.go. Fixes #52602 Change-Id: I0893e95410be96e94e5e9dee7a3aab30b59c19c5 Reviewed-on: https://go-review.googlesource.com/c/tools/+/403679 Run-TryBot: Robert Findley <rfindley@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> gopls-CI: kokoro <noreply+kokoro@google.com> Reviewed-by: Alan Donovan <adonovan@google.com>

commit: d30366863522954d3dd1358b141ea026f28dfec5 [log] [tgz]
author: Robert Findley <rfindley@google.com> Tue May 03 15:35:33 2022 -0400
committer: Robert Findley <rfindley@google.com> Mon May 09 17:04:06 2022 +0000
tree: be50f65d53bbd7ba70d0778cc729f87440ecf606
parent: 0fb1abf25a5f88a5791bd57a7084b3e61952f637 [diff]
diff --git a/gopls/internal/regtest/bench/bench_test.go b/gopls/internal/regtest/bench/bench_test.go
index 9d95ae0..44ab457 100644
--- a/gopls/internal/regtest/bench/bench_test.go
+++ b/gopls/internal/regtest/bench/bench_test.go

@@ -130,7 +130,7 @@
 }
 
 var (
-	benchDir     = flag.String("didchange_dir", "", "If set, run benchmarks in this dir. Must also set regtest_bench_file.")
+	benchDir     = flag.String("didchange_dir", "", "If set, run benchmarks in this dir. Must also set didchange_file.")
 	benchFile    = flag.String("didchange_file", "", "The file to modify")
 	benchProfile = flag.String("didchange_cpuprof", "", "file to write cpu profiling data to")
 )

diff --git a/internal/lsp/cache/parsemode_go116.go b/internal/lsp/cache/parsemode_go116.go
new file mode 100644
index 0000000..d365a91
--- /dev/null
+++ b/internal/lsp/cache/parsemode_go116.go

@@ -0,0 +1,11 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !go1.17
+// +build !go1.17
+
+package cache
+
+// The parser.SkipObjectResolution mode flag is not supported before Go 1.17.
+const skipObjectResolution = 0

diff --git a/internal/lsp/cache/parsemode_go117.go b/internal/lsp/cache/parsemode_go117.go
new file mode 100644
index 0000000..e2c9fb9
--- /dev/null
+++ b/internal/lsp/cache/parsemode_go117.go

@@ -0,0 +1,12 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.17
+// +build go1.17
+
+package cache
+
+import "go/parser"
+
+const skipObjectResolution = parser.SkipObjectResolution

diff --git a/internal/lsp/cache/symbols.go b/internal/lsp/cache/symbols.go
index 8310172..087e211 100644
--- a/internal/lsp/cache/symbols.go
+++ b/internal/lsp/cache/symbols.go

@@ -7,14 +7,15 @@
 import (
 	"context"
 	"go/ast"
+	"go/parser"
 	"go/token"
 	"go/types"
 	"strings"
 
+	"golang.org/x/tools/internal/lsp/lsppos"
 	"golang.org/x/tools/internal/lsp/protocol"
 	"golang.org/x/tools/internal/lsp/source"
 	"golang.org/x/tools/internal/memoize"
-	"golang.org/x/tools/internal/span"
 )
 
 type symbolHandle struct {
@@ -54,22 +55,58 @@
 	return s.addSymbolHandle(sh)
 }
 
-// symbolize extracts symbols from a file. It does not parse the file through the cache.
+// symbolize extracts symbols from a file. It uses a parsed file already
+// present in the cache but otherwise does not populate the cache.
 func symbolize(ctx context.Context, snapshot *snapshot, fh source.FileHandle) ([]source.Symbol, error) {
-	var w symbolWalker
-	fset := token.NewFileSet() // don't use snapshot.FileSet, as that would needlessly leak memory.
-	data := parseGo(ctx, fset, fh, source.ParseFull)
-	if data.parsed != nil && data.parsed.File != nil {
-		w.curFile = data.parsed
-		w.curURI = protocol.URIFromSpanURI(data.parsed.URI)
-		w.fileDecls(data.parsed.File.Decls)
+	src, err := fh.Read()
+	if err != nil {
+		return nil, err
 	}
+
+	var (
+		file     *ast.File
+		fileDesc *token.File
+	)
+
+	// If the file has already been fully parsed through the cache, we can just
+	// use the result.
+	key := parseKey{file: fh.FileIdentity(), mode: source.ParseFull}
+	if pgh := snapshot.getGoFile(key); pgh != nil {
+		cached := pgh.handle.Cached(snapshot.generation)
+		if cached != nil {
+			cached := cached.(*parseGoData)
+			if cached.parsed != nil {
+				file = cached.parsed.File
+				fileDesc = cached.parsed.Tok
+			}
+		}
+	}
+
+	// Otherwise, we parse the file ourselves. Notably we don't use parseGo here,
+	// so that we can avoid parsing comments and can skip object resolution,
+	// which has a meaningful impact on performance. Neither comments nor objects
+	// are necessary for symbol construction.
+	if file == nil {
+		fset := token.NewFileSet()
+		file, err = parser.ParseFile(fset, fh.URI().Filename(), src, skipObjectResolution)
+		if file == nil {
+			return nil, err
+		}
+		fileDesc = fset.File(file.Package)
+	}
+
+	w := &symbolWalker{
+		mapper: lsppos.NewTokenMapper(src, fileDesc),
+	}
+
+	w.fileDecls(file.Decls)
+
 	return w.symbols, w.firstError
 }
 
 type symbolWalker struct {
-	curFile    *source.ParsedGoFile
-	curURI     protocol.DocumentURI
+	mapper *lsppos.TokenMapper // for computing positions
+
 	symbols    []source.Symbol
 	firstError error
 }
@@ -84,7 +121,7 @@
 	}
 	b.WriteString(name)
 
-	rng, err := fileRange(w.curFile, node.Pos(), node.End())
+	rng, err := w.mapper.Range(node.Pos(), node.End())
 	if err != nil {
 		w.error(err)
 		return
@@ -103,14 +140,6 @@
 	}
 }
 
-func fileRange(pgf *source.ParsedGoFile, start, end token.Pos) (protocol.Range, error) {
-	s, err := span.FileSpan(pgf.Tok, pgf.Mapper.Converter, start, end)
-	if err != nil {
-		return protocol.Range{}, nil
-	}
-	return pgf.Mapper.Range(s)
-}
-
 func (w *symbolWalker) fileDecls(decls []ast.Decl) {
 	for _, decl := range decls {
 		switch decl := decl.(type) {
commit	d30366863522954d3dd1358b141ea026f28dfec5	[log] [tgz]
author	Robert Findley <rfindley@google.com>	Tue May 03 15:35:33 2022 -0400
committer	Robert Findley <rfindley@google.com>	Mon May 09 17:04:06 2022 +0000
tree	be50f65d53bbd7ba70d0778cc729f87440ecf606
parent	0fb1abf25a5f88a5791bd57a7084b3e61952f637 [diff]