internal/lsp/cache: use [256]byte Hash instead of hex digit string
I had hoped to see a reduction in total allocation, but it does not
appear to be significant according to the included crude benchmark.
Nonetheless this is a slight code clarity improvement.
Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7
Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/gopls/internal/regtest/bench/bench_test.go b/gopls/internal/regtest/bench/bench_test.go
index 5e4eb5f..22f157f 100644
--- a/gopls/internal/regtest/bench/bench_test.go
+++ b/gopls/internal/regtest/bench/bench_test.go
@@ -8,6 +8,7 @@
"flag"
"fmt"
"os"
+ "runtime"
"runtime/pprof"
"testing"
@@ -66,6 +67,7 @@
results := testing.Benchmark(func(b *testing.B) {
for i := 0; i < b.N; i++ {
WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {})
+
}
})
@@ -192,3 +194,31 @@
printBenchmarkResults(result)
})
}
+
+// TestPrintMemStats measures the memory usage of loading a project.
+// It uses the same -didchange_dir flag as above.
+// Always run it in isolation since it measures global heap usage.
+//
+// Kubernetes example:
+// $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes
+// TotalAlloc: 5766 MB
+// HeapAlloc: 1984 MB
+//
+// Both figures exhibit variance of less than 1%.
+func TestPrintMemStats(t *testing.T) {
+ if *benchDir == "" {
+ t.Skip("-didchange_dir is not set")
+ }
+
+ // Load the program...
+ opts := benchmarkOptions(*benchDir)
+ WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) {
+ // ...and print the memory usage.
+ runtime.GC()
+ runtime.GC()
+ var mem runtime.MemStats
+ runtime.ReadMemStats(&mem)
+ t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6)
+ t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6)
+ })
+}
diff --git a/internal/lsp/cache/analysis.go b/internal/lsp/cache/analysis.go
index e882fb4..9f7a19c 100644
--- a/internal/lsp/cache/analysis.go
+++ b/internal/lsp/cache/analysis.go
@@ -54,7 +54,7 @@
return results, nil
}
-type actionHandleKey string
+type actionHandleKey source.Hash
// An action represents one unit of analysis work: the application of
// one analysis to one package. Actions form a DAG, both within a
@@ -170,7 +170,7 @@
}
func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey {
- return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key)))))
+ return actionHandleKey(source.Hashf("%p%s", a, ph.key[:]))
}
func (act *actionHandle) String() string {
diff --git a/internal/lsp/cache/cache.go b/internal/lsp/cache/cache.go
index f5796df..2a8a169 100644
--- a/internal/lsp/cache/cache.go
+++ b/internal/lsp/cache/cache.go
@@ -6,7 +6,6 @@
import (
"context"
- "crypto/sha256"
"fmt"
"go/ast"
"go/token"
@@ -55,7 +54,7 @@
modTime time.Time
uri span.URI
bytes []byte
- hash string
+ hash source.Hash
err error
// size is the file length as reported by Stat, for the purpose of
@@ -139,7 +138,7 @@
size: fi.Size(),
uri: uri,
bytes: data,
- hash: hashContents(data),
+ hash: source.HashOf(data),
}, nil
}
@@ -168,10 +167,6 @@
return h.uri
}
-func (h *fileHandle) Hash() string {
- return h.hash
-}
-
func (h *fileHandle) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
@@ -183,16 +178,6 @@
return h.bytes, h.err
}
-// hashContents returns a string of hex digits denoting the hash of contents.
-//
-// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
-// to hex digits on demand (rare). The array is larger when it appears as a
-// struct field (32B vs 16B) but smaller overall (string data is 64B), has
-// better locality, and is more efficiently hashed by runtime maps.
-func hashContents(contents []byte) string {
- return fmt.Sprintf("%64x", sha256.Sum256(contents))
-}
-
var cacheIndex, sessionIndex, viewIndex int64
func (c *Cache) ID() string { return c.id }
diff --git a/internal/lsp/cache/check.go b/internal/lsp/cache/check.go
index b8a3655..f09fc29 100644
--- a/internal/lsp/cache/check.go
+++ b/internal/lsp/cache/check.go
@@ -32,7 +32,7 @@
"golang.org/x/tools/internal/typesinternal"
)
-type packageHandleKey string
+type packageHandleKey source.Hash
type packageHandle struct {
handle *memoize.Handle
@@ -187,7 +187,7 @@
}
// One bad dependency should not prevent us from checking the entire package.
// Add a special key to mark a bad dependency.
- depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID)))
+ depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID)))
continue
}
deps[depHandle.m.PkgPath] = depHandle
@@ -215,6 +215,8 @@
}
func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey {
+ // TODO(adonovan): opt: no need to materalize the bytes; hash them directly.
+ // Also, use field separators to avoid spurious collisions.
b := bytes.NewBuffer(nil)
b.WriteString(string(id))
if m.Module != nil {
@@ -225,38 +227,37 @@
// files, and deps). It should not otherwise affect the inputs to the type
// checker, so this experiment omits it. This should increase cache hits on
// the daemon as cfg contains the environment and working directory.
- b.WriteString(hashConfig(m.Config))
+ hc := hashConfig(m.Config)
+ b.Write(hc[:])
}
b.WriteByte(byte(mode))
for _, dep := range deps {
- b.WriteString(string(dep))
+ b.Write(dep[:])
}
for _, cgf := range pghs {
b.WriteString(cgf.file.FileIdentity().String())
}
- return packageHandleKey(hashContents(b.Bytes()))
+ return packageHandleKey(source.HashOf(b.Bytes()))
}
// hashEnv returns a hash of the snapshot's configuration.
-func hashEnv(s *snapshot) string {
+func hashEnv(s *snapshot) source.Hash {
s.view.optionsMu.Lock()
env := s.view.options.EnvSlice()
s.view.optionsMu.Unlock()
- b := &bytes.Buffer{}
- for _, e := range env {
- b.WriteString(e)
- }
- return hashContents(b.Bytes())
+ return source.Hashf("%s", env)
}
// hashConfig returns the hash for the *packages.Config.
-func hashConfig(config *packages.Config) string {
- b := bytes.NewBuffer(nil)
+func hashConfig(config *packages.Config) source.Hash {
+ // TODO(adonovan): opt: don't materialize the bytes; hash them directly.
+ // Also, use sound field separators to avoid collisions.
+ var b bytes.Buffer
// Dir, Mode, Env, BuildFlags are the parts of the config that can change.
b.WriteString(config.Dir)
- b.WriteString(string(rune(config.Mode)))
+ b.WriteRune(rune(config.Mode))
for _, e := range config.Env {
b.WriteString(e)
@@ -264,7 +265,7 @@
for _, f := range config.BuildFlags {
b.WriteString(f)
}
- return hashContents(b.Bytes())
+ return source.HashOf(b.Bytes())
}
func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) {
diff --git a/internal/lsp/cache/imports.go b/internal/lsp/cache/imports.go
index 01a2468..f333f70 100644
--- a/internal/lsp/cache/imports.go
+++ b/internal/lsp/cache/imports.go
@@ -27,7 +27,7 @@
cleanupProcessEnv func()
cacheRefreshDuration time.Duration
cacheRefreshTimer *time.Timer
- cachedModFileHash string
+ cachedModFileHash source.Hash
cachedBuildFlags []string
}
@@ -38,7 +38,7 @@
// Find the hash of the active mod file, if any. Using the unsaved content
// is slightly wasteful, since we'll drop caches a little too often, but
// the mod file shouldn't be changing while people are autocompleting.
- var modFileHash string
+ var modFileHash source.Hash
// If we are using 'legacyWorkspace' mode, we can just read the modfile from
// the snapshot. Otherwise, we need to get the synthetic workspace mod file.
//
@@ -61,7 +61,7 @@
if err != nil {
return err
}
- modFileHash = hashContents(modBytes)
+ modFileHash = source.HashOf(modBytes)
}
// view.goEnv is immutable -- changes make a new view. Options can change.
diff --git a/internal/lsp/cache/mod.go b/internal/lsp/cache/mod.go
index 5ac199b..c076f42 100644
--- a/internal/lsp/cache/mod.go
+++ b/internal/lsp/cache/mod.go
@@ -201,9 +201,11 @@
// modKey is uniquely identifies cached data for `go mod why` or dependencies
// to upgrade.
type modKey struct {
- sessionID, env, view string
- mod source.FileIdentity
- verb modAction
+ sessionID string
+ env source.Hash
+ view string
+ mod source.FileIdentity
+ verb modAction
}
type modAction int
diff --git a/internal/lsp/cache/mod_tidy.go b/internal/lsp/cache/mod_tidy.go
index aa525e7..bd2ff0c 100644
--- a/internal/lsp/cache/mod_tidy.go
+++ b/internal/lsp/cache/mod_tidy.go
@@ -29,10 +29,10 @@
type modTidyKey struct {
sessionID string
- env string
+ env source.Hash
gomod source.FileIdentity
- imports string
- unsavedOverlays string
+ imports source.Hash
+ unsavedOverlays source.Hash
view string
}
@@ -81,10 +81,6 @@
if err != nil {
return nil, err
}
- importHash, err := s.hashImports(ctx, workspacePkgs)
- if err != nil {
- return nil, err
- }
s.mu.Lock()
overlayHash := hashUnsavedOverlays(s.files)
@@ -93,7 +89,7 @@
key := modTidyKey{
sessionID: s.view.session.id,
view: s.view.folder.Filename(),
- imports: importHash,
+ imports: s.hashImports(ctx, workspacePkgs),
unsavedOverlays: overlayHash,
gomod: fh.FileIdentity(),
env: hashEnv(s),
@@ -152,7 +148,7 @@
return mth.tidy(ctx, s)
}
-func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) {
+func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash {
seen := map[string]struct{}{}
var imports []string
for _, ph := range wsPackages {
@@ -164,8 +160,7 @@
}
}
sort.Strings(imports)
- hashed := strings.Join(imports, ",")
- return hashContents([]byte(hashed)), nil
+ return source.Hashf("%s", imports)
}
// modTidyDiagnostics computes the differences between the original and tidied
diff --git a/internal/lsp/cache/session.go b/internal/lsp/cache/session.go
index 9da5c1e..cbb5874 100644
--- a/internal/lsp/cache/session.go
+++ b/internal/lsp/cache/session.go
@@ -44,7 +44,7 @@
session *Session
uri span.URI
text []byte
- hash string
+ hash source.Hash
version int32
kind source.FileKind
@@ -637,7 +637,7 @@
if c.OnDisk || c.Action == source.Save {
version = o.version
}
- hash := hashContents(text)
+ hash := source.HashOf(text)
var sameContentOnDisk bool
switch c.Action {
case source.Delete:
diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go
index 0d3c869..6edd1db 100644
--- a/internal/lsp/cache/snapshot.go
+++ b/internal/lsp/cache/snapshot.go
@@ -466,7 +466,7 @@
return overlays
}
-func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
+func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash {
var unsaved []string
for uri, fh := range files {
if overlay, ok := fh.(*overlay); ok && !overlay.saved {
@@ -474,7 +474,7 @@
}
}
sort.Strings(unsaved)
- return hashContents([]byte(strings.Join(unsaved, "")))
+ return source.Hashf("%s", unsaved)
}
func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) {
@@ -2652,25 +2652,7 @@
// -- internal--
// hash returns 8 bits from the key's file digest.
-func (m *goFileMap) hash(k parseKey) int {
- h := k.file.Hash
- if h == "" {
- // Sadly the Hash isn't always a hash because cache.GetFile may
- // successfully return a *fileHandle containing an error and no hash.
- // Lump the duds together for now.
- // TODO(adonovan): fix the underlying bug.
- return 0
- }
- return unhex(h[0])<<4 | unhex(h[1])
-}
-
-// unhex returns the value of a valid hex digit.
-func unhex(b byte) int {
- if '0' <= b && b <= '9' {
- return int(b - '0')
- }
- return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
-}
+func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] }
// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
diff --git a/internal/lsp/cache/symbols.go b/internal/lsp/cache/symbols.go
index db68912..bf5e00b 100644
--- a/internal/lsp/cache/symbols.go
+++ b/internal/lsp/cache/symbols.go
@@ -33,7 +33,7 @@
err error
}
-type symbolHandleKey string
+type symbolHandleKey source.Hash
func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle {
if h := s.getSymbolHandle(fh.URI()); h != nil {
diff --git a/internal/lsp/cache/view.go b/internal/lsp/cache/view.go
index b0390a3f..0ed9883 100644
--- a/internal/lsp/cache/view.go
+++ b/internal/lsp/cache/view.go
@@ -165,7 +165,7 @@
// given go.mod file. It is the caller's responsibility to clean up the files
// when they are done using them.
func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) {
- filenameHash := hashContents([]byte(modFh.URI().Filename()))
+ filenameHash := source.Hashf("%s", modFh.URI().Filename())
tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash))
if err != nil {
return "", nil, err
diff --git a/internal/lsp/debug/serve.go b/internal/lsp/debug/serve.go
index 0bdee92..d343a6d 100644
--- a/internal/lsp/debug/serve.go
+++ b/internal/lsp/debug/serve.go
@@ -320,7 +320,8 @@
return nil
}
for _, o := range s.Overlays() {
- if o.FileIdentity().Hash == identifier {
+ // TODO(adonovan): understand and document this comparison.
+ if o.FileIdentity().Hash.String() == identifier {
return o
}
}
diff --git a/internal/lsp/source/view.go b/internal/lsp/source/view.go
index 5b908bc..7960b0c 100644
--- a/internal/lsp/source/view.go
+++ b/internal/lsp/source/view.go
@@ -7,6 +7,7 @@
import (
"bytes"
"context"
+ "crypto/sha256"
"errors"
"fmt"
"go/ast"
@@ -528,12 +529,32 @@
Saved() bool
}
+// A Hash is a cryptographic digest of the contents of a file.
+// (Although at 32B it is larger than a 16B string header, it is smaller
+// and has better locality than the string header + 64B of hex digits.)
+type Hash [sha256.Size]byte
+
+// HashOf returns the hash of some data.
+func HashOf(data []byte) Hash {
+ return Hash(sha256.Sum256(data))
+}
+
+// Hashf returns the hash of a printf-formatted string.
+func Hashf(format string, args ...interface{}) Hash {
+ // Although this looks alloc-heavy, it is faster than using
+ // Fprintf on sha256.New() because the allocations don't escape.
+ return HashOf([]byte(fmt.Sprintf(format, args...)))
+}
+
+// String returns the digest as a string of hex digits.
+func (h Hash) String() string {
+ return fmt.Sprintf("%64x", [sha256.Size]byte(h))
+}
+
// FileIdentity uniquely identifies a file at a version from a FileSystem.
type FileIdentity struct {
- URI span.URI
-
- // Hash is a string of hex digits denoting the cryptographic digest of the file's content.
- Hash string
+ URI span.URI
+ Hash Hash // digest of file contents
}
func (id FileIdentity) String() string {