internal/lsp/cache: use [256]byte Hash instead of hex digit string

I had hoped to see a reduction in total allocation, but it does not
appear to be significant according to the included crude benchmark.
Nonetheless this is a slight code clarity improvement.

Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7
Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/gopls/internal/regtest/bench/bench_test.go b/gopls/internal/regtest/bench/bench_test.go
index 5e4eb5f..22f157f 100644
--- a/gopls/internal/regtest/bench/bench_test.go
+++ b/gopls/internal/regtest/bench/bench_test.go
@@ -8,6 +8,7 @@
 	"flag"
 	"fmt"
 	"os"
+	"runtime"
 	"runtime/pprof"
 	"testing"
 
@@ -66,6 +67,7 @@
 	results := testing.Benchmark(func(b *testing.B) {
 		for i := 0; i < b.N; i++ {
 			WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {})
+
 		}
 	})
 
@@ -192,3 +194,31 @@
 		printBenchmarkResults(result)
 	})
 }
+
+// TestPrintMemStats measures the memory usage of loading a project.
+// It uses the same -didchange_dir flag as above.
+// Always run it in isolation since it measures global heap usage.
+//
+// Kubernetes example:
+//   $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes
+//   TotalAlloc:      5766 MB
+//   HeapAlloc:       1984 MB
+//
+// Both figures exhibit variance of less than 1%.
+func TestPrintMemStats(t *testing.T) {
+	if *benchDir == "" {
+		t.Skip("-didchange_dir is not set")
+	}
+
+	// Load the program...
+	opts := benchmarkOptions(*benchDir)
+	WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) {
+		// ...and print the memory usage.
+		runtime.GC()
+		runtime.GC()
+		var mem runtime.MemStats
+		runtime.ReadMemStats(&mem)
+		t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6)
+		t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6)
+	})
+}
diff --git a/internal/lsp/cache/analysis.go b/internal/lsp/cache/analysis.go
index e882fb4..9f7a19c 100644
--- a/internal/lsp/cache/analysis.go
+++ b/internal/lsp/cache/analysis.go
@@ -54,7 +54,7 @@
 	return results, nil
 }
 
-type actionHandleKey string
+type actionHandleKey source.Hash
 
 // An action represents one unit of analysis work: the application of
 // one analysis to one package. Actions form a DAG, both within a
@@ -170,7 +170,7 @@
 }
 
 func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey {
-	return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key)))))
+	return actionHandleKey(source.Hashf("%p%s", a, ph.key[:]))
 }
 
 func (act *actionHandle) String() string {
diff --git a/internal/lsp/cache/cache.go b/internal/lsp/cache/cache.go
index f5796df..2a8a169 100644
--- a/internal/lsp/cache/cache.go
+++ b/internal/lsp/cache/cache.go
@@ -6,7 +6,6 @@
 
 import (
 	"context"
-	"crypto/sha256"
 	"fmt"
 	"go/ast"
 	"go/token"
@@ -55,7 +54,7 @@
 	modTime time.Time
 	uri     span.URI
 	bytes   []byte
-	hash    string
+	hash    source.Hash
 	err     error
 
 	// size is the file length as reported by Stat, for the purpose of
@@ -139,7 +138,7 @@
 		size:    fi.Size(),
 		uri:     uri,
 		bytes:   data,
-		hash:    hashContents(data),
+		hash:    source.HashOf(data),
 	}, nil
 }
 
@@ -168,10 +167,6 @@
 	return h.uri
 }
 
-func (h *fileHandle) Hash() string {
-	return h.hash
-}
-
 func (h *fileHandle) FileIdentity() source.FileIdentity {
 	return source.FileIdentity{
 		URI:  h.uri,
@@ -183,16 +178,6 @@
 	return h.bytes, h.err
 }
 
-// hashContents returns a string of hex digits denoting the hash of contents.
-//
-// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
-// to hex digits on demand (rare). The array is larger when it appears as a
-// struct field (32B vs 16B) but smaller overall (string data is 64B), has
-// better locality, and is more efficiently hashed by runtime maps.
-func hashContents(contents []byte) string {
-	return fmt.Sprintf("%64x", sha256.Sum256(contents))
-}
-
 var cacheIndex, sessionIndex, viewIndex int64
 
 func (c *Cache) ID() string                     { return c.id }
diff --git a/internal/lsp/cache/check.go b/internal/lsp/cache/check.go
index b8a3655..f09fc29 100644
--- a/internal/lsp/cache/check.go
+++ b/internal/lsp/cache/check.go
@@ -32,7 +32,7 @@
 	"golang.org/x/tools/internal/typesinternal"
 )
 
-type packageHandleKey string
+type packageHandleKey source.Hash
 
 type packageHandle struct {
 	handle *memoize.Handle
@@ -187,7 +187,7 @@
 			}
 			// One bad dependency should not prevent us from checking the entire package.
 			// Add a special key to mark a bad dependency.
-			depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID)))
+			depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID)))
 			continue
 		}
 		deps[depHandle.m.PkgPath] = depHandle
@@ -215,6 +215,8 @@
 }
 
 func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey {
+	// TODO(adonovan): opt: no need to materalize the bytes; hash them directly.
+	// Also, use field separators to avoid spurious collisions.
 	b := bytes.NewBuffer(nil)
 	b.WriteString(string(id))
 	if m.Module != nil {
@@ -225,38 +227,37 @@
 		// files, and deps). It should not otherwise affect the inputs to the type
 		// checker, so this experiment omits it. This should increase cache hits on
 		// the daemon as cfg contains the environment and working directory.
-		b.WriteString(hashConfig(m.Config))
+		hc := hashConfig(m.Config)
+		b.Write(hc[:])
 	}
 	b.WriteByte(byte(mode))
 	for _, dep := range deps {
-		b.WriteString(string(dep))
+		b.Write(dep[:])
 	}
 	for _, cgf := range pghs {
 		b.WriteString(cgf.file.FileIdentity().String())
 	}
-	return packageHandleKey(hashContents(b.Bytes()))
+	return packageHandleKey(source.HashOf(b.Bytes()))
 }
 
 // hashEnv returns a hash of the snapshot's configuration.
-func hashEnv(s *snapshot) string {
+func hashEnv(s *snapshot) source.Hash {
 	s.view.optionsMu.Lock()
 	env := s.view.options.EnvSlice()
 	s.view.optionsMu.Unlock()
 
-	b := &bytes.Buffer{}
-	for _, e := range env {
-		b.WriteString(e)
-	}
-	return hashContents(b.Bytes())
+	return source.Hashf("%s", env)
 }
 
 // hashConfig returns the hash for the *packages.Config.
-func hashConfig(config *packages.Config) string {
-	b := bytes.NewBuffer(nil)
+func hashConfig(config *packages.Config) source.Hash {
+	// TODO(adonovan): opt: don't materialize the bytes; hash them directly.
+	// Also, use sound field separators to avoid collisions.
+	var b bytes.Buffer
 
 	// Dir, Mode, Env, BuildFlags are the parts of the config that can change.
 	b.WriteString(config.Dir)
-	b.WriteString(string(rune(config.Mode)))
+	b.WriteRune(rune(config.Mode))
 
 	for _, e := range config.Env {
 		b.WriteString(e)
@@ -264,7 +265,7 @@
 	for _, f := range config.BuildFlags {
 		b.WriteString(f)
 	}
-	return hashContents(b.Bytes())
+	return source.HashOf(b.Bytes())
 }
 
 func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) {
diff --git a/internal/lsp/cache/imports.go b/internal/lsp/cache/imports.go
index 01a2468..f333f70 100644
--- a/internal/lsp/cache/imports.go
+++ b/internal/lsp/cache/imports.go
@@ -27,7 +27,7 @@
 	cleanupProcessEnv    func()
 	cacheRefreshDuration time.Duration
 	cacheRefreshTimer    *time.Timer
-	cachedModFileHash    string
+	cachedModFileHash    source.Hash
 	cachedBuildFlags     []string
 }
 
@@ -38,7 +38,7 @@
 	// Find the hash of the active mod file, if any. Using the unsaved content
 	// is slightly wasteful, since we'll drop caches a little too often, but
 	// the mod file shouldn't be changing while people are autocompleting.
-	var modFileHash string
+	var modFileHash source.Hash
 	// If we are using 'legacyWorkspace' mode, we can just read the modfile from
 	// the snapshot. Otherwise, we need to get the synthetic workspace mod file.
 	//
@@ -61,7 +61,7 @@
 		if err != nil {
 			return err
 		}
-		modFileHash = hashContents(modBytes)
+		modFileHash = source.HashOf(modBytes)
 	}
 
 	// view.goEnv is immutable -- changes make a new view. Options can change.
diff --git a/internal/lsp/cache/mod.go b/internal/lsp/cache/mod.go
index 5ac199b..c076f42 100644
--- a/internal/lsp/cache/mod.go
+++ b/internal/lsp/cache/mod.go
@@ -201,9 +201,11 @@
 // modKey is uniquely identifies cached data for `go mod why` or dependencies
 // to upgrade.
 type modKey struct {
-	sessionID, env, view string
-	mod                  source.FileIdentity
-	verb                 modAction
+	sessionID string
+	env       source.Hash
+	view      string
+	mod       source.FileIdentity
+	verb      modAction
 }
 
 type modAction int
diff --git a/internal/lsp/cache/mod_tidy.go b/internal/lsp/cache/mod_tidy.go
index aa525e7..bd2ff0c 100644
--- a/internal/lsp/cache/mod_tidy.go
+++ b/internal/lsp/cache/mod_tidy.go
@@ -29,10 +29,10 @@
 
 type modTidyKey struct {
 	sessionID       string
-	env             string
+	env             source.Hash
 	gomod           source.FileIdentity
-	imports         string
-	unsavedOverlays string
+	imports         source.Hash
+	unsavedOverlays source.Hash
 	view            string
 }
 
@@ -81,10 +81,6 @@
 	if err != nil {
 		return nil, err
 	}
-	importHash, err := s.hashImports(ctx, workspacePkgs)
-	if err != nil {
-		return nil, err
-	}
 
 	s.mu.Lock()
 	overlayHash := hashUnsavedOverlays(s.files)
@@ -93,7 +89,7 @@
 	key := modTidyKey{
 		sessionID:       s.view.session.id,
 		view:            s.view.folder.Filename(),
-		imports:         importHash,
+		imports:         s.hashImports(ctx, workspacePkgs),
 		unsavedOverlays: overlayHash,
 		gomod:           fh.FileIdentity(),
 		env:             hashEnv(s),
@@ -152,7 +148,7 @@
 	return mth.tidy(ctx, s)
 }
 
-func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) {
+func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash {
 	seen := map[string]struct{}{}
 	var imports []string
 	for _, ph := range wsPackages {
@@ -164,8 +160,7 @@
 		}
 	}
 	sort.Strings(imports)
-	hashed := strings.Join(imports, ",")
-	return hashContents([]byte(hashed)), nil
+	return source.Hashf("%s", imports)
 }
 
 // modTidyDiagnostics computes the differences between the original and tidied
diff --git a/internal/lsp/cache/session.go b/internal/lsp/cache/session.go
index 9da5c1e..cbb5874 100644
--- a/internal/lsp/cache/session.go
+++ b/internal/lsp/cache/session.go
@@ -44,7 +44,7 @@
 	session *Session
 	uri     span.URI
 	text    []byte
-	hash    string
+	hash    source.Hash
 	version int32
 	kind    source.FileKind
 
@@ -637,7 +637,7 @@
 		if c.OnDisk || c.Action == source.Save {
 			version = o.version
 		}
-		hash := hashContents(text)
+		hash := source.HashOf(text)
 		var sameContentOnDisk bool
 		switch c.Action {
 		case source.Delete:
diff --git a/internal/lsp/cache/snapshot.go b/internal/lsp/cache/snapshot.go
index 0d3c869..6edd1db 100644
--- a/internal/lsp/cache/snapshot.go
+++ b/internal/lsp/cache/snapshot.go
@@ -466,7 +466,7 @@
 	return overlays
 }
 
-func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
+func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash {
 	var unsaved []string
 	for uri, fh := range files {
 		if overlay, ok := fh.(*overlay); ok && !overlay.saved {
@@ -474,7 +474,7 @@
 		}
 	}
 	sort.Strings(unsaved)
-	return hashContents([]byte(strings.Join(unsaved, "")))
+	return source.Hashf("%s", unsaved)
 }
 
 func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) {
@@ -2652,25 +2652,7 @@
 // -- internal--
 
 // hash returns 8 bits from the key's file digest.
-func (m *goFileMap) hash(k parseKey) int {
-	h := k.file.Hash
-	if h == "" {
-		// Sadly the Hash isn't always a hash because cache.GetFile may
-		// successfully return a *fileHandle containing an error and no hash.
-		// Lump the duds together for now.
-		// TODO(adonovan): fix the underlying bug.
-		return 0
-	}
-	return unhex(h[0])<<4 | unhex(h[1])
-}
-
-// unhex returns the value of a valid hex digit.
-func unhex(b byte) int {
-	if '0' <= b && b <= '9' {
-		return int(b - '0')
-	}
-	return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
-}
+func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] }
 
 // unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
 func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
diff --git a/internal/lsp/cache/symbols.go b/internal/lsp/cache/symbols.go
index db68912..bf5e00b 100644
--- a/internal/lsp/cache/symbols.go
+++ b/internal/lsp/cache/symbols.go
@@ -33,7 +33,7 @@
 	err     error
 }
 
-type symbolHandleKey string
+type symbolHandleKey source.Hash
 
 func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle {
 	if h := s.getSymbolHandle(fh.URI()); h != nil {
diff --git a/internal/lsp/cache/view.go b/internal/lsp/cache/view.go
index b0390a3f..0ed9883 100644
--- a/internal/lsp/cache/view.go
+++ b/internal/lsp/cache/view.go
@@ -165,7 +165,7 @@
 // given go.mod file. It is the caller's responsibility to clean up the files
 // when they are done using them.
 func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) {
-	filenameHash := hashContents([]byte(modFh.URI().Filename()))
+	filenameHash := source.Hashf("%s", modFh.URI().Filename())
 	tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash))
 	if err != nil {
 		return "", nil, err
diff --git a/internal/lsp/debug/serve.go b/internal/lsp/debug/serve.go
index 0bdee92..d343a6d 100644
--- a/internal/lsp/debug/serve.go
+++ b/internal/lsp/debug/serve.go
@@ -320,7 +320,8 @@
 		return nil
 	}
 	for _, o := range s.Overlays() {
-		if o.FileIdentity().Hash == identifier {
+		// TODO(adonovan): understand and document this comparison.
+		if o.FileIdentity().Hash.String() == identifier {
 			return o
 		}
 	}
diff --git a/internal/lsp/source/view.go b/internal/lsp/source/view.go
index 5b908bc..7960b0c 100644
--- a/internal/lsp/source/view.go
+++ b/internal/lsp/source/view.go
@@ -7,6 +7,7 @@
 import (
 	"bytes"
 	"context"
+	"crypto/sha256"
 	"errors"
 	"fmt"
 	"go/ast"
@@ -528,12 +529,32 @@
 	Saved() bool
 }
 
+// A Hash is a cryptographic digest of the contents of a file.
+// (Although at 32B it is larger than a 16B string header, it is smaller
+// and has better locality than the string header + 64B of hex digits.)
+type Hash [sha256.Size]byte
+
+// HashOf returns the hash of some data.
+func HashOf(data []byte) Hash {
+	return Hash(sha256.Sum256(data))
+}
+
+// Hashf returns the hash of a printf-formatted string.
+func Hashf(format string, args ...interface{}) Hash {
+	// Although this looks alloc-heavy, it is faster than using
+	// Fprintf on sha256.New() because the allocations don't escape.
+	return HashOf([]byte(fmt.Sprintf(format, args...)))
+}
+
+// String returns the digest as a string of hex digits.
+func (h Hash) String() string {
+	return fmt.Sprintf("%64x", [sha256.Size]byte(h))
+}
+
 // FileIdentity uniquely identifies a file at a version from a FileSystem.
 type FileIdentity struct {
-	URI span.URI
-
-	// Hash is a string of hex digits denoting the cryptographic digest of the file's content.
-	Hash string
+	URI  span.URI
+	Hash Hash // digest of file contents
 }
 
 func (id FileIdentity) String() string {