maintner: change representation of gitHash to string, intern them

Saves a tiny amount of memory, but many fewer allocation.

name   old time/op    new time/op    delta
Get-4     4.97s ±19%     5.22s ± 9%    ~     (p=0.421 n=5+5)

name   old alloc/op   new alloc/op   delta
Get-4    1.10GB ± 0%    1.09GB ± 0%  -0.67%  (p=0.008 n=5+5)

name   old allocs/op  new allocs/op  delta
Get-4     14.2M ± 0%     12.9M ± 0%  -9.38%  (p=0.008 n=5+5)

Change-Id: Ia4d383b7115a856de517002357c069e6b3eee7ef
Reviewed-on: https://go-review.googlesource.com/38782
Reviewed-by: Kevin Burke <kev@inburke.com>
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index 1fa99f5..94d344f 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -165,7 +165,7 @@
 		if !ok || err != nil {
 			continue
 		}
-		hash := gitHashFromHexStr(refp.Sha1)
+		hash := c.gitHashFromHexStr(refp.Sha1)
 		gc, ok := c.gitCommit[hash]
 		if !ok {
 			gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash)
@@ -300,7 +300,12 @@
 	var toFetch []gitHash
 
 	bs := bufio.NewScanner(bytes.NewReader(out))
-	c.mu.RLock() // to access gp.remote; okay because ls-remote output all in out already
+
+	// Take the lock here to access gp.remote and call c.gitHashFromHex.
+	// It's acceptable to take such a coarse-looking lock because
+	// it's not actually around I/O: all the input from ls-remote has
+	// already been slurped into memory.
+	c.mu.Lock()
 	for bs.Scan() {
 		m := rxRemoteRef.FindSubmatch(bs.Bytes())
 		if m == nil {
@@ -312,7 +317,7 @@
 			continue
 		}
 		sha1 := m[1]
-		hash := gitHashFromHex(sha1)
+		hash := c.gitHashFromHex(sha1)
 
 		curHash := gp.remote[gerritCLVersion{int32(clNum), version}]
 
@@ -324,7 +329,7 @@
 			})
 		}
 	}
-	c.mu.RUnlock()
+	c.mu.Unlock()
 	if err := bs.Err(); err != nil {
 		return err
 	}
@@ -370,7 +375,7 @@
 	lastLog := time.Now()
 	for {
 		hash := gp.commitToIndex()
-		if hash == nil {
+		if hash == "" {
 			return n, nil
 		}
 		now := time.Now()
@@ -400,7 +405,7 @@
 	for hash := range gp.need {
 		return hash
 	}
-	return nil
+	return ""
 }
 
 var (
diff --git a/maintner/git.go b/maintner/git.go
index 8063af4..106972b 100644
--- a/maintner/git.go
+++ b/maintner/git.go
@@ -21,48 +21,38 @@
 	"golang.org/x/build/maintner/maintpb"
 )
 
-type gitHash interface {
-	String() string
-	Less(gitHash) bool
-}
+// gitHash is a git commit in binary form (NOT hex form).
+// They are currently always 20 bytes long. (for SHA-1 refs)
+// That may change in the future.
+type gitHash string
 
-// gitSHA1 (the value type) is the current (only) implementation of
-// the gitHash interface.
-type gitSHA1 [20]byte
+func (h gitHash) String() string { return fmt.Sprintf("%x", string(h)) }
 
-func (h gitSHA1) String() string { return fmt.Sprintf("%x", h[:]) }
-
-func (h gitSHA1) Less(h2 gitHash) bool {
-	switch h2 := h2.(type) {
-	case gitSHA1:
-		return bytes.Compare(h[:], h2[:]) < 0
-	default:
-		panic("unsupported type")
-	}
-}
-
-func gitHashFromHexStr(s string) gitHash {
+// requires c.mu be held for writing
+func (c *Corpus) gitHashFromHexStr(s string) gitHash {
 	if len(s) != 40 {
 		panic(fmt.Sprintf("bogus git hash %q", s))
 	}
-	var hash gitSHA1
-	n, err := hex.Decode(hash[:], []byte(s)) // TODO: garbage
-	if n != 20 || err != nil {
-		panic(fmt.Sprintf("bogus git hash %q", s))
+	var buf [40]byte
+	copy(buf[:], s)
+	_, err := hex.Decode(buf[:20], buf[:]) // aliasing is safe
+	if err != nil {
+		panic(fmt.Sprintf("bogus git hash %q: %v", s, err))
 	}
-	return hash
+	return gitHash(c.strb(buf[:20]))
 }
 
-func gitHashFromHex(s []byte) gitHash {
+// requires c.mu be held for writing
+func (c *Corpus) gitHashFromHex(s []byte) gitHash {
 	if len(s) != 40 {
 		panic(fmt.Sprintf("bogus git hash %q", s))
 	}
-	var hash gitSHA1
-	n, err := hex.Decode(hash[:], s)
-	if n != 20 || err != nil {
-		panic(fmt.Sprintf("bogus git hash %q", s))
+	var buf [20]byte
+	_, err := hex.Decode(buf[:], s)
+	if err != nil {
+		panic(fmt.Sprintf("bogus git hash %q: %v", s, err))
 	}
-	return hash
+	return gitHash(c.strb(buf[:20]))
 }
 
 type gitCommit struct {
@@ -105,15 +95,15 @@
 		return fmt.Errorf("no remote found for refs/remotes/origin/master")
 	}
 	ref := strings.Fields(outs)[0]
-	refHash := gitHashFromHexStr(ref)
 	c.mu.Lock()
+	refHash := c.gitHashFromHexStr(ref)
 	c.enqueueCommitLocked(refHash)
 	c.mu.Unlock()
 
 	idle := false
 	for {
 		hash := c.gitCommitToIndex()
-		if hash == nil {
+		if hash == "" {
 			if !loop {
 				return nil
 			}
@@ -147,7 +137,7 @@
 		}
 		log.Printf("Warning: git commit %v in todo map, but already known; ignoring", hash)
 	}
-	return nil
+	return ""
 }
 
 var (
@@ -217,11 +207,11 @@
 		Raw:      catFile,
 		DiffTree: diffTree,
 	}
-	switch hash.(type) {
-	case gitSHA1:
+	switch len(hash) {
+	case 20:
 		commit.Sha1 = hash.String()
 	default:
-		return nil, fmt.Errorf("unsupported git hash type %T", hash)
+		return nil, fmt.Errorf("unsupported git hash %q", hash.String())
 	}
 	return commit, nil
 }
@@ -244,7 +234,7 @@
 	return nil
 }
 
-// Note: c.mu is held for writing.
+// c.mu is held for writing.
 func (c *Corpus) processGitMutation(m *maintpb.GitMutation) {
 	commit := m.Commit
 	if commit == nil {
@@ -254,11 +244,12 @@
 	c.processGitCommit(commit)
 }
 
+// c.mu is held for writing.
 func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*gitCommit, error) {
 	if len(commit.Sha1) != 40 {
 		return nil, fmt.Errorf("bogus git sha1 %q", commit.Sha1)
 	}
-	hash := gitHashFromHexStr(commit.Sha1)
+	hash := c.gitHashFromHexStr(commit.Sha1)
 
 	catFile := commit.Raw
 	i := bytes.Index(catFile, nlnl)
@@ -281,7 +272,7 @@
 	err := foreachLine(hdr, func(ln []byte) error {
 		if bytes.HasPrefix(ln, parentSpace) {
 			parents++
-			parentHash := gitHashFromHex(ln[len(parentSpace):])
+			parentHash := c.gitHashFromHex(ln[len(parentSpace):])
 			gc.parents = append(gc.parents, parentHash)
 			c.enqueueCommitLocked(parentHash)
 			return nil
@@ -305,7 +296,7 @@
 			return nil
 		}
 		if bytes.HasPrefix(ln, treeSpace) {
-			gc.tree = gitHashFromHex(ln[len(treeSpace):])
+			gc.tree = c.gitHashFromHex(ln[len(treeSpace):])
 			return nil
 		}
 		if bytes.HasPrefix(ln, golangHgSpace) {
diff --git a/maintner/maintner.go b/maintner/maintner.go
index e5f18a5..dbb3ece 100644
--- a/maintner/maintner.go
+++ b/maintner/maintner.go
@@ -38,7 +38,8 @@
 	// corpus state:
 	didInit   bool // true after Initialize completes successfully
 	debug     bool
-	strIntern map[string]string // interned strings
+	strIntern map[string]string // interned strings, including binary githashes
+
 	// github-specific
 	github             *GitHub
 	gerrit             *Gerrit