maintner: change representation of gitHash to string, intern them
Saves a tiny amount of memory, but many fewer allocation.
name old time/op new time/op delta
Get-4 4.97s ±19% 5.22s ± 9% ~ (p=0.421 n=5+5)
name old alloc/op new alloc/op delta
Get-4 1.10GB ± 0% 1.09GB ± 0% -0.67% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
Get-4 14.2M ± 0% 12.9M ± 0% -9.38% (p=0.008 n=5+5)
Change-Id: Ia4d383b7115a856de517002357c069e6b3eee7ef
Reviewed-on: https://go-review.googlesource.com/38782
Reviewed-by: Kevin Burke <kev@inburke.com>
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index 1fa99f5..94d344f 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -165,7 +165,7 @@
if !ok || err != nil {
continue
}
- hash := gitHashFromHexStr(refp.Sha1)
+ hash := c.gitHashFromHexStr(refp.Sha1)
gc, ok := c.gitCommit[hash]
if !ok {
gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash)
@@ -300,7 +300,12 @@
var toFetch []gitHash
bs := bufio.NewScanner(bytes.NewReader(out))
- c.mu.RLock() // to access gp.remote; okay because ls-remote output all in out already
+
+ // Take the lock here to access gp.remote and call c.gitHashFromHex.
+ // It's acceptable to take such a coarse-looking lock because
+ // it's not actually around I/O: all the input from ls-remote has
+ // already been slurped into memory.
+ c.mu.Lock()
for bs.Scan() {
m := rxRemoteRef.FindSubmatch(bs.Bytes())
if m == nil {
@@ -312,7 +317,7 @@
continue
}
sha1 := m[1]
- hash := gitHashFromHex(sha1)
+ hash := c.gitHashFromHex(sha1)
curHash := gp.remote[gerritCLVersion{int32(clNum), version}]
@@ -324,7 +329,7 @@
})
}
}
- c.mu.RUnlock()
+ c.mu.Unlock()
if err := bs.Err(); err != nil {
return err
}
@@ -370,7 +375,7 @@
lastLog := time.Now()
for {
hash := gp.commitToIndex()
- if hash == nil {
+ if hash == "" {
return n, nil
}
now := time.Now()
@@ -400,7 +405,7 @@
for hash := range gp.need {
return hash
}
- return nil
+ return ""
}
var (
diff --git a/maintner/git.go b/maintner/git.go
index 8063af4..106972b 100644
--- a/maintner/git.go
+++ b/maintner/git.go
@@ -21,48 +21,38 @@
"golang.org/x/build/maintner/maintpb"
)
-type gitHash interface {
- String() string
- Less(gitHash) bool
-}
+// gitHash is a git commit in binary form (NOT hex form).
+// They are currently always 20 bytes long. (for SHA-1 refs)
+// That may change in the future.
+type gitHash string
-// gitSHA1 (the value type) is the current (only) implementation of
-// the gitHash interface.
-type gitSHA1 [20]byte
+func (h gitHash) String() string { return fmt.Sprintf("%x", string(h)) }
-func (h gitSHA1) String() string { return fmt.Sprintf("%x", h[:]) }
-
-func (h gitSHA1) Less(h2 gitHash) bool {
- switch h2 := h2.(type) {
- case gitSHA1:
- return bytes.Compare(h[:], h2[:]) < 0
- default:
- panic("unsupported type")
- }
-}
-
-func gitHashFromHexStr(s string) gitHash {
+// requires c.mu be held for writing
+func (c *Corpus) gitHashFromHexStr(s string) gitHash {
if len(s) != 40 {
panic(fmt.Sprintf("bogus git hash %q", s))
}
- var hash gitSHA1
- n, err := hex.Decode(hash[:], []byte(s)) // TODO: garbage
- if n != 20 || err != nil {
- panic(fmt.Sprintf("bogus git hash %q", s))
+ var buf [40]byte
+ copy(buf[:], s)
+ _, err := hex.Decode(buf[:20], buf[:]) // aliasing is safe
+ if err != nil {
+ panic(fmt.Sprintf("bogus git hash %q: %v", s, err))
}
- return hash
+ return gitHash(c.strb(buf[:20]))
}
-func gitHashFromHex(s []byte) gitHash {
+// requires c.mu be held for writing
+func (c *Corpus) gitHashFromHex(s []byte) gitHash {
if len(s) != 40 {
panic(fmt.Sprintf("bogus git hash %q", s))
}
- var hash gitSHA1
- n, err := hex.Decode(hash[:], s)
- if n != 20 || err != nil {
- panic(fmt.Sprintf("bogus git hash %q", s))
+ var buf [20]byte
+ _, err := hex.Decode(buf[:], s)
+ if err != nil {
+ panic(fmt.Sprintf("bogus git hash %q: %v", s, err))
}
- return hash
+ return gitHash(c.strb(buf[:20]))
}
type gitCommit struct {
@@ -105,15 +95,15 @@
return fmt.Errorf("no remote found for refs/remotes/origin/master")
}
ref := strings.Fields(outs)[0]
- refHash := gitHashFromHexStr(ref)
c.mu.Lock()
+ refHash := c.gitHashFromHexStr(ref)
c.enqueueCommitLocked(refHash)
c.mu.Unlock()
idle := false
for {
hash := c.gitCommitToIndex()
- if hash == nil {
+ if hash == "" {
if !loop {
return nil
}
@@ -147,7 +137,7 @@
}
log.Printf("Warning: git commit %v in todo map, but already known; ignoring", hash)
}
- return nil
+ return ""
}
var (
@@ -217,11 +207,11 @@
Raw: catFile,
DiffTree: diffTree,
}
- switch hash.(type) {
- case gitSHA1:
+ switch len(hash) {
+ case 20:
commit.Sha1 = hash.String()
default:
- return nil, fmt.Errorf("unsupported git hash type %T", hash)
+ return nil, fmt.Errorf("unsupported git hash %q", hash.String())
}
return commit, nil
}
@@ -244,7 +234,7 @@
return nil
}
-// Note: c.mu is held for writing.
+// c.mu is held for writing.
func (c *Corpus) processGitMutation(m *maintpb.GitMutation) {
commit := m.Commit
if commit == nil {
@@ -254,11 +244,12 @@
c.processGitCommit(commit)
}
+// c.mu is held for writing.
func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*gitCommit, error) {
if len(commit.Sha1) != 40 {
return nil, fmt.Errorf("bogus git sha1 %q", commit.Sha1)
}
- hash := gitHashFromHexStr(commit.Sha1)
+ hash := c.gitHashFromHexStr(commit.Sha1)
catFile := commit.Raw
i := bytes.Index(catFile, nlnl)
@@ -281,7 +272,7 @@
err := foreachLine(hdr, func(ln []byte) error {
if bytes.HasPrefix(ln, parentSpace) {
parents++
- parentHash := gitHashFromHex(ln[len(parentSpace):])
+ parentHash := c.gitHashFromHex(ln[len(parentSpace):])
gc.parents = append(gc.parents, parentHash)
c.enqueueCommitLocked(parentHash)
return nil
@@ -305,7 +296,7 @@
return nil
}
if bytes.HasPrefix(ln, treeSpace) {
- gc.tree = gitHashFromHex(ln[len(treeSpace):])
+ gc.tree = c.gitHashFromHex(ln[len(treeSpace):])
return nil
}
if bytes.HasPrefix(ln, golangHgSpace) {
diff --git a/maintner/maintner.go b/maintner/maintner.go
index e5f18a5..dbb3ece 100644
--- a/maintner/maintner.go
+++ b/maintner/maintner.go
@@ -38,7 +38,8 @@
// corpus state:
didInit bool // true after Initialize completes successfully
debug bool
- strIntern map[string]string // interned strings
+ strIntern map[string]string // interned strings, including binary githashes
+
// github-specific
github *GitHub
gerrit *Gerrit