maintner: sync gerrit commits

Change-Id: Ia77fe7f509d9b9f8e211ae4fae7591b1182b8048
Reviewed-on: https://go-review.googlesource.com/38659
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index f842a2f..15a619a 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -62,6 +62,7 @@
 	gitDir string
 	cls    map[int32]*gerritCL
 	remote map[gerritCLVersion]gitHash
+	need   map[gitHash]bool
 }
 
 func (gp *GerritProject) logf(format string, args ...interface{}) {
@@ -123,6 +124,7 @@
 	})
 }
 
+// called with c.mu Locked
 func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) {
 	if c.gerrit == nil {
 		// Untracked.
@@ -133,6 +135,11 @@
 		// Untracked.
 		return
 	}
+	gp.processMutation(gm)
+}
+
+// called with c.mu Locked
+func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) {
 	for _, refp := range gm.Refs {
 		m := rxChangeRef.FindStringSubmatch(refp.Ref)
 		if m == nil {
@@ -143,10 +150,37 @@
 		if !ok || err != nil {
 			continue
 		}
-		gp.remote[gerritCLVersion{int32(cl), version}] = gitHashFromHexStr(refp.Sha1)
+		hash := gitHashFromHexStr(refp.Sha1)
+		gp.remote[gerritCLVersion{int32(cl), version}] = hash
+		gp.markNeededCommit(hash)
 	}
 
-	// TODO: commits
+	c := gp.gerrit.c
+	for _, commitp := range gm.Commits {
+		gc, err := c.processGitCommit(commitp)
+		if err != nil {
+			continue
+		}
+		if gp.need != nil {
+			delete(gp.need, gc.hash)
+		}
+		for _, p := range gc.parents {
+			gp.markNeededCommit(p)
+		}
+	}
+}
+
+// c.mu must be held
+func (gp *GerritProject) markNeededCommit(hash gitHash) {
+	c := gp.gerrit.c
+	if _, ok := c.gitCommit[hash]; ok {
+		// Already have it.
+		return
+	}
+	if gp.need == nil {
+		gp.need = map[gitHash]bool{}
+	}
+	gp.need[hash] = true
 }
 
 func gerritVersionNumber(s string) (version int32, ok bool) {
@@ -202,6 +236,13 @@
 }
 
 func (gp *GerritProject) syncOnce(ctx context.Context) error {
+	if err := gp.syncRefs(ctx); err != nil {
+		return err
+	}
+	return gp.syncCommits(ctx)
+}
+
+func (gp *GerritProject) syncRefs(ctx context.Context) error {
 	c := gp.gerrit.c
 
 	fetchCtx, cancel := context.WithTimeout(ctx, time.Minute)
@@ -221,6 +262,7 @@
 	}
 
 	var changedRefs []*maintpb.GitRef
+	var toFetch []gitHash
 
 	bs := bufio.NewScanner(bytes.NewReader(out))
 	for bs.Scan() {
@@ -241,6 +283,7 @@
 		c.mu.RUnlock()
 
 		if curHash != hash {
+			toFetch = append(toFetch, hash)
 			changedRefs = append(changedRefs, &maintpb.GitRef{
 				Ref:  strings.TrimSpace(bs.Text()[len(sha1):]),
 				Sha1: string(sha1),
@@ -253,7 +296,12 @@
 	if len(changedRefs) == 0 {
 		return nil
 	}
-	gp.logf("%d new refs.", len(changedRefs))
+	gp.logf("%d new refs; fetching...", len(changedRefs))
+	if err := gp.fetchHashes(ctx, toFetch); err != nil {
+		return err
+	}
+	gp.logf("fetched %d new refs.", len(changedRefs))
+
 	c.addMutation(&maintpb.Mutation{
 		Gerrit: &maintpb.GerritMutation{
 			Project: gp.proj,
@@ -263,6 +311,37 @@
 	return nil
 }
 
+func (gp *GerritProject) syncCommits(ctx context.Context) error {
+	c := gp.gerrit.c
+	for {
+		hash := gp.commitToIndex()
+		if hash == nil {
+			return nil
+		}
+		commit, err := parseCommitFromGit(gp.gitDir, hash)
+		if err != nil {
+			return err
+		}
+		c.addMutation(&maintpb.Mutation{
+			Gerrit: &maintpb.GerritMutation{
+				Project: gp.proj,
+				Commits: []*maintpb.GitCommit{commit},
+			},
+		})
+	}
+}
+
+func (gp *GerritProject) commitToIndex() gitHash {
+	c := gp.gerrit.c
+
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	for hash := range gp.need {
+		return hash
+	}
+	return nil
+}
+
 var (
 	statusSpace = []byte("Status: ")
 )
diff --git a/maintner/git.go b/maintner/git.go
index abd7274..aac538e 100644
--- a/maintner/git.go
+++ b/maintner/git.go
@@ -146,7 +146,10 @@
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 	for hash := range c.gitCommitTodo {
-		return hash
+		if _, ok := c.gitCommit[hash]; !ok {
+			return hash
+		}
+		log.Printf("Warning: git commit %v in todo map, but already known; ignoring", hash)
 	}
 	return nil
 }
@@ -158,31 +161,24 @@
 	committerSpace = []byte("committer ")
 	treeSpace      = []byte("tree ")
 	golangHgSpace  = []byte("golang-hg ")
+	gpgSigSpace    = []byte("gpgsig ")
+	space          = []byte(" ")
 )
 
-func (c *Corpus) indexCommit(conf polledGitCommits, hash gitHash) error {
-	if conf.repo == nil {
-		panic("bogus config; nil repo")
-	}
+func parseCommitFromGit(dir string, hash gitHash) (*maintpb.GitCommit, error) {
 	cmd := exec.Command("git", "cat-file", "commit", hash.String())
-	cmd.Dir = conf.dir
+	cmd.Dir = dir
 	catFile, err := cmd.Output()
 	if err != nil {
-		return fmt.Errorf("git cat-file -p %v: %v", hash, err)
+		return nil, fmt.Errorf("git cat-file -p %v: %v", hash, err)
 	}
 	cmd = exec.Command("git", "diff-tree", "--numstat", hash.String())
-	cmd.Dir = conf.dir
+	cmd.Dir = dir
 	diffTreeOut, err := cmd.Output()
 	if err != nil {
-		return fmt.Errorf("git diff-tree --numstat %v: %v", hash, err)
+		return nil, fmt.Errorf("git diff-tree --numstat %v: %v", hash, err)
 	}
 
-	c.mu.Lock()
-	if _, ok := c.gitCommit[hash]; ok {
-		c.mu.Unlock()
-		return nil
-	}
-	c.mu.Unlock()
 	diffTree := &maintpb.GitDiffTree{}
 	bs := bufio.NewScanner(bytes.NewReader(diffTreeOut))
 	lineNum := 0
@@ -218,7 +214,7 @@
 		})
 	}
 	if err := bs.Err(); err != nil {
-		return err
+		return nil, err
 	}
 	commit := &maintpb.GitCommit{
 		Raw:      catFile,
@@ -228,7 +224,18 @@
 	case gitSHA1:
 		commit.Sha1 = hash.String()
 	default:
-		return fmt.Errorf("unsupported git hash type %T", hash)
+		return nil, fmt.Errorf("unsupported git hash type %T", hash)
+	}
+	return commit, nil
+}
+
+func (c *Corpus) indexCommit(conf polledGitCommits, hash gitHash) error {
+	if conf.repo == nil {
+		panic("bogus config; nil repo")
+	}
+	commit, err := parseCommitFromGit(conf.dir, hash)
+	if err != nil {
+		return err
 	}
 	m := &maintpb.Mutation{
 		Git: &maintpb.GitMutation{
@@ -246,16 +253,20 @@
 	if commit == nil {
 		return
 	}
+	// TODO: care about m.Repo?
+	c.processGitCommit(commit)
+}
+
+func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*gitCommit, error) {
 	if len(commit.Sha1) != 40 {
-		return
+		return nil, fmt.Errorf("bogus git sha1 %q", commit.Sha1)
 	}
 	hash := gitHashFromHexStr(commit.Sha1)
 
 	catFile := commit.Raw
 	i := bytes.Index(catFile, nlnl)
 	if i == 0 {
-		log.Printf("Unparseable commit %q", hash)
-		return
+		return nil, fmt.Errorf("commit %v lacks double newline", hash)
 	}
 	hdr, msg := catFile[:i], catFile[i+2:]
 	gc := &gitCommit{
@@ -307,12 +318,17 @@
 			c.gitOfHg[string(ln[len(golangHgSpace):])] = hash
 			return nil
 		}
+		if bytes.HasPrefix(ln, gpgSigSpace) || bytes.HasPrefix(ln, space) {
+			// Jessie Frazelle is a unique butterfly.
+			return nil
+
+		}
 		log.Printf("in commit %s, unrecognized line %q", hash, ln)
 		return nil
 	})
 	if err != nil {
 		log.Printf("Unparseable commit %q: %v", hash, err)
-		return
+		return nil, fmt.Errorf("Unparseable commit %q: %v", hash, err)
 	}
 	if c.gitCommit == nil {
 		c.gitCommit = map[gitHash]*gitCommit{}
@@ -324,6 +340,7 @@
 	if n := len(c.gitCommit); n%100 == 0 && c.Verbose {
 		log.Printf("Num git commits = %v", n)
 	}
+	return gc, nil
 }
 
 // calls f on each non-empty line in v, without the trailing \n. the