maintner: add GitCommit.HasAncestor method

maintnerd will have an API to query this in a subsequent CL.

Updates golang/go#20222

Change-Id: I3cb0ce2897eea782f627c2e27c4d02ef0eb6c66b
Reviewed-on: https://go-review.googlesource.com/43554
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index 85c5273..493fb7e 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -339,7 +339,7 @@
 		if len(commit.Parents) == 0 {
 			return "new"
 		}
-		parentHash := commit.Parents[0] // meta tree has no merge commits
+		parentHash := commit.Parents[0].Hash // meta tree has no merge commits
 		commit = c.gitCommit[parentHash]
 		if commit == nil {
 			gp.logf("getGerritStatus: did not find parent commit %s", parentHash)
@@ -402,7 +402,7 @@
 			delete(gp.need, gc.Hash)
 		}
 		for _, p := range gc.Parents {
-			gp.markNeededCommit(p)
+			gp.markNeededCommit(p.Hash)
 		}
 	}
 }
diff --git a/maintner/git.go b/maintner/git.go
index 6d95ee6..13a3e0c 100644
--- a/maintner/git.go
+++ b/maintner/git.go
@@ -54,11 +54,17 @@
 	return GitHash(c.strb(buf[:20]))
 }
 
+// placeholderCommitter is a sentinel value for GitCommit.Committer to
+// mean that the GitCommit is a placeholder. It's used for commits we
+// know should exist (because they're referenced as parents) but we
+// haven't yet seen in the log.
+var placeholderCommitter = new(GitPerson)
+
 // GitCommit represents a single commit in a git repository.
 type GitCommit struct {
 	Hash       GitHash
 	Tree       GitHash
-	Parents    []GitHash
+	Parents    []*GitCommit
 	Author     *GitPerson
 	AuthorTime time.Time
 	Committer  *GitPerson
@@ -67,6 +73,32 @@
 	Files      []*maintpb.GitDiffTreeFile
 }
 
+// HasAncestor reports whether gc contains the provided ancestor
+// commit in gc's history.
+func (gc *GitCommit) HasAncestor(ancestor *GitCommit) bool {
+	return gc.hasAncestor(ancestor, make(map[*GitCommit]bool))
+}
+
+func (gc *GitCommit) hasAncestor(ancestor *GitCommit, checked map[*GitCommit]bool) bool {
+	if v, ok := checked[ancestor]; ok {
+		return v
+	}
+	checked[gc] = false
+	for _, pc := range gc.Parents {
+		if pc == nil {
+			panic("nil parent")
+		}
+		if pc.Committer == placeholderCommitter {
+			panic("found placeholder")
+		}
+		if pc.Hash == ancestor.Hash || pc.hasAncestor(ancestor, checked) {
+			checked[gc] = true
+			return true
+		}
+	}
+	return false
+}
+
 // GitPerson is a person in a git commit.
 type GitPerson struct {
 	Str string // "Foo Bar <foo@bar.com>"
@@ -266,6 +298,9 @@
 
 // c.mu is held for writing.
 func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error) {
+	if c.gitCommit == nil {
+		c.gitCommit = map[GitHash]*GitCommit{}
+	}
 	if len(commit.Sha1) != 40 {
 		return nil, fmt.Errorf("bogus git sha1 %q", commit.Sha1)
 	}
@@ -279,7 +314,7 @@
 	hdr, msg := catFile[:i], catFile[i+2:]
 	gc := &GitCommit{
 		Hash:    hash,
-		Parents: make([]GitHash, 0, bytes.Count(hdr, parentSpace)),
+		Parents: make([]*GitCommit, 0, bytes.Count(hdr, parentSpace)),
 		Msg:     c.strb(msg),
 	}
 	if commit.DiffTree != nil {
@@ -293,7 +328,16 @@
 		if bytes.HasPrefix(ln, parentSpace) {
 			parents++
 			parentHash := c.gitHashFromHex(ln[len(parentSpace):])
-			gc.Parents = append(gc.Parents, parentHash)
+			parent := c.gitCommit[parentHash]
+			if parent == nil {
+				// Install a placeholder to be filled in later.
+				parent = &GitCommit{
+					Hash:      parentHash,
+					Committer: placeholderCommitter,
+				}
+				c.gitCommit[parentHash] = parent
+			}
+			gc.Parents = append(gc.Parents, parent)
 			c.enqueueCommitLocked(parentHash)
 			return nil
 		}
@@ -344,10 +388,12 @@
 		log.Printf("Unparseable commit %q: %v", hash, err)
 		return nil, fmt.Errorf("Unparseable commit %q: %v", hash, err)
 	}
-	if c.gitCommit == nil {
-		c.gitCommit = map[GitHash]*GitCommit{}
+	if ph, ok := c.gitCommit[hash]; ok {
+		// Update placeholder.
+		*ph = *gc
+	} else {
+		c.gitCommit[hash] = gc
 	}
-	c.gitCommit[hash] = gc
 	if c.gitCommitTodo != nil {
 		delete(c.gitCommitTodo, hash)
 	}
diff --git a/maintner/godata/godata_test.go b/maintner/godata/godata_test.go
index a645fea..76f6a6d 100644
--- a/maintner/godata/godata_test.go
+++ b/maintner/godata/godata_test.go
@@ -6,7 +6,10 @@
 
 import (
 	"context"
+	"sync"
 	"testing"
+
+	"golang.org/x/build/maintner"
 )
 
 func BenchmarkGet(b *testing.B) {
@@ -18,3 +21,76 @@
 		}
 	}
 }
+
+var (
+	corpusMu    sync.Mutex
+	corpusCache *maintner.Corpus
+)
+
+func getGoData(tb testing.TB) *maintner.Corpus {
+	corpusMu.Lock()
+	defer corpusMu.Unlock()
+	if corpusCache != nil {
+		return corpusCache
+	}
+	var err error
+	corpusCache, err = Get(context.Background())
+	if err != nil {
+		tb.Fatalf("getting corpus: %v", err)
+	}
+	return corpusCache
+}
+
+func TestCorpusCheck(t *testing.T) {
+	c := getGoData(t)
+	if err := c.Check(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestGitAncestor(t *testing.T) {
+	c := getGoData(t)
+	tests := []struct {
+		subject, ancestor string
+		want              bool
+	}{
+		{"3b5637ff2bd5c03479780995e7a35c48222157c1", "0bb0b61d6a85b2a1a33dcbc418089656f2754d32", true},
+		{"0bb0b61d6a85b2a1a33dcbc418089656f2754d32", "3b5637ff2bd5c03479780995e7a35c48222157c1", false},
+
+		// Same on both sides:
+		{"0bb0b61d6a85b2a1a33dcbc418089656f2754d32", "0bb0b61d6a85b2a1a33dcbc418089656f2754d32", false},
+		{"3b5637ff2bd5c03479780995e7a35c48222157c1", "3b5637ff2bd5c03479780995e7a35c48222157c1", false},
+	}
+	for i, tt := range tests {
+		subject := c.GitCommit(tt.subject)
+		if subject == nil {
+			t.Errorf("%d. missing subject commit %q", i, tt.subject)
+			continue
+		}
+		anc := c.GitCommit(tt.ancestor)
+		if anc == nil {
+			t.Errorf("%d. missing ancestor commit %q", i, tt.ancestor)
+			continue
+		}
+		got := subject.HasAncestor(anc)
+		if got != tt.want {
+			t.Errorf("HasAncestor(%q, %q) = %v; want %v", tt.subject, tt.ancestor, got, tt.want)
+		}
+	}
+}
+
+func BenchmarkGitAncestor(b *testing.B) {
+	c := getGoData(b)
+	subject := c.GitCommit("3b5637ff2bd5c03479780995e7a35c48222157c1")
+	anc := c.GitCommit("0bb0b61d6a85b2a1a33dcbc418089656f2754d32")
+	if subject == nil || anc == nil {
+		b.Fatal("missing commit(s)")
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if !subject.HasAncestor(anc) {
+			b.Fatal("wrong answer")
+		}
+	}
+}
diff --git a/maintner/maintner.go b/maintner/maintner.go
index 1935a47..cfdd688 100644
--- a/maintner/maintner.go
+++ b/maintner/maintner.go
@@ -60,6 +60,15 @@
 	zoneCache     map[string]*time.Location // "+0530" => location
 }
 
+// RLock grabs the corpus's read lock. Grabbing the read lock prevents
+// any concurrent writes from mutation the corpus. This is only
+// necessary if the application is querying the corpus and calling its
+// Update method concurrently.
+func (c *Corpus) RLock() { c.mu.RLock() }
+
+// RUnlock unlocks the corpus's read lock.
+func (c *Corpus) RUnlock() { c.mu.RUnlock() }
+
 type polledGitCommits struct {
 	repo *maintpb.GitRepo
 	dir  string
@@ -74,6 +83,7 @@
 	c.dataDir = scratchDir
 }
 
+// SetVerbose enables or disables verbose logging.
 func (c *Corpus) SetVerbose(v bool) { c.verbose = v }
 
 func (c *Corpus) getDataDir() string {
@@ -99,6 +109,20 @@
 	return new(Gerrit)
 }
 
+// Check verifies the internal structure of the Corpus data structures.
+// It is intended for tests and debugging.
+func (c *Corpus) Check() error {
+	for hash, gc := range c.gitCommit {
+		if gc.Committer == placeholderCommitter {
+			return fmt.Errorf("git commit for key %q was placeholder", hash)
+		}
+		if gc.Hash != hash {
+			return fmt.Errorf("git commit for key %q had GitCommit.Hash %q", hash, gc.Hash)
+		}
+	}
+	return nil
+}
+
 // GitCommit returns the provided git commit, or nil if it's unknown.
 func (c *Corpus) GitCommit(hash string) *GitCommit {
 	if len(hash) != 40 {