maintner: parse Gerrit reply bodies

The contents of a Gerrit reply are stored in the body of the commit
message for a meta commit, but we currently ignore them. Parse the
message text when we walk the tree of meta commits, and return it when
we return a CL.

The API returns these in very raw fashion - what you see in a reply
in the UI is what you get in the git commit. We don't do any parsing
either. We could add helpers on the GerritMessage object to try to
return you better formatted information.

To see what the API returns for these messages, visit this URL:
https://go-review.googlesource.com/changes/42180?o=MESSAGES. The
messages attached to a specific line and file are stored a different
way and returned via a different API. It's trickier to get those so
I'll add them in a separate commit.

Change-Id: Ibd56fb828c225b57650157d08b27034ece75848a
Reviewed-on: https://go-review.googlesource.com/42452
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index c1f791b..fc19817 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -13,6 +13,7 @@
 	"bufio"
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"log"
 	"net/url"
@@ -233,6 +234,30 @@
 
 	// GitHubIssueRefs are parsed references to GitHub issues.
 	GitHubIssueRefs []GitHubIssueRef
+
+	// Messages contains all of the messages for this CL, in sorted order.
+	Messages []*GerritMessage
+}
+
+// GerritMessage is a Gerrit reply that is attached to the CL as a whole, and
+// not to a file or line of a patch set.
+//
+// Maintner does very little parsing or formatting of a Message body. Messages
+// are stored the same way they are stored in the API.
+type GerritMessage struct {
+	// Version is the patch set version this message was sent on.
+	Version int32
+
+	// Message is the raw message contents from Gerrit (a subset
+	// of the raw git commit message), starting with "Patch Set
+	// nnnn".
+	Message string
+
+	// Date is when this message was stored (the commit time of
+	// the git commit).
+	Date time.Time
+
+	// TODO author id etc.
 }
 
 // References reports whether cl includes a commit message reference
@@ -297,7 +322,6 @@
 	if c.mutationLogger == nil {
 		panic("can't TrackGerrit in non-leader mode")
 	}
-
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -340,9 +364,8 @@
 //     git push origin HEAD:refs/drafts/master
 var statuses = []string{"merged", "abandoned", "draft", "new"}
 
-// getGerritStatus takes a current and previous commit, and returns a Gerrit
-// status, or the empty string to indicate the status did not change between the
-// two commits.
+// getGerritStatus returns a Gerrit status for a commit, or the empty string to
+// indicate the commit did not show a status.
 //
 // getGerritStatus relies on the Gerrit code review convention of amending
 // the meta commit to include the current status of the CL. The Gerrit search
@@ -351,33 +374,90 @@
 // returns only "NEW", "DRAFT", "ABANDONED", "MERGED". Gerrit attaches "draft",
 // "abandoned", "new", and "merged" statuses to some meta commits; you may have
 // to search the current meta commit's parents to find the last good commit.
+func getGerritStatus(commit *GitCommit) string {
+	idx := strings.Index(commit.Msg, statusIndicator)
+	if idx == -1 {
+		return ""
+	}
+	off := idx + len(statusIndicator)
+	for _, status := range statuses {
+		if strings.HasPrefix(commit.Msg[off:], status) {
+			return status
+		}
+	}
+	return ""
+}
+
+var errStopIteration = errors.New("stop iteration")
+var errTooManyParents = errors.New("maintner: too many commit parents")
+
+// foreachCommitParent walks a commit's parents, calling f for each commit until
+// an error is returned from f or a commit has no parent.
+//
+// foreachCommitParent returns errTooManyParents (and stops processing) if a commit
+// has more than one parent.
 //
 // Corpus.mu must be held.
-func (gp *GerritProject) getGerritStatus(currentMeta, oldMeta *GitCommit) string {
-	commit := currentMeta
+func (gp *GerritProject) foreachCommitParent(hash GitHash, f func(*GitCommit) error) error {
 	c := gp.gerrit.c
+	commit := c.gitCommit[hash]
 	for {
-		idx := strings.Index(commit.Msg, statusIndicator)
-		if idx > -1 {
-			off := idx + len(statusIndicator)
-			for _, status := range statuses {
-				if strings.HasPrefix(commit.Msg[off:], status) {
-					return status
-				}
-			}
+		if commit == nil {
+			return nil
 		}
-		if len(commit.Parents) == 0 {
-			return "new"
+		if err := f(commit); err != nil {
+			return err
+		}
+		if commit.Parents == nil || len(commit.Parents) == 0 {
+			return nil
+		}
+		if len(commit.Parents) > 1 {
+			return errTooManyParents
 		}
 		parentHash := commit.Parents[0].Hash // meta tree has no merge commits
 		commit = c.gitCommit[parentHash]
-		if commit == nil {
-			gp.logf("getGerritStatus: did not find parent commit %s", parentHash)
-			return "new"
+	}
+}
+
+// getGerritMessage parses a Gerrit comment from the given commit or returns nil
+// if there wasn't one.
+//
+// Corpus.mu must be held.
+func (gp *GerritProject) getGerritMessage(commit *GitCommit) *GerritMessage {
+	start := strings.Index(commit.Msg, "\nPatch Set ")
+	if start == -1 {
+		return nil
+	}
+	numStart := start + len("\nPatch Set ")
+	colon := strings.IndexByte(commit.Msg[numStart:], ':')
+	if colon == -1 {
+		return nil
+	}
+	version, err := strconv.ParseInt(commit.Msg[numStart:numStart+colon], 10, 32)
+	if err != nil {
+		gp.logf("unexpected patch set number in %s; err: %v", commit.Hash, err)
+		return nil
+	}
+	start++
+	v := commit.Msg[start:]
+	l := 0
+	for {
+		i := strings.IndexByte(v, '\n')
+		if i < 0 {
+			return nil
 		}
-		if oldMeta != nil && commit.Hash == oldMeta.Hash {
-			return ""
+		if strings.HasPrefix(v[:i], "Patch-set:") {
+			// two newlines before the Patch-set message
+			v = commit.Msg[start : start+l-2]
+			break
 		}
+		v = v[i+1:]
+		l = l + i + 1
+	}
+	return &GerritMessage{
+		Date:    commit.CommitTime,
+		Message: v,
+		Version: int32(version),
 	}
 }
 
@@ -420,12 +500,35 @@
 		if clv.Version == 0 {
 			oldMeta := cl.Meta
 			cl.Meta = gc
-			if status := gp.getGerritStatus(cl.Meta, oldMeta); status != "" {
-				cl.Status = status
+			foundStatus := ""
+			var messages []*GerritMessage
+			gp.foreachCommitParent(cl.Meta.Hash, func(gc *GitCommit) error {
+				if status := getGerritStatus(gc); status != "" && foundStatus == "" {
+					foundStatus = status
+				}
+				if message := gp.getGerritMessage(gc); message != nil {
+					// Walk from the newest commit backwards, so we need to
+					// insert all messages at the beginning of the array.
+					// TODO: store these in reverse order instead and avoid
+					// the quadratic inserting at the beginning.
+					messages = append(messages, nil)
+					copy(messages[1:], messages[:])
+					messages[0] = message
+				}
+				if oldMeta != nil && gc.Hash == oldMeta.Hash {
+					return errStopIteration
+				}
+				return nil
+			})
+			if foundStatus != "" {
+				cl.Status = foundStatus
+			} else if cl.Status == "" {
+				cl.Status = "new"
 			}
 			if cl.Created.IsZero() || gc.CommitTime.Before(cl.Created) {
 				cl.Created = gc.CommitTime
 			}
+			cl.Messages = append(cl.Messages, messages...)
 		} else {
 			cl.Commit = gc
 			cl.Version = clv.Version
diff --git a/maintner/gerrit_test.go b/maintner/gerrit_test.go
index 81b0ecd..180c519 100644
--- a/maintner/gerrit_test.go
+++ b/maintner/gerrit_test.go
@@ -4,7 +4,10 @@
 
 package maintner
 
-import "testing"
+import (
+	"testing"
+	"time"
+)
 
 var statusTests = []struct {
 	msg  string
@@ -36,17 +39,13 @@
 Subject: devapp: initial support for App Engine Flex
 Commit: 17839a9f284b473986f235ad2757a2b445d05068
 Tag: autogenerated:gerrit:newPatchSet
-Groups: 17839a9f284b473986f235ad2757a2b445d05068`, "new"},
+Groups: 17839a9f284b473986f235ad2757a2b445d05068`, ""},
 }
 
 func TestGetGerritStatus(t *testing.T) {
-	var c Corpus
-	c.EnableLeaderMode(new(dummyMutationLogger), "/fake/dir")
-	c.TrackGerrit("go.googlesource.com/build")
-	gp := c.gerrit.projects["go.googlesource.com/build"]
 	for _, tt := range statusTests {
 		gc := &GitCommit{Msg: tt.msg}
-		got := gp.getGerritStatus(gc, nil)
+		got := getGerritStatus(gc)
 		if got != tt.want {
 			t.Errorf("getGerritStatus msg:\n%s\ngot %s, want %s", tt.msg, got, tt.want)
 		}
@@ -85,3 +84,43 @@
 		t.Errorf("expected go-review.googlesource.com to return nil, got a project")
 	}
 }
+
+var messageTests = []struct {
+	in  string
+	out string
+}{
+	{`Update patch set 1
+
+Patch Set 1: Code-Review+2
+
+Just to confirm, "go test" will consider an empty test file to be passing?
+
+Patch-set: 1
+Reviewer: Quentin Smith <13020@62eb7196-b449-3ce5-99f1-c037f21e1705>
+Label: Code-Review=+2
+`, "Patch Set 1: Code-Review+2\n\nJust to confirm, \"go test\" will consider an empty test file to be passing?"},
+}
+
+func TestGetGerritMessage(t *testing.T) {
+	var c Corpus
+	c.EnableLeaderMode(new(dummyMutationLogger), "/fake/dir")
+	c.TrackGerrit("go.googlesource.com/build")
+	gp := c.gerrit.projects["go.googlesource.com/build"]
+	for _, tt := range messageTests {
+		gc := &GitCommit{
+			Msg:        tt.in,
+			CommitTime: time.Now().UTC(),
+		}
+		msg := gp.getGerritMessage(gc)
+		// just checking these get copied through appropriately
+		if msg.Version != 1 {
+			t.Errorf("getGerritMessage: want Version 1, got %d", msg.Version)
+		}
+		if msg.Date.IsZero() {
+			t.Errorf("getGerritMessage: expected Date to be non-zero, got zero")
+		}
+		if msg.Message != tt.out {
+			t.Errorf("getGerritMessage: want %q, got %q", tt.out, msg.Message)
+		}
+	}
+}
diff --git a/maintner/maintpb/maintner.pb.go b/maintner/maintpb/maintner.pb.go
index 69b3e62..ce78efa 100644
--- a/maintner/maintpb/maintner.pb.go
+++ b/maintner/maintpb/maintner.pb.go
@@ -816,9 +816,6 @@
 	return false
 }
 
-// GerritMutation represents an individual Gerrit CL. The URL and Number must
-// always be present, to identify the CL. The other fields may or may not be
-// present.
 type GerritMutation struct {
 	// Project is the Gerrit server and project, without scheme (https implied) or
 	// trailing slash.
diff --git a/maintner/maintpb/maintner.proto b/maintner/maintpb/maintner.proto
index b7469ed..1c5858c 100644
--- a/maintner/maintpb/maintner.proto
+++ b/maintner/maintpb/maintner.proto
@@ -200,9 +200,6 @@
   bool   binary = 4;
 }
 
-// GerritMutation represents an individual Gerrit CL. The URL and Number must
-// always be present, to identify the CL. The other fields may or may not be
-// present.
 message GerritMutation {
   // Project is the Gerrit server and project, without scheme (https implied) or
   // trailing slash.