git-codereview: allow short user names in git mail

In Mercurial, hg mail expanded Rietveld user names.
There are no Gerrit user names.
There are GitHub user names, but then I'd have to type
randall77,griesemer,robpike,ianlancetaylor
instead of khr,gri,r,iant.

The algorithm for expanding short user names is as follows:
Look at the git commit log for the current repository,
extracting all the email addresses in Reviewed-By lines
and sorting by how many times each address appears.
For each short user name, walk the list, most common
address first, and use the first address found that has
the short user name on the left side of the @.

This is a purely local operation, it adjusts automatically as
new reviewers come on board, it avoids a separate database,
it adjusts to repo-specific reviewer patterns, and it resolves
potential ambiguity in favor of the most common reviewers.
(For example, r@golang.org will beat any other r@mail.com
in the main repo.)

Change-Id: I53afea2a86ba4cfa8fd7f31d56b90a3e12cc2b48
Reviewed-on: https://go-review.googlesource.com/2111
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Andrew Gerrand <adg@golang.org>
diff --git a/git-codereview/mail.go b/git-codereview/mail.go
index d6c1108..c6eee7e 100644
--- a/git-codereview/mail.go
+++ b/git-codereview/mail.go
@@ -8,6 +8,7 @@
 	"fmt"
 	"os"
 	"regexp"
+	"sort"
 	"strings"
 )
 
@@ -80,25 +81,116 @@
 
 // mailAddressRE matches the mail addresses we admit. It's restrictive but admits
 // all the addresses in the Go CONTRIBUTORS file at time of writing (tested separately).
-var mailAddressRE = regexp.MustCompile(`^[a-zA-Z0-9][-_.a-zA-Z0-9]*@[-_.a-zA-Z0-9]+$`)
+var mailAddressRE = regexp.MustCompile(`^([a-zA-Z0-9][-_.a-zA-Z0-9]*)(@[-_.a-zA-Z0-9]+)?$`)
 
 // mailList turns the list of mail addresses from the flag value into the format
 // expected by gerrit. The start argument is a % or , depending on where we
 // are in the processing sequence.
 func mailList(start, tag string, flagList string) string {
+	errors := false
 	spec := start
+	short := ""
+	long := ""
 	for i, addr := range strings.Split(flagList, ",") {
-		if !mailAddressRE.MatchString(addr) {
-			dief("%q is not a valid reviewer mail address", addr)
+		m := mailAddressRE.FindStringSubmatch(addr)
+		if m == nil {
+			printf("invalid reviewer mail address: %s", addr)
+			errors = true
+			continue
+		}
+		if m[2] == "" {
+			email := mailLookup(addr)
+			if email == "" {
+				printf("unknown reviewer: %s", addr)
+				errors = true
+				continue
+			}
+			short += "," + addr
+			long += "," + email
+			addr = email
 		}
 		if i > 0 {
 			spec += ","
 		}
 		spec += tag + "=" + addr
 	}
+	if short != "" {
+		verbosef("expanded %s to %s", short[1:], long[1:])
+	}
+	if errors {
+		die()
+	}
 	return spec
 }
 
+// reviewers is the list of reviewers for the current repository,
+// sorted by how many reviews each has done.
+var reviewers []reviewer
+
+type reviewer struct {
+	addr  string
+	count int
+}
+
+// mailLookup translates the short name (like adg) into a full
+// email address (like adg@golang.org).
+// It returns "" if no translation is found.
+// The algorithm for expanding short user names is as follows:
+// Look at the git commit log for the current repository,
+// extracting all the email addresses in Reviewed-By lines
+// and sorting by how many times each address appears.
+// For each short user name, walk the list, most common
+// address first, and use the first address found that has
+// the short user name on the left side of the @.
+func mailLookup(short string) string {
+	loadReviewers()
+
+	short += "@"
+	for _, r := range reviewers {
+		if strings.HasPrefix(r.addr, short) {
+			return r.addr
+		}
+	}
+	return ""
+}
+
+// loadReviewers reads the reviewer list from the current git repo
+// and leaves it in the global variable reviewers.
+// See the comment on mailLookup for a description of how the
+// list is generated and used.
+func loadReviewers() {
+	if reviewers != nil {
+		return
+	}
+	countByAddr := map[string]int{}
+	for _, line := range getLines("git", "log", "--format=format:%B") {
+		if strings.HasPrefix(line, "Reviewed-by:") {
+			f := strings.Fields(line)
+			addr := f[len(f)-1]
+			if strings.HasPrefix(addr, "<") && strings.Contains(addr, "@") && strings.HasSuffix(addr, ">") {
+				countByAddr[addr[1:len(addr)-1]]++
+			}
+		}
+	}
+
+	reviewers = []reviewer{}
+	for addr, count := range countByAddr {
+		reviewers = append(reviewers, reviewer{addr, count})
+	}
+	sort.Sort(reviewersByCount(reviewers))
+}
+
+type reviewersByCount []reviewer
+
+func (x reviewersByCount) Len() int      { return len(x) }
+func (x reviewersByCount) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
+func (x reviewersByCount) Less(i, j int) bool {
+	if x[i].count != x[j].count {
+		return x[i].count > x[j].count
+	}
+	return x[i].addr < x[j].addr
+}
+
 // stringList is a flag.Value that is like flag.String, but if repeated
 // keeps appending to the old value, inserting commas as separators.
 // This allows people to write -r rsc,adg (like the old hg command)
diff --git a/git-codereview/mail_test.go b/git-codereview/mail_test.go
index e3543df..f2baafe 100644
--- a/git-codereview/mail_test.go
+++ b/git-codereview/mail_test.go
@@ -4,7 +4,10 @@
 
 package main
 
-import "testing"
+import (
+	"fmt"
+	"testing"
+)
 
 func TestMail(t *testing.T) {
 	gt := newGitTest(t)
@@ -48,3 +51,57 @@
 
 	testMain(t, "mail", "-diff")
 }
+
+var reviewerLog = []string{
+	"Fake 1 <r1@fake.com>",
+	"Fake 1 <r1@fake.com>",
+	"Fake 1 <r1@fake.com>",
+	"Reviewer 1 <r1@golang.org>",
+	"Reviewer 1 <r1@golang.org>",
+	"Reviewer 1 <r1@golang.org>",
+	"Reviewer 1 <r1@golang.org>",
+	"Reviewer 1 <r1@golang.org>",
+	"Other <other@golang.org>",
+	"<anon@golang.org>",
+}
+
+func TestMailShort(t *testing.T) {
+	gt := newGitTest(t)
+	defer gt.done()
+
+	// fake auth information to avoid Gerrit error
+	auth.host = "gerrit.fake"
+	auth.user = "not-a-user"
+	defer func() {
+		auth.host = ""
+		auth.user = ""
+	}()
+
+	// Seed commit history with reviewers.
+	for i, addr := range reviewerLog {
+		write(t, gt.server+"/file", fmt.Sprintf("v%d", i))
+		trun(t, gt.server, "git", "commit", "-a", "-m", "msg\n\nReviewed-by: "+addr+"\n")
+	}
+	trun(t, gt.client, "git", "pull")
+
+	// Do some work.
+	gt.work(t)
+
+	testMain(t, "mail")
+	testRan(t,
+		"git push -q origin HEAD:refs/for/master",
+		"git tag -f work.mailed")
+
+	testMain(t, "mail", "-r", "r1")
+	testRan(t,
+		"git push -q origin HEAD:refs/for/master%r=r1@golang.org",
+		"git tag -f work.mailed")
+
+	testMain(t, "mail", "-r", "other,anon", "-cc", "r1,full@email.com")
+	testRan(t,
+		"git push -q origin HEAD:refs/for/master%r=other@golang.org,r=anon@golang.org,cc=r1@golang.org,cc=full@email.com",
+		"git tag -f work.mailed")
+
+	testMainDied(t, "mail", "-r", "other", "-r", "anon,r1,missing")
+	testPrintedStderr(t, "unknown reviewer: missing")
+}
diff --git a/git-codereview/review.go b/git-codereview/review.go
index cc8b937..6d1f6ac 100644
--- a/git-codereview/review.go
+++ b/git-codereview/review.go
@@ -2,8 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// TODO(adg): translate email addresses without @ by looking up somewhere
-
 // Command git-codereview provides a simple command-line user interface for
 // working with git repositories and the Gerrit code review system.
 // See "git-codereview help" for details.
@@ -237,6 +235,10 @@
 
 func dief(format string, args ...interface{}) {
 	printf(format, args...)
+	die()
+}
+
+func die() {
 	if dieTrap != nil {
 		dieTrap()
 	}
diff --git a/git-codereview/util_test.go b/git-codereview/util_test.go
index d9933ad..bc44e2f 100644
--- a/git-codereview/util_test.go
+++ b/git-codereview/util_test.go
@@ -147,6 +147,12 @@
 	}
 }
 
+func remove(t *testing.T, file string) {
+	if err := os.RemoveAll(file); err != nil {
+		t.Fatal(err)
+	}
+}
+
 func trun(t *testing.T, dir string, cmdline ...string) string {
 	cmd := exec.Command(cmdline[0], cmdline[1:]...)
 	cmd.Dir = dir