cmd/gopherbot: mention gerrit CLs on Github (cl2issue port)
This adds indexing of Github mentions to maintner, then adds gopherbot
functionality to say "CL https://golang.org/cl/NNNN mentions this issue."
on Github when a Gerrit CL references it.
Also the start of the cherry-pick milestone bot which needs the same
Github issue reference tracking. But that part's only barely started
and still disabled.
Fixes golang/go#18818
Updates golang/go#19776
Change-Id: Ie5f7f6845317a7c39cc36d7397c7539bf99c3f92
Reviewed-on: https://go-review.googlesource.com/39352
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/cmd/gopherbot/gopherbot.go b/cmd/gopherbot/gopherbot.go
index 7a83818..b976519 100644
--- a/cmd/gopherbot/gopherbot.go
+++ b/cmd/gopherbot/gopherbot.go
@@ -8,6 +8,7 @@
import (
"context"
+ "errors"
"flag"
"fmt"
"io/ioutil"
@@ -15,6 +16,7 @@
"net/http"
"os"
"path/filepath"
+ "sort"
"strings"
"time"
@@ -117,6 +119,17 @@
fail = true
}
+ // "CL nnnn mentions this issue"
+ if err := bot.cl2issue(ctx); err != nil {
+ log.Printf("cl2issue: %v", err)
+ fail = true
+ }
+
+ if err := bot.checkCherryPicks(ctx); err != nil {
+ log.Printf("checking cherry picks: %v", err)
+ fail = true
+ }
+
if fail {
os.Exit(1)
}
@@ -133,6 +146,54 @@
return err
}
+func (b *gopherbot) addGitHubComment(ctx context.Context, org, repo string, issueNum int32, msg string) error {
+ gr := b.corpus.GitHub().Repo(org, repo)
+ if gr == nil {
+ return fmt.Errorf("unknown github repo %s/%s", org, repo)
+ }
+ var since time.Time
+ if gi := gr.Issue(issueNum); gi != nil {
+ dup := false
+ gi.ForeachComment(func(c *maintner.GitHubComment) error {
+ since = c.Updated
+ // TODO: check for gopherbot as author? check for exact match?
+ // This seems fine for now.
+ if strings.Contains(c.Body, msg) {
+ dup = true
+ return errStopIteration
+ }
+ return nil
+ })
+ if dup {
+ // Comment's already been posted. Nothing to do.
+ return nil
+ }
+ }
+ // See if there is a dup comment from when gopherbot last got
+ // its data from maintner.
+ ics, _, err := b.ghc.Issues.ListComments(ctx, org, repo, int(issueNum), &github.IssueListCommentsOptions{
+ Since: since,
+ ListOptions: github.ListOptions{PerPage: 1000},
+ })
+ if err != nil {
+ return err
+ }
+ for _, ic := range ics {
+ if strings.Contains(ic.GetBody(), msg) {
+ // Dup.
+ return nil
+ }
+ }
+ if *dryRun {
+ log.Printf("[dry run] would add comment to github.com/%s/%s/issues/%d: %v", org, repo, issueNum, msg)
+ return nil
+ }
+ _, _, err = b.ghc.Issues.CreateComment(ctx, org, repo, int(issueNum), &github.IssueComment{
+ Body: github.String(msg),
+ })
+ return err
+}
+
// freezeOldIssues locks any issue that's old and closed.
// (Otherwise people find ancient bugs via searches and start asking questions
// into a void and it's sad for everybody.)
@@ -337,18 +398,90 @@
}
// TODO: write a task that reopens issues if the OP speaks up.
- _, _, err := b.ghc.Issues.CreateComment(ctx, "golang", "go", int(gi.Number), &github.IssueComment{
- Body: github.String("Timed out in state WaitingForInfo. Closing.\n\n(I am just a bot, though. Please speak up if this is a mistake or you have the requested information.)"),
- })
- if err != nil {
+ if err := b.addGitHubComment(ctx, "golang", "go", gi.Number,
+ "Timed out in state WaitingForInfo. Closing.\n\n(I am just a bot, though. Please speak up if this is a mistake or you have the requested information.)"); err != nil {
return err
}
- _, _, err = b.ghc.Issues.Edit(ctx, "golang", "go", int(gi.Number), &github.IssueRequest{State: github.String("closed")})
+ _, _, err := b.ghc.Issues.Edit(ctx, "golang", "go", int(gi.Number), &github.IssueRequest{State: github.String("closed")})
return err
})
}
+// Issue 19776: assist with cherry picks based on Milestones
+func (b *gopherbot) checkCherryPicks(ctx context.Context) error {
+ // TODO(bradfitz): write this. Debugging stuff below only.
+ return nil
+
+ sum := map[string]int{}
+ b.gorepo.ForeachIssue(func(gi *maintner.GitHubIssue) error {
+ if gi.Milestone.IsNone() || gi.Milestone.IsUnknown() || gi.Milestone.Closed {
+ return nil
+ }
+ title := gi.Milestone.Title
+ if !strings.HasPrefix(title, "Go") || strings.Count(title, ".") != 2 {
+ return nil
+ }
+ sum[title]++
+ return nil
+ })
+ var titles []string
+ for k := range sum {
+ titles = append(titles, k)
+ }
+ sort.Slice(titles, func(i, j int) bool { return sum[titles[i]] < sum[titles[j]] })
+ for _, title := range titles {
+ fmt.Printf(" %10d %s\n", sum[title], title)
+ }
+ return nil
+}
+
+// Write "CL https://golang.org/issue/NNNN mentions this issue" on
+// Github when a new Gerrit CL references a Github issue.
+func (b *gopherbot) cl2issue(ctx context.Context) error {
+ monthAgo := time.Now().Add(-30 * 24 * time.Hour)
+ b.corpus.Gerrit().ForeachProjectUnsorted(func(gp *maintner.GerritProject) error {
+ if gp.Server() != "go.googlesource.com" {
+ return nil
+ }
+ return gp.ForeachCLUnsorted(func(cl *maintner.GerritCL) error {
+ if cl.Meta == nil || cl.Meta.AuthorTime.Before(monthAgo) {
+ // If the CL was last updated over a
+ // month ago, assume (as an
+ // optimization) that gopherbot
+ // already processed this issue.
+ return nil
+ }
+ for _, ref := range cl.GitHubIssueRefs {
+ gi := ref.Repo.Issue(ref.Number)
+ if gi == nil || gi.Closed {
+ continue
+ }
+ hasComment := false
+ substr := fmt.Sprintf("%d mentions this issue.", cl.Number)
+ gi.ForeachComment(func(c *maintner.GitHubComment) error {
+ if strings.Contains(c.Body, substr) {
+ hasComment = true
+ return errStopIteration
+ }
+ return nil
+ })
+ if !hasComment {
+ if err := b.addGitHubComment(ctx, "golang", "go", gi.Number, fmt.Sprintf("CL https://golang.org/cl/%d mentions this issue.", cl.Number)); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+ })
+ })
+ return nil
+}
+
+// errStopIteration is used to stop iteration over issues or comments.
+// It has no special meaning.
+var errStopIteration = errors.New("stop iteration")
+
func isDocumentationTitle(t string) bool {
if !strings.Contains(t, "doc") && !strings.Contains(t, "Doc") {
return false
diff --git a/maintner/gerrit.go b/maintner/gerrit.go
index 809b8a2..599ac1e 100644
--- a/maintner/gerrit.go
+++ b/maintner/gerrit.go
@@ -30,10 +30,11 @@
// Gerrit holds information about a number of Gerrit projects.
type Gerrit struct {
- c *Corpus
- dataDir string // the root Corpus data directory
- // keys are like "https://go.googlesource.com/build"
- projects map[string]*GerritProject
+ c *Corpus
+ dataDir string // the root Corpus data directory
+ projects map[string]*GerritProject // keyed by "go.googlesource.com/build"
+
+ clsReferencingGithubIssue map[GitHubIssueRef][]*GerritCL
}
// c.mu must be held
@@ -53,6 +54,17 @@
return proj
}
+// ForeachProjectUnsorted calls fn for each known Gerrit project.
+// Iteration ends if fn returns a non-nil value.
+func (g *Gerrit) ForeachProjectUnsorted(fn func(*GerritProject) error) error {
+ for _, p := range g.projects {
+ if err := fn(p); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
// GerritProject represents a single Gerrit project.
type GerritProject struct {
gerrit *Gerrit
@@ -66,6 +78,24 @@
need map[GitHash]bool
}
+func (gp *GerritProject) ServerSlashProject() string { return gp.proj }
+
+// Server returns the Gerrit server, such as "go.googlesource.com".
+func (gp *GerritProject) Server() string {
+ if i := strings.IndexByte(gp.proj, '/'); i != -1 {
+ return gp.proj[:i]
+ }
+ return ""
+}
+
+// Project returns the Gerrit project on the server, such as "go" or "crypto".
+func (gp *GerritProject) Project() string {
+ if i := strings.IndexByte(gp.proj, '/'); i != -1 {
+ return gp.proj[i+1:]
+ }
+ return ""
+}
+
// ForeachOpenCL calls fn for each open CL in the repo.
//
// If fn returns an error, iteration ends and ForeachIssue returns
@@ -90,10 +120,22 @@
return nil
}
+func (gp *GerritProject) ForeachCLUnsorted(fn func(*GerritCL) error) error {
+ for _, cl := range gp.cls {
+ if err := fn(cl); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
func (gp *GerritProject) logf(format string, args ...interface{}) {
log.Printf("gerrit "+gp.proj+": "+format, args...)
}
+// gerritCLVersion is a value type used as a map key to store a CL
+// number and a patchset version. Its Version field is overloaded
+// to reference the "meta" metadata commit if the Version is 0.
type gerritCLVersion struct {
CLNumber int32
Version int32 // version 0 is used for the "meta" ref.
@@ -101,6 +143,9 @@
// A GerritCL represents a single change in Gerrit.
type GerritCL struct {
+ // Project is the project this CL is part of.
+ Project *GerritProject
+
// Number is the CL number on the Gerrit
// server. (e.g. 1, 2, 3)
Number int32
@@ -120,6 +165,47 @@
// Status will be "merged", "abandoned", "new", or "draft".
Status string
+
+ // GitHubIssueRefs are parsed references to GitHub issues.
+ GitHubIssueRefs []GitHubIssueRef
+}
+
+// References reports whether cl includes a commit message reference
+// to the provided Github issue ref.
+func (cl *GerritCL) References(ref GitHubIssueRef) bool {
+ for _, eref := range cl.GitHubIssueRefs {
+ if eref == ref {
+ return true
+ }
+ }
+ return false
+}
+
+func (cl *GerritCL) updateGithubIssueRefs() {
+ gp := cl.Project
+ gerrit := gp.gerrit
+ gc := cl.Commit
+
+ oldRefs := cl.GitHubIssueRefs
+ newRefs := gerrit.c.parseGithubRefs(gp.proj, gc.Msg)
+ cl.GitHubIssueRefs = newRefs
+ for _, ref := range newRefs {
+ if !clSliceContains(gerrit.clsReferencingGithubIssue[ref], cl) {
+ // TODO: make this as small as
+ // possible? Most will have length
+ // 1. Care about default capacity of
+ // 2?
+ gerrit.clsReferencingGithubIssue[ref] = append(gerrit.clsReferencingGithubIssue[ref], cl)
+ }
+ }
+ for _, ref := range oldRefs {
+ if !cl.References(ref) {
+ // TODO: remove ref from gerrit.clsReferencingGithubIssue
+ // It could be a map of maps I suppose, but not as compact.
+ // So uses a slice as the second layer, since there will normally
+ // be one item.
+ }
+ }
}
// c.mu must be held
@@ -128,9 +214,10 @@
return
}
c.gerrit = &Gerrit{
- c: c,
- dataDir: c.dataDir,
- projects: map[string]*GerritProject{},
+ c: c,
+ dataDir: c.dataDir,
+ projects: map[string]*GerritProject{},
+ clsReferencingGithubIssue: map[GitHubIssueRef][]*GerritCL{},
}
}
@@ -256,6 +343,7 @@
} else {
cl.Commit = gc
cl.Version = clv.Version
+ cl.updateGithubIssueRefs()
}
if c.didInit {
gp.logf("Ref %+v => %v", clv, hash)
@@ -276,6 +364,16 @@
}
}
+// clSliceContains reports whether cls contains cl.
+func clSliceContains(cls []*GerritCL, cl *GerritCL) bool {
+ for _, v := range cls {
+ if v == cl {
+ return true
+ }
+ }
+ return false
+}
+
// c.mu must be held
func (gp *GerritProject) markNeededCommit(hash GitHash) {
c := gp.gerrit.c
@@ -296,7 +394,8 @@
return cl
}
cl = &GerritCL{
- Number: num,
+ Project: gp,
+ Number: num,
}
gp.cls[num] = cl
return cl
diff --git a/maintner/github.go b/maintner/github.go
index 61fd8a9..ca2b1e0 100644
--- a/maintner/github.go
+++ b/maintner/github.go
@@ -35,14 +35,14 @@
// package for responses fulfilled from cache due to a 304 from the server.
const xFromCache = "X-From-Cache"
-// githubRepoID is a github org & repo, lowercase.
-type githubRepoID struct {
+// GithubRepoID is a github org & repo, lowercase.
+type GithubRepoID struct {
Owner, Repo string
}
-func (id githubRepoID) String() string { return id.Owner + "/" + id.Repo }
+func (id GithubRepoID) String() string { return id.Owner + "/" + id.Repo }
-func (id githubRepoID) valid() bool {
+func (id GithubRepoID) valid() bool {
if id.Owner == "" || id.Repo == "" {
// TODO: more validation. whatever github requires.
return false
@@ -53,15 +53,16 @@
type GitHub struct {
c *Corpus
users map[int64]*GitHubUser
- repos map[githubRepoID]*GitHubRepo
+ repos map[GithubRepoID]*GitHubRepo
}
+// Repo returns the repo if it's known. Otherwise it returns nil.
func (g *GitHub) Repo(owner, repo string) *GitHubRepo {
- return g.repos[githubRepoID{owner, repo}]
+ return g.repos[GithubRepoID{owner, repo}]
}
func (g *GitHub) getOrCreateRepo(owner, repo string) *GitHubRepo {
- id := githubRepoID{owner, repo}
+ id := GithubRepoID{owner, repo}
if !id.valid() {
return nil
}
@@ -80,12 +81,17 @@
type GitHubRepo struct {
github *GitHub
- id githubRepoID
+ id GithubRepoID
issues map[int32]*GitHubIssue // num -> issue
milestones map[int64]*GitHubMilestone
labels map[int64]*GitHubLabel
}
+func (gr *GitHubRepo) ID() GithubRepoID { return gr.id }
+
+// Issue returns the the provided issue number, or nil if it's not known.
+func (gr *GitHubRepo) Issue(n int32) *GitHubIssue { return gr.issues[n] }
+
// ForeachIssue calls fn for each issue in the repo.
//
// If fn returns an error, iteration ends and ForeachIssue returns
@@ -146,6 +152,14 @@
Login string
}
+// GitHubIssueRef is a reference to an issue (or pull request) number
+// in a repo. These are parsed from text making references such as
+// "golang/go#1234" or just "#1234" (with an implicit Repo).
+type GitHubIssueRef struct {
+ Repo *GitHubRepo // must be non-nil
+ Number int32 // GitHubIssue.Number
+}
+
// GitHubIssue represents a github issue.
// This is maintner's in-memory representation. It differs slightly
// from the API's *github.Issue type, notably in the lack of pointers
@@ -314,6 +328,10 @@
// IsNone reports whether ms represents the sentinel "no milestone" milestone.
func (ms *GitHubMilestone) IsNone() bool { return ms == noMilestone }
+// IsUnknown reports whether ms is nil, which represents the unknown
+// state. Milestones should never be in this state, though.
+func (ms *GitHubMilestone) IsUnknown() bool { return ms == nil }
+
// emptyMilestone is a non-nil *githubMilestone with zero values for
// all fields.
var emptyMilestone = new(GitHubMilestone)
@@ -509,7 +527,7 @@
}
c.github = &GitHub{
c: c,
- repos: map[githubRepoID]*GitHubRepo{},
+ repos: map[GithubRepoID]*GitHubRepo{},
}
}
@@ -1891,3 +1909,58 @@
}
return gr
}
+
+var rxReferences = regexp.MustCompile(`\b(?:([\w\-]+)/([\w\-]+))?\#(\d+)\b`)
+
+func (c *Corpus) parseGithubRefs(gerritProj string, commitMsg string) []GitHubIssueRef {
+ // Use of rxReferences by itself caused this function to take 20% of the CPU time.
+ // TODO(bradfitz): stop using regexps here.
+ // But in the meantime, help the regexp engine with this one weird trick:
+ // Reduce the length of the string given to FindAllStringSubmatch.
+ // Discard all lines before the first line containing a '#'.
+ // The "Fixes #nnnn" is usually at the end, so this discards most of the input.
+ // Now CPU is only 2% instead of 20%.
+ hash := strings.IndexByte(commitMsg, '#')
+ if hash == -1 {
+ return nil
+ }
+ nl := strings.LastIndexByte(commitMsg[:hash], '\n')
+ commitMsg = commitMsg[nl+1:]
+
+ // TODO: use FindAllStringSubmatchIndex instead, so we can
+ // back up and see what's behind it and ignore "#1", "#2",
+ // "#3" 'references' which are actually bullets or ARM
+ // disassembly, and only respect them as real if they have the
+ // word "Fixes " or "Issue " or similar before them.
+ ms := rxReferences.FindAllStringSubmatch(commitMsg, -1)
+ if len(ms) == 0 {
+ return nil
+ }
+ /* e.g.
+ 2017/03/30 21:42:07 matches: [["golang/go#9327" "golang" "go" "9327"]]
+ 2017/03/30 21:42:07 matches: [["golang/go#16512" "golang" "go" "16512"] ["golang/go#18404" "golang" "go" "18404"]]
+ 2017/03/30 21:42:07 matches: [["#1" "" "" "1"]]
+ 2017/03/30 21:42:07 matches: [["#10234" "" "" "10234"]]
+ 2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/gcloud-golang#262" "GoogleCloudPlatform" "gcloud-golang" "262"]]
+ 2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/google-cloud-go#481" "GoogleCloudPlatform" "google-cloud-go" "481"]]
+ */
+ github := c.GitHub()
+ refs := make([]GitHubIssueRef, 0, len(ms))
+ for _, m := range ms {
+ owner, repo, numStr := strings.ToLower(m[1]), strings.ToLower(m[2]), m[3]
+ num, err := strconv.ParseInt(numStr, 10, 32)
+ if err != nil {
+ continue
+ }
+ if owner == "" {
+ if gerritProj == "go.googlesource.com/go" {
+ owner, repo = "golang", "go"
+ } else {
+ continue
+ }
+ }
+ gr := github.getOrCreateRepo(owner, repo)
+ refs = append(refs, GitHubIssueRef{gr, int32(num)})
+ }
+ return refs
+}
diff --git a/maintner/logger.go b/maintner/logger.go
index 8cc4a3f..d22c9fa 100644
--- a/maintner/logger.go
+++ b/maintner/logger.go
@@ -7,7 +7,6 @@
import (
"context"
"fmt"
- "log"
"os"
"path/filepath"
"strings"
@@ -87,7 +86,6 @@
go func() {
defer close(ch)
err := d.ForeachFile(func(fullPath string, fi os.FileInfo) error {
- log.Printf("File %s has size %v", fullPath, fi.Size())
return reclog.ForeachFileRecord(fullPath, func(off int64, hdr, rec []byte) error {
m := new(maintpb.Mutation)
if err := proto.Unmarshal(rec, m); err != nil {
diff --git a/maintner/maintner.go b/maintner/maintner.go
index 94fcb16..0d3cd87 100644
--- a/maintner/maintner.go
+++ b/maintner/maintner.go
@@ -77,6 +77,14 @@
return new(GitHub)
}
+// Gerrit returns the corpus's Gerrit data.
+func (c *Corpus) Gerrit() *Gerrit {
+ if c.gerrit != nil {
+ return c.gerrit
+ }
+ return new(Gerrit)
+}
+
// mustProtoFromTime turns a time.Time into a *timestamp.Timestamp or panics if
// in is invalid.
func mustProtoFromTime(in time.Time) *timestamp.Timestamp {
diff --git a/maintner/maintner_test.go b/maintner/maintner_test.go
index d3eb200..be05cef 100644
--- a/maintner/maintner_test.go
+++ b/maintner/maintner_test.go
@@ -80,10 +80,10 @@
github.users = map[int64]*GitHubUser{
u1.ID: u1,
}
- github.repos = map[githubRepoID]*GitHubRepo{
- githubRepoID{"golang", "go"}: &GitHubRepo{
+ github.repos = map[GithubRepoID]*GitHubRepo{
+ GithubRepoID{"golang", "go"}: &GitHubRepo{
github: github,
- id: githubRepoID{"golang", "go"},
+ id: GithubRepoID{"golang", "go"},
issues: map[int32]*GitHubIssue{
3: &GitHubIssue{
Number: 3,
@@ -121,7 +121,7 @@
State: github.String("closed"),
}
gr := &GitHubRepo{
- id: githubRepoID{"golang", "go"},
+ id: GithubRepoID{"golang", "go"},
}
is := gr.newMutationFromIssue(nil, gh)
want := &maintpb.Mutation{GithubIssue: &maintpb.GithubIssueMutation{
@@ -167,7 +167,7 @@
Assignees: assignees,
}
gr := &GitHubRepo{
- id: githubRepoID{"golang", "go"},
+ id: GithubRepoID{"golang", "go"},
issues: map[int32]*GitHubIssue{
3: issue,
},
@@ -176,8 +176,8 @@
users: map[int64]*GitHubUser{
u1.ID: u1,
},
- repos: map[githubRepoID]*GitHubRepo{
- githubRepoID{"golang", "go"}: gr,
+ repos: map[GithubRepoID]*GitHubRepo{
+ GithubRepoID{"golang", "go"}: gr,
},
}