| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Logic to interact with a Gerrit server. Gerrit has an entire Git-based |
| // protocol for fetching metadata about CL's, reviewers, patch comments, which |
| // is used here - we don't use the x/build/gerrit client, which hits the API. |
| // TODO: write about Gerrit's Git API. |
| |
| package maintner |
| |
| import ( |
| "bufio" |
| "bytes" |
| "context" |
| "fmt" |
| "log" |
| "net/url" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "regexp" |
| "sort" |
| "strconv" |
| "strings" |
| "time" |
| |
| "golang.org/x/build/maintner/maintpb" |
| ) |
| |
| // Gerrit holds information about a number of Gerrit projects. |
| type Gerrit struct { |
| c *Corpus |
| dataDir string // the root Corpus data directory |
| // keys are like "https://go.googlesource.com/build" |
| projects map[string]*GerritProject |
| } |
| |
| // c.mu must be held |
| func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject { |
| proj, ok := g.projects[gerritProj] |
| if ok { |
| return proj |
| } |
| proj = &GerritProject{ |
| gerrit: g, |
| proj: gerritProj, |
| gitDir: filepath.Join(g.dataDir, url.PathEscape(gerritProj)), |
| cls: map[int32]*GerritCL{}, |
| remote: map[gerritCLVersion]GitHash{}, |
| } |
| g.projects[gerritProj] = proj |
| return proj |
| } |
| |
| // GerritProject represents a single Gerrit project. |
| type GerritProject struct { |
| gerrit *Gerrit |
| proj string // "go.googlesource.com/net" |
| // TODO: Many different Git remotes can share the same Gerrit instance, e.g. |
| // the Go Gerrit instance supports build, gddo, go. For the moment these are |
| // all treated separately, since the remotes are separate. |
| gitDir string |
| cls map[int32]*GerritCL |
| remote map[gerritCLVersion]GitHash |
| need map[GitHash]bool |
| } |
| |
| // ForeachOpenCL calls fn for each open CL in the repo. |
| // |
| // If fn returns an error, iteration ends and ForeachIssue returns |
| // with that error. |
| // |
| // The fn function is called serially, with increasingly numbered |
| // CLs. |
| func (gp *GerritProject) ForeachOpenCL(fn func(*GerritCL) error) error { |
| var s []*GerritCL |
| for _, cl := range gp.cls { |
| if cl.Status != "new" { |
| continue |
| } |
| s = append(s, cl) |
| } |
| sort.Slice(s, func(i, j int) bool { return s[i].Number < s[j].Number }) |
| for _, cl := range s { |
| if err := fn(cl); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| func (gp *GerritProject) logf(format string, args ...interface{}) { |
| log.Printf("gerrit "+gp.proj+": "+format, args...) |
| } |
| |
| type gerritCLVersion struct { |
| CLNumber int32 |
| Version int32 // version 0 is used for the "meta" ref. |
| } |
| |
| // A GerritCL represents a single change in Gerrit. |
| type GerritCL struct { |
| // Number is the CL number on the Gerrit |
| // server. (e.g. 1, 2, 3) |
| Number int32 |
| |
| // Version is the number of versions of the patchset for this |
| // CL seen so far. It starts at 1. |
| Version int32 |
| |
| // Commit is the git commit of the latest version of this CL. |
| // Previous versions are available via GerritProject.remote. |
| Commit *GitCommit |
| |
| // Meta is the head of the most recent Gerrit "meta" commit |
| // for this CL. This is guaranteed to be a linear history |
| // back to a CL-specific root commit for this meta branch. |
| Meta *GitCommit |
| |
| // Status will be "merged", "abandoned", "new", or "draft". |
| Status string |
| } |
| |
| // c.mu must be held |
| func (c *Corpus) initGerrit() { |
| if c.gerrit != nil { |
| return |
| } |
| c.gerrit = &Gerrit{ |
| c: c, |
| dataDir: c.dataDir, |
| projects: map[string]*GerritProject{}, |
| } |
| } |
| |
| type watchedGerritRepo struct { |
| project *GerritProject |
| } |
| |
| // AddGerrit adds the Gerrit project with the given project to the corpus. |
| // The provided string should be of the form "hostname/project", without a scheme |
| // or trailing slash. |
| func (c *Corpus) AddGerrit(gerritProj string) { |
| c.mu.Lock() |
| defer c.mu.Unlock() |
| if strings.Count(gerritProj, "/") != 1 { |
| panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj)) |
| } |
| c.initGerrit() |
| if _, dup := c.gerrit.projects[gerritProj]; dup { |
| panic("duplicated watched gerrit project " + gerritProj) |
| } |
| project := c.gerrit.getOrCreateProject(gerritProj) |
| if project == nil { |
| panic("gerrit project not created") |
| } |
| c.watchedGerritRepos = append(c.watchedGerritRepos, watchedGerritRepo{ |
| project: project, |
| }) |
| } |
| |
| // called with c.mu Locked |
| func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) { |
| if c.gerrit == nil { |
| // TODO: option to ignore mutation if user isn't interested. |
| c.initGerrit() |
| } |
| gp, ok := c.gerrit.projects[gm.Project] |
| if !ok { |
| // TODO: option to ignore mutation if user isn't interested. |
| // For now, always process the record. |
| gp = c.gerrit.getOrCreateProject(gm.Project) |
| } |
| gp.processMutation(gm) |
| } |
| |
| var statusIndicator = "\n Status: " |
| |
| // The Go Gerrit site does not really use the "draft" status much, but if |
| // you need to test it, create a dummy commit and then run |
| // |
| // git push origin HEAD:refs/drafts/master |
| var statuses = []string{"merged", "abandoned", "draft", "new"} |
| |
| // getGerritStatus takes a current and previous commit, and returns a Gerrit |
| // status, or the empty string to indicate the status did not change between the |
| // two commits. |
| // |
| // getGerritStatus relies on the Gerrit code review convention of amending |
| // the meta commit to include the current status of the CL. The Gerrit search |
| // bar allows you to search for changes with the following statuses: "open", |
| // "reviewed", "closed", "abandoned", "merged", "draft", "pending". The REST API |
| // returns only "NEW", "DRAFT", "ABANDONED", "MERGED". Gerrit attaches "draft", |
| // "abandoned", "new", and "merged" statuses to some meta commits; you may have |
| // to search the current meta commit's parents to find the last good commit. |
| // |
| // Corpus.mu must be held. |
| func (gp *GerritProject) getGerritStatus(currentMeta, oldMeta *GitCommit) string { |
| commit := currentMeta |
| c := gp.gerrit.c |
| for { |
| idx := strings.Index(commit.Msg, statusIndicator) |
| if idx > -1 { |
| off := idx + len(statusIndicator) |
| for _, status := range statuses { |
| if strings.HasPrefix(commit.Msg[off:], status) { |
| return status |
| } |
| } |
| } |
| if len(commit.Parents) == 0 { |
| return "new" |
| } |
| parentHash := commit.Parents[0] // meta tree has no merge commits |
| commit = c.gitCommit[parentHash] |
| if commit == nil { |
| gp.logf("getGerritStatus: did not find parent commit %s", parentHash) |
| return "new" |
| } |
| if oldMeta != nil && commit.Hash == oldMeta.Hash { |
| return "" |
| } |
| } |
| } |
| |
| // called with c.mu Locked |
| func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) { |
| c := gp.gerrit.c |
| |
| for _, refp := range gm.Refs { |
| m := rxChangeRef.FindStringSubmatch(refp.Ref) |
| if m == nil { |
| continue |
| } |
| clNum64, err := strconv.ParseInt(m[1], 10, 32) |
| version, ok := gerritVersionNumber(m[2]) |
| if !ok || err != nil { |
| continue |
| } |
| hash := c.gitHashFromHexStr(refp.Sha1) |
| gc, ok := c.gitCommit[hash] |
| if !ok { |
| gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash) |
| continue |
| } |
| clv := gerritCLVersion{int32(clNum64), version} |
| gp.remote[clv] = hash |
| cl := gp.getOrCreateCL(clv.CLNumber) |
| if clv.Version == 0 { |
| oldMeta := cl.Meta |
| cl.Meta = gc |
| if status := gp.getGerritStatus(cl.Meta, oldMeta); status != "" { |
| cl.Status = status |
| } |
| } else { |
| cl.Commit = gc |
| cl.Version = clv.Version |
| } |
| if c.didInit { |
| gp.logf("Ref %+v => %v", clv, hash) |
| } |
| } |
| |
| for _, commitp := range gm.Commits { |
| gc, err := c.processGitCommit(commitp) |
| if err != nil { |
| continue |
| } |
| if gp.need != nil { |
| delete(gp.need, gc.Hash) |
| } |
| for _, p := range gc.Parents { |
| gp.markNeededCommit(p) |
| } |
| } |
| } |
| |
| // c.mu must be held |
| func (gp *GerritProject) markNeededCommit(hash GitHash) { |
| c := gp.gerrit.c |
| if _, ok := c.gitCommit[hash]; ok { |
| // Already have it. |
| return |
| } |
| if gp.need == nil { |
| gp.need = map[GitHash]bool{} |
| } |
| gp.need[hash] = true |
| } |
| |
| // c.mu must be held |
| func (gp *GerritProject) getOrCreateCL(num int32) *GerritCL { |
| cl, ok := gp.cls[num] |
| if ok { |
| return cl |
| } |
| cl = &GerritCL{ |
| Number: num, |
| } |
| gp.cls[num] = cl |
| return cl |
| } |
| |
| func gerritVersionNumber(s string) (version int32, ok bool) { |
| if s == "meta" { |
| return 0, true |
| } |
| v, err := strconv.ParseInt(s, 10, 32) |
| if err != nil { |
| return 0, false |
| } |
| return int32(v), true |
| } |
| |
| // rxRemoteRef matches "git ls-remote" lines. |
| // |
| // sample row: |
| // fd1e71f1594ce64941a85428ddef2fbb0ad1023e refs/changes/99/30599/3 |
| // |
| // Capture values: |
| // $0: whole match |
| // $1: "fd1e71f1594ce64941a85428ddef2fbb0ad1023e" |
| // $2: "30599" (CL number) |
| // $3: "1", "2" (patchset number) or "meta" (a/ special commit |
| // holding the comments for a commit) |
| // |
| // The "99" in the middle covers all CL's that end in "99", so |
| // refs/changes/99/99/1, refs/changes/99/199/meta. |
| var rxRemoteRef = regexp.MustCompile(`^([0-9a-f]{40,})\s+refs/changes/[0-9a-f]{2}/([0-9]+)/(.+)$`) |
| |
| // $1: change num |
| // $2: version or "meta" |
| var rxChangeRef = regexp.MustCompile(`^refs/changes/[0-9a-f]{2}/([0-9]+)/(meta|(?:\d+))`) |
| |
| func (gp *GerritProject) sync(ctx context.Context, loop bool) error { |
| if err := gp.init(ctx); err != nil { |
| gp.logf("init: %v", err) |
| return err |
| } |
| activityCh := gp.gerrit.c.activityChan("gerrit:" + gp.proj) |
| for { |
| if err := gp.syncOnce(ctx); err != nil { |
| gp.logf("sync: %v", err) |
| return err |
| } |
| if !loop { |
| return nil |
| } |
| timer := time.NewTimer(15 * time.Minute) |
| select { |
| case <-ctx.Done(): |
| timer.Stop() |
| return ctx.Err() |
| case <-activityCh: |
| timer.Stop() |
| case <-timer.C: |
| } |
| } |
| } |
| |
| func (gp *GerritProject) syncOnce(ctx context.Context) error { |
| c := gp.gerrit.c |
| |
| fetchCtx, cancel := context.WithTimeout(ctx, time.Minute) |
| cmd := exec.CommandContext(fetchCtx, "git", "fetch", "origin") |
| cmd.Dir = gp.gitDir |
| out, err := cmd.CombinedOutput() |
| cancel() |
| if err != nil { |
| return fmt.Errorf("git fetch origin: %v, %s", err, out) |
| } |
| |
| cmd = exec.CommandContext(ctx, "git", "ls-remote") |
| cmd.Dir = gp.gitDir |
| out, err = cmd.CombinedOutput() |
| if err != nil { |
| return fmt.Errorf("git ls-remote: %v, %s", err, out) |
| } |
| |
| var changedRefs []*maintpb.GitRef |
| var toFetch []GitHash |
| |
| bs := bufio.NewScanner(bytes.NewReader(out)) |
| |
| // Take the lock here to access gp.remote and call c.gitHashFromHex. |
| // It's acceptable to take such a coarse-looking lock because |
| // it's not actually around I/O: all the input from ls-remote has |
| // already been slurped into memory. |
| c.mu.Lock() |
| for bs.Scan() { |
| m := rxRemoteRef.FindSubmatch(bs.Bytes()) |
| if m == nil { |
| continue |
| } |
| clNum, err := strconv.ParseInt(string(m[2]), 10, 32) |
| version, ok := gerritVersionNumber(string(m[3])) |
| if err != nil || !ok { |
| continue |
| } |
| sha1 := m[1] |
| hash := c.gitHashFromHex(sha1) |
| |
| curHash := gp.remote[gerritCLVersion{int32(clNum), version}] |
| |
| if curHash != hash { |
| toFetch = append(toFetch, hash) |
| changedRefs = append(changedRefs, &maintpb.GitRef{ |
| Ref: strings.TrimSpace(bs.Text()[len(sha1):]), |
| Sha1: string(sha1), |
| }) |
| } |
| } |
| c.mu.Unlock() |
| if err := bs.Err(); err != nil { |
| return err |
| } |
| if len(changedRefs) == 0 { |
| return nil |
| } |
| gp.logf("%d new refs", len(changedRefs)) |
| const batchSize = 250 |
| for len(toFetch) > 0 { |
| batch := toFetch |
| if len(batch) > batchSize { |
| batch = batch[:batchSize] |
| } |
| if err := gp.fetchHashes(ctx, batch); err != nil { |
| return err |
| } |
| |
| c.mu.Lock() |
| for _, hash := range batch { |
| gp.markNeededCommit(hash) |
| } |
| c.mu.Unlock() |
| |
| n, err := gp.syncCommits(ctx) |
| if err != nil { |
| return err |
| } |
| toFetch = toFetch[len(batch):] |
| gp.logf("synced %v commits for %d new hashes, %d hashes remain", n, len(batch), len(toFetch)) |
| |
| c.addMutation(&maintpb.Mutation{ |
| Gerrit: &maintpb.GerritMutation{ |
| Project: gp.proj, |
| Refs: changedRefs[:len(batch)], |
| }}) |
| changedRefs = changedRefs[len(batch):] |
| } |
| return nil |
| } |
| |
| func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) { |
| c := gp.gerrit.c |
| lastLog := time.Now() |
| for { |
| hash := gp.commitToIndex() |
| if hash == "" { |
| return n, nil |
| } |
| now := time.Now() |
| if lastLog.Before(now.Add(-1 * time.Second)) { |
| lastLog = now |
| gp.logf("parsing commits (%v done)", n) |
| } |
| commit, err := parseCommitFromGit(gp.gitDir, hash) |
| if err != nil { |
| return n, err |
| } |
| c.addMutation(&maintpb.Mutation{ |
| Gerrit: &maintpb.GerritMutation{ |
| Project: gp.proj, |
| Commits: []*maintpb.GitCommit{commit}, |
| }, |
| }) |
| n++ |
| } |
| } |
| |
| func (gp *GerritProject) commitToIndex() GitHash { |
| c := gp.gerrit.c |
| |
| c.mu.RLock() |
| defer c.mu.RUnlock() |
| for hash := range gp.need { |
| return hash |
| } |
| return "" |
| } |
| |
| var ( |
| statusSpace = []byte("Status: ") |
| ) |
| |
| func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []GitHash) error { |
| args := []string{"fetch", "--quiet", "origin"} |
| for _, hash := range hashes { |
| args = append(args, hash.String()) |
| } |
| gp.logf("fetching %v hashes...", len(hashes)) |
| cmd := exec.CommandContext(ctx, "git", args...) |
| cmd.Dir = gp.gitDir |
| if out, err := cmd.CombinedOutput(); err != nil { |
| log.Printf("error fetching %d hashes from gerrit project %s: %s", len(hashes), gp.proj, out) |
| return err |
| } |
| gp.logf("fetched %v hashes.", len(hashes)) |
| return nil |
| } |
| |
| func (gp *GerritProject) init(ctx context.Context) error { |
| if err := os.MkdirAll(gp.gitDir, 0755); err != nil { |
| return err |
| } |
| // try to short circuit a git init error, since the init error matching is |
| // brittle |
| if _, err := exec.LookPath("git"); err != nil { |
| return err |
| } |
| |
| if _, err := os.Stat(filepath.Join(gp.gitDir, ".git", "config")); err == nil { |
| remoteBytes, err := exec.CommandContext(ctx, "git", "remote", "-v").Output() |
| if err != nil { |
| return err |
| } |
| if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) { |
| return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes)) |
| } |
| gp.logf("git directory exists.") |
| return nil |
| } |
| |
| cmd := exec.CommandContext(ctx, "git", "init") |
| buf := new(bytes.Buffer) |
| cmd.Stdout = buf |
| cmd.Stderr = buf |
| cmd.Dir = gp.gitDir |
| if err := cmd.Run(); err != nil { |
| log.Printf(`Error running "git init": %s`, buf.String()) |
| return err |
| } |
| buf.Reset() |
| cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj) |
| cmd.Stdout = buf |
| cmd.Stderr = buf |
| cmd.Dir = gp.gitDir |
| if err := cmd.Run(); err != nil { |
| log.Printf(`Error running "git remote add origin": %s`, buf.String()) |
| return err |
| } |
| |
| return nil |
| } |