blob: 9f445bb46c5245974a82c80775590820c6756cea [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Logic to interact with a Gerrit server. Gerrit has an entire Git-based
// protocol for fetching metadata about CL's, reviewers, patch comments, which
// is used here - we don't use the x/build/gerrit client, which hits the API.
// TODO: write about Gerrit's Git API.
package maintner
import (
"bufio"
"bytes"
"context"
"fmt"
"log"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"golang.org/x/build/maintner/maintpb"
)
// Gerrit holds information about a number of Gerrit projects.
type Gerrit struct {
c *Corpus
dataDir string // the root Corpus data directory
// keys are like "https://go.googlesource.com/build"
projects map[string]*GerritProject
}
// c.mu must be held
func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject {
proj, ok := g.projects[gerritProj]
if ok {
return proj
}
proj = &GerritProject{
gerrit: g,
proj: gerritProj,
gitDir: filepath.Join(g.dataDir, url.PathEscape(gerritProj)),
cls: map[int32]*gerritCL{},
remote: map[gerritCLVersion]gitHash{},
}
g.projects[gerritProj] = proj
return proj
}
// GerritProject represents a single Gerrit project.
type GerritProject struct {
gerrit *Gerrit
proj string // "go.googlesource.com/net"
// TODO: Many different Git remotes can share the same Gerrit instance, e.g.
// the Go Gerrit instance supports build, gddo, go. For the moment these are
// all treated separately, since the remotes are separate.
gitDir string
cls map[int32]*gerritCL
remote map[gerritCLVersion]gitHash
need map[gitHash]bool
}
func (gp *GerritProject) logf(format string, args ...interface{}) {
log.Printf("gerrit "+gp.proj+": "+format, args...)
}
type gerritCLVersion struct {
CLNumber int32
Version int32 // version 0 is used for the "meta" ref.
}
type gerritCL struct {
// Number is the CL number on the Gerrit
// server. (e.g. 1, 2, 3)
Number int32
// Version is the number of versions of the patchset for this
// CL seen so far. It starts at 1.
Version int32
// Commit is the git commit of the latest version of this CL.
// Previous versions are available via GerritProject.remote.
Commit *gitCommit
// Meta is the head of the most recent Gerrit "meta" commit
// for this CL. This is guaranteed to be a linear history
// back to a CL-specific root commit for this meta branch.
Meta *gitCommit
// Status is TODO.
// Will be something like "merged", "abandoned", "new"
// Or maybe bools.
// Status string
}
// c.mu must be held
func (c *Corpus) initGerrit() {
if c.gerrit != nil {
return
}
c.gerrit = &Gerrit{
c: c,
dataDir: c.dataDir,
projects: map[string]*GerritProject{},
}
}
type watchedGerritRepo struct {
project *GerritProject
}
// AddGerrit adds the Gerrit project with the given project to the corpus.
// The provided string should be of the form "hostname/project", without a scheme
// or trailing slash.
func (c *Corpus) AddGerrit(gerritProj string) {
c.mu.Lock()
defer c.mu.Unlock()
if strings.Count(gerritProj, "/") != 1 {
panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj))
}
c.initGerrit()
if _, dup := c.gerrit.projects[gerritProj]; dup {
panic("duplicated watched gerrit project " + gerritProj)
}
project := c.gerrit.getOrCreateProject(gerritProj)
if project == nil {
panic("gerrit project not created")
}
c.watchedGerritRepos = append(c.watchedGerritRepos, watchedGerritRepo{
project: project,
})
}
// called with c.mu Locked
func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) {
if c.gerrit == nil {
// Untracked.
return
}
gp, ok := c.gerrit.projects[gm.Project]
if !ok {
// Untracked.
return
}
gp.processMutation(gm)
}
// called with c.mu Locked
func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) {
c := gp.gerrit.c
for _, refp := range gm.Refs {
m := rxChangeRef.FindStringSubmatch(refp.Ref)
if m == nil {
continue
}
clNum64, err := strconv.ParseInt(m[1], 10, 32)
version, ok := gerritVersionNumber(m[2])
if !ok || err != nil {
continue
}
hash := gitHashFromHexStr(refp.Sha1)
gc, ok := c.gitCommit[hash]
if !ok {
gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash)
continue
}
clv := gerritCLVersion{int32(clNum64), version}
gp.remote[clv] = hash
cl := gp.getOrCreateCL(clv.CLNumber)
if clv.Version == 0 {
cl.Meta = gc
} else {
cl.Commit = gc
cl.Version = clv.Version
}
gp.logf("Ref %+v => %v", clv, hash)
}
for _, commitp := range gm.Commits {
gc, err := c.processGitCommit(commitp)
if err != nil {
continue
}
if gp.need != nil {
delete(gp.need, gc.hash)
}
for _, p := range gc.parents {
gp.markNeededCommit(p)
}
}
}
// c.mu must be held
func (gp *GerritProject) markNeededCommit(hash gitHash) {
c := gp.gerrit.c
if _, ok := c.gitCommit[hash]; ok {
// Already have it.
return
}
if gp.need == nil {
gp.need = map[gitHash]bool{}
}
gp.need[hash] = true
}
// c.mu must be held
func (gp *GerritProject) getOrCreateCL(num int32) *gerritCL {
cl, ok := gp.cls[num]
if ok {
return cl
}
cl = &gerritCL{
Number: num,
}
gp.cls[num] = cl
return cl
}
func gerritVersionNumber(s string) (version int32, ok bool) {
if s == "meta" {
return 0, true
}
v, err := strconv.ParseInt(s, 10, 32)
if err != nil {
return 0, false
}
return int32(v), true
}
// rxRemoteRef matches "git ls-remote" lines.
//
// sample row:
// fd1e71f1594ce64941a85428ddef2fbb0ad1023e refs/changes/99/30599/3
//
// Capture values:
// $0: whole match
// $1: "fd1e71f1594ce64941a85428ddef2fbb0ad1023e"
// $2: "30599" (CL number)
// $3: "1", "2" (patchset number) or "meta" (a/ special commit
// holding the comments for a commit)
//
// The "99" in the middle covers all CL's that end in "99", so
// refs/changes/99/99/1, refs/changes/99/199/meta.
var rxRemoteRef = regexp.MustCompile(`^([0-9a-f]{40,})\s+refs/changes/[0-9a-f]{2}/([0-9]+)/(.+)$`)
// $1: change num
// $2: version or "meta"
var rxChangeRef = regexp.MustCompile(`^refs/changes/[0-9a-f]{2}/([0-9]+)/(meta|(?:\d+))`)
func (gp *GerritProject) sync(ctx context.Context, loop bool) error {
if err := gp.init(ctx); err != nil {
gp.logf("init: %v", err)
return err
}
for {
if err := gp.syncOnce(ctx); err != nil {
gp.logf("sync: %v", err)
return err
}
if !loop {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(1 * time.Minute):
}
}
}
func (gp *GerritProject) syncOnce(ctx context.Context) error {
c := gp.gerrit.c
fetchCtx, cancel := context.WithTimeout(ctx, time.Minute)
cmd := exec.CommandContext(fetchCtx, "git", "fetch", "origin")
cmd.Dir = gp.gitDir
out, err := cmd.CombinedOutput()
cancel()
if err != nil {
return fmt.Errorf("git fetch origin: %v, %s", err, out)
}
cmd = exec.CommandContext(ctx, "git", "ls-remote")
cmd.Dir = gp.gitDir
out, err = cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git ls-remote: %v, %s", err, out)
}
var changedRefs []*maintpb.GitRef
var toFetch []gitHash
bs := bufio.NewScanner(bytes.NewReader(out))
c.mu.RLock() // to access gp.remote; okay because ls-remote output all in out already
for bs.Scan() {
m := rxRemoteRef.FindSubmatch(bs.Bytes())
if m == nil {
continue
}
clNum, err := strconv.ParseInt(string(m[2]), 10, 32)
version, ok := gerritVersionNumber(string(m[3]))
if err != nil || !ok {
continue
}
sha1 := m[1]
hash := gitHashFromHex(sha1)
curHash := gp.remote[gerritCLVersion{int32(clNum), version}]
if curHash != hash {
toFetch = append(toFetch, hash)
changedRefs = append(changedRefs, &maintpb.GitRef{
Ref: strings.TrimSpace(bs.Text()[len(sha1):]),
Sha1: string(sha1),
})
}
}
c.mu.RUnlock()
if err := bs.Err(); err != nil {
return err
}
if len(changedRefs) == 0 {
return nil
}
gp.logf("%d new refs", len(changedRefs))
const batchSize = 250
for len(toFetch) > 0 {
batch := toFetch
if len(batch) > batchSize {
batch = batch[:batchSize]
}
if err := gp.fetchHashes(ctx, batch); err != nil {
return err
}
c.mu.Lock()
for _, hash := range batch {
gp.markNeededCommit(hash)
}
c.mu.Unlock()
n, err := gp.syncCommits(ctx)
if err != nil {
return err
}
toFetch = toFetch[len(batch):]
gp.logf("synced %v commits for %d new hashes, %d hashes remain", n, len(batch), len(toFetch))
c.addMutation(&maintpb.Mutation{
Gerrit: &maintpb.GerritMutation{
Project: gp.proj,
Refs: changedRefs[:len(batch)],
}})
changedRefs = changedRefs[len(batch):]
}
return nil
}
func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) {
c := gp.gerrit.c
lastLog := time.Now()
for {
hash := gp.commitToIndex()
if hash == nil {
return n, nil
}
now := time.Now()
if lastLog.Before(now.Add(-1 * time.Second)) {
lastLog = now
gp.logf("parsing commits (%v done)", n)
}
commit, err := parseCommitFromGit(gp.gitDir, hash)
if err != nil {
return n, err
}
c.addMutation(&maintpb.Mutation{
Gerrit: &maintpb.GerritMutation{
Project: gp.proj,
Commits: []*maintpb.GitCommit{commit},
},
})
n++
}
}
func (gp *GerritProject) commitToIndex() gitHash {
c := gp.gerrit.c
c.mu.RLock()
defer c.mu.RUnlock()
for hash := range gp.need {
return hash
}
return nil
}
var (
statusSpace = []byte("Status: ")
)
func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []gitHash) error {
args := []string{"fetch", "--quiet", "origin"}
for _, hash := range hashes {
args = append(args, hash.String())
}
gp.logf("fetching %v hashes...", len(hashes))
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = gp.gitDir
if out, err := cmd.CombinedOutput(); err != nil {
log.Printf("error fetching %d hashes from gerrit project %s: %s", len(hashes), gp.proj, out)
return err
}
gp.logf("fetched %v hashes.", len(hashes))
return nil
}
func (gp *GerritProject) init(ctx context.Context) error {
if err := os.MkdirAll(gp.gitDir, 0755); err != nil {
return err
}
// try to short circuit a git init error, since the init error matching is
// brittle
if _, err := exec.LookPath("git"); err != nil {
return err
}
if _, err := os.Stat(filepath.Join(gp.gitDir, ".git", "config")); err == nil {
remoteBytes, err := exec.CommandContext(ctx, "git", "remote", "-v").Output()
if err != nil {
return err
}
if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) {
return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes))
}
gp.logf("git directory exists.")
return nil
}
cmd := exec.CommandContext(ctx, "git", "init")
buf := new(bytes.Buffer)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git init": %s`, buf.String())
return err
}
buf.Reset()
cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git remote add origin": %s`, buf.String())
return err
}
return nil
}