blob: 15a619a675955cb261a3192d2da1e819dc5a67f5 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Logic to interact with a Gerrit server. Gerrit has an entire Git-based
// protocol for fetching metadata about CL's, reviewers, patch comments, which
// is used here - we don't use the x/build/gerrit client, which hits the API.
// TODO: write about Gerrit's Git API.
package maintner
import (
"bufio"
"bytes"
"context"
"fmt"
"log"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"golang.org/x/build/maintner/maintpb"
)
// Gerrit holds information about a number of Gerrit projects.
type Gerrit struct {
c *Corpus
dataDir string // the root Corpus data directory
// keys are like "https://go.googlesource.com/build"
projects map[string]*GerritProject
}
// c.mu must be held
func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject {
proj, ok := g.projects[gerritProj]
if ok {
return proj
}
proj = &GerritProject{
gerrit: g,
proj: gerritProj,
gitDir: filepath.Join(g.dataDir, url.PathEscape(gerritProj)),
cls: map[int32]*gerritCL{},
remote: map[gerritCLVersion]gitHash{},
}
g.projects[gerritProj] = proj
return proj
}
// GerritProject represents a single Gerrit project.
type GerritProject struct {
gerrit *Gerrit
proj string // "go.googlesource.com/net"
// TODO: Many different Git remotes can share the same Gerrit instance, e.g.
// the Go Gerrit instance supports build, gddo, go. For the moment these are
// all treated separately, since the remotes are separate.
gitDir string
cls map[int32]*gerritCL
remote map[gerritCLVersion]gitHash
need map[gitHash]bool
}
func (gp *GerritProject) logf(format string, args ...interface{}) {
log.Printf("gerrit "+gp.proj+": "+format, args...)
}
type gerritCLVersion struct {
CLNumber int32
Version int32 // version 0 is used for the "meta" ref.
}
type gerritCL struct {
Hash gitHash
Number int32
Author *gitPerson
AuthorTime time.Time
Status string // "merged", "abandoned", "new"
// TODO...
}
// gerritMetaCommit holds data about the "meta commit" object that Gerrit
// returns for a given CL.
type gerritMetaCommit struct {
Hash gitHash
Number int32
Raw []byte
}
// c.mu must be held
func (c *Corpus) initGerrit() {
if c.gerrit != nil {
return
}
c.gerrit = &Gerrit{
c: c,
dataDir: c.dataDir,
projects: map[string]*GerritProject{},
}
}
type watchedGerritRepo struct {
project *GerritProject
}
// AddGerrit adds the Gerrit project with the given URL to the corpus.
func (c *Corpus) AddGerrit(gerritURL string) {
c.mu.Lock()
defer c.mu.Unlock()
if strings.Count(gerritURL, "/") != 1 {
panic(fmt.Sprintf("gerrit URL %q expected to contain exactly 1 slash", gerritURL))
}
c.initGerrit()
project := c.gerrit.getOrCreateProject(gerritURL)
if project == nil {
panic("gerrit project not created")
}
c.watchedGerritRepos = append(c.watchedGerritRepos, watchedGerritRepo{
project: project,
})
}
// called with c.mu Locked
func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) {
if c.gerrit == nil {
// Untracked.
return
}
gp, ok := c.gerrit.projects[gm.Project]
if !ok {
// Untracked.
return
}
gp.processMutation(gm)
}
// called with c.mu Locked
func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) {
for _, refp := range gm.Refs {
m := rxChangeRef.FindStringSubmatch(refp.Ref)
if m == nil {
continue
}
cl, err := strconv.ParseInt(m[1], 10, 32)
version, ok := gerritVersionNumber(m[2])
if !ok || err != nil {
continue
}
hash := gitHashFromHexStr(refp.Sha1)
gp.remote[gerritCLVersion{int32(cl), version}] = hash
gp.markNeededCommit(hash)
}
c := gp.gerrit.c
for _, commitp := range gm.Commits {
gc, err := c.processGitCommit(commitp)
if err != nil {
continue
}
if gp.need != nil {
delete(gp.need, gc.hash)
}
for _, p := range gc.parents {
gp.markNeededCommit(p)
}
}
}
// c.mu must be held
func (gp *GerritProject) markNeededCommit(hash gitHash) {
c := gp.gerrit.c
if _, ok := c.gitCommit[hash]; ok {
// Already have it.
return
}
if gp.need == nil {
gp.need = map[gitHash]bool{}
}
gp.need[hash] = true
}
func gerritVersionNumber(s string) (version int32, ok bool) {
if s == "meta" {
return 0, true
}
v, err := strconv.ParseInt(s, 10, 32)
if err != nil {
return 0, false
}
return int32(v), true
}
// rxRemoteRef matches "git ls-remote" lines.
//
// sample row:
// fd1e71f1594ce64941a85428ddef2fbb0ad1023e refs/changes/99/30599/3
//
// Capture values:
// $0: whole match
// $1: "fd1e71f1594ce64941a85428ddef2fbb0ad1023e"
// $2: "30599" (CL number)
// $3: "1", "2" (patchset number) or "meta" (a/ special commit
// holding the comments for a commit)
//
// The "99" in the middle covers all CL's that end in "99", so
// refs/changes/99/99/1, refs/changes/99/199/meta.
var rxRemoteRef = regexp.MustCompile(`^([0-9a-f]{40,})\s+refs/changes/[0-9a-f]{2}/([0-9]+)/(.+)$`)
// $1: change num
// $2: version or "meta"
var rxChangeRef = regexp.MustCompile(`^refs/changes/[0-9a-f]{2}/([0-9]+)/(meta|(?:\d+))`)
func (gp *GerritProject) sync(ctx context.Context, loop bool) error {
if err := gp.init(ctx); err != nil {
gp.logf("init: %v", err)
return err
}
for {
if err := gp.syncOnce(ctx); err != nil {
gp.logf("sync: %v", err)
return err
}
if !loop {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(1 * time.Minute):
}
}
}
func (gp *GerritProject) syncOnce(ctx context.Context) error {
if err := gp.syncRefs(ctx); err != nil {
return err
}
return gp.syncCommits(ctx)
}
func (gp *GerritProject) syncRefs(ctx context.Context) error {
c := gp.gerrit.c
fetchCtx, cancel := context.WithTimeout(ctx, time.Minute)
cmd := exec.CommandContext(fetchCtx, "git", "fetch", "origin")
cmd.Dir = gp.gitDir
out, err := cmd.CombinedOutput()
cancel()
if err != nil {
return fmt.Errorf("git fetch origin: %v, %s", err, out)
}
cmd = exec.CommandContext(ctx, "git", "ls-remote")
cmd.Dir = gp.gitDir
out, err = cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git ls-remote: %v, %s", err, out)
}
var changedRefs []*maintpb.GitRef
var toFetch []gitHash
bs := bufio.NewScanner(bytes.NewReader(out))
for bs.Scan() {
m := rxRemoteRef.FindSubmatch(bs.Bytes())
if m == nil {
continue
}
clNum, err := strconv.ParseInt(string(m[2]), 10, 32)
version, ok := gerritVersionNumber(string(m[3]))
if err != nil || !ok {
continue
}
sha1 := m[1]
hash := gitHashFromHex(sha1)
c.mu.RLock()
curHash := gp.remote[gerritCLVersion{int32(clNum), version}]
c.mu.RUnlock()
if curHash != hash {
toFetch = append(toFetch, hash)
changedRefs = append(changedRefs, &maintpb.GitRef{
Ref: strings.TrimSpace(bs.Text()[len(sha1):]),
Sha1: string(sha1),
})
}
}
if err := bs.Err(); err != nil {
return err
}
if len(changedRefs) == 0 {
return nil
}
gp.logf("%d new refs; fetching...", len(changedRefs))
if err := gp.fetchHashes(ctx, toFetch); err != nil {
return err
}
gp.logf("fetched %d new refs.", len(changedRefs))
c.addMutation(&maintpb.Mutation{
Gerrit: &maintpb.GerritMutation{
Project: gp.proj,
Refs: changedRefs,
},
})
return nil
}
func (gp *GerritProject) syncCommits(ctx context.Context) error {
c := gp.gerrit.c
for {
hash := gp.commitToIndex()
if hash == nil {
return nil
}
commit, err := parseCommitFromGit(gp.gitDir, hash)
if err != nil {
return err
}
c.addMutation(&maintpb.Mutation{
Gerrit: &maintpb.GerritMutation{
Project: gp.proj,
Commits: []*maintpb.GitCommit{commit},
},
})
}
}
func (gp *GerritProject) commitToIndex() gitHash {
c := gp.gerrit.c
c.mu.RLock()
defer c.mu.RUnlock()
for hash := range gp.need {
return hash
}
return nil
}
var (
statusSpace = []byte("Status: ")
)
// newMutationFromCL generates a GerritCLMutation using the smallest possible
// diff between a (the state we have in memory) and b (the current Gerrit
// state).
//
// If newMutationFromCL returns nil, the provided gerrit CL is no newer than
// the data we have in the corpus. 'a' may be nil.
func (gp *GerritProject) newMutationFromCL(a *gerritCL, b *gerritMetaCommit) *maintpb.Mutation {
if b == nil {
panic("newMutationFromCL: provided nil gerritCL")
}
if a == nil {
var sha1 string
switch b.Hash.(type) {
case gitSHA1:
sha1 = b.Hash.String()
default:
panic(fmt.Sprintf("unsupported git hash type %T", b.Hash))
}
_ = sha1
panic("TODO")
return &maintpb.Mutation{
Gerrit: &maintpb.GerritMutation{
Project: gp.proj,
},
}
}
// TODO: update the existing proto
return nil
}
// updateCL updates the local CL.
func (gp *GerritProject) updateCL(ctx context.Context, clNum int32, hash gitHash) error {
cmd := exec.CommandContext(ctx, "git", "cat-file", "-p", hash.String())
cmd.Dir = gp.gitDir
buf, errBuf := new(bytes.Buffer), new(bytes.Buffer)
cmd.Stdout = buf
cmd.Stderr = errBuf
if err := cmd.Run(); err != nil {
return err
}
cl := &gerritMetaCommit{
Number: clNum,
Hash: hash,
Raw: buf.Bytes(),
}
proto := gp.newMutationFromCL(gp.cls[clNum], cl)
gp.gerrit.c.addMutation(proto)
return nil
}
func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []gitHash) error {
for len(hashes) > 0 {
batch := hashes
if len(batch) > 500 {
batch = batch[:500]
}
hashes = hashes[len(batch):]
args := []string{"fetch", "--quiet", "origin"}
for _, hash := range batch {
args = append(args, hash.String())
}
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = gp.gitDir
if out, err := cmd.CombinedOutput(); err != nil {
log.Printf("error fetching %d hashes from gerrit project %s: %s", len(batch), gp.proj, out)
return err
}
}
return nil
}
func (gp *GerritProject) init(ctx context.Context) error {
if err := os.MkdirAll(gp.gitDir, 0755); err != nil {
return err
}
// try to short circuit a git init error, since the init error matching is
// brittle
if _, err := exec.LookPath("git"); err != nil {
return err
}
if _, err := os.Stat(filepath.Join(gp.gitDir, ".git", "config")); err == nil {
remoteBytes, err := exec.CommandContext(ctx, "git", "remote", "-v").Output()
if err != nil {
return err
}
if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) {
return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes))
}
gp.logf("git directory exists.")
return nil
}
cmd := exec.CommandContext(ctx, "git", "init")
buf := new(bytes.Buffer)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git init": %s`, buf.String())
return err
}
buf.Reset()
cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git remote add origin": %s`, buf.String())
return err
}
return nil
}