blob: c93054859378c6f98797c96930dceec3497f2e5d [file] [log] [blame]
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The gitmirror binary watches the specified Gerrit repositories for
// new commits and reports them to the build dashboard.
//
// It also serves tarballs over HTTP for the build system, and pushes
// new commits to GitHub.
package main
import (
"bytes"
"context"
"crypto/sha1"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"net"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"sync"
"time"
"cloud.google.com/go/compute/metadata"
"golang.org/x/build/buildenv"
"golang.org/x/build/gerrit"
"golang.org/x/build/internal/gitauth"
"golang.org/x/build/maintner"
"golang.org/x/build/maintner/godata"
)
const (
goBase = "https://go.googlesource.com/"
watcherVersion = 3 // must match dashboard/app/build/handler.go's watcherVersion
master = "master" // name of the master branch
)
var (
httpAddr = flag.String("http", "", "If non-empty, the listen address to run an HTTP server on")
cacheDir = flag.String("cachedir", "", "git cache directory. If empty a temp directory is made.")
dashFlag = flag.String("dash", "", "Dashboard URL (must end in /). If unset, will be automatically derived from the GCE project name.")
keyFile = flag.String("key", defaultKeyFile, "Build dashboard key file. If empty, automatic from GCE project metadata")
pollInterval = flag.Duration("poll", 60*time.Second, "Remote repo poll interval")
// TODO(bradfitz): these three are all kinda the same and
// redundant. Unify after research.
network = flag.Bool("network", true, "Enable network calls (disable for testing)")
mirror = flag.Bool("mirror", false, "whether to mirror to github")
report = flag.Bool("report", true, "Report updates to build dashboard (use false for development dry-run mode)")
filter = flag.String("filter", "", "If non-empty, a comma-separated list of directories or files to watch for new commits (only works on main repo). If empty, watch all files in repo.")
branches = flag.String("branches", "", "If non-empty, a comma-separated list of branches to watch. If empty, watch changes on every branch.")
)
var (
defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey")
dashboardKey = ""
networkSeen = make(map[string]bool) // testing mode only (-network=false); known hashes
)
var httpClient = &http.Client{
Timeout: 30 * time.Second, // overkill
}
var gerritClient = gerrit.NewClient("https://go-review.googlesource.com", gerrit.NoAuth)
var (
// gitLogFn returns the list of unseen Commits on a Repo,
// typically by shelling out to `git log`.
// gitLogFn is a global var so we can stub it out for tests.
gitLogFn = gitLog
// gitRemotesFn returns a slice of remote branches known to the git repo.
// gitRemotesFn is a global var so we can stub it out for tests.
gitRemotesFn = gitRemotes
)
func main() {
flag.Parse()
if err := gitauth.Init(); err != nil {
log.Fatalf("gitauth: %v", err)
}
if *dashFlag == "" && metadata.OnGCE() {
project, err := metadata.ProjectID()
if err != nil {
log.Fatalf("metadata.ProjectID: %v", err)
}
*dashFlag = buildenv.ByProjectID(project).DashBase()
}
if *dashFlag == "" {
log.Fatal("-dash must be specified and could not be autodetected")
}
log.Printf("gitmirror running.")
go pollGerritAndTickle()
go subscribeToMaintnerAndTickleLoop()
err := runGitMirror()
log.Fatalf("gitmirror exiting after failure: %v", err)
}
// runGitMirror is a little wrapper so we can use defer and return to signal
// errors. It should only return a non-nil error.
func runGitMirror() error {
if !strings.HasSuffix(*dashFlag, "/") {
return errors.New("dashboard URL (-dashboard) must end in /")
}
if *mirror {
sshDir := filepath.Join(homeDir(), ".ssh")
sshKey := filepath.Join(sshDir, "id_ed25519")
if _, err := os.Stat(sshKey); err == nil {
log.Printf("Using github ssh key at %v", sshKey)
} else {
if privKey, err := metadata.ProjectAttributeValue("github-ssh"); err == nil && len(privKey) > 0 {
if err := os.MkdirAll(sshDir, 0700); err != nil {
return err
}
if err := ioutil.WriteFile(sshKey, []byte(privKey+"\n"), 0600); err != nil {
return err
}
log.Printf("Wrote %s from GCE metadata.", sshKey)
} else {
return fmt.Errorf("Can't mirror to github without 'github-ssh' GCE metadata or file %v", sshKey)
}
}
}
if *cacheDir == "" {
dir, err := ioutil.TempDir("", "gitmirror")
if err != nil {
log.Fatal(err)
}
defer os.RemoveAll(dir)
*cacheDir = dir
} else {
fi, err := os.Stat(*cacheDir)
if os.IsNotExist(err) {
if err := os.MkdirAll(*cacheDir, 0755); err != nil {
return fmt.Errorf("failed to create watcher's git cache dir: %v", err)
}
} else {
if err != nil {
return fmt.Errorf("invalid -cachedir: %v", err)
}
if !fi.IsDir() {
return fmt.Errorf("invalid -cachedir=%q; not a directory", *cacheDir)
}
}
}
if *report {
if k, err := readKey(); err != nil {
return err
} else {
dashboardKey = k
}
}
if *httpAddr != "" {
http.HandleFunc("/debug/env", handleDebugEnv)
http.HandleFunc("/debug/goroutines", handleDebugGoroutines)
ln, err := net.Listen("tcp", *httpAddr)
if err != nil {
return err
}
go http.Serve(ln, nil)
}
errc := make(chan error)
subrepos, err := subrepoList()
if err != nil {
return err
}
startRepo := func(name, path string, dash bool) {
log.Printf("Starting watch of repo %s", name)
url := goBase + name
var dst string
if *mirror {
dst = shouldMirrorTo(name)
if dst != "" {
log.Printf("Starting mirror of subrepo %s", name)
} else {
log.Printf("Not mirroring repo %s", name)
}
}
r, err := NewRepo(url, dst, path, dash)
if err != nil {
errc <- err
return
}
http.Handle("/"+name+".tar.gz", r)
reposMu.Lock()
repos = append(repos, r)
sort.Slice(repos, func(i, j int) bool { return repos[i].name() < repos[j].name() })
reposMu.Unlock()
r.Loop()
}
go startRepo("go", "", true)
seen := map[string]bool{"go": true}
for _, path := range subrepos {
name := strings.TrimPrefix(path, "golang.org/x/")
seen[name] = true
go startRepo(name, path, true)
}
if *mirror {
gerritRepos, err := gerritMetaMap()
if err != nil {
return fmt.Errorf("gerritMetaMap: %v", err)
}
for name := range gerritRepos {
if seen[name] {
// Repo already picked up by dashboard list.
continue
}
path := "golang.org/x/" + name
switch name {
case "dl":
// This subrepo is different from others in that
// it doesn't use the /x/ path element.
path = "golang.org/" + name
case "protobuf":
path = "google.golang.org/" + name
}
go startRepo(name, path, false)
}
}
http.HandleFunc("/", handleRoot)
// Blocks forever if all the NewRepo calls succeed:
return <-errc
}
var (
reposMu sync.Mutex
repos []*Repo
)
// GET /
// or:
// GET /debug/watcher/
func handleRoot(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" && r.URL.Path != "/debug/watcher/" {
http.NotFound(w, r)
return
}
reposMu.Lock()
defer reposMu.Unlock()
w.Header().Set("Content-Type", "text/html; charset=utf-8")
fmt.Fprint(w, "<html><body><pre>")
for _, r := range repos {
fmt.Fprintf(w, "<a href='/debug/watcher/%s'>%s</a> - %s\n", r.name(), r.name(), r.statusLine())
}
fmt.Fprint(w, "</pre></body></html>")
}
// shouldMirrorTo returns the GitHub repository the named repo should be
// mirrored to or "" if it should not be mirrored.
func shouldMirrorTo(name string) (dst string) {
switch name {
case
"arch",
"benchmarks",
"blog",
"build",
"crypto",
"debug",
"dl",
"example",
"exp",
"gddo",
"go",
"gofrontend",
"image",
"lint",
"mobile",
"mod",
"net",
"oauth2",
"playground",
"proposal",
"review",
"scratch",
"sync",
"sys",
"talks",
"term",
"text",
"time",
"tools",
"tour",
"vgo",
"website",
"xerrors":
// Mirror this.
case "protobuf":
return "git@github.com:protocolbuffers/protobuf-go.git"
default:
// Else, see if it appears to be a subrepo:
r, err := httpClient.Get("https://golang.org/x/" + name)
if err != nil {
log.Printf("repo %v doesn't seem to exist: %v", name, err)
return ""
}
r.Body.Close()
if r.StatusCode/100 != 2 {
return ""
}
// Mirror this.
}
return "git@github.com:golang/" + name + ".git"
}
// a statusEntry is a status string at a specific time.
type statusEntry struct {
status string
t time.Time
}
// statusRing is a ring buffer of timestamped status messages.
type statusRing struct {
mu sync.Mutex // guards rest
head int // next position to fill
ent [50]statusEntry // ring buffer of entries; zero time means unpopulated
}
func (r *statusRing) add(status string) {
r.mu.Lock()
defer r.mu.Unlock()
r.ent[r.head] = statusEntry{status, time.Now()}
r.head++
if r.head == len(r.ent) {
r.head = 0
}
}
func (r *statusRing) foreachDesc(fn func(statusEntry)) {
r.mu.Lock()
defer r.mu.Unlock()
i := r.head
for {
i--
if i < 0 {
i = len(r.ent) - 1
}
if i == r.head || r.ent[i].t.IsZero() {
return
}
fn(r.ent[i])
}
}
// Repo represents a repository to be watched.
type Repo struct {
root string // on-disk location of the git repo, *cacheDir/name
path string // base import path for repo (blank for main repo)
commits map[string]*Commit // keyed by full commit hash (40 lowercase hex digits)
branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default)
dash bool // push new commits to the dashboard
mirror bool // push new commits to 'dest' remote
status statusRing
mu sync.Mutex
err error
firstBad time.Time
lastBad time.Time
firstGood time.Time
lastGood time.Time
}
// NewRepo checks out a new instance of the git repository
// specified by srcURL.
//
// If dstURL is not empty, changes from the source repository will
// be mirrored to the specified destination repository.
// The importPath argument is the base import path of the repository,
// and should be empty for the main Go repo.
// The dash argument should be set true if commits to this
// repo should be reported to the build dashboard.
func NewRepo(srcURL, dstURL, importPath string, dash bool) (*Repo, error) {
name := path.Base(srcURL) // "go", "net", etc
root := filepath.Join(*cacheDir, name)
r := &Repo{
path: importPath,
root: root,
commits: make(map[string]*Commit),
branches: make(map[string]*Branch),
mirror: dstURL != "",
dash: dash,
}
http.Handle("/debug/watcher/"+r.name(), r)
needClone := true
if r.shouldTryReuseGitDir(dstURL) {
r.setStatus("reusing git dir; running git fetch")
cmd := exec.Command("git", "fetch", "--prune", "origin")
cmd.Dir = r.root
r.logf("running git fetch")
t0 := time.Now()
var stderr bytes.Buffer
cmd.Stderr = &stderr
err := cmd.Run()
if err != nil {
r.logf("git fetch failed; proceeding to wipe + clone instead; err: %v, stderr: %s", err, stderr.Bytes())
} else {
needClone = false
r.logf("ran git fetch in %v", time.Since(t0))
}
}
if needClone {
r.setStatus("need clone; removing cache root")
os.RemoveAll(r.root)
t0 := time.Now()
r.setStatus("running fresh git clone --mirror")
r.logf("cloning %v into %s", srcURL, r.root)
cmd := exec.Command("git", "clone", "--mirror", srcURL, r.root)
if out, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("cloning %s: %v\n\n%s", srcURL, err, out)
}
r.setStatus("cloned")
r.logf("cloned in %v", time.Since(t0))
}
if r.mirror {
r.setStatus("adding dest remote")
if err := r.addRemote("dest", dstURL,
// We want to include only the refs/heads/* and refs/tags/* namespaces
// in the GitHub mirrors. They correspond to published branches and tags.
// Leave out internal Gerrit namespaces such as refs/changes/*,
// refs/users/*, etc., because they're not helpful on github.com/golang.
"push = +refs/heads/*:refs/heads/*",
"push = +refs/tags/*:refs/tags/*",
); err != nil {
r.setStatus("failed to add dest")
return nil, fmt.Errorf("adding remote: %v", err)
}
r.setStatus("added dest remote")
}
if r.dash {
r.logf("loading commit log")
if err := r.update(false); err != nil {
return nil, err
}
r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits))
}
return r, nil
}
func (r *Repo) setErr(err error) {
r.mu.Lock()
defer r.mu.Unlock()
change := (r.err != nil) != (err != nil)
now := time.Now()
if err != nil {
if change {
r.firstBad = now
}
r.lastBad = now
} else {
if change {
r.firstGood = now
}
r.lastGood = now
}
r.err = err
}
var startTime = time.Now()
func (r *Repo) statusLine() string {
r.mu.Lock()
defer r.mu.Unlock()
if r.lastGood.IsZero() {
if r.err != nil {
return fmt.Sprintf("broken; permanently? always failing, for %v", time.Since(r.firstBad))
}
if time.Since(startTime) < 5*time.Minute {
return "ok; starting up, no report yet"
}
return fmt.Sprintf("hung; hang at start-up? no report since start %v ago", time.Since(startTime))
}
if r.err == nil {
if sinceGood := time.Since(r.lastGood); sinceGood > 6*time.Minute {
return fmt.Sprintf("hung? no activity since last success %v ago", sinceGood)
}
if r.lastBad.After(time.Now().Add(-1 * time.Hour)) {
return fmt.Sprintf("ok; recent failure %v ago", time.Since(r.lastBad))
}
return "ok"
}
return fmt.Sprintf("broken for %v", time.Since(r.lastGood))
}
func (r *Repo) setStatus(status string) {
r.status.add(status)
}
// shouldTryReuseGitDir reports whether we should try to reuse r.root as the git
// directory. (The directory may be corrupt, though.)
// dstURL is optional, and is the desired remote URL for a remote named "dest".
func (r *Repo) shouldTryReuseGitDir(dstURL string) bool {
if _, err := os.Stat(filepath.Join(r.root, "FETCH_HEAD")); err != nil {
if os.IsNotExist(err) {
r.logf("not reusing git dir; no FETCH_HEAD at %s", r.root)
} else {
r.logf("not reusing git dir; %v", err)
}
return false
}
if dstURL == "" {
r.logf("not reusing git dir because dstURL is empty")
return true
}
// Does the "dest" remote match? If not, we return false and nuke
// the world and re-clone out of laziness.
cmd := exec.Command("git", "remote", "-v")
cmd.Dir = r.root
out, err := cmd.Output()
if err != nil {
log.Printf("git remote -v: %v", err)
}
foundWrong := false
for _, ln := range strings.Split(string(out), "\n") {
if !strings.HasPrefix(ln, "dest") {
continue
}
f := strings.Fields(ln)
if len(f) < 2 {
continue
}
if f[0] == "dest" {
if f[1] == dstURL {
return true
}
if !foundWrong {
foundWrong = true
r.logf("found dest of %q, which doesn't equal sought %q", f[1], dstURL)
}
}
}
r.logf("not reusing old repo: remote \"dest\" URL doesn't match")
return false
}
func (r *Repo) addRemote(name, url string, opts ...string) error {
gitConfig := filepath.Join(r.root, "config")
f, err := os.OpenFile(gitConfig, os.O_WRONLY|os.O_APPEND, os.ModePerm)
if err != nil {
return err
}
_, err = fmt.Fprintf(f, "\n[remote %q]\n\turl = %v\n", name, url)
if err != nil {
f.Close()
return err
}
for _, o := range opts {
_, err := fmt.Fprintf(f, "\t%s\n", o)
if err != nil {
f.Close()
return err
}
}
return f.Close()
}
// Loop continuously runs "git fetch" in the repo, checks for
// new commits, posts any new commits to the dashboard (if enabled),
// and mirrors commits to a destination repo (if enabled).
func (r *Repo) Loop() {
tickler := repoTickler(r.name())
for {
if err := r.fetch(); err != nil {
r.setErr(err)
time.Sleep(10 * time.Second)
continue
}
if r.mirror {
if err := r.push(); err != nil {
r.setErr(err)
time.Sleep(10 * time.Second)
continue
}
}
if r.dash {
if err := r.updateDashboard(); err != nil {
r.setErr(err)
time.Sleep(10 * time.Second)
continue
}
}
r.setErr(nil)
r.setStatus("waiting")
// We still run a timer but a very slow one, just
// in case the mechanism updating the repo tickler
// breaks for some reason.
timer := time.NewTimer(5 * time.Minute)
select {
case <-tickler:
r.setStatus("got update tickle")
timer.Stop()
case <-timer.C:
r.setStatus("poll timer fired")
}
}
}
func (r *Repo) updateDashboard() (err error) {
r.setStatus("updating dashboard")
defer func() {
if err == nil {
r.setStatus("updated dashboard")
}
}()
if err := r.update(true); err != nil {
return err
}
remotes, err := gitRemotesFn(r)
if err != nil {
return err
}
for _, name := range remotes {
b, ok := r.branches[name]
if !ok {
// skip branch; must be already merged
continue
}
if err := r.postNewCommits(b); err != nil {
return err
}
}
return nil
}
func (r *Repo) name() string {
if r.path == "" {
return "go"
}
return path.Base(r.path)
}
func (r *Repo) logf(format string, args ...interface{}) {
log.Printf(r.name()+": "+format, args...)
}
// postNewCommits looks for unseen commits on the specified branch and
// posts them to the dashboard.
func (r *Repo) postNewCommits(b *Branch) error {
if b.Head == b.LastSeen {
return nil
}
c := b.LastSeen
if c == nil {
// Haven't seen anything on this branch yet:
if b.Name == master {
// For the master branch, bootstrap by creating a dummy
// commit with a lone child that is the initial commit.
c = &Commit{}
for _, c2 := range r.commits {
if c2.Parent == "" {
c.children = []*Commit{c2}
break
}
}
if c.children == nil {
return fmt.Errorf("couldn't find initial commit")
}
} else {
// Find the commit that this branch forked from.
base, err := r.mergeBase("heads/"+b.Name, master)
if err != nil {
return err
}
var ok bool
c, ok = r.commits[base]
if !ok {
return fmt.Errorf("couldn't find base commit: %v", base)
}
}
}
if err := r.postChildren(b, c); err != nil {
return err
}
b.LastSeen = b.Head
return nil
}
// postChildren posts to the dashboard all descendants of the given parent.
// It ignores descendants that are not on the given branch.
func (r *Repo) postChildren(b *Branch, parent *Commit) error {
for _, c := range parent.children {
if c.Branch != b.Name {
continue
}
if err := r.postCommit(c); err != nil {
if strings.Contains(err.Error(), "this package already has a first commit; aborting") {
return nil
}
return err
}
}
for _, c := range parent.children {
if err := r.postChildren(b, c); err != nil {
return err
}
}
return nil
}
// postCommit sends a commit to the build dashboard.
func (r *Repo) postCommit(c *Commit) error {
if !*report {
r.logf("dry-run mode; NOT posting commit to dashboard: %v", c)
return nil
}
r.logf("sending commit to dashboard: %v", c)
t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date)
if err != nil {
return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err)
}
dc := struct {
PackagePath string // (empty for main repo commits)
Hash string
ParentHash string
User string
Desc string
Time time.Time
Branch string
NeedsBenchmarking bool
}{
PackagePath: r.path,
Hash: c.Hash,
ParentHash: c.Parent,
User: c.Author,
Desc: c.Desc,
Time: t,
Branch: c.Branch,
NeedsBenchmarking: c.NeedsBenchmarking(),
}
b, err := json.Marshal(dc)
if err != nil {
return fmt.Errorf("postCommit: marshaling request body: %v", err)
}
if !*network {
if c.Parent != "" {
if !networkSeen[c.Parent] {
r.logf("%v: %v", c.Parent, r.commits[c.Parent])
return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c)
}
}
if networkSeen[c.Hash] {
return fmt.Errorf("postCommit: already seen %v", c)
}
networkSeen[c.Hash] = true
return nil
}
v := url.Values{"version": {fmt.Sprint(watcherVersion)}, "key": {dashboardKey}}
u := *dashFlag + "commit?" + v.Encode()
resp, err := http.Post(u, "text/json", bytes.NewReader(b))
if err != nil {
return err
}
body, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return fmt.Errorf("postCommit: reading body: %v", err)
}
if resp.StatusCode != 200 {
return fmt.Errorf("postCommit: status: %v\nbody: %s", resp.Status, body)
}
var s struct {
Error string
}
if err := json.Unmarshal(body, &s); err != nil {
return fmt.Errorf("postCommit: decoding response: %v", err)
}
if s.Error != "" {
return fmt.Errorf("postCommit: error: %v", s.Error)
}
return nil
}
// update looks for new commits and branches,
// and updates the commits and branches maps.
func (r *Repo) update(noisy bool) error {
remotes, err := gitRemotesFn(r)
if err != nil {
return err
}
for _, name := range remotes {
b := r.branches[name]
// Find all unseen commits on this branch.
revspec := "heads/" + name
if b != nil {
// If we know about this branch,
// only log commits down to the known head.
revspec = b.Head.Hash + ".." + revspec
}
log, err := gitLogFn(r, "--topo-order", revspec)
if err != nil {
return err
}
if len(log) == 0 {
// No commits to handle; carry on.
continue
}
var nDups, nDrops int
// Add unknown commits to r.commits.
var added []*Commit
for _, c := range log {
if noisy {
r.logf("found new commit %v", c)
}
// If we've already seen this commit,
// only store the master one in r.commits.
if _, ok := r.commits[c.Hash]; ok {
nDups++
if name != master {
nDrops++
continue
}
}
c.Branch = name
r.commits[c.Hash] = c
added = append(added, c)
}
if nDups > 0 {
r.logf("saw %v duplicate commits; dropped %v of them", nDups, nDrops)
}
// Link added commits.
for _, c := range added {
if c.Parent == "" {
// This is the initial commit; no parent.
r.logf("no parents for initial commit %v", c)
continue
}
// Find parent commit.
p, ok := r.commits[c.Parent]
if !ok {
return fmt.Errorf("can't find parent %q for %v", c.Parent, c)
}
// Link parent Commit.
c.parent = p
// Link child Commits.
p.children = append(p.children, c)
}
// Update branch head, or add newly discovered branch.
// If we had already seen log[0], eg. on a different branch,
// we would never have linked it in the loop above.
// We therefore fetch the commit from r.commits to ensure we have
// the right address.
head := r.commits[log[0].Hash]
if b != nil {
// Known branch; update head.
b.Head = head
r.logf("updated branch head: %v", b)
} else {
// It's a new branch; add it.
seen, err := r.lastSeen(head.Hash)
if err != nil {
return err
}
b = &Branch{Name: name, Head: head, LastSeen: seen}
r.branches[name] = b
r.logf("found branch: %v", b)
}
}
return nil
}
// lastSeen finds the most recent commit the dashboard has seen,
// starting at the specified head. If the dashboard hasn't seen
// any of the commits from head to the beginning, it returns nil.
func (r *Repo) lastSeen(head string) (*Commit, error) {
h, ok := r.commits[head]
if !ok {
return nil, fmt.Errorf("lastSeen: can't find %q in commits", head)
}
var s []*Commit
for c := h; c != nil; c = c.parent {
s = append(s, c)
}
var err error
i := sort.Search(len(s), func(i int) bool {
if err != nil {
return false
}
ok, err = r.dashSeen(s[i].Hash)
return ok
})
switch {
case err != nil:
return nil, fmt.Errorf("lastSeen: %v", err)
case i < len(s):
return s[i], nil
default:
// Dashboard saw no commits.
return nil, nil
}
}
// dashSeen reports whether the build dashboard knows the specified commit.
func (r *Repo) dashSeen(hash string) (bool, error) {
if !*network {
return networkSeen[hash], nil
}
v := url.Values{"hash": {hash}, "packagePath": {r.path}}
u := *dashFlag + "commit?" + v.Encode()
resp, err := httpClient.Get(u)
if err != nil {
return false, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return false, fmt.Errorf("status: %v", resp.Status)
}
var s struct {
Error string
}
err = json.NewDecoder(resp.Body).Decode(&s)
if err != nil {
return false, err
}
switch s.Error {
case "":
// Found one.
return true, nil
case "Commit not found":
// Commit not found, keep looking for earlier commits.
return false, nil
default:
return false, fmt.Errorf("dashboard: %v", s.Error)
}
}
// mergeBase returns the hash of the merge base for revspecs a and b.
func (r *Repo) mergeBase(a, b string) (string, error) {
cmd := exec.Command("git", "merge-base", a, b)
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("git merge-base %s..%s: %v", a, b, err)
}
return string(bytes.TrimSpace(out)), nil
}
// gitRemotes returns a slice of remote branches known to the git repo.
// It always puts "origin/master" first.
func gitRemotes(r *Repo) ([]string, error) {
if *branches != "" {
return strings.Split(*branches, ","), nil
}
cmd := exec.Command("git", "branch")
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("git branch: %v", err)
}
bs := []string{master}
for _, b := range strings.Split(string(out), "\n") {
b = strings.TrimPrefix(b, "* ")
b = strings.TrimSpace(b)
// Ignore aliases, blank lines, and master (it's already in bs).
if b == "" || strings.Contains(b, "->") || b == master {
continue
}
// Ignore pre-go1 release branches; they are just noise.
if strings.HasPrefix(b, "release-branch.r") {
continue
}
bs = append(bs, b)
}
return bs, nil
}
const logFormat = `--format=format:` + logBoundary + `%H
%P
%an <%ae>
%cD
%B
` + fileBoundary
const logBoundary = `_-_- magic boundary -_-_`
const fileBoundary = `_-_- file boundary -_-_`
// gitLog runs "git log" with the supplied arguments
// and parses the output into Commit values.
func gitLog(r *Repo, dir string, args ...string) ([]*Commit, error) {
args = append([]string{"log", "--date=rfc", "--name-only", "--parents", logFormat}, args...)
if r.path == "" && *filter != "" {
paths := strings.Split(*filter, ",")
args = append(args, "--")
args = append(args, paths...)
}
cmd := exec.Command("git", args...)
cmd.Dir = r.root
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("git %v: %v\n%s", strings.Join(args, " "), err, out)
}
// We have a commit with description that contains 0x1b byte.
// Mercurial does not escape it, but xml.Unmarshal does not accept it.
// TODO(adg): do we still need to scrub this? Probably.
out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1)
var cs []*Commit
for _, text := range strings.Split(string(out), logBoundary) {
text = strings.TrimSpace(text)
if text == "" {
continue
}
p := strings.SplitN(text, "\n", 5)
if len(p) != 5 {
return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text)
}
// The change summary contains the change description and files
// modified in this commit. There is no way to directly refer
// to the modified files in the log formatting string, so we look
// for the file boundary after the description.
changeSummary := p[4]
descAndFiles := strings.SplitN(changeSummary, fileBoundary, 2)
desc := strings.TrimSpace(descAndFiles[0])
// For branch merges, the list of files can still be empty
// because there are no changed files.
files := strings.Replace(strings.TrimSpace(descAndFiles[1]), "\n", " ", -1)
cs = append(cs, &Commit{
Hash: p[0],
// TODO(adg): This may break with branch merges.
Parent: strings.Split(p[1], " ")[0],
Author: p[2],
Date: p[3],
Desc: desc,
Files: files,
})
}
return cs, nil
}
// fetch runs "git fetch" in the repository root.
// It tries three times, just in case it failed because of a transient error.
func (r *Repo) fetch() (err error) {
n := 0
r.setStatus("running git fetch origin")
defer func() {
if err != nil {
r.setStatus("git fetch failed")
} else {
r.setStatus("ran git fetch")
}
}()
return try(3, func() error {
n++
if n > 1 {
r.setStatus(fmt.Sprintf("running git fetch origin, attempt %d", n))
}
cmd := exec.Command("git", "fetch", "--prune", "origin")
cmd.Dir = r.root
if out, err := cmd.CombinedOutput(); err != nil {
err = fmt.Errorf("%v\n\n%s", err, out)
r.logf("git fetch: %v", err)
return err
}
return nil
})
}
// push runs "git push -f --mirror dest" in the repository root.
// It tries three times, just in case it failed because of a transient error.
func (r *Repo) push() (err error) {
n := 0
r.setStatus("syncing to github")
defer func() {
if err != nil {
r.setStatus("sync to github failed")
} else {
r.setStatus("did sync to github")
}
}()
return try(3, func() error {
n++
if n > 1 {
r.setStatus(fmt.Sprintf("syncing to github, attempt %d", n))
}
cmd := exec.Command("git", "push", "-f", "--mirror", "dest")
cmd.Dir = r.root
if out, err := cmd.CombinedOutput(); err != nil {
err = fmt.Errorf("%v\n\n%s", err, out)
r.logf("git push failed: %v", err)
return err
}
return nil
})
}
// hasRev returns true if the repo contains the commit-ish rev.
func (r *Repo) hasRev(ctx context.Context, rev string) bool {
cmd := exec.CommandContext(ctx, "git", "cat-file", "-t", rev)
cmd.Dir = r.root
return cmd.Run() == nil
}
// if non-nil, used by r.archive to create a "git archive" command.
var testHookArchiveCmd func(context.Context, string, ...string) *exec.Cmd
// if non-nil, used by r.archive to create a "git fetch" command.
var testHookFetchCmd func(context.Context, string, ...string) *exec.Cmd
// archive exports the git repository at the given rev and returns the
// compressed repository.
func (r *Repo) archive(ctx context.Context, rev string) ([]byte, error) {
var cmd *exec.Cmd
if testHookArchiveCmd == nil {
cmd = exec.CommandContext(ctx, "git", "archive", "--format=tgz", rev)
} else {
cmd = testHookArchiveCmd(ctx, "git", "archive", "--format=tgz", rev)
}
cmd.Dir = r.root
return cmd.Output()
}
// fetchRev attempts to fetch rev from remote.
func (r *Repo) fetchRev(ctx context.Context, remote, rev string) error {
var cmd *exec.Cmd
if testHookFetchCmd == nil {
cmd = exec.CommandContext(ctx, "git", "fetch", remote, rev)
} else {
cmd = testHookFetchCmd(ctx, "git", "fetch", remote, rev)
}
cmd.Dir = r.root
return cmd.Run()
}
func (r *Repo) fetchRevIfNeeded(ctx context.Context, rev string) error {
if r.hasRev(ctx, rev) {
return nil
}
r.logf("attempting to fetch missing revision %s from origin", rev)
return r.fetchRev(ctx, "origin", rev)
}
// GET /<name>.tar.gz
// GET /debug/watcher/<name>
func (r *Repo) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if req.Method != "GET" && req.Method != "HEAD" {
w.WriteHeader(http.StatusBadRequest)
return
}
if strings.HasPrefix(req.URL.Path, "/debug/watcher/") {
r.serveStatus(w, req)
return
}
rev := req.FormValue("rev")
if rev == "" {
w.WriteHeader(http.StatusBadRequest)
return
}
ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
defer cancel()
if err := r.fetchRevIfNeeded(ctx, rev); err != nil {
// Try the archive anyway, it might work
r.logf("error fetching revision %s: %v", rev, err)
}
tgz, err := r.archive(ctx, rev)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Length", strconv.Itoa(len(tgz)))
w.Header().Set("Content-Type", "application/x-compressed")
w.Write(tgz)
}
func (r *Repo) serveStatus(w http.ResponseWriter, req *http.Request) {
w.Header().Set("Content-Type", "text/html")
fmt.Fprintf(w, "<html><head><title>watcher: %s</title><body><h1>watcher status for repo: %q</h1>\n",
r.name(), r.name())
fmt.Fprintf(w, "<pre>\n")
nowRound := time.Now().Round(time.Second)
r.status.foreachDesc(func(ent statusEntry) {
fmt.Fprintf(w, "%v %-20s %v\n",
ent.t.In(time.UTC).Format(time.RFC3339),
nowRound.Sub(ent.t.Round(time.Second)).String()+" ago",
ent.status)
})
fmt.Fprintf(w, "\n</pre></body></html>")
}
func try(n int, fn func() error) error {
var err error
for tries := 0; tries < n; tries++ {
time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off.
if err = fn(); err == nil {
break
}
}
return err
}
// Branch represents a Mercurial branch.
type Branch struct {
Name string
Head *Commit
LastSeen *Commit // the last commit posted to the dashboard
}
func (b *Branch) String() string {
return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen)
}
// Commit represents a single Git commit.
type Commit struct {
Hash string
Author string
Date string // Format: "Mon, 2 Jan 2006 15:04:05 -0700"
Desc string // Plain text, first line is a short description.
Parent string
Branch string
Files string
// For walking the graph.
parent *Commit
children []*Commit
}
func (c *Commit) String() string {
s := c.Hash
if c.Branch != "" {
s += fmt.Sprintf("[%v]", c.Branch)
}
s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0])
return s
}
// NeedsBenchmarking reports whether the Commit needs benchmarking.
func (c *Commit) NeedsBenchmarking() bool {
// Do not benchmark branch commits, they are usually not interesting
// and fall out of the trunk succession.
if c.Branch != master {
return false
}
// Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS).
for _, f := range strings.Split(c.Files, " ") {
if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) &&
!strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") {
return true
}
}
return false
}
func homeDir() string {
switch runtime.GOOS {
case "plan9":
return os.Getenv("home")
case "windows":
return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
}
return os.Getenv("HOME")
}
func readKey() (string, error) {
c, err := ioutil.ReadFile(*keyFile)
if os.IsNotExist(err) && metadata.OnGCE() {
key, err := metadata.ProjectAttributeValue("builder-master-key")
if err != nil {
return "", fmt.Errorf("-key=%s doesn't exist, and key can't be loaded from GCE metadata: %v", *keyFile, err)
}
return strings.TrimSpace(key), nil
}
if err != nil {
return "", err
}
return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil
}
// subrepoList fetches a list of sub-repositories from the dashboard
// and returns them as a slice of base import paths.
// Eg, []string{"golang.org/x/tools", "golang.org/x/net"}.
func subrepoList() ([]string, error) {
if !*network {
return nil, nil
}
r, err := httpClient.Get(*dashFlag + "packages?kind=subrepo")
if err != nil {
return nil, fmt.Errorf("subrepo list: %v", err)
}
defer r.Body.Close()
if r.StatusCode != 200 {
return nil, fmt.Errorf("subrepo list: got status %v", r.Status)
}
var resp struct {
Response []struct {
Path string
}
Error string
}
err = json.NewDecoder(r.Body).Decode(&resp)
if err != nil {
return nil, fmt.Errorf("subrepo list: %v", err)
}
if resp.Error != "" {
return nil, fmt.Errorf("subrepo list: %v", resp.Error)
}
var pkgs []string
for _, r := range resp.Response {
pkgs = append(pkgs, r.Path)
}
return pkgs, nil
}
var (
ticklerMu sync.Mutex
ticklers = make(map[string]chan bool)
)
// repo is the gerrit repo: e.g. "go", "net", "crypto", ...
func repoTickler(repo string) chan bool {
ticklerMu.Lock()
defer ticklerMu.Unlock()
if c, ok := ticklers[repo]; ok {
return c
}
c := make(chan bool, 1)
ticklers[repo] = c
return c
}
// pollGerritAndTickle polls Gerrit's JSON meta URL of all its URLs
// and their current branch heads. When this sees that one has
// changed, it tickles the channel for that repo and wakes up its
// poller, if its poller is in a sleep.
func pollGerritAndTickle() {
last := map[string]string{} // repo -> last seen hash
for {
gerritRepos, err := gerritMetaMap()
if err != nil {
log.Printf("pollGerritAndTickle: gerritMetaMap failed, skipping: %v", err)
gerritRepos = nil
}
for repo, hash := range gerritRepos {
if hash != last[repo] {
last[repo] = hash
select {
case repoTickler(repo) <- true:
default:
}
}
}
time.Sleep(*pollInterval)
}
}
// subscribeToMaintnerAndTickleLoop subscribes to maintner.golang.org
// and watches for any ref changes in realtime.
func subscribeToMaintnerAndTickleLoop() {
for {
if err := subscribeToMaintnerAndTickle(); err != nil {
log.Printf("maintner loop: %v; retrying in 30 seconds", err)
time.Sleep(30 * time.Second)
}
}
}
func subscribeToMaintnerAndTickle() error {
log.Printf("Loading maintner data.")
t0 := time.Now()
ctx := context.Background()
corpus, err := godata.Get(ctx)
if err != nil {
return err
}
log.Printf("Loaded maintner data in %v", time.Since(t0))
last := map[string]string{} // go.googlesource.com repo base => digest of all refs
for {
corpus.Gerrit().ForeachProjectUnsorted(func(gp *maintner.GerritProject) error {
proj := path.Base(gp.ServerSlashProject())
s1 := sha1.New()
gp.ForeachNonChangeRef(func(ref string, hash maintner.GitHash) error {
io.WriteString(s1, string(hash))
return nil
})
sum := fmt.Sprintf("%x", s1.Sum(nil))
lastSum := last[proj]
if lastSum == sum {
return nil
}
last[proj] = sum
if lastSum == "" {
return nil
}
log.Printf("maintner refs for %s changed", gp.ServerSlashProject())
select {
case repoTickler(proj) <- true:
default:
}
return nil
})
if err := corpus.Update(ctx); err != nil {
return err
}
}
}
// gerritMetaMap returns the map from repo name (e.g. "go") to its
// latest master hash.
func gerritMetaMap() (map[string]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
meta, err := gerritClient.GetProjects(ctx, "master")
if err != nil {
return nil, fmt.Errorf("gerritClient.GetProjects: %v", err)
}
m := map[string]string{}
for repo, v := range meta {
if master, ok := v.Branches["master"]; ok {
m[repo] = master
}
}
return m, nil
}
// GET /debug/goroutines
func handleDebugGoroutines(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
buf := make([]byte, 1<<20)
w.Write(buf[:runtime.Stack(buf, true)])
}
// GET /debug/env
func handleDebugEnv(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
for _, kv := range os.Environ() {
fmt.Fprintf(w, "%s\n", kv)
}
}