cmd/gitmirror: start of splitting off the git mirror code into separate service
Currently the Gerrit polling & git mirroring & serving & syncing
functionality is in the "watcher" process, which is in the same binary
as the cmd/coordinator (farmer.golang.org), but runs as a separate
process on that machine, and actually run in a separate Docker
container so it has a cache volume and the /usr/bin/git binary, etc. A
flag determines where func main delegates to: the build coordinator,
or the git mirror.
Start cleaning this up, in prep to run on Kubernetes as a separate service.
This copies the env/watcher-world/Dockerfile into a new gitmirror
Dockerfile, and copies the existing cmd/coordinator/watcher_process.go
into a new cmd/gitmirror/gitmirror.go process.
None of this is deployed or working yet. Once it's running and happy,
I'll then switch the coordinator to find & use it on GKE, and then
delete the old code.
Updates golang/go#18817
Change-Id: Ibaa87f244f08ce61662e689b815740c72c997536
Reviewed-on: https://go-review.googlesource.com/35911
Reviewed-by: Jaana Burcu Dogan <jbd@google.com>
diff --git a/cmd/gitmirror/Dockerfile b/cmd/gitmirror/Dockerfile
new file mode 100644
index 0000000..d68b5c7
--- /dev/null
+++ b/cmd/gitmirror/Dockerfile
@@ -0,0 +1,47 @@
+# Copyright 2017 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+FROM golang:1.8-wheezy
+LABEL maintainer "golang-dev@googlegroups.com"
+
+# todo(bradfitz): remove duplication between this and coordinator.
+# Maybe make both derive FROM "golang_org:buildbase" or something. But
+# then I want to make sure that FROM base is rebuilt always when things in
+# x/build change.
+
+RUN go get -d cloud.google.com/go/compute/metadata
+RUN cd /go/src/cloud.google.com/go/compute/metadata && git reset --hard cd0da878c66091060d2e7403abd62192b3e387e0
+
+RUN go get -d golang.org/x/time/rate
+RUN cd /go/src/golang.org/x/time/rate && git reset --hard f51c12702a4d776e4c1fa9b0fabab841babae631
+
+RUN go get -d golang.org/x/oauth2
+RUN cd /go/src/golang.org/x/oauth2 && git reset --hard 314dd2c0bf3ebd592ec0d20847d27e79d0dbe8dd
+
+RUN go get -d go4.org/syncutil
+RUN cd /go/src/go4.org && git reset --hard 7ce08ca145dbe0e66a127c447b80ee7914f3e4f9
+
+RUN go get -d golang.org/x/net/context
+RUN cd /go/src/golang.org/x/net && git reset --hard f2499483f923065a842d38eb4c7f1927e6fc6e6d
+
+RUN go get -d google.golang.org/api/container/v1
+RUN cd /go/src/google.golang.org/api && git reset --hard dfa61ae24628a06502b9c2805d983b57e89399b5
+
+RUN go get -d google.golang.org/genproto/googleapis/type/latlng
+RUN cd /go/src/google.golang.org/genproto && git reset --hard b3e7c2fb04031add52c4817f53f43757ccbf9c18
+RUN go get -d google.golang.org/genproto/googleapis/datastore/v1
+
+RUN go get -d gopkg.in/inf.v0
+RUN cd /go/src/gopkg.in/inf.v0 && git reset --hard 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4
+
+RUN cd /go/src/google.golang.org/grpc && git reset --hard 50955793b0183f9de69bd78e2ec251cf20aab121
+
+COPY /cmd/gitmirror/install-apt-deps.sh /scripts/install-apt-deps.sh
+RUN /scripts/install-apt-deps.sh
+
+COPY . /go/src/golang.org/x/build/
+
+RUN go install golang.org/x/build/cmd/gitmirror
+
+ENTRYPOINT ["/go/bin/gitmirror"]
diff --git a/cmd/gitmirror/Makefile b/cmd/gitmirror/Makefile
new file mode 100644
index 0000000..e0854fe
--- /dev/null
+++ b/cmd/gitmirror/Makefile
@@ -0,0 +1,13 @@
+# Copyright 2014 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+docker-prod: Dockerfile
+ docker build -f Dockerfile --tag=gcr.io/symbolic-datum-552/gitmirror:latest ../..
+docker-dev: Dockerfile
+ docker build -f Dockerfile --tag=gcr.io/go-dashboard-dev/gitmirror:latest ../..
+
+push-prod: docker-prod
+ gcloud docker push gcr.io/symbolic-datum-552/gitmirror:latest
+push-dev: docker-dev
+ gcloud docker push gcr.io/go-dashboard-dev/gitmirror:latest
diff --git a/cmd/gitmirror/gitmirror.go b/cmd/gitmirror/gitmirror.go
new file mode 100644
index 0000000..68dcaaf
--- /dev/null
+++ b/cmd/gitmirror/gitmirror.go
@@ -0,0 +1,1322 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The gitmirror binary watches the specified Gerrit repositories for
+// new commits and reports them to the build dashboard.
+//
+// It also serves tarballs over HTTP for the build system, and pushes
+// new commits to Github.
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "encoding/json"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "log"
+ "net"
+ "net/http"
+ "net/url"
+ "os"
+ "os/exec"
+ "path"
+ "path/filepath"
+ "runtime"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+)
+
+const (
+ goBase = "https://go.googlesource.com/"
+ watcherVersion = 3 // must match dashboard/app/build/handler.go's watcherVersion
+ master = "master" // name of the master branch
+ metaURL = goBase + "?b=master&format=JSON"
+)
+
+var (
+ httpAddr = flag.String("http", "", "If non-empty, the listen address to run an HTTP server on")
+ cacheDir = flag.String("cachedir", "/var/cache/git-mirror", "git cache directory. If empty a temp directory is made.")
+
+ repoURL = flag.String("repo", goBase+"go", "main Repository URL (but subrepos are also mirrored)") // TODO: delete this?
+ dashFlag = flag.String("dash", "https://build.golang.org/", "Dashboard URL (must end in /)")
+ keyFile = flag.String("key", defaultKeyFile, "Build dashboard key file") // TODO: make automatic from metadata/k8s secrets
+
+ pollInterval = flag.Duration("poll", 10*time.Second, "Remote repo poll interval")
+
+ // TODO(bradfitz): these three are all kinda the same and
+ // redundant. Unify after research.
+ network = flag.Bool("network", true, "Enable network calls (disable for testing)")
+ mirror = flag.Bool("mirror", false, "whether to mirror to github")
+ report = flag.Bool("report", true, "Report updates to build dashboard (use false for development dry-run mode)")
+
+ filter = flag.String("filter", "", "If non-empty, a comma-separated list of directories or files to watch for new commits (only works on main repo). If empty, watch all files in repo.")
+ branches = flag.String("branches", "", "If non-empty, a comma-separated list of branches to watch. If empty, watch changes on every branch.")
+)
+
+var (
+ defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey") // TODO: use k8s secrets or GCE proj metadata
+ dashboardKey = ""
+ networkSeen = make(map[string]bool) // testing mode only (-network=false); known hashes
+)
+
+func main() {
+ flag.Parse()
+ log.Printf("gitmirror running.")
+
+ if err := os.MkdirAll(*cacheDir, 0755); err != nil {
+ log.Fatalf("Failed to created watcher's git cache dir: %v", err)
+ }
+
+ go pollGerritAndTickle()
+ err := runGitMirror()
+ log.Fatalf("gitmirror exiting after failure: %v", err)
+}
+
+// runGitMirror is a little wrapper so we can use defer and return to signal
+// errors. It should only return a non-nil error.
+func runGitMirror() error {
+ if !strings.HasSuffix(*dashFlag, "/") {
+ return errors.New("dashboard URL (-dashboard) must end in /")
+ }
+
+ if *cacheDir == "" {
+ dir, err := ioutil.TempDir("", "gitmirror")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ *cacheDir = dir
+ } else {
+ fi, err := os.Stat(*cacheDir)
+ if err != nil {
+ return fmt.Errorf("invalid -cachedir: %v", err)
+ }
+ if !fi.IsDir() {
+ return fmt.Errorf("invalid -cachedir=%q; not a directory", *cacheDir)
+ }
+ }
+
+ if *report {
+ if k, err := readKey(); err != nil {
+ return err
+ } else {
+ dashboardKey = k
+ }
+ }
+
+ if *httpAddr != "" {
+ ln, err := net.Listen("tcp", *httpAddr)
+ if err != nil {
+ return err
+ }
+ go http.Serve(ln, nil)
+ }
+
+ errc := make(chan error)
+ dir := *cacheDir
+ go func() {
+ dst := ""
+ if *mirror {
+ name := (*repoURL)[strings.LastIndex(*repoURL, "/")+1:]
+ dst = "git@github.com:golang/" + name + ".git"
+ }
+ name := strings.TrimPrefix(*repoURL, goBase)
+ r, err := NewRepo(dir, *repoURL, dst, "", true)
+ if err != nil {
+ errc <- err
+ return
+ }
+ http.Handle("/"+name+".tar.gz", r)
+ errc <- r.Watch()
+ }()
+
+ subrepos, err := subrepoList()
+ if err != nil {
+ return err
+ }
+
+ start := func(name, path string, dash bool) {
+ log.Printf("Starting watch of repo %s", name)
+ url := goBase + name
+ var dst string
+ if *mirror {
+ if shouldMirror(name) {
+ log.Printf("Starting mirror of subrepo %s", name)
+ dst = "git@github.com:golang/" + name + ".git"
+ } else {
+ log.Printf("Not mirroring repo %s", name)
+ }
+ }
+ r, err := NewRepo(dir, url, dst, path, dash)
+ if err != nil {
+ errc <- err
+ return
+ }
+ http.Handle("/"+name+".tar.gz", r)
+ errc <- r.Watch()
+ }
+
+ seen := map[string]bool{"go": true}
+ for _, path := range subrepos {
+ name := strings.TrimPrefix(path, "golang.org/x/")
+ seen[name] = true
+ go start(name, path, true)
+ }
+ if *mirror {
+ for name := range gerritMetaMap() {
+ if seen[name] {
+ // Repo already picked up by dashboard list.
+ continue
+ }
+ go start(name, "golang.org/x/"+name, false)
+ }
+ }
+
+ // Must be non-nil.
+ return <-errc
+}
+
+// shouldReport reports whether the named repo should be mirrored from
+// Gerrit to Github.
+func shouldMirror(name string) bool {
+ switch name {
+ case
+ "arch",
+ "benchmarks",
+ "blog",
+ "build",
+ "crypto",
+ "debug",
+ "example",
+ "exp",
+ "gddo",
+ "go",
+ "gofrontend",
+ "image",
+ "mobile",
+ "net",
+ "oauth2",
+ "playground",
+ "proposal",
+ "review",
+ "sync",
+ "sys",
+ "talks",
+ "term",
+ "text",
+ "time",
+ "tools",
+ "tour":
+ return true
+ }
+ // Else, see if it appears to be a subrepo:
+ r, err := http.Get("https://golang.org/x/" + name)
+ if err != nil {
+ log.Printf("repo %v doesn't seem to exist: %v", name, err)
+ return false
+ }
+ r.Body.Close()
+ return r.StatusCode/100 == 2
+}
+
+// a statusEntry is a status string at a specific time.
+type statusEntry struct {
+ status string
+ t time.Time
+}
+
+// statusRing is a ring buffer of timestamped status messages.
+type statusRing struct {
+ mu sync.Mutex // guards rest
+ head int // next position to fill
+ ent [50]statusEntry // ring buffer of entries; zero time means unpopulated
+}
+
+func (r *statusRing) add(status string) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ r.ent[r.head] = statusEntry{status, time.Now()}
+ r.head++
+ if r.head == len(r.ent) {
+ r.head = 0
+ }
+}
+
+func (r *statusRing) foreachDesc(fn func(statusEntry)) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ i := r.head
+ for {
+ i--
+ if i < 0 {
+ i = len(r.ent) - 1
+ }
+ if i == r.head || r.ent[i].t.IsZero() {
+ return
+ }
+ fn(r.ent[i])
+ }
+}
+
+// Repo represents a repository to be watched.
+type Repo struct {
+ root string // on-disk location of the git repo
+ path string // base import path for repo (blank for main repo)
+ commits map[string]*Commit // keyed by full commit hash (40 lowercase hex digits)
+ branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default)
+ dash bool // push new commits to the dashboard
+ mirror bool // push new commits to 'dest' remote
+ status statusRing
+}
+
+// NewRepo checks out a new instance of the Mercurial repository
+// specified by srcURL to a new directory inside dir.
+// If dstURL is not empty, changes from the source repository will
+// be mirrored to the specified destination repository.
+// The importPath argument is the base import path of the repository,
+// and should be empty for the main Go repo.
+// The dash argument should be set true if commits to this
+// repo should be reported to the build dashboard.
+func NewRepo(dir, srcURL, dstURL, importPath string, dash bool) (*Repo, error) {
+ var root string
+ if importPath == "" {
+ root = filepath.Join(dir, "go")
+ } else {
+ root = filepath.Join(dir, path.Base(importPath))
+ }
+ r := &Repo{
+ path: importPath,
+ root: root,
+ commits: make(map[string]*Commit),
+ branches: make(map[string]*Branch),
+ mirror: dstURL != "",
+ dash: dash,
+ }
+
+ http.Handle("/debug/watcher/"+r.name(), r)
+
+ needClone := true
+ if r.shouldTryReuseGitDir(dstURL) {
+ r.setStatus("reusing git dir; running git fetch")
+ cmd := exec.Command("git", "fetch", "origin")
+ cmd.Dir = r.root
+ r.logf("running git fetch")
+ t0 := time.Now()
+ var stderr bytes.Buffer
+ cmd.Stderr = &stderr
+ err := cmd.Run()
+ if err != nil {
+ r.logf("git fetch failed; proceeding to wipe + clone instead; err: %v, stderr: %s", err, stderr.Bytes())
+ } else {
+ needClone = false
+ r.logf("ran git fetch in %v", time.Since(t0))
+ }
+ }
+ if needClone {
+ r.setStatus("need clone; removing cache root")
+ os.RemoveAll(r.root)
+ t0 := time.Now()
+ r.setStatus("running fresh git clone --mirror")
+ r.logf("cloning %v", srcURL)
+ cmd := exec.Command("git", "clone", "--mirror", srcURL, r.root)
+ if out, err := cmd.CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("cloning %s: %v\n\n%s", srcURL, err, out)
+ }
+ r.setStatus("cloned")
+ r.logf("cloned in %v", time.Since(t0))
+ }
+
+ if r.mirror {
+ r.setStatus("adding dest remote")
+ if err := r.addRemote("dest", dstURL); err != nil {
+ r.setStatus("failed to add dest")
+ return nil, fmt.Errorf("adding remote: %v", err)
+ }
+ r.setStatus("added dest remote")
+ r.logf("starting initial push to %v", dstURL)
+ if err := r.push(); err != nil {
+ return nil, err
+ }
+ r.logf("did initial push to %v", dstURL)
+ }
+
+ if r.dash {
+ r.logf("loading commit log")
+ if err := r.update(false); err != nil {
+ return nil, err
+ }
+ r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits))
+ }
+
+ return r, nil
+}
+
+func (r *Repo) setStatus(status string) {
+ r.status.add(status)
+}
+
+// shouldTryReuseGitDir reports whether we should try to reuse r.root as the git
+// directory. (The directory may be corrupt, though.)
+// dstURL is optional, and is the desired remote URL for a remote named "dest".
+func (r *Repo) shouldTryReuseGitDir(dstURL string) bool {
+ if _, err := os.Stat(filepath.Join(r.root, "FETCH_HEAD")); err != nil {
+ if os.IsNotExist(err) {
+ r.logf("not reusing git dir; no FETCH_HEAD at %s", r.root)
+ } else {
+ r.logf("not reusing git dir; %v", err)
+ }
+ return false
+ }
+ if dstURL == "" {
+ r.logf("not reusing git dir because dstURL is empty")
+ return true
+ }
+
+ // Does the "dest" remote match? If not, we return false and nuke
+ // the world and re-clone out of laziness.
+ cmd := exec.Command("git", "remote", "-v")
+ cmd.Dir = r.root
+ out, err := cmd.Output()
+ if err != nil {
+ log.Printf("git remote -v: %v", err)
+ }
+ foundWrong := false
+ for _, ln := range strings.Split(string(out), "\n") {
+ if !strings.HasPrefix(ln, "dest") {
+ continue
+ }
+ f := strings.Fields(ln)
+ if len(f) < 2 {
+ continue
+ }
+ if f[0] == "dest" {
+ if f[1] == dstURL {
+ return true
+ }
+ if !foundWrong {
+ foundWrong = true
+ r.logf("found dest of %q, which doesn't equal sought %q", f[1], dstURL)
+ }
+ }
+ }
+ r.logf("not reusing old repo: remote \"dest\" URL doesn't match")
+ return false
+}
+
+func (r *Repo) addRemote(name, url string) error {
+ gitConfig := filepath.Join(r.root, "config")
+ f, err := os.OpenFile(gitConfig, os.O_WRONLY|os.O_APPEND, os.ModePerm)
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(f, "\n[remote %q]\n\turl = %v\n", name, url)
+ if err != nil {
+ f.Close()
+ return err
+ }
+ return f.Close()
+}
+
+// Watch continuously runs "git fetch" in the repo, checks for
+// new commits, posts any new commits to the dashboard (if enabled),
+// and mirrors commits to a destination repo (if enabled).
+// It only returns a non-nil error.
+func (r *Repo) Watch() error {
+ tickler := repoTickler(r.name())
+ for {
+ if err := r.fetch(); err != nil {
+ return err
+ }
+ if r.mirror {
+ if err := r.push(); err != nil {
+ return err
+ }
+ }
+ if r.dash {
+ if err := r.updateDashboard(); err != nil {
+ return err
+ }
+ }
+
+ r.setStatus("waiting")
+ // We still run a timer but a very slow one, just
+ // in case the mechanism updating the repo tickler
+ // breaks for some reason.
+ timer := time.NewTimer(5 * time.Minute)
+ select {
+ case <-tickler:
+ r.setStatus("got update tickle")
+ timer.Stop()
+ case <-timer.C:
+ r.setStatus("poll timer fired")
+ }
+ }
+}
+
+func (r *Repo) updateDashboard() (err error) {
+ r.setStatus("updating dashboard")
+ defer func() {
+ if err == nil {
+ r.setStatus("updated dashboard")
+ }
+ }()
+ if err := r.update(true); err != nil {
+ return err
+ }
+ remotes, err := r.remotes()
+ if err != nil {
+ return err
+ }
+ for _, name := range remotes {
+ b, ok := r.branches[name]
+ if !ok {
+ // skip branch; must be already merged
+ continue
+ }
+ if err := r.postNewCommits(b); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (r *Repo) name() string {
+ if r.path == "" {
+ return "go"
+ }
+ return path.Base(r.path)
+}
+
+func (r *Repo) logf(format string, args ...interface{}) {
+ log.Printf(r.name()+": "+format, args...)
+}
+
+// postNewCommits looks for unseen commits on the specified branch and
+// posts them to the dashboard.
+func (r *Repo) postNewCommits(b *Branch) error {
+ if b.Head == b.LastSeen {
+ return nil
+ }
+ c := b.LastSeen
+ if c == nil {
+ // Haven't seen anything on this branch yet:
+ if b.Name == master {
+ // For the master branch, bootstrap by creating a dummy
+ // commit with a lone child that is the initial commit.
+ c = &Commit{}
+ for _, c2 := range r.commits {
+ if c2.Parent == "" {
+ c.children = []*Commit{c2}
+ break
+ }
+ }
+ if c.children == nil {
+ return fmt.Errorf("couldn't find initial commit")
+ }
+ } else {
+ // Find the commit that this branch forked from.
+ base, err := r.mergeBase("heads/"+b.Name, master)
+ if err != nil {
+ return err
+ }
+ var ok bool
+ c, ok = r.commits[base]
+ if !ok {
+ return fmt.Errorf("couldn't find base commit: %v", base)
+ }
+ }
+ }
+ if err := r.postChildren(b, c); err != nil {
+ return err
+ }
+ b.LastSeen = b.Head
+ return nil
+}
+
+// postChildren posts to the dashboard all descendants of the given parent.
+// It ignores descendants that are not on the given branch.
+func (r *Repo) postChildren(b *Branch, parent *Commit) error {
+ for _, c := range parent.children {
+ if c.Branch != b.Name {
+ continue
+ }
+ if err := r.postCommit(c); err != nil {
+ if strings.Contains(err.Error(), "this package already has a first commit; aborting") {
+ return nil
+ }
+ return err
+ }
+ }
+ for _, c := range parent.children {
+ if err := r.postChildren(b, c); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// postCommit sends a commit to the build dashboard.
+func (r *Repo) postCommit(c *Commit) error {
+ if !*report {
+ r.logf("dry-run mode; NOT posting commit to dashboard: %v", c)
+ return nil
+ }
+ r.logf("sending commit to dashboard: %v", c)
+
+ t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date)
+ if err != nil {
+ return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err)
+ }
+ dc := struct {
+ PackagePath string // (empty for main repo commits)
+ Hash string
+ ParentHash string
+
+ User string
+ Desc string
+ Time time.Time
+ Branch string
+
+ NeedsBenchmarking bool
+ }{
+ PackagePath: r.path,
+ Hash: c.Hash,
+ ParentHash: c.Parent,
+
+ User: c.Author,
+ Desc: c.Desc,
+ Time: t,
+ Branch: c.Branch,
+
+ NeedsBenchmarking: c.NeedsBenchmarking(),
+ }
+ b, err := json.Marshal(dc)
+ if err != nil {
+ return fmt.Errorf("postCommit: marshaling request body: %v", err)
+ }
+
+ if !*network {
+ if c.Parent != "" {
+ if !networkSeen[c.Parent] {
+ r.logf("%v: %v", c.Parent, r.commits[c.Parent])
+ return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c)
+ }
+ }
+ if networkSeen[c.Hash] {
+ return fmt.Errorf("postCommit: already seen %v", c)
+ }
+ networkSeen[c.Hash] = true
+ return nil
+ }
+
+ v := url.Values{"version": {fmt.Sprint(watcherVersion)}, "key": {dashboardKey}}
+ u := *dashFlag + "commit?" + v.Encode()
+ resp, err := http.Post(u, "text/json", bytes.NewReader(b))
+ if err != nil {
+ return err
+ }
+ body, err := ioutil.ReadAll(resp.Body)
+ resp.Body.Close()
+ if err != nil {
+ return fmt.Errorf("postCommit: reading body: %v", err)
+ }
+ if resp.StatusCode != 200 {
+ return fmt.Errorf("postCommit: status: %v\nbody: %s", resp.Status, body)
+ }
+
+ var s struct {
+ Error string
+ }
+ if err := json.Unmarshal(body, &s); err != nil {
+ return fmt.Errorf("postCommit: decoding response: %v", err)
+ }
+ if s.Error != "" {
+ return fmt.Errorf("postCommit: error: %v", s.Error)
+ }
+ return nil
+}
+
+// update looks for new commits and branches,
+// and updates the commits and branches maps.
+func (r *Repo) update(noisy bool) error {
+ remotes, err := r.remotes()
+ if err != nil {
+ return err
+ }
+ for _, name := range remotes {
+ b := r.branches[name]
+
+ // Find all unseen commits on this branch.
+ revspec := "heads/" + name
+ if b != nil {
+ // If we know about this branch,
+ // only log commits down to the known head.
+ revspec = b.Head.Hash + ".." + revspec
+ }
+ log, err := r.log("--topo-order", revspec)
+ if err != nil {
+ return err
+ }
+ if len(log) == 0 {
+ // No commits to handle; carry on.
+ continue
+ }
+
+ var nDups, nDrops int
+
+ // Add unknown commits to r.commits.
+ var added []*Commit
+ for _, c := range log {
+ if noisy {
+ r.logf("found new commit %v", c)
+ }
+ // If we've already seen this commit,
+ // only store the master one in r.commits.
+ if _, ok := r.commits[c.Hash]; ok {
+ nDups++
+ if name != master {
+ nDrops++
+ continue
+ }
+ }
+ c.Branch = name
+ r.commits[c.Hash] = c
+ added = append(added, c)
+ }
+
+ if nDups > 0 {
+ r.logf("saw %v duplicate commits; dropped %v of them", nDups, nDrops)
+ }
+
+ // Link added commits.
+ for _, c := range added {
+ if c.Parent == "" {
+ // This is the initial commit; no parent.
+ r.logf("no parents for initial commit %v", c)
+ continue
+ }
+ // Find parent commit.
+ p, ok := r.commits[c.Parent]
+ if !ok {
+ return fmt.Errorf("can't find parent %q for %v", c.Parent, c)
+ }
+ // Link parent Commit.
+ c.parent = p
+ // Link child Commits.
+ p.children = append(p.children, c)
+ }
+
+ // Update branch head, or add newly discovered branch.
+ head := log[0]
+ if b != nil {
+ // Known branch; update head.
+ b.Head = head
+ r.logf("updated branch head: %v", b)
+ } else {
+ // It's a new branch; add it.
+ seen, err := r.lastSeen(head.Hash)
+ if err != nil {
+ return err
+ }
+ b = &Branch{Name: name, Head: head, LastSeen: seen}
+ r.branches[name] = b
+ r.logf("found branch: %v", b)
+ }
+ }
+
+ return nil
+}
+
+// lastSeen finds the most recent commit the dashboard has seen,
+// starting at the specified head. If the dashboard hasn't seen
+// any of the commits from head to the beginning, it returns nil.
+func (r *Repo) lastSeen(head string) (*Commit, error) {
+ h, ok := r.commits[head]
+ if !ok {
+ return nil, fmt.Errorf("lastSeen: can't find %q in commits", head)
+ }
+
+ var s []*Commit
+ for c := h; c != nil; c = c.parent {
+ s = append(s, c)
+ }
+
+ var err error
+ i := sort.Search(len(s), func(i int) bool {
+ if err != nil {
+ return false
+ }
+ ok, err = r.dashSeen(s[i].Hash)
+ return ok
+ })
+ switch {
+ case err != nil:
+ return nil, fmt.Errorf("lastSeen: %v", err)
+ case i < len(s):
+ return s[i], nil
+ default:
+ // Dashboard saw no commits.
+ return nil, nil
+ }
+}
+
+// dashSeen reports whether the build dashboard knows the specified commit.
+func (r *Repo) dashSeen(hash string) (bool, error) {
+ if !*network {
+ return networkSeen[hash], nil
+ }
+ v := url.Values{"hash": {hash}, "packagePath": {r.path}}
+ u := *dashFlag + "commit?" + v.Encode()
+ resp, err := http.Get(u)
+ if err != nil {
+ return false, err
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != 200 {
+ return false, fmt.Errorf("status: %v", resp.Status)
+ }
+ var s struct {
+ Error string
+ }
+ err = json.NewDecoder(resp.Body).Decode(&s)
+ if err != nil {
+ return false, err
+ }
+ switch s.Error {
+ case "":
+ // Found one.
+ return true, nil
+ case "Commit not found":
+ // Commit not found, keep looking for earlier commits.
+ return false, nil
+ default:
+ return false, fmt.Errorf("dashboard: %v", s.Error)
+ }
+}
+
+// mergeBase returns the hash of the merge base for revspecs a and b.
+func (r *Repo) mergeBase(a, b string) (string, error) {
+ cmd := exec.Command("git", "merge-base", a, b)
+ cmd.Dir = r.root
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return "", fmt.Errorf("git merge-base %s..%s: %v", a, b, err)
+ }
+ return string(bytes.TrimSpace(out)), nil
+}
+
+// remotes returns a slice of remote branches known to the git repo.
+// It always puts "origin/master" first.
+func (r *Repo) remotes() ([]string, error) {
+ if *branches != "" {
+ return strings.Split(*branches, ","), nil
+ }
+
+ cmd := exec.Command("git", "branch")
+ cmd.Dir = r.root
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return nil, fmt.Errorf("git branch: %v", err)
+ }
+ bs := []string{master}
+ for _, b := range strings.Split(string(out), "\n") {
+ b = strings.TrimPrefix(b, "* ")
+ b = strings.TrimSpace(b)
+ // Ignore aliases, blank lines, and master (it's already in bs).
+ if b == "" || strings.Contains(b, "->") || b == master {
+ continue
+ }
+ // Ignore pre-go1 release branches; they are just noise.
+ if strings.HasPrefix(b, "release-branch.r") {
+ continue
+ }
+ bs = append(bs, b)
+ }
+ return bs, nil
+}
+
+const logFormat = `--format=format:` + logBoundary + `%H
+%P
+%an <%ae>
+%cD
+%B
+` + fileBoundary
+
+const logBoundary = `_-_- magic boundary -_-_`
+const fileBoundary = `_-_- file boundary -_-_`
+
+// log runs "git log" with the supplied arguments
+// and parses the output into Commit values.
+func (r *Repo) log(dir string, args ...string) ([]*Commit, error) {
+ args = append([]string{"log", "--date=rfc", "--name-only", "--parents", logFormat}, args...)
+ if r.path == "" && *filter != "" {
+ paths := strings.Split(*filter, ",")
+ args = append(args, "--")
+ args = append(args, paths...)
+ }
+ cmd := exec.Command("git", args...)
+ cmd.Dir = r.root
+ out, err := cmd.CombinedOutput()
+ if err != nil {
+ return nil, fmt.Errorf("git %v: %v\n%s", strings.Join(args, " "), err, out)
+ }
+
+ // We have a commit with description that contains 0x1b byte.
+ // Mercurial does not escape it, but xml.Unmarshal does not accept it.
+ // TODO(adg): do we still need to scrub this? Probably.
+ out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1)
+
+ var cs []*Commit
+ for _, text := range strings.Split(string(out), logBoundary) {
+ text = strings.TrimSpace(text)
+ if text == "" {
+ continue
+ }
+ p := strings.SplitN(text, "\n", 5)
+ if len(p) != 5 {
+ return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text)
+ }
+
+ // The change summary contains the change description and files
+ // modified in this commit. There is no way to directly refer
+ // to the modified files in the log formatting string, so we look
+ // for the file boundary after the description.
+ changeSummary := p[4]
+ descAndFiles := strings.SplitN(changeSummary, fileBoundary, 2)
+ desc := strings.TrimSpace(descAndFiles[0])
+
+ // For branch merges, the list of files can still be empty
+ // because there are no changed files.
+ files := strings.Replace(strings.TrimSpace(descAndFiles[1]), "\n", " ", -1)
+
+ cs = append(cs, &Commit{
+ Hash: p[0],
+ // TODO(adg): This may break with branch merges.
+ Parent: strings.Split(p[1], " ")[0],
+ Author: p[2],
+ Date: p[3],
+ Desc: desc,
+ Files: files,
+ })
+ }
+ return cs, nil
+}
+
+// fetch runs "git fetch" in the repository root.
+// It tries three times, just in case it failed because of a transient error.
+func (r *Repo) fetch() (err error) {
+ n := 0
+ r.setStatus("running git fetch origin")
+ defer func() {
+ if err != nil {
+ r.setStatus("git fetch failed")
+ } else {
+ r.setStatus("ran git fetch")
+ }
+ }()
+ return try(3, func() error {
+ n++
+ if n > 1 {
+ r.setStatus(fmt.Sprintf("running git fetch origin, attempt %d", n))
+ }
+ cmd := exec.Command("git", "fetch", "origin")
+ cmd.Dir = r.root
+ if out, err := cmd.CombinedOutput(); err != nil {
+ err = fmt.Errorf("%v\n\n%s", err, out)
+ r.logf("git fetch: %v", err)
+ return err
+ }
+ return nil
+ })
+}
+
+// push runs "git push -f --mirror dest" in the repository root.
+// It tries three times, just in case it failed because of a transient error.
+func (r *Repo) push() (err error) {
+ n := 0
+ r.setStatus("syncing to github")
+ defer func() {
+ if err != nil {
+ r.setStatus("sync to github failed")
+ } else {
+ r.setStatus("did sync to github")
+ }
+ }()
+ return try(3, func() error {
+ n++
+ if n > 1 {
+ r.setStatus(fmt.Sprintf("syncing to github, attempt %d", n))
+ }
+ r.setStatus("sync: fetching local refs")
+ local, err := r.getLocalRefs()
+ if err != nil {
+ r.logf("failed to get local refs: %v", err)
+ return err
+ }
+ r.setStatus(fmt.Sprintf("sync: got %d local refs", len(local)))
+
+ r.setStatus("sync: fetching remote refs")
+ remote, err := r.getRemoteRefs("dest")
+ if err != nil {
+ r.logf("failed to get local refs: %v", err)
+ return err
+ }
+ r.setStatus(fmt.Sprintf("sync: got %d remote refs", len(remote)))
+
+ var pushRefs []string
+ for ref, hash := range local {
+ if strings.Contains(ref, "refs/users/") ||
+ strings.Contains(ref, "refs/cache-auto") ||
+ strings.Contains(ref, "refs/changes/") {
+ continue
+ }
+ if remote[ref] != hash {
+ pushRefs = append(pushRefs, ref)
+ }
+ }
+ sort.Sort(refByPriority(pushRefs))
+ if len(pushRefs) == 0 {
+ r.setStatus("nothing to sync")
+ return nil
+ }
+ for len(pushRefs) > 0 {
+ r.setStatus(fmt.Sprintf("%d refs to push; pushing batch", len(pushRefs)))
+ r.logf("%d refs remain to sync to github", len(pushRefs))
+ args := []string{"push", "-f", "dest"}
+ n := 0
+ for _, ref := range pushRefs {
+ args = append(args, "+"+local[ref]+":"+ref)
+ n++
+ if n == 200 {
+ break
+ }
+ }
+ pushRefs = pushRefs[n:]
+ cmd := exec.Command("git", args...)
+ cmd.Dir = r.root
+ cmd.Stderr = os.Stderr
+ out, err := cmd.Output()
+ if err != nil {
+ r.logf("git push failed, running git %s: %s", args, out)
+ r.setStatus("git push failure")
+ return err
+ }
+ }
+ r.setStatus("sync complete")
+ return nil
+ })
+}
+
+func (r *Repo) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+ if req.Method != "GET" && req.Method != "HEAD" {
+ w.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ if strings.HasPrefix(req.URL.Path, "/debug/watcher/") {
+ r.serveStatus(w, req)
+ return
+ }
+ rev := req.FormValue("rev")
+ if rev == "" {
+ w.WriteHeader(http.StatusBadRequest)
+ return
+ }
+ cmd := exec.Command("git", "archive", "--format=tgz", rev)
+ cmd.Dir = r.root
+ tgz, err := cmd.Output()
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Length", strconv.Itoa(len(tgz)))
+ w.Header().Set("Content-Type", "application/x-compressed")
+ w.Write(tgz)
+}
+
+func (r *Repo) serveStatus(w http.ResponseWriter, req *http.Request) {
+ w.Header().Set("Content-Type", "text/html")
+ fmt.Fprintf(w, "<html><head><title>watcher: %s</title><body><h1>watcher status for repo: %q</h1>\n",
+ r.name(), r.name())
+ fmt.Fprintf(w, "<pre>\n")
+ nowRound := time.Now().Round(time.Second)
+ r.status.foreachDesc(func(ent statusEntry) {
+ fmt.Fprintf(w, "%v %-20s %v\n",
+ ent.t.In(time.UTC).Format(time.RFC3339),
+ nowRound.Sub(ent.t.Round(time.Second)).String()+" ago",
+ ent.status)
+ })
+}
+
+func try(n int, fn func() error) error {
+ var err error
+ for tries := 0; tries < n; tries++ {
+ time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off.
+ if err = fn(); err == nil {
+ break
+ }
+ }
+ return err
+}
+
+// Branch represents a Mercurial branch.
+type Branch struct {
+ Name string
+ Head *Commit
+ LastSeen *Commit // the last commit posted to the dashboard
+}
+
+func (b *Branch) String() string {
+ return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen)
+}
+
+// Commit represents a single Git commit.
+type Commit struct {
+ Hash string
+ Author string
+ Date string // Format: "Mon, 2 Jan 2006 15:04:05 -0700"
+ Desc string // Plain text, first line is a short description.
+ Parent string
+ Branch string
+ Files string
+
+ // For walking the graph.
+ parent *Commit
+ children []*Commit
+}
+
+func (c *Commit) String() string {
+ s := c.Hash
+ if c.Branch != "" {
+ s += fmt.Sprintf("[%v]", c.Branch)
+ }
+ s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0])
+ return s
+}
+
+// NeedsBenchmarking reports whether the Commit needs benchmarking.
+func (c *Commit) NeedsBenchmarking() bool {
+ // Do not benchmark branch commits, they are usually not interesting
+ // and fall out of the trunk succession.
+ if c.Branch != master {
+ return false
+ }
+ // Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS).
+ for _, f := range strings.Split(c.Files, " ") {
+ if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) &&
+ !strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") {
+ return true
+ }
+ }
+ return false
+}
+
+func homeDir() string {
+ switch runtime.GOOS {
+ case "plan9":
+ return os.Getenv("home")
+ case "windows":
+ return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
+ }
+ return os.Getenv("HOME")
+}
+
+func readKey() (string, error) {
+ c, err := ioutil.ReadFile(*keyFile)
+ if err != nil {
+ return "", err
+ }
+ return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil
+}
+
+// subrepoList fetches a list of sub-repositories from the dashboard
+// and returns them as a slice of base import paths.
+// Eg, []string{"golang.org/x/tools", "golang.org/x/net"}.
+func subrepoList() ([]string, error) {
+ if !*network {
+ return nil, nil
+ }
+
+ // TODO: timeout on all http requests.
+ r, err := http.Get(*dashFlag + "packages?kind=subrepo")
+ if err != nil {
+ return nil, fmt.Errorf("subrepo list: %v", err)
+ }
+ defer r.Body.Close()
+ if r.StatusCode != 200 {
+ return nil, fmt.Errorf("subrepo list: got status %v", r.Status)
+ }
+ var resp struct {
+ Response []struct {
+ Path string
+ }
+ Error string
+ }
+ err = json.NewDecoder(r.Body).Decode(&resp)
+ if err != nil {
+ return nil, fmt.Errorf("subrepo list: %v", err)
+ }
+ if resp.Error != "" {
+ return nil, fmt.Errorf("subrepo list: %v", resp.Error)
+ }
+ var pkgs []string
+ for _, r := range resp.Response {
+ pkgs = append(pkgs, r.Path)
+ }
+ return pkgs, nil
+}
+
+var (
+ ticklerMu sync.Mutex
+ ticklers = make(map[string]chan bool)
+)
+
+// repo is the gerrit repo: e.g. "go", "net", "crypto", ...
+func repoTickler(repo string) chan bool {
+ ticklerMu.Lock()
+ defer ticklerMu.Unlock()
+ if c, ok := ticklers[repo]; ok {
+ return c
+ }
+ c := make(chan bool, 1)
+ ticklers[repo] = c
+ return c
+}
+
+// pollGerritAndTickle polls Gerrit's JSON meta URL of all its URLs
+// and their current branch heads. When this sees that one has
+// changed, it tickles the channel for that repo and wakes up its
+// poller, if its poller is in a sleep.
+func pollGerritAndTickle() {
+ last := map[string]string{} // repo -> last seen hash
+ for {
+ for repo, hash := range gerritMetaMap() {
+ if hash != last[repo] {
+ last[repo] = hash
+ select {
+ case repoTickler(repo) <- true:
+ default:
+ }
+ }
+ }
+ time.Sleep(*pollInterval)
+ }
+}
+
+// gerritMetaMap returns the map from repo name (e.g. "go") to its
+// latest master hash.
+// The returned map is nil on any transient error.
+func gerritMetaMap() map[string]string {
+ // TODO: timeout
+ res, err := http.Get(metaURL)
+ if err != nil {
+ return nil
+ }
+ defer res.Body.Close()
+ defer io.Copy(ioutil.Discard, res.Body) // ensure EOF for keep-alive
+ if res.StatusCode != 200 {
+ return nil
+ }
+ var meta map[string]struct {
+ Branches map[string]string
+ }
+ br := bufio.NewReader(res.Body)
+ // For security reasons or something, this URL starts with ")]}'\n" before
+ // the JSON object. So ignore that.
+ // Shawn Pearce says it's guaranteed to always be just one line, ending in '\n'.
+ for {
+ b, err := br.ReadByte()
+ if err != nil {
+ return nil
+ }
+ if b == '\n' {
+ break
+ }
+ }
+ if err := json.NewDecoder(br).Decode(&meta); err != nil {
+ log.Printf("JSON decoding error from %v: %s", metaURL, err)
+ return nil
+ }
+ m := map[string]string{}
+ for repo, v := range meta {
+ if master, ok := v.Branches["master"]; ok {
+ m[repo] = master
+ }
+ }
+ return m
+}
+
+func (r *Repo) getLocalRefs() (map[string]string, error) {
+ cmd := exec.Command("git", "show-ref")
+ cmd.Dir = r.root
+ return parseRefs(cmd)
+}
+
+func (r *Repo) getRemoteRefs(dest string) (map[string]string, error) {
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+ cmd := exec.CommandContext(ctx, "git", "ls-remote", dest)
+ cmd.Dir = r.root
+ return parseRefs(cmd)
+}
+
+func parseRefs(cmd *exec.Cmd) (map[string]string, error) {
+ refHash := map[string]string{}
+ out, err := cmd.StdoutPipe()
+ if err != nil {
+ return nil, err
+ }
+ bs := bufio.NewScanner(out)
+ if err := cmd.Start(); err != nil {
+ return nil, err
+ }
+ for bs.Scan() {
+ f := strings.Fields(bs.Text())
+ refHash[f[1]] = f[0]
+ }
+ if err := bs.Err(); err != nil {
+ go cmd.Wait() // prevent zombies
+ return nil, err
+ }
+ if err := cmd.Wait(); err != nil {
+ return nil, err
+ }
+ return refHash, bs.Err()
+}
+
+type refByPriority []string
+
+func (s refByPriority) Len() int { return len(s) }
+func (s refByPriority) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s refByPriority) Less(i, j int) bool {
+ p1 := priority[refType(s[i])]
+ p2 := priority[refType(s[j])]
+ if p1 != p2 {
+ return p1 > p2
+ }
+ return s[i] <= s[j]
+}
+
+func refType(s string) string {
+ s = strings.TrimPrefix(s, "refs/")
+ if i := strings.IndexByte(s, '/'); i != -1 {
+ return s[:i]
+ }
+ return s
+}
+
+var priority = map[string]int{
+ "heads": 5,
+ "tags": 4,
+ "changes": 3,
+}
diff --git a/cmd/gitmirror/install-apt-deps.sh b/cmd/gitmirror/install-apt-deps.sh
new file mode 100755
index 0000000..4d5254f
--- /dev/null
+++ b/cmd/gitmirror/install-apt-deps.sh
@@ -0,0 +1,23 @@
+# Copyright 2015 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -ex
+
+apt-get update
+apt-get install -y --no-install-recommends ca-certificates
+# For interacting with the Go source & subrepos:
+apt-get install -y --no-install-recommends git-core openssh-client
+
+apt-get clean
+rm -fr /var/lib/apt/lists
+
+mkdir -p ~/.ssh/
+chmod 0700 ~/.ssh/
+
+# Add Github.com's known_hosts entries, so git push calls later don't
+# prompt, and don't need to have their strict host key checking
+# disabled.
+echo "|1|SFEvEAqYsJ18JCr+0iV4GtlwS4w=|P6oCZUUd/5t9pH4Om7ShlfltRyE= ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" > ~/.ssh/known_hosts
+echo "|1|HygGkfOGLovavKfixjXWFJ7Yk1I=|lb/724row8KDTMC1dZiJlHyjxWM= ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==" >> ~/.ssh/known_hosts
+chmod 0600 ~/.ssh/known_hosts