// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// The gitmirror binary watches the specified Gerrit repositories for
// new commits and reports them to the build dashboard.
//
// It also serves tarballs over HTTP for the build system, and pushes
// new commits to GitHub.
package main

import (
	"bytes"
	"context"
	"crypto/sha1"
	"encoding/json"
	"errors"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"net"
	"net/http"
	"net/url"
	"os"
	"os/exec"
	"path"
	"path/filepath"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	"cloud.google.com/go/compute/metadata"
	"golang.org/x/build/buildenv"
	"golang.org/x/build/gerrit"
	"golang.org/x/build/internal/gitauth"
	"golang.org/x/build/maintner"
	"golang.org/x/build/maintner/godata"
)

const (
	goBase         = "https://go.googlesource.com/"
	watcherVersion = 3        // must match dashboard/app/build/handler.go's watcherVersion
	master         = "master" // name of the master branch
)

var (
	httpAddr = flag.String("http", "", "If non-empty, the listen address to run an HTTP server on")
	cacheDir = flag.String("cachedir", "", "git cache directory. If empty a temp directory is made.")

	dashFlag = flag.String("dash", "", "Dashboard URL (must end in /). If unset, will be automatically derived from the GCE project name.")
	keyFile  = flag.String("key", defaultKeyFile, "Build dashboard key file. If empty, automatic from GCE project metadata")

	pollInterval = flag.Duration("poll", 60*time.Second, "Remote repo poll interval")

	// TODO(bradfitz): these three are all kinda the same and
	// redundant. Unify after research.
	network = flag.Bool("network", true, "Enable network calls (disable for testing)")
	mirror  = flag.Bool("mirror", false, "whether to mirror to github")
	report  = flag.Bool("report", true, "Report updates to build dashboard (use false for development dry-run mode)")

	filter   = flag.String("filter", "", "If non-empty, a comma-separated list of directories or files to watch for new commits (only works on main repo). If empty, watch all files in repo.")
	branches = flag.String("branches", "", "If non-empty, a comma-separated list of branches to watch. If empty, watch changes on every branch.")
)

var (
	defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey")
	dashboardKey   = ""
	networkSeen    = make(map[string]bool) // testing mode only (-network=false); known hashes
)

var httpClient = &http.Client{
	Timeout: 30 * time.Second, // overkill
}

var gerritClient = gerrit.NewClient("https://go-review.googlesource.com", gerrit.NoAuth)

var (
	// gitLogFn returns the list of unseen Commits on a Repo,
	// typically by shelling out to `git log`.
	// gitLogFn is a global var so we can stub it out for tests.
	gitLogFn = gitLog

	// gitRemotesFn returns a slice of remote branches known to the git repo.
	// gitRemotesFn is a global var so we can stub it out for tests.
	gitRemotesFn = gitRemotes
)

func main() {
	flag.Parse()
	if err := gitauth.Init(); err != nil {
		log.Fatalf("gitauth: %v", err)
	}

	if *dashFlag == "" && metadata.OnGCE() {
		project, err := metadata.ProjectID()
		if err != nil {
			log.Fatalf("metadata.ProjectID: %v", err)
		}
		*dashFlag = buildenv.ByProjectID(project).DashBase()
	}
	if *dashFlag == "" {
		log.Fatal("-dash must be specified and could not be autodetected")
	}

	log.Printf("gitmirror running.")

	go pollGerritAndTickle()
	go subscribeToMaintnerAndTickleLoop()
	err := runGitMirror()
	log.Fatalf("gitmirror exiting after failure: %v", err)
}

// runGitMirror is a little wrapper so we can use defer and return to signal
// errors. It should only return a non-nil error.
func runGitMirror() error {
	if !strings.HasSuffix(*dashFlag, "/") {
		return errors.New("dashboard URL (-dashboard) must end in /")
	}

	if *mirror {
		sshDir := filepath.Join(homeDir(), ".ssh")
		sshKey := filepath.Join(sshDir, "id_ed25519")
		if _, err := os.Stat(sshKey); err == nil {
			log.Printf("Using github ssh key at %v", sshKey)
		} else {
			if privKey, err := metadata.ProjectAttributeValue("github-ssh"); err == nil && len(privKey) > 0 {
				if err := os.MkdirAll(sshDir, 0700); err != nil {
					return err
				}
				if err := ioutil.WriteFile(sshKey, []byte(privKey+"\n"), 0600); err != nil {
					return err
				}
				log.Printf("Wrote %s from GCE metadata.", sshKey)
			} else {
				return fmt.Errorf("Can't mirror to github without 'github-ssh' GCE metadata or file %v", sshKey)
			}
		}
	}

	if *cacheDir == "" {
		dir, err := ioutil.TempDir("", "gitmirror")
		if err != nil {
			log.Fatal(err)
		}
		defer os.RemoveAll(dir)
		*cacheDir = dir
	} else {
		fi, err := os.Stat(*cacheDir)
		if os.IsNotExist(err) {
			if err := os.MkdirAll(*cacheDir, 0755); err != nil {
				return fmt.Errorf("failed to create watcher's git cache dir: %v", err)
			}
		} else {
			if err != nil {
				return fmt.Errorf("invalid -cachedir: %v", err)
			}
			if !fi.IsDir() {
				return fmt.Errorf("invalid -cachedir=%q; not a directory", *cacheDir)
			}
		}
	}

	if *report {
		if k, err := readKey(); err != nil {
			return err
		} else {
			dashboardKey = k
		}
	}

	if *httpAddr != "" {
		http.HandleFunc("/debug/env", handleDebugEnv)
		http.HandleFunc("/debug/goroutines", handleDebugGoroutines)
		ln, err := net.Listen("tcp", *httpAddr)
		if err != nil {
			return err
		}
		go http.Serve(ln, nil)
	}

	errc := make(chan error)

	subrepos, err := subrepoList()
	if err != nil {
		return err
	}

	startRepo := func(name, path string, dash bool) {
		log.Printf("Starting watch of repo %s", name)
		url := goBase + name
		var dst string
		if *mirror {
			dst = shouldMirrorTo(name)
			if dst != "" {
				log.Printf("Starting mirror of subrepo %s", name)
			} else {
				log.Printf("Not mirroring repo %s", name)
			}
		}
		r, err := NewRepo(url, dst, path, dash)
		if err != nil {
			errc <- err
			return
		}
		http.Handle("/"+name+".tar.gz", r)
		reposMu.Lock()
		repos = append(repos, r)
		sort.Slice(repos, func(i, j int) bool { return repos[i].name() < repos[j].name() })
		reposMu.Unlock()
		r.Loop()
	}

	go startRepo("go", "", true)

	seen := map[string]bool{"go": true}
	for _, path := range subrepos {
		name := strings.TrimPrefix(path, "golang.org/x/")
		seen[name] = true
		go startRepo(name, path, true)
	}
	if *mirror {
		gerritRepos, err := gerritMetaMap()
		if err != nil {
			return fmt.Errorf("gerritMetaMap: %v", err)
		}
		for name := range gerritRepos {
			if seen[name] {
				// Repo already picked up by dashboard list.
				continue
			}
			path := "golang.org/x/" + name
			switch name {
			case "dl":
				// This subrepo is different from others in that
				// it doesn't use the /x/ path element.
				path = "golang.org/" + name
			case "protobuf":
				path = "google.golang.org/" + name
			}
			go startRepo(name, path, false)
		}
	}

	http.HandleFunc("/", handleRoot)

	// Blocks forever if all the NewRepo calls succeed:
	return <-errc
}

var (
	reposMu sync.Mutex
	repos   []*Repo
)

// GET /
// or:
// GET /debug/watcher/
func handleRoot(w http.ResponseWriter, r *http.Request) {
	if r.URL.Path != "/" && r.URL.Path != "/debug/watcher/" {
		http.NotFound(w, r)
		return
	}
	reposMu.Lock()
	defer reposMu.Unlock()
	w.Header().Set("Content-Type", "text/html; charset=utf-8")
	fmt.Fprint(w, "<html><body><pre>")
	for _, r := range repos {
		fmt.Fprintf(w, "<a href='/debug/watcher/%s'>%s</a> - %s\n", r.name(), r.name(), r.statusLine())
	}
	fmt.Fprint(w, "</pre></body></html>")
}

// shouldMirrorTo returns the GitHub repository the named repo should be
// mirrored to or "" if it should not be mirrored.
func shouldMirrorTo(name string) (dst string) {
	switch name {
	case
		"arch",
		"benchmarks",
		"blog",
		"build",
		"crypto",
		"debug",
		"dl",
		"example",
		"exp",
		"gddo",
		"go",
		"gofrontend",
		"image",
		"lint",
		"mobile",
		"mod",
		"net",
		"oauth2",
		"playground",
		"proposal",
		"review",
		"scratch",
		"sync",
		"sys",
		"talks",
		"term",
		"text",
		"time",
		"tools",
		"tour",
		"vgo",
		"website",
		"xerrors":
		// Mirror this.
	case "protobuf":
		return "git@github.com:protocolbuffers/protobuf-go.git"
	default:
		// Else, see if it appears to be a subrepo:
		r, err := httpClient.Get("https://golang.org/x/" + name)
		if err != nil {
			log.Printf("repo %v doesn't seem to exist: %v", name, err)
			return ""
		}
		r.Body.Close()
		if r.StatusCode/100 != 2 {
			return ""
		}
		// Mirror this.
	}
	return "git@github.com:golang/" + name + ".git"
}

// a statusEntry is a status string at a specific time.
type statusEntry struct {
	status string
	t      time.Time
}

// statusRing is a ring buffer of timestamped status messages.
type statusRing struct {
	mu   sync.Mutex      // guards rest
	head int             // next position to fill
	ent  [50]statusEntry // ring buffer of entries; zero time means unpopulated
}

func (r *statusRing) add(status string) {
	r.mu.Lock()
	defer r.mu.Unlock()

	r.ent[r.head] = statusEntry{status, time.Now()}
	r.head++
	if r.head == len(r.ent) {
		r.head = 0
	}
}

func (r *statusRing) foreachDesc(fn func(statusEntry)) {
	r.mu.Lock()
	defer r.mu.Unlock()

	i := r.head
	for {
		i--
		if i < 0 {
			i = len(r.ent) - 1
		}
		if i == r.head || r.ent[i].t.IsZero() {
			return
		}
		fn(r.ent[i])
	}
}

// Repo represents a repository to be watched.
type Repo struct {
	root     string             // on-disk location of the git repo, *cacheDir/name
	path     string             // base import path for repo (blank for main repo)
	commits  map[string]*Commit // keyed by full commit hash (40 lowercase hex digits)
	branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default)
	dash     bool               // push new commits to the dashboard
	mirror   bool               // push new commits to 'dest' remote
	status   statusRing

	mu        sync.Mutex
	err       error
	firstBad  time.Time
	lastBad   time.Time
	firstGood time.Time
	lastGood  time.Time
}

// NewRepo checks out a new instance of the git repository
// specified by srcURL.
//
// If dstURL is not empty, changes from the source repository will
// be mirrored to the specified destination repository.
// The importPath argument is the base import path of the repository,
// and should be empty for the main Go repo.
// The dash argument should be set true if commits to this
// repo should be reported to the build dashboard.
func NewRepo(srcURL, dstURL, importPath string, dash bool) (*Repo, error) {
	name := path.Base(srcURL) // "go", "net", etc
	root := filepath.Join(*cacheDir, name)
	r := &Repo{
		path:     importPath,
		root:     root,
		commits:  make(map[string]*Commit),
		branches: make(map[string]*Branch),
		mirror:   dstURL != "",
		dash:     dash,
	}

	http.Handle("/debug/watcher/"+r.name(), r)

	needClone := true
	if r.shouldTryReuseGitDir(dstURL) {
		r.setStatus("reusing git dir; running git fetch")
		cmd := exec.Command("git", "fetch", "--prune", "origin")
		cmd.Dir = r.root
		r.logf("running git fetch")
		t0 := time.Now()
		var stderr bytes.Buffer
		cmd.Stderr = &stderr
		err := cmd.Run()
		if err != nil {
			r.logf("git fetch failed; proceeding to wipe + clone instead; err: %v, stderr: %s", err, stderr.Bytes())
		} else {
			needClone = false
			r.logf("ran git fetch in %v", time.Since(t0))
		}
	}
	if needClone {
		r.setStatus("need clone; removing cache root")
		os.RemoveAll(r.root)
		t0 := time.Now()
		r.setStatus("running fresh git clone --mirror")
		r.logf("cloning %v into %s", srcURL, r.root)
		cmd := exec.Command("git", "clone", "--mirror", srcURL, r.root)
		if out, err := cmd.CombinedOutput(); err != nil {
			return nil, fmt.Errorf("cloning %s: %v\n\n%s", srcURL, err, out)
		}
		r.setStatus("cloned")
		r.logf("cloned in %v", time.Since(t0))
	}

	if r.mirror {
		r.setStatus("adding dest remote")
		if err := r.addRemote("dest", dstURL,
			// We want to include only the refs/heads/* and refs/tags/* namespaces
			// in the GitHub mirrors. They correspond to published branches and tags.
			// Leave out internal Gerrit namespaces such as refs/changes/*,
			// refs/users/*, etc., because they're not helpful on github.com/golang.
			"push = +refs/heads/*:refs/heads/*",
			"push = +refs/tags/*:refs/tags/*",
		); err != nil {
			r.setStatus("failed to add dest")
			return nil, fmt.Errorf("adding remote: %v", err)
		}
		r.setStatus("added dest remote")
	}

	if r.dash {
		r.logf("loading commit log")
		if err := r.update(false); err != nil {
			return nil, err
		}
		r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits))
	}

	return r, nil
}

func (r *Repo) setErr(err error) {
	r.mu.Lock()
	defer r.mu.Unlock()
	change := (r.err != nil) != (err != nil)
	now := time.Now()
	if err != nil {
		if change {
			r.firstBad = now
		}
		r.lastBad = now
	} else {
		if change {
			r.firstGood = now
		}
		r.lastGood = now
	}
	r.err = err
}

var startTime = time.Now()

func (r *Repo) statusLine() string {
	r.mu.Lock()
	defer r.mu.Unlock()

	if r.lastGood.IsZero() {
		if r.err != nil {
			return fmt.Sprintf("broken; permanently? always failing, for %v", time.Since(r.firstBad))
		}
		if time.Since(startTime) < 5*time.Minute {
			return "ok; starting up, no report yet"
		}
		return fmt.Sprintf("hung; hang at start-up? no report since start %v ago", time.Since(startTime))
	}
	if r.err == nil {
		if sinceGood := time.Since(r.lastGood); sinceGood > 6*time.Minute {
			return fmt.Sprintf("hung? no activity since last success %v ago", sinceGood)
		}
		if r.lastBad.After(time.Now().Add(-1 * time.Hour)) {
			return fmt.Sprintf("ok; recent failure %v ago", time.Since(r.lastBad))
		}
		return "ok"
	}
	return fmt.Sprintf("broken for %v", time.Since(r.lastGood))
}

func (r *Repo) setStatus(status string) {
	r.status.add(status)
}

// shouldTryReuseGitDir reports whether we should try to reuse r.root as the git
// directory. (The directory may be corrupt, though.)
// dstURL is optional, and is the desired remote URL for a remote named "dest".
func (r *Repo) shouldTryReuseGitDir(dstURL string) bool {
	if _, err := os.Stat(filepath.Join(r.root, "FETCH_HEAD")); err != nil {
		if os.IsNotExist(err) {
			r.logf("not reusing git dir; no FETCH_HEAD at %s", r.root)
		} else {
			r.logf("not reusing git dir; %v", err)
		}
		return false
	}
	if dstURL == "" {
		r.logf("not reusing git dir because dstURL is empty")
		return true
	}

	// Does the "dest" remote match? If not, we return false and nuke
	// the world and re-clone out of laziness.
	cmd := exec.Command("git", "remote", "-v")
	cmd.Dir = r.root
	out, err := cmd.Output()
	if err != nil {
		log.Printf("git remote -v: %v", err)
	}
	foundWrong := false
	for _, ln := range strings.Split(string(out), "\n") {
		if !strings.HasPrefix(ln, "dest") {
			continue
		}
		f := strings.Fields(ln)
		if len(f) < 2 {
			continue
		}
		if f[0] == "dest" {
			if f[1] == dstURL {
				return true
			}
			if !foundWrong {
				foundWrong = true
				r.logf("found dest of %q, which doesn't equal sought %q", f[1], dstURL)
			}
		}
	}
	r.logf("not reusing old repo: remote \"dest\" URL doesn't match")
	return false
}

func (r *Repo) addRemote(name, url string, opts ...string) error {
	gitConfig := filepath.Join(r.root, "config")
	f, err := os.OpenFile(gitConfig, os.O_WRONLY|os.O_APPEND, os.ModePerm)
	if err != nil {
		return err
	}
	_, err = fmt.Fprintf(f, "\n[remote %q]\n\turl = %v\n", name, url)
	if err != nil {
		f.Close()
		return err
	}
	for _, o := range opts {
		_, err := fmt.Fprintf(f, "\t%s\n", o)
		if err != nil {
			f.Close()
			return err
		}
	}
	return f.Close()
}

// Loop continuously runs "git fetch" in the repo, checks for
// new commits, posts any new commits to the dashboard (if enabled),
// and mirrors commits to a destination repo (if enabled).
func (r *Repo) Loop() {
	tickler := repoTickler(r.name())
	for {
		if err := r.fetch(); err != nil {
			r.setErr(err)
			time.Sleep(10 * time.Second)
			continue
		}
		if r.mirror {
			if err := r.push(); err != nil {
				r.setErr(err)
				time.Sleep(10 * time.Second)
				continue
			}
		}
		if r.dash {
			if err := r.updateDashboard(); err != nil {
				r.setErr(err)
				time.Sleep(10 * time.Second)
				continue
			}
		}

		r.setErr(nil)
		r.setStatus("waiting")
		// We still run a timer but a very slow one, just
		// in case the mechanism updating the repo tickler
		// breaks for some reason.
		timer := time.NewTimer(5 * time.Minute)
		select {
		case <-tickler:
			r.setStatus("got update tickle")
			timer.Stop()
		case <-timer.C:
			r.setStatus("poll timer fired")
		}
	}
}

func (r *Repo) updateDashboard() (err error) {
	r.setStatus("updating dashboard")
	defer func() {
		if err == nil {
			r.setStatus("updated dashboard")
		}
	}()
	if err := r.update(true); err != nil {
		return err
	}
	remotes, err := gitRemotesFn(r)
	if err != nil {
		return err
	}
	for _, name := range remotes {
		b, ok := r.branches[name]
		if !ok {
			// skip branch; must be already merged
			continue
		}
		if err := r.postNewCommits(b); err != nil {
			return err
		}
	}
	return nil
}

func (r *Repo) name() string {
	if r.path == "" {
		return "go"
	}
	return path.Base(r.path)
}

func (r *Repo) logf(format string, args ...interface{}) {
	log.Printf(r.name()+": "+format, args...)
}

// postNewCommits looks for unseen commits on the specified branch and
// posts them to the dashboard.
func (r *Repo) postNewCommits(b *Branch) error {
	if b.Head == b.LastSeen {
		return nil
	}
	c := b.LastSeen
	if c == nil {
		// Haven't seen anything on this branch yet:
		if b.Name == master {
			// For the master branch, bootstrap by creating a dummy
			// commit with a lone child that is the initial commit.
			c = &Commit{}
			for _, c2 := range r.commits {
				if c2.Parent == "" {
					c.children = []*Commit{c2}
					break
				}
			}
			if c.children == nil {
				return fmt.Errorf("couldn't find initial commit")
			}
		} else {
			// Find the commit that this branch forked from.
			base, err := r.mergeBase("heads/"+b.Name, master)
			if err != nil {
				return err
			}
			var ok bool
			c, ok = r.commits[base]
			if !ok {
				return fmt.Errorf("couldn't find base commit: %v", base)
			}
		}
	}
	if err := r.postChildren(b, c); err != nil {
		return err
	}
	b.LastSeen = b.Head
	return nil
}

// postChildren posts to the dashboard all descendants of the given parent.
// It ignores descendants that are not on the given branch.
func (r *Repo) postChildren(b *Branch, parent *Commit) error {
	for _, c := range parent.children {
		if c.Branch != b.Name {
			continue
		}
		if err := r.postCommit(c); err != nil {
			if strings.Contains(err.Error(), "this package already has a first commit; aborting") {
				return nil
			}
			return err
		}
	}
	for _, c := range parent.children {
		if err := r.postChildren(b, c); err != nil {
			return err
		}
	}
	return nil
}

// postCommit sends a commit to the build dashboard.
func (r *Repo) postCommit(c *Commit) error {
	if !*report {
		r.logf("dry-run mode; NOT posting commit to dashboard: %v", c)
		return nil
	}
	r.logf("sending commit to dashboard: %v", c)

	t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date)
	if err != nil {
		return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err)
	}
	dc := struct {
		PackagePath string // (empty for main repo commits)
		Hash        string
		ParentHash  string

		User   string
		Desc   string
		Time   time.Time
		Branch string

		NeedsBenchmarking bool
	}{
		PackagePath: r.path,
		Hash:        c.Hash,
		ParentHash:  c.Parent,

		User:   c.Author,
		Desc:   c.Desc,
		Time:   t,
		Branch: c.Branch,

		NeedsBenchmarking: c.NeedsBenchmarking(),
	}
	b, err := json.Marshal(dc)
	if err != nil {
		return fmt.Errorf("postCommit: marshaling request body: %v", err)
	}

	if !*network {
		if c.Parent != "" {
			if !networkSeen[c.Parent] {
				r.logf("%v: %v", c.Parent, r.commits[c.Parent])
				return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c)
			}
		}
		if networkSeen[c.Hash] {
			return fmt.Errorf("postCommit: already seen %v", c)
		}
		networkSeen[c.Hash] = true
		return nil
	}

	v := url.Values{"version": {fmt.Sprint(watcherVersion)}, "key": {dashboardKey}}
	u := *dashFlag + "commit?" + v.Encode()
	resp, err := http.Post(u, "text/json", bytes.NewReader(b))
	if err != nil {
		return err
	}
	body, err := ioutil.ReadAll(resp.Body)
	resp.Body.Close()
	if err != nil {
		return fmt.Errorf("postCommit: reading body: %v", err)
	}
	if resp.StatusCode != 200 {
		return fmt.Errorf("postCommit: status: %v\nbody: %s", resp.Status, body)
	}

	var s struct {
		Error string
	}
	if err := json.Unmarshal(body, &s); err != nil {
		return fmt.Errorf("postCommit: decoding response: %v", err)
	}
	if s.Error != "" {
		return fmt.Errorf("postCommit: error: %v", s.Error)
	}
	return nil
}

// update looks for new commits and branches,
// and updates the commits and branches maps.
func (r *Repo) update(noisy bool) error {
	remotes, err := gitRemotesFn(r)
	if err != nil {
		return err
	}
	for _, name := range remotes {
		b := r.branches[name]

		// Find all unseen commits on this branch.
		revspec := "heads/" + name
		if b != nil {
			// If we know about this branch,
			// only log commits down to the known head.
			revspec = b.Head.Hash + ".." + revspec
		}
		log, err := gitLogFn(r, "--topo-order", revspec)
		if err != nil {
			return err
		}
		if len(log) == 0 {
			// No commits to handle; carry on.
			continue
		}

		var nDups, nDrops int

		// Add unknown commits to r.commits.
		var added []*Commit
		for _, c := range log {
			if noisy {
				r.logf("found new commit %v", c)
			}
			// If we've already seen this commit,
			// only store the master one in r.commits.
			if _, ok := r.commits[c.Hash]; ok {
				nDups++
				if name != master {
					nDrops++
					continue
				}
			}
			c.Branch = name
			r.commits[c.Hash] = c
			added = append(added, c)
		}

		if nDups > 0 {
			r.logf("saw %v duplicate commits; dropped %v of them", nDups, nDrops)
		}

		// Link added commits.
		for _, c := range added {
			if c.Parent == "" {
				// This is the initial commit; no parent.
				r.logf("no parents for initial commit %v", c)
				continue
			}
			// Find parent commit.
			p, ok := r.commits[c.Parent]
			if !ok {
				return fmt.Errorf("can't find parent %q for %v", c.Parent, c)
			}
			// Link parent Commit.
			c.parent = p
			// Link child Commits.
			p.children = append(p.children, c)
		}

		// Update branch head, or add newly discovered branch.
		// If we had already seen log[0], eg. on a different branch,
		// we would never have linked it in the loop above.
		// We therefore fetch the commit from r.commits to ensure we have
		// the right address.
		head := r.commits[log[0].Hash]
		if b != nil {
			// Known branch; update head.
			b.Head = head
			r.logf("updated branch head: %v", b)
		} else {
			// It's a new branch; add it.
			seen, err := r.lastSeen(head.Hash)
			if err != nil {
				return err
			}
			b = &Branch{Name: name, Head: head, LastSeen: seen}
			r.branches[name] = b
			r.logf("found branch: %v", b)
		}
	}

	return nil
}

// lastSeen finds the most recent commit the dashboard has seen,
// starting at the specified head. If the dashboard hasn't seen
// any of the commits from head to the beginning, it returns nil.
func (r *Repo) lastSeen(head string) (*Commit, error) {
	h, ok := r.commits[head]
	if !ok {
		return nil, fmt.Errorf("lastSeen: can't find %q in commits", head)
	}

	var s []*Commit
	for c := h; c != nil; c = c.parent {
		s = append(s, c)
	}

	var err error
	i := sort.Search(len(s), func(i int) bool {
		if err != nil {
			return false
		}
		ok, err = r.dashSeen(s[i].Hash)
		return ok
	})
	switch {
	case err != nil:
		return nil, fmt.Errorf("lastSeen: %v", err)
	case i < len(s):
		return s[i], nil
	default:
		// Dashboard saw no commits.
		return nil, nil
	}
}

// dashSeen reports whether the build dashboard knows the specified commit.
func (r *Repo) dashSeen(hash string) (bool, error) {
	if !*network {
		return networkSeen[hash], nil
	}
	v := url.Values{"hash": {hash}, "packagePath": {r.path}}
	u := *dashFlag + "commit?" + v.Encode()
	resp, err := httpClient.Get(u)
	if err != nil {
		return false, err
	}
	defer resp.Body.Close()
	if resp.StatusCode != 200 {
		return false, fmt.Errorf("status: %v", resp.Status)
	}
	var s struct {
		Error string
	}
	err = json.NewDecoder(resp.Body).Decode(&s)
	if err != nil {
		return false, err
	}
	switch s.Error {
	case "":
		// Found one.
		return true, nil
	case "Commit not found":
		// Commit not found, keep looking for earlier commits.
		return false, nil
	default:
		return false, fmt.Errorf("dashboard: %v", s.Error)
	}
}

// mergeBase returns the hash of the merge base for revspecs a and b.
func (r *Repo) mergeBase(a, b string) (string, error) {
	cmd := exec.Command("git", "merge-base", a, b)
	cmd.Dir = r.root
	out, err := cmd.CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("git merge-base %s..%s: %v", a, b, err)
	}
	return string(bytes.TrimSpace(out)), nil
}

// gitRemotes returns a slice of remote branches known to the git repo.
// It always puts "origin/master" first.
func gitRemotes(r *Repo) ([]string, error) {
	if *branches != "" {
		return strings.Split(*branches, ","), nil
	}

	cmd := exec.Command("git", "branch")
	cmd.Dir = r.root
	out, err := cmd.CombinedOutput()
	if err != nil {
		return nil, fmt.Errorf("git branch: %v", err)
	}
	bs := []string{master}
	for _, b := range strings.Split(string(out), "\n") {
		b = strings.TrimPrefix(b, "* ")
		b = strings.TrimSpace(b)
		// Ignore aliases, blank lines, and master (it's already in bs).
		if b == "" || strings.Contains(b, "->") || b == master {
			continue
		}
		// Ignore pre-go1 release branches; they are just noise.
		if strings.HasPrefix(b, "release-branch.r") {
			continue
		}
		bs = append(bs, b)
	}
	return bs, nil
}

const logFormat = `--format=format:` + logBoundary + `%H
%P
%an <%ae>
%cD
%B
` + fileBoundary

const logBoundary = `_-_- magic boundary -_-_`
const fileBoundary = `_-_- file boundary -_-_`

// gitLog runs "git log" with the supplied arguments
// and parses the output into Commit values.
func gitLog(r *Repo, dir string, args ...string) ([]*Commit, error) {
	args = append([]string{"log", "--date=rfc", "--name-only", "--parents", logFormat}, args...)
	if r.path == "" && *filter != "" {
		paths := strings.Split(*filter, ",")
		args = append(args, "--")
		args = append(args, paths...)
	}
	cmd := exec.Command("git", args...)
	cmd.Dir = r.root
	out, err := cmd.CombinedOutput()
	if err != nil {
		return nil, fmt.Errorf("git %v: %v\n%s", strings.Join(args, " "), err, out)
	}

	// We have a commit with description that contains 0x1b byte.
	// Mercurial does not escape it, but xml.Unmarshal does not accept it.
	// TODO(adg): do we still need to scrub this? Probably.
	out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1)

	var cs []*Commit
	for _, text := range strings.Split(string(out), logBoundary) {
		text = strings.TrimSpace(text)
		if text == "" {
			continue
		}
		p := strings.SplitN(text, "\n", 5)
		if len(p) != 5 {
			return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text)
		}

		// The change summary contains the change description and files
		// modified in this commit.  There is no way to directly refer
		// to the modified files in the log formatting string, so we look
		// for the file boundary after the description.
		changeSummary := p[4]
		descAndFiles := strings.SplitN(changeSummary, fileBoundary, 2)
		desc := strings.TrimSpace(descAndFiles[0])

		// For branch merges, the list of files can still be empty
		// because there are no changed files.
		files := strings.Replace(strings.TrimSpace(descAndFiles[1]), "\n", " ", -1)

		cs = append(cs, &Commit{
			Hash: p[0],
			// TODO(adg): This may break with branch merges.
			Parent: strings.Split(p[1], " ")[0],
			Author: p[2],
			Date:   p[3],
			Desc:   desc,
			Files:  files,
		})
	}
	return cs, nil
}

// fetch runs "git fetch" in the repository root.
// It tries three times, just in case it failed because of a transient error.
func (r *Repo) fetch() (err error) {
	n := 0
	r.setStatus("running git fetch origin")
	defer func() {
		if err != nil {
			r.setStatus("git fetch failed")
		} else {
			r.setStatus("ran git fetch")
		}
	}()
	return try(3, func() error {
		n++
		if n > 1 {
			r.setStatus(fmt.Sprintf("running git fetch origin, attempt %d", n))
		}
		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
		defer cancel()
		cmd := exec.CommandContext(ctx, "git", "fetch", "--prune", "origin")
		cmd.Dir = r.root
		if out, err := cmd.CombinedOutput(); err != nil {
			err = fmt.Errorf("%v\n\n%s", err, out)
			r.logf("git fetch: %v", err)
			return err
		}
		return nil
	})
}

// push runs "git push -f --mirror dest" in the repository root.
// It tries three times, just in case it failed because of a transient error.
func (r *Repo) push() (err error) {
	n := 0
	r.setStatus("syncing to github")
	defer func() {
		if err != nil {
			r.setStatus("sync to github failed")
		} else {
			r.setStatus("did sync to github")
		}
	}()
	return try(3, func() error {
		n++
		if n > 1 {
			r.setStatus(fmt.Sprintf("syncing to github, attempt %d", n))
		}
		cmd := exec.Command("git", "push", "-f", "--mirror", "dest")
		cmd.Dir = r.root
		if out, err := cmd.CombinedOutput(); err != nil {
			err = fmt.Errorf("%v\n\n%s", err, out)
			r.logf("git push failed: %v", err)
			return err
		}
		return nil
	})
}

// hasRev returns true if the repo contains the commit-ish rev.
func (r *Repo) hasRev(ctx context.Context, rev string) bool {
	cmd := exec.CommandContext(ctx, "git", "cat-file", "-t", rev)
	cmd.Dir = r.root
	return cmd.Run() == nil
}

// if non-nil, used by r.archive to create a "git archive" command.
var testHookArchiveCmd func(context.Context, string, ...string) *exec.Cmd

// if non-nil, used by r.archive to create a "git fetch" command.
var testHookFetchCmd func(context.Context, string, ...string) *exec.Cmd

// archive exports the git repository at the given rev and returns the
// compressed repository.
func (r *Repo) archive(ctx context.Context, rev string) ([]byte, error) {
	var cmd *exec.Cmd
	if testHookArchiveCmd == nil {
		cmd = exec.CommandContext(ctx, "git", "archive", "--format=tgz", rev)
	} else {
		cmd = testHookArchiveCmd(ctx, "git", "archive", "--format=tgz", rev)
	}
	cmd.Dir = r.root
	return cmd.Output()
}

// fetchRev attempts to fetch rev from remote.
func (r *Repo) fetchRev(ctx context.Context, remote, rev string) error {
	var cmd *exec.Cmd
	if testHookFetchCmd == nil {
		cmd = exec.CommandContext(ctx, "git", "fetch", remote, rev)
	} else {
		cmd = testHookFetchCmd(ctx, "git", "fetch", remote, rev)
	}
	cmd.Dir = r.root
	return cmd.Run()
}

func (r *Repo) fetchRevIfNeeded(ctx context.Context, rev string) error {
	if r.hasRev(ctx, rev) {
		return nil
	}
	r.logf("attempting to fetch missing revision %s from origin", rev)
	return r.fetchRev(ctx, "origin", rev)
}

// GET /<name>.tar.gz
// GET /debug/watcher/<name>
func (r *Repo) ServeHTTP(w http.ResponseWriter, req *http.Request) {
	if req.Method != "GET" && req.Method != "HEAD" {
		w.WriteHeader(http.StatusBadRequest)
		return
	}
	if strings.HasPrefix(req.URL.Path, "/debug/watcher/") {
		r.serveStatus(w, req)
		return
	}
	rev := req.FormValue("rev")
	if rev == "" {
		w.WriteHeader(http.StatusBadRequest)
		return
	}
	ctx, cancel := context.WithTimeout(req.Context(), 30*time.Second)
	defer cancel()
	if err := r.fetchRevIfNeeded(ctx, rev); err != nil {
		// Try the archive anyway, it might work
		r.logf("error fetching revision %s: %v", rev, err)
	}
	tgz, err := r.archive(ctx, rev)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	w.Header().Set("Content-Length", strconv.Itoa(len(tgz)))
	w.Header().Set("Content-Type", "application/x-compressed")
	w.Write(tgz)
}

func (r *Repo) serveStatus(w http.ResponseWriter, req *http.Request) {
	w.Header().Set("Content-Type", "text/html")
	fmt.Fprintf(w, "<html><head><title>watcher: %s</title><body><h1>watcher status for repo: %q</h1>\n",
		r.name(), r.name())
	fmt.Fprintf(w, "<pre>\n")
	nowRound := time.Now().Round(time.Second)
	r.status.foreachDesc(func(ent statusEntry) {
		fmt.Fprintf(w, "%v   %-20s %v\n",
			ent.t.In(time.UTC).Format(time.RFC3339),
			nowRound.Sub(ent.t.Round(time.Second)).String()+" ago",
			ent.status)
	})
	fmt.Fprintf(w, "\n</pre></body></html>")
}

func try(n int, fn func() error) error {
	var err error
	for tries := 0; tries < n; tries++ {
		time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off.
		if err = fn(); err == nil {
			break
		}
	}
	return err
}

// Branch represents a Mercurial branch.
type Branch struct {
	Name     string
	Head     *Commit
	LastSeen *Commit // the last commit posted to the dashboard
}

func (b *Branch) String() string {
	return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen)
}

// Commit represents a single Git commit.
type Commit struct {
	Hash   string
	Author string
	Date   string // Format: "Mon, 2 Jan 2006 15:04:05 -0700"
	Desc   string // Plain text, first line is a short description.
	Parent string
	Branch string
	Files  string

	// For walking the graph.
	parent   *Commit
	children []*Commit
}

func (c *Commit) String() string {
	s := c.Hash
	if c.Branch != "" {
		s += fmt.Sprintf("[%v]", c.Branch)
	}
	s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0])
	return s
}

// NeedsBenchmarking reports whether the Commit needs benchmarking.
func (c *Commit) NeedsBenchmarking() bool {
	// Do not benchmark branch commits, they are usually not interesting
	// and fall out of the trunk succession.
	if c.Branch != master {
		return false
	}
	// Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS).
	for _, f := range strings.Split(c.Files, " ") {
		if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) &&
			!strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") {
			return true
		}
	}
	return false
}

func homeDir() string {
	switch runtime.GOOS {
	case "plan9":
		return os.Getenv("home")
	case "windows":
		return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
	}
	return os.Getenv("HOME")
}

func readKey() (string, error) {
	c, err := ioutil.ReadFile(*keyFile)
	if os.IsNotExist(err) && metadata.OnGCE() {
		key, err := metadata.ProjectAttributeValue("builder-master-key")
		if err != nil {
			return "", fmt.Errorf("-key=%s doesn't exist, and key can't be loaded from GCE metadata: %v", *keyFile, err)
		}
		return strings.TrimSpace(key), nil
	}
	if err != nil {
		return "", err
	}
	return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil
}

// subrepoList fetches a list of sub-repositories from the dashboard
// and returns them as a slice of base import paths.
// Eg, []string{"golang.org/x/tools", "golang.org/x/net"}.
func subrepoList() ([]string, error) {
	if !*network {
		return nil, nil
	}

	r, err := httpClient.Get(*dashFlag + "packages?kind=subrepo")
	if err != nil {
		return nil, fmt.Errorf("subrepo list: %v", err)
	}
	defer r.Body.Close()
	if r.StatusCode != 200 {
		return nil, fmt.Errorf("subrepo list: got status %v", r.Status)
	}
	var resp struct {
		Response []struct {
			Path string
		}
		Error string
	}
	err = json.NewDecoder(r.Body).Decode(&resp)
	if err != nil {
		return nil, fmt.Errorf("subrepo list: %v", err)
	}
	if resp.Error != "" {
		return nil, fmt.Errorf("subrepo list: %v", resp.Error)
	}
	var pkgs []string
	for _, r := range resp.Response {
		pkgs = append(pkgs, r.Path)
	}
	return pkgs, nil
}

var (
	ticklerMu sync.Mutex
	ticklers  = make(map[string]chan bool)
)

// repo is the gerrit repo: e.g. "go", "net", "crypto", ...
func repoTickler(repo string) chan bool {
	ticklerMu.Lock()
	defer ticklerMu.Unlock()
	if c, ok := ticklers[repo]; ok {
		return c
	}
	c := make(chan bool, 1)
	ticklers[repo] = c
	return c
}

// pollGerritAndTickle polls Gerrit's JSON meta URL of all its URLs
// and their current branch heads.  When this sees that one has
// changed, it tickles the channel for that repo and wakes up its
// poller, if its poller is in a sleep.
func pollGerritAndTickle() {
	last := map[string]string{} // repo -> last seen hash
	for {
		gerritRepos, err := gerritMetaMap()
		if err != nil {
			log.Printf("pollGerritAndTickle: gerritMetaMap failed, skipping: %v", err)
			gerritRepos = nil
		}
		for repo, hash := range gerritRepos {
			if hash != last[repo] {
				last[repo] = hash
				select {
				case repoTickler(repo) <- true:
				default:
				}
			}
		}
		time.Sleep(*pollInterval)
	}
}

// subscribeToMaintnerAndTickleLoop subscribes to maintner.golang.org
// and watches for any ref changes in realtime.
func subscribeToMaintnerAndTickleLoop() {
	for {
		if err := subscribeToMaintnerAndTickle(); err != nil {
			log.Printf("maintner loop: %v; retrying in 30 seconds", err)
			time.Sleep(30 * time.Second)
		}
	}
}

func subscribeToMaintnerAndTickle() error {
	log.Printf("Loading maintner data.")
	t0 := time.Now()
	ctx := context.Background()
	corpus, err := godata.Get(ctx)
	if err != nil {
		return err
	}
	log.Printf("Loaded maintner data in %v", time.Since(t0))
	last := map[string]string{} // go.googlesource.com repo base => digest of all refs
	for {
		corpus.Gerrit().ForeachProjectUnsorted(func(gp *maintner.GerritProject) error {
			proj := path.Base(gp.ServerSlashProject())
			s1 := sha1.New()
			gp.ForeachNonChangeRef(func(ref string, hash maintner.GitHash) error {
				io.WriteString(s1, string(hash))
				return nil
			})
			sum := fmt.Sprintf("%x", s1.Sum(nil))
			lastSum := last[proj]
			if lastSum == sum {
				return nil
			}
			last[proj] = sum
			if lastSum == "" {
				return nil
			}
			log.Printf("maintner refs for %s changed", gp.ServerSlashProject())
			select {
			case repoTickler(proj) <- true:
			default:
			}
			return nil
		})
		if err := corpus.Update(ctx); err != nil {
			return err
		}
	}
}

// gerritMetaMap returns the map from repo name (e.g. "go") to its
// latest master hash.
func gerritMetaMap() (map[string]string, error) {
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()
	meta, err := gerritClient.GetProjects(ctx, "master")
	if err != nil {
		return nil, fmt.Errorf("gerritClient.GetProjects: %v", err)
	}
	m := map[string]string{}
	for repo, v := range meta {
		if master, ok := v.Branches["master"]; ok {
			m[repo] = master
		}
	}
	return m, nil
}

// GET /debug/goroutines
func handleDebugGoroutines(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
	buf := make([]byte, 1<<20)
	w.Write(buf[:runtime.Stack(buf, true)])
}

// GET /debug/env
func handleDebugEnv(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
	for _, kv := range os.Environ() {
		fmt.Fprintf(w, "%s\n", kv)
	}
}
