cmd/coordinator, internal: move benchmark building to separate package

This moves getSourceTgz* to a new package called sourcecache. The
original functions couldn't be left in coordinator.go because that
would cause a single process to have multiple caches.

The code for building and running benchmarks is moved into the buildgo
package, and the public API becomes GoBuilder.EnumerateBenchmarks and
Run on the resulting BenchmarkItem objects.

Updates golang/go#19871

Change-Id: I28b660e1cdaa6d1c6b0378c08de30f5e58316cc6
Reviewed-on: https://go-review.googlesource.com/44211
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 092b738..679e5c6 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -55,8 +55,7 @@
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/gerrit"
 	"golang.org/x/build/internal/buildgo"
-	"golang.org/x/build/internal/lru"
-	"golang.org/x/build/internal/singleflight"
+	"golang.org/x/build/internal/sourcecache"
 	"golang.org/x/build/livelog"
 	"golang.org/x/build/maintner/maintnerd/apipb"
 	"golang.org/x/build/types"
@@ -365,7 +364,12 @@
 		log.Fatal(err)
 	}
 	watcherProxy = httputil.NewSingleHostReverseProxy(u)
-	watcherProxy.Transport = gitMirrorClient.Transport
+	watcherProxy.Transport = &http.Transport{
+		IdleConnTimeout: 30 * time.Second,
+		DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
+			return goKubeClient.DialServicePort(ctx, "gitmirror", "")
+		},
+	}
 }
 
 func handleDebugWatcher(w http.ResponseWriter, r *http.Request) {
@@ -1348,6 +1352,25 @@
 	return poolForConf(st.conf)
 }
 
+// parentRev returns the parent of this build's commit (but only if this build comes from a trySet).
+func (st *buildStatus) parentRev() (pbr buildgo.BuilderRev, err error) {
+	// TODO(quentin): Support non-try commits by asking maintnerd
+	// (at which point this no longer needs to be a method on buildStatus).
+	pbr = st.BuilderRev // copy
+	rev := st.trySet.ci.Revisions[st.trySet.ci.CurrentRevision]
+	if rev.Commit == nil {
+		err = fmt.Errorf("commit information missing for revision %q", st.trySet.ci.CurrentRevision)
+		return
+	}
+	if len(rev.Commit.Parents) == 0 {
+		// TODO(quentin): Log?
+		err = errors.New("commit has no parent")
+		return
+	}
+	pbr.Rev = rev.Commit.Parents[0].CommitID
+	return
+}
+
 func (st *buildStatus) expectedMakeBashDuration() time.Duration {
 	// TODO: base this on historical measurements, instead of statically configured.
 	// TODO: move this to dashboard/builders.go? But once we based on on historical
@@ -1688,6 +1711,16 @@
 	return st.isTry() && !st.IsSubrepo() && st.conf.RunBench
 }
 
+// goBuilder returns a GoBuilder for this buildStatus.
+func (st *buildStatus) goBuilder() buildgo.GoBuilder {
+	return buildgo.GoBuilder{
+		Logger:     st,
+		BuilderRev: st.BuilderRev,
+		Conf:       st.conf,
+		Goroot:     "go",
+	}
+}
+
 // runAllSharded runs make.bash and then shards the test execution.
 // remoteErr and err are as described at the top of this file.
 //
@@ -1698,13 +1731,7 @@
 	st.getHelpersReadySoon()
 
 	if !st.useSnapshot() {
-		builder := buildgo.GoBuilder{
-			Logger:     st,
-			BuilderRev: st.BuilderRev,
-			Conf:       st.conf,
-			Goroot:     "go",
-		}
-		remoteErr, err = builder.RunMake(st.bc, st)
+		remoteErr, err = st.goBuilder().RunMake(st.bc, st)
 		if err != nil {
 			return nil, err
 		}
@@ -1876,7 +1903,7 @@
 		return sp.Done(fmt.Errorf("writing VERSION tgz: %v", err))
 	}
 
-	srcTar, err := getSourceTgz(st, "go", st.Rev)
+	srcTar, err := sourcecache.GetSourceTgz(st, "go", st.Rev)
 	if err != nil {
 		return err
 	}
@@ -1995,7 +2022,7 @@
 	return false
 }
 
-func (st *buildStatus) newTestSet(names []string, benchmarks []*benchmarkItem) *testSet {
+func (st *buildStatus) newTestSet(names []string, benchmarks []*buildgo.BenchmarkItem) *testSet {
 	set := &testSet{
 		st: st,
 	}
@@ -2009,7 +2036,7 @@
 		})
 	}
 	for _, bench := range benchmarks {
-		name := "bench:" + bench.name()
+		name := "bench:" + bench.Name()
 		set.items = append(set.items, &testItem{
 			set:      set,
 			name:     name,
@@ -2393,19 +2420,11 @@
 	}
 	if strings.HasPrefix(testName, "bench:") {
 		// Assume benchmarks are roughly 20 seconds per run.
-		return 2 * benchRuns * 20 * time.Second
+		return 2 * 5 * 20 * time.Second
 	}
 	return minGoTestSpeed * 2
 }
 
-func fetchSubrepo(sl spanlog.Logger, bc *buildlet.Client, repo, rev string) error {
-	tgz, err := getSourceTgz(sl, repo, rev)
-	if err != nil {
-		return err
-	}
-	return bc.PutTar(tgz, "gopath/src/"+subrepoPrefix+repo)
-}
-
 func (st *buildStatus) runSubrepoTests() (remoteErr, err error) {
 	st.LogEventTime("fetching_subrepo", st.SubName)
 
@@ -2423,7 +2442,7 @@
 	// fetch checks out the provided sub-repo to the buildlet's workspace.
 	fetch := func(repo, rev string) error {
 		fetched[repo] = true
-		return fetchSubrepo(st, st.bc, repo, rev)
+		return buildgo.FetchSubrepo(st, st.bc, repo, rev)
 	}
 
 	// findDeps uses 'go list' on the checked out repo to find its
@@ -2503,6 +2522,26 @@
 	})
 }
 
+// affectedPkgs returns the name of every package affected by this commit.
+// The returned list may contain duplicates and is unsorted.
+// It is safe to call this on a nil trySet.
+func (ts *trySet) affectedPkgs() (pkgs []string) {
+	// TODO(quentin): Support non-try commits by asking maintnerd for the affected files.
+	if ts == nil || ts.ci == nil {
+		return
+	}
+	rev := ts.ci.Revisions[ts.ci.CurrentRevision]
+	for p := range rev.Files {
+		if strings.HasPrefix(p, "src/") {
+			pkg := path.Dir(p[len("src/"):])
+			if pkg != "" {
+				pkgs = append(pkgs, pkg)
+			}
+		}
+	}
+	return
+}
+
 // runTests is only called for builders which support a split make/run
 // (should be everything, at least soon). Currently (2015-05-27) iOS
 // and Android and Nacl do not.
@@ -2517,10 +2556,14 @@
 	if err != nil {
 		return nil, fmt.Errorf("distTestList exec: %v", err)
 	}
-	var benches []*benchmarkItem
+	var benches []*buildgo.BenchmarkItem
 	if st.shouldBench() {
 		sp := st.CreateSpan("enumerate_benchmarks")
-		b, err := enumerateBenchmarks(st, st.conf, st.bc, "go", st.trySet)
+		rev := getRepoHead("benchmarks")
+		if rev == "" {
+			rev = "master" // should happen rarely; ok if it does.
+		}
+		b, err := st.goBuilder().EnumerateBenchmarks(st.bc, rev, st.trySet.affectedPkgs())
 		sp.Done(err)
 		if err == nil {
 			benches = b
@@ -2626,7 +2669,7 @@
 		}
 
 		if ti.bench != nil {
-			for i, s := range ti.bench.output {
+			for i, s := range ti.bench.Output {
 				if i < len(benchFiles) {
 					benchFiles[i].out.WriteString(s)
 				}
@@ -2710,7 +2753,7 @@
 	if !st.shouldBench() {
 		return nil
 	}
-	// We know rev and rev.Commit.Parents[0] exist because benchmarkItem.buildParent has checked.
+	// We know rev and rev.Commit.Parents[0] exist because BenchmarkItem.buildParent has checked.
 	rev := st.trySet.ci.Revisions[st.trySet.ci.CurrentRevision]
 	ps := rev.PatchSetNumber
 	benchFiles := []*benchFile{
@@ -2794,7 +2837,11 @@
 	timeout := execTimeout(names)
 	var remoteErr, err error
 	if ti := tis[0]; ti.bench != nil {
-		remoteErr, err = ti.bench.run(st, bc, &buf)
+		pbr, perr := st.parentRev()
+		// TODO(quentin): Error if parent commit could not be determined?
+		if perr == nil {
+			remoteErr, err = ti.bench.Run(buildEnv, st, st.conf, bc, &buf, []buildgo.BuilderRev{st.BuilderRev, pbr})
+		}
 	} else {
 		remoteErr, err = bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
 			// We set Dir to "." instead of the default ("go/bin") so when the dist tests
@@ -2941,7 +2988,7 @@
 	name     string        // "go_test:sort"
 	duration time.Duration // optional approximate size
 
-	bench *benchmarkItem // If populated, this is a benchmark instead of a regular test.
+	bench *buildgo.BenchmarkItem // If populated, this is a benchmark instead of a regular test.
 
 	take chan token // buffered size 1: sending takes ownership of rest of fields:
 
@@ -3277,110 +3324,10 @@
 	return bytes.NewReader(buf.Bytes())
 }
 
-var sourceGroup singleflight.Group
-
-var sourceCache = lru.New(40) // git rev -> []byte
-
 func useGitMirror() bool {
 	return *mode != "dev"
 }
 
-// repo is go.googlesource.com repo ("go", "net", etc)
-// rev is git revision.
-func getSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) {
-	sp := sl.CreateSpan("get_source")
-	defer func() { sp.Done(err) }()
-
-	key := fmt.Sprintf("%v-%v", repo, rev)
-	vi, err, _ := sourceGroup.Do(key, func() (interface{}, error) {
-		if tgzBytes, ok := sourceCache.Get(key); ok {
-			return tgzBytes, nil
-		}
-
-		if useGitMirror() {
-			sp := sl.CreateSpan("get_source_from_gitmirror")
-			tgzBytes, err := getSourceTgzFromGitMirror(repo, rev)
-			if err == nil {
-				sourceCache.Add(key, tgzBytes)
-				sp.Done(nil)
-				return tgzBytes, nil
-			}
-			log.Printf("Error fetching source %s/%s from watcher (after %v uptime): %v",
-				repo, rev, time.Since(processStartTime), err)
-			sp.Done(errors.New("timeout"))
-		}
-
-		sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key))
-		tgzBytes, err := getSourceTgzFromGerrit(repo, rev)
-		sp.Done(err)
-		if err == nil {
-			sourceCache.Add(key, tgzBytes)
-		}
-		return tgzBytes, err
-	})
-	if err != nil {
-		return nil, err
-	}
-	return bytes.NewReader(vi.([]byte)), nil
-}
-
-var gitMirrorClient = &http.Client{
-	Timeout: 30 * time.Second,
-	Transport: &http.Transport{
-		IdleConnTimeout: 30 * time.Second,
-		DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
-			return goKubeClient.DialServicePort(ctx, "gitmirror", "")
-		},
-	},
-}
-
-var gerritHTTPClient = &http.Client{
-	Timeout: 30 * time.Second,
-}
-
-func getSourceTgzFromGerrit(repo, rev string) (tgz []byte, err error) {
-	return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
-}
-
-func getSourceTgzFromGitMirror(repo, rev string) (tgz []byte, err error) {
-	for i := 0; i < 2; i++ { // two tries; different pods maybe?
-		if i > 0 {
-			time.Sleep(1 * time.Second)
-		}
-		// The "gitmirror" hostname is unused:
-		tgz, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev)
-		if err == nil {
-			return tgz, nil
-		}
-		if tr, ok := http.DefaultTransport.(*http.Transport); ok {
-			tr.CloseIdleConnections()
-		}
-	}
-	return nil, err
-}
-
-func getSourceTgzFromURL(hc *http.Client, source, repo, rev, urlStr string) (tgz []byte, err error) {
-	res, err := hc.Get(urlStr)
-	if err != nil {
-		return nil, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, source, err)
-	}
-	defer res.Body.Close()
-	if res.StatusCode/100 != 2 {
-		slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
-		return nil, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, source, res.Status, slurp)
-	}
-	// TODO(bradfitz): finish golang.org/issue/11224
-	const maxSize = 50 << 20 // talks repo is over 25MB; go source is 7.8MB on 2015-06-15
-	slurp, err := ioutil.ReadAll(io.LimitReader(res.Body, maxSize+1))
-	if len(slurp) > maxSize && err == nil {
-		err = fmt.Errorf("body over %d bytes", maxSize)
-	}
-	if err != nil {
-		return nil, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, source, err)
-	}
-	return slurp, nil
-}
-
 var nl = []byte("\n")
 
 // repoHead contains the hashes of the latest master HEAD
diff --git a/cmd/coordinator/kube.go b/cmd/coordinator/kube.go
index 13d8f38..1268a72 100644
--- a/cmd/coordinator/kube.go
+++ b/cmd/coordinator/kube.go
@@ -10,6 +10,7 @@
 	"fmt"
 	"io"
 	"log"
+	"net"
 	"sort"
 	"strconv"
 	"strings"
@@ -18,6 +19,7 @@
 
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/sourcecache"
 	"golang.org/x/build/kubernetes"
 	"golang.org/x/build/kubernetes/api"
 	"golang.org/x/build/kubernetes/gke"
@@ -70,6 +72,10 @@
 		return err
 	}
 
+	sourcecache.RegisterGitMirrorDial(func(ctx context.Context) (net.Conn, error) {
+		return goKubeClient.DialServicePort(ctx, "gitmirror", "")
+	})
+
 	go kubePool.pollCapacityLoop()
 	return nil
 }
diff --git a/cmd/coordinator/benchmarks.go b/internal/buildgo/benchmarks.go
similarity index 67%
rename from cmd/coordinator/benchmarks.go
rename to internal/buildgo/benchmarks.go
index dddb207..843fe04 100644
--- a/cmd/coordinator/benchmarks.go
+++ b/internal/buildgo/benchmarks.go
@@ -2,37 +2,40 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-package main
+package buildgo
 
 import (
 	"bytes"
 	"context"
-	"errors"
 	"fmt"
 	"io"
 	"path"
 	"strings"
 	"time"
 
+	"golang.org/x/build/buildenv"
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/cmd/coordinator/spanlog"
 	"golang.org/x/build/dashboard"
-	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/internal/sourcecache"
 )
 
 // benchRuns is the number of times to run each benchmark binary
 const benchRuns = 5
 
-type benchmarkItem struct {
+// BenchmarkItem represents a single package's benchmarks, to be run on one or more commit.
+// After Run is called, the output of each commit can be retrieved from the Output field.
+type BenchmarkItem struct {
 	binary   string   // name of binary relative to goroot
 	args     []string // args to run binary with
 	preamble string   // string to print before benchmark results (e.g. "pkg: test/bench/go1\n")
-	output   []string // benchmark output for each commit
+	Output   []string // benchmark output for each commit
 
 	build func(bc *buildlet.Client, goroot string, w io.Writer) (remoteErr, err error) // how to build benchmark binary
 }
 
-func (b *benchmarkItem) name() string {
+// Name returns a string that uniquely identifies this benchmark.
+func (b *BenchmarkItem) Name() string {
 	return b.binary + " " + strings.Join(b.args, " ")
 }
 
@@ -82,7 +85,7 @@
 		return nil, err
 	}
 	if err := bc.ListDir("gopath/src/golang.org/x/benchmarks", buildlet.ListDirOpts{}, func(buildlet.DirEntry) {}); err != nil {
-		if err := fetchSubrepo(sl, bc, "benchmarks", rev); err != nil {
+		if err := FetchSubrepo(sl, bc, "benchmarks", rev); err != nil {
 			return nil, err
 		}
 	}
@@ -96,80 +99,73 @@
 	})
 }
 
-func enumerateBenchmarks(sl spanlog.Logger, conf dashboard.BuildConfig, bc *buildlet.Client, goroot string, trySet *trySet) ([]*benchmarkItem, error) {
+// EnumerateBenchmarks returns a slice of the benchmarks to be run for the built Go distribution found in gb.Goroot.
+// If benchmarksRev is non-empty, it is the revision of x/benchmarks to check out for additional benchmarks.
+// pkgs contains a list of possibly duplicate packages that will be searched for benchmarks.
+func (gb GoBuilder) EnumerateBenchmarks(bc *buildlet.Client, benchmarksRev string, pkgs []string) ([]*BenchmarkItem, error) {
 	workDir, err := bc.WorkDir()
 	if err != nil {
 		err = fmt.Errorf("buildBench, WorkDir: %v", err)
 		return nil, err
 	}
+
 	// Fetch x/benchmarks
-	rev := getRepoHead("benchmarks")
-	if rev == "" {
-		rev = "master" // should happen rarely; ok if it does.
+	if benchmarksRev != "" {
+		if err := FetchSubrepo(gb.Logger, bc, "benchmarks", benchmarksRev); err != nil {
+			return nil, err
+		}
 	}
 
-	if err := fetchSubrepo(sl, bc, "benchmarks", rev); err != nil {
-		return nil, err
-	}
-
-	var out []*benchmarkItem
+	var out []*BenchmarkItem
 
 	// These regexes shard the go1 tests so each shard takes about 20s, ensuring no test runs for
 	for _, re := range []string{`^Benchmark[BF]`, `^Benchmark[HR]`, `^Benchmark[^BFHR]`} {
-		out = append(out, &benchmarkItem{
+		out = append(out, &BenchmarkItem{
 			binary:   "test/bench/go1/go1.test",
 			args:     []string{"-test.bench", re, "-test.benchmem"},
 			preamble: "pkg: test/bench/go1\n",
 			build: func(bc *buildlet.Client, goroot string, w io.Writer) (error, error) {
-				return buildGo1(conf, bc, goroot, w)
+				return buildGo1(gb.Conf, bc, goroot, w)
 			},
 		})
 	}
 
 	// Enumerate x/benchmarks
-	var buf bytes.Buffer
-	remoteErr, err := bc.Exec(path.Join(goroot, "bin/go"), buildlet.ExecOpts{
-		Output: &buf,
-		ExtraEnv: []string{
-			"GOROOT=" + conf.FilePathJoin(workDir, goroot),
-			"GOPATH=" + conf.FilePathJoin(workDir, "gopath"),
-		},
-		Args: []string{"list", "-f", `{{if eq .Name "main"}}{{.ImportPath}}{{end}}`, "golang.org/x/benchmarks/..."},
-	})
-	if remoteErr != nil {
-		return nil, remoteErr
-	}
-	if err != nil {
-		return nil, err
-	}
-	for _, pkg := range strings.Fields(buf.String()) {
-		pkg := pkg
-		name := "bench-" + path.Base(pkg) + ".exe"
-		out = append(out, &benchmarkItem{
-			binary: name, args: nil, build: func(bc *buildlet.Client, goroot string, w io.Writer) (error, error) {
-				return buildXBenchmark(sl, conf, bc, goroot, w, rev, pkg, name)
-			}})
-	}
-	// Enumerate package benchmarks that were affected by the CL
-	if trySet != nil && trySet.ci != nil {
-		rev := trySet.ci.Revisions[trySet.ci.CurrentRevision]
-		var args []string
-		for p := range rev.Files {
-			if strings.HasPrefix(p, "src/") {
-				pkg := path.Dir(p[len("src/"):])
-				if pkg != "" {
-					args = append(args, pkg)
-				}
-			}
-		}
-		// Find packages that actually have benchmarks or tests.
+	if benchmarksRev != "" {
 		var buf bytes.Buffer
-		remoteErr, err := bc.Exec(path.Join(goroot, "bin/go"), buildlet.ExecOpts{
+		remoteErr, err := bc.Exec(path.Join(gb.Goroot, "bin/go"), buildlet.ExecOpts{
 			Output: &buf,
 			ExtraEnv: []string{
-				"GOROOT=" + conf.FilePathJoin(workDir, goroot),
+				"GOROOT=" + gb.Conf.FilePathJoin(workDir, gb.Goroot),
+				"GOPATH=" + gb.Conf.FilePathJoin(workDir, "gopath"),
 			},
-			Args: append([]string{"list", "-e", "-f", "{{if or (len .TestGoFiles) (len .XTestGoFiles)}}{{.ImportPath}}{{end}}"}, args...),
+			Args: []string{"list", "-f", `{{if eq .Name "main"}}{{.ImportPath}}{{end}}`, "golang.org/x/benchmarks/..."},
+		})
+		if remoteErr != nil {
+			return nil, remoteErr
+		}
+		if err != nil {
+			return nil, err
+		}
+		for _, pkg := range strings.Fields(buf.String()) {
+			pkg := pkg
+			name := "bench-" + path.Base(pkg) + ".exe"
+			out = append(out, &BenchmarkItem{
+				binary: name, args: nil, build: func(bc *buildlet.Client, goroot string, w io.Writer) (error, error) {
+					return buildXBenchmark(gb.Logger, gb.Conf, bc, goroot, w, benchmarksRev, pkg, name)
+				}})
+		}
+	}
+	// Enumerate package benchmarks that were affected by the CL
+	if len(pkgs) > 0 {
+		// Find packages that actually have benchmarks or tests.
+		var buf bytes.Buffer
+		remoteErr, err := bc.Exec(path.Join(gb.Goroot, "bin/go"), buildlet.ExecOpts{
+			Output: &buf,
+			ExtraEnv: []string{
+				"GOROOT=" + gb.Conf.FilePathJoin(workDir, gb.Goroot),
+			},
+			Args: append([]string{"list", "-e", "-f", "{{if or (len .TestGoFiles) (len .XTestGoFiles)}}{{.ImportPath}}{{end}}"}, pkgs...),
 		})
 		if remoteErr != nil {
 			return nil, remoteErr
@@ -186,11 +182,11 @@
 				continue
 			}
 			name := "bench-" + strings.Replace(pkg, "/", "-", -1) + ".exe"
-			out = append(out, &benchmarkItem{
+			out = append(out, &BenchmarkItem{
 				binary: name,
 				args:   []string{"-test.bench", ".", "-test.benchmem", "-test.run", "^$", "-test.benchtime", "100ms"},
 				build: func(bc *buildlet.Client, goroot string, w io.Writer) (error, error) {
-					return buildPkg(conf, bc, goroot, w, pkg, name)
+					return buildPkg(gb.Conf, bc, goroot, w, pkg, name)
 				}})
 		}
 	}
@@ -225,38 +221,21 @@
 	})
 }
 
-// parentRev returns the parent of this build's commit (but only if this build comes from a trySet).
-func (st *buildStatus) parentRev() (pbr buildgo.BuilderRev, err error) {
-	pbr = st.BuilderRev // copy
-	rev := st.trySet.ci.Revisions[st.trySet.ci.CurrentRevision]
-	if rev.Commit == nil {
-		err = fmt.Errorf("commit information missing for revision %q", st.trySet.ci.CurrentRevision)
-		return
-	}
-	if len(rev.Commit.Parents) == 0 {
-		// TODO(quentin): Log?
-		err = errors.New("commit has no parent")
-		return
-	}
-	pbr.Rev = rev.Commit.Parents[0].CommitID
-	return
-}
-
-func buildRev(sl spanlog.Logger, conf dashboard.BuildConfig, bc *buildlet.Client, w io.Writer, goroot string, br buildgo.BuilderRev) error {
+func buildRev(buildEnv *buildenv.Environment, sl spanlog.Logger, conf dashboard.BuildConfig, bc *buildlet.Client, w io.Writer, goroot string, br BuilderRev) error {
 	if br.SnapshotExists(context.TODO(), buildEnv) {
 		return bc.PutTarFromURL(br.SnapshotURL(buildEnv), goroot)
 	}
-	if err := bc.PutTar(versionTgz(br.Rev), goroot); err != nil {
+	if err := bc.PutTar(VersionTgz(br.Rev), goroot); err != nil {
 		return err
 	}
-	srcTar, err := getSourceTgz(sl, "go", br.Rev)
+	srcTar, err := sourcecache.GetSourceTgz(sl, "go", br.Rev)
 	if err != nil {
 		return err
 	}
 	if err := bc.PutTar(srcTar, goroot); err != nil {
 		return err
 	}
-	builder := buildgo.GoBuilder{
+	builder := GoBuilder{
 		Logger:     sl,
 		BuilderRev: br,
 		Conf:       conf,
@@ -269,18 +248,16 @@
 	return remoteErr
 }
 
-// run runs all the iterations of this benchmark on bc.
+// Run runs all the iterations of this benchmark on bc.
 // Build output is sent to w. Benchmark output is stored in b.output.
-// TODO(quentin): Take a list of commits so this can be used for non-try runs.
-func (b *benchmarkItem) run(st *buildStatus, bc *buildlet.Client, w io.Writer) (remoteErr, err error) {
+// revs must contain exactly two revs. The first rev is assumed to be present in "go", and the second will be placed into "go-parent".
+// TODO(quentin): Support len(revs) != 2.
+func (b *BenchmarkItem) Run(buildEnv *buildenv.Environment, sl spanlog.Logger, conf dashboard.BuildConfig, bc *buildlet.Client, w io.Writer, revs []BuilderRev) (remoteErr, err error) {
 	// Ensure we have a built parent repo.
 	if err := bc.ListDir("go-parent", buildlet.ListDirOpts{}, func(buildlet.DirEntry) {}); err != nil {
-		pbr, err := st.parentRev()
-		if err != nil {
-			return nil, err
-		}
-		sp := st.CreateSpan("bench_build_parent", bc.Name())
-		err = buildRev(st, st.conf, bc, w, "go-parent", pbr)
+		pbr := revs[1]
+		sp := sl.CreateSpan("bench_build_parent", bc.Name())
+		err = buildRev(buildEnv, sl, conf, bc, w, "go-parent", pbr)
 		sp.Done(err)
 		if err != nil {
 			return nil, err
@@ -288,7 +265,7 @@
 	}
 	// Build benchmark.
 	for _, goroot := range []string{"go", "go-parent"} {
-		sp := st.CreateSpan("bench_build", fmt.Sprintf("%s/%s: %s", goroot, b.binary, bc.Name()))
+		sp := sl.CreateSpan("bench_build", fmt.Sprintf("%s/%s: %s", goroot, b.binary, bc.Name()))
 		remoteErr, err = b.build(bc, goroot, w)
 		sp.Done(err)
 		if remoteErr != nil || err != nil {
@@ -314,8 +291,8 @@
 		for _, c := range commits {
 			fmt.Fprintf(&c.out, "iteration: %d\nstart-time: %s\n", i, time.Now().UTC().Format(time.RFC3339))
 			p := path.Join(c.path, b.binary)
-			sp := st.CreateSpan("run_one_bench", p)
-			remoteErr, err = runOneBenchBinary(st.conf, bc, &c.out, c.path, p, b.args)
+			sp := sl.CreateSpan("run_one_bench", p)
+			remoteErr, err = runOneBenchBinary(conf, bc, &c.out, c.path, p, b.args)
 			sp.Done(err)
 			if err != nil || remoteErr != nil {
 				c.out.WriteTo(w)
@@ -323,7 +300,7 @@
 			}
 		}
 	}
-	b.output = []string{
+	b.Output = []string{
 		commits[0].out.String(),
 		commits[1].out.String(),
 	}
diff --git a/internal/buildgo/buildgo.go b/internal/buildgo/buildgo.go
index bf1ae43..c5dd598 100644
--- a/internal/buildgo/buildgo.go
+++ b/internal/buildgo/buildgo.go
@@ -7,6 +7,9 @@
 package buildgo
 
 import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
 	"context"
 	"fmt"
 	"io"
@@ -19,8 +22,11 @@
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/cmd/coordinator/spanlog"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/sourcecache"
 )
 
+const subrepoPrefix = "golang.org/x/"
+
 // BuilderRev is a build configuration type and a revision.
 type BuilderRev struct {
 	Name string // e.g. "linux-amd64-race"
@@ -163,3 +169,39 @@
 	span.Done(nil)
 	return nil, nil
 }
+
+func FetchSubrepo(sl spanlog.Logger, bc *buildlet.Client, repo, rev string) error {
+	tgz, err := sourcecache.GetSourceTgz(sl, repo, rev)
+	if err != nil {
+		return err
+	}
+	return bc.PutTar(tgz, "gopath/src/"+subrepoPrefix+repo)
+}
+
+// VersionTgz returns an io.Reader of a *.tar.gz file containing only
+// a VERSION file containing the contents of the provided rev string.
+func VersionTgz(rev string) io.Reader {
+	var buf bytes.Buffer
+	zw := gzip.NewWriter(&buf)
+	tw := tar.NewWriter(zw)
+
+	// Writing to a bytes.Buffer should never fail, so check
+	// errors with an explosion:
+	check := func(err error) {
+		if err != nil {
+			panic("previously assumed to never fail: " + err.Error())
+		}
+	}
+
+	contents := fmt.Sprintf("devel " + rev)
+	check(tw.WriteHeader(&tar.Header{
+		Name: "VERSION",
+		Mode: 0644,
+		Size: int64(len(contents)),
+	}))
+	_, err := io.WriteString(tw, contents)
+	check(err)
+	check(tw.Close())
+	check(zw.Close())
+	return bytes.NewReader(buf.Bytes())
+}
diff --git a/internal/sourcecache/source.go b/internal/sourcecache/source.go
new file mode 100644
index 0000000..c701541
--- /dev/null
+++ b/internal/sourcecache/source.go
@@ -0,0 +1,134 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package sourcecache provides a cache of code found in Git repositories.
+// It takes directly to the Gerrit instance at go.googlesource.com.
+// If RegisterGitMirrorDial is called, it will first try to get code from gitmirror before falling back on Gerrit.
+package sourcecache
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net"
+	"net/http"
+	"time"
+
+	"golang.org/x/build/cmd/coordinator/spanlog"
+	"golang.org/x/build/internal/lru"
+	"golang.org/x/build/internal/singleflight"
+)
+
+var processStartTime = time.Now()
+
+var sourceGroup singleflight.Group
+
+var sourceCache = lru.New(40) // git rev -> []byte
+
+// GetSourceTgz returns a Reader that provides a tgz of the requested source revision.
+// repo is go.googlesource.com repo ("go", "net", etc)
+// rev is git revision.
+func GetSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) {
+	sp := sl.CreateSpan("get_source")
+	defer func() { sp.Done(err) }()
+
+	key := fmt.Sprintf("%v-%v", repo, rev)
+	vi, err, _ := sourceGroup.Do(key, func() (interface{}, error) {
+		if tgzBytes, ok := sourceCache.Get(key); ok {
+			return tgzBytes, nil
+		}
+
+		if gitMirrorClient != nil {
+			sp := sl.CreateSpan("get_source_from_gitmirror")
+			tgzBytes, err := getSourceTgzFromGitMirror(repo, rev)
+			if err == nil {
+				sourceCache.Add(key, tgzBytes)
+				sp.Done(nil)
+				return tgzBytes, nil
+			}
+			log.Printf("Error fetching source %s/%s from watcher (after %v uptime): %v",
+				repo, rev, time.Since(processStartTime), err)
+			sp.Done(errors.New("timeout"))
+		}
+
+		sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key))
+		tgzBytes, err := getSourceTgzFromGerrit(repo, rev)
+		sp.Done(err)
+		if err == nil {
+			sourceCache.Add(key, tgzBytes)
+		}
+		return tgzBytes, err
+	})
+	if err != nil {
+		return nil, err
+	}
+	return bytes.NewReader(vi.([]byte)), nil
+}
+
+var gitMirrorClient *http.Client
+
+// RegisterGitMirrorDial registers a dial function which will be used to reach gitmirror.
+// If used, this function must be called before GetSourceTgz.
+func RegisterGitMirrorDial(dial func(context.Context) (net.Conn, error)) {
+	gitMirrorClient = &http.Client{
+		Timeout: 30 * time.Second,
+		Transport: &http.Transport{
+			IdleConnTimeout: 30 * time.Second,
+			DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
+				return dial(ctx)
+			},
+		},
+	}
+}
+
+var gerritHTTPClient = &http.Client{
+	Timeout: 30 * time.Second,
+}
+
+func getSourceTgzFromGerrit(repo, rev string) (tgz []byte, err error) {
+	return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
+}
+
+func getSourceTgzFromGitMirror(repo, rev string) (tgz []byte, err error) {
+	for i := 0; i < 2; i++ { // two tries; different pods maybe?
+		if i > 0 {
+			time.Sleep(1 * time.Second)
+		}
+		// The "gitmirror" hostname is unused:
+		tgz, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev)
+		if err == nil {
+			return tgz, nil
+		}
+		if tr, ok := http.DefaultTransport.(*http.Transport); ok {
+			tr.CloseIdleConnections()
+		}
+	}
+	return nil, err
+}
+
+func getSourceTgzFromURL(hc *http.Client, source, repo, rev, urlStr string) (tgz []byte, err error) {
+	res, err := hc.Get(urlStr)
+	if err != nil {
+		return nil, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, source, err)
+	}
+	defer res.Body.Close()
+	if res.StatusCode/100 != 2 {
+		slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
+		return nil, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, source, res.Status, slurp)
+	}
+	// TODO(bradfitz): finish golang.org/issue/11224
+	const maxSize = 50 << 20 // talks repo is over 25MB; go source is 7.8MB on 2015-06-15
+	slurp, err := ioutil.ReadAll(io.LimitReader(res.Body, maxSize+1))
+	if len(slurp) > maxSize && err == nil {
+		err = fmt.Errorf("body over %d bytes", maxSize)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, source, err)
+	}
+	return slurp, nil
+}