cmd/coordinator/buildstatus.go - build - Git at Google

 // Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build go1.16 && (linux || darwin)
 // +build go1.16
 // +build linux darwin

 package main

 import (
 	"bytes"
 	"context"
 	"errors"
 	"fmt"
 	"html"
 	"html/template"
 	"io"
 	"log"
 	"os"
 	"path"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"

 	"cloud.google.com/go/errorreporting"
 	"go4.org/syncutil"
 	"golang.org/x/build/buildenv"
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/buildgo"
 	"golang.org/x/build/internal/buildstats"
 	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/coordinator/pool/queue"
 	"golang.org/x/build/internal/coordinator/schedule"
 	"golang.org/x/build/internal/singleflight"
 	"golang.org/x/build/internal/sourcecache"
 	"golang.org/x/build/internal/spanlog"
 	"golang.org/x/build/livelog"
 	"golang.org/x/build/maintner/maintnerd/apipb"
 	"golang.org/x/build/types"
 	perfstorage "golang.org/x/perf/storage"
 )

 // newBuild constructs a new *buildStatus from rev and commit details.
 // detail may be only partially populated, but it must have at least RevBranch set.
 // If rev.SubRev is set, then detail.SubRevBranch must also be set.
 func newBuild(rev buildgo.BuilderRev, detail commitDetail) (*buildStatus, error) {
 	// Note: can't acquire statusMu in newBuild, as this is called
 	// from findTryWork -> newTrySet, which holds statusMu.

 	conf, ok := dashboard.Builders[rev.Name]
 	if !ok {
 		return nil, fmt.Errorf("unknown builder type %q", rev.Name)
 	}
 	if rev.Rev == "" {
 		return nil, fmt.Errorf("required field Rev is empty; got %+v", rev)
 	}
 	if detail.RevBranch == "" {
 		return nil, fmt.Errorf("required field RevBranch is empty; got %+v", detail)
 	}
 	if rev.SubRev != "" && detail.SubRevBranch == "" {
 		return nil, fmt.Errorf("field SubRevBranch is empty, required because SubRev is present; got %+v", detail)
 	}

 	ctx, cancel := context.WithCancel(context.Background())
 	return &buildStatus{
 		buildID:      "B" + randHex(9),
 		BuilderRev:   rev,
 		commitDetail: detail,
 		conf:         conf,
 		startTime:    time.Now(),
 		ctx:          ctx,
 		cancel:       cancel,
 	}, nil
 }

 // buildStatus is the status of a build.
 type buildStatus struct {
 	// Immutable:
 	buildgo.BuilderRev
 	commitDetail
 	buildID   string // "B" + 9 random hex
 	conf      *dashboard.BuildConfig
 	startTime time.Time // actually time of newBuild (~same thing)
 	trySet    *trySet   // or nil

 	onceInitHelpers sync.Once // guards call of onceInitHelpersFunc
 	helpers         <-chan buildlet.Client
 	ctx             context.Context    // used to start the build
 	cancel          context.CancelFunc // used to cancel context; for use by setDone only

 	hasBuildlet int32 // atomic: non-zero if this build has a buildlet; for status.go.

 	mu              sync.Mutex       // guards following
 	canceled        bool             // whether this build was forcefully canceled, so errors should be ignored
 	schedItem       *queue.SchedItem // for the initial buildlet (ignoring helpers for now)
 	logURL          string           // if non-empty, permanent URL of log
 	bc              buildlet.Client  // nil initially, until pool returns one
 	done            time.Time        // finished running
 	succeeded       bool             // set when done
 	output          livelog.Buffer   // stdout and stderr
 	events          []eventAndTime
 	useSnapshotMemo map[string]bool // memoized result of useSnapshotFor(rev), where the key is rev
 }

 func (st *buildStatus) NameAndBranch() string {
 	result := st.Name
 	if st.RevBranch != "master" {
 		// For the common and currently-only case of
 		// "release-branch.go1.15" say "linux-amd64 (Go 1.15.x)"
 		const releasePrefix = "release-branch.go"
 		if strings.HasPrefix(st.RevBranch, releasePrefix) {
 			result = fmt.Sprintf("%s (Go %s.x)", st.Name, strings.TrimPrefix(st.RevBranch, releasePrefix))
 		} else {
 			// But if we ever support building other branches,
 			// fall back to something verbose until we add a
 			// special case:
 			result = fmt.Sprintf("%s (go branch %s)", st.Name, st.RevBranch)
 		}
 	}
 	// For an x repo running on a CL in a different repo,
 	// add a prefix specifying the name of the x repo.
 	if st.SubName != "" && st.trySet != nil && st.SubName != st.trySet.Project {
 		result = "(x/" + st.SubName + ") " + result
 	}
 	return result
 }

 // cancelBuild marks a build as no longer wanted, cancels its context,
 // and tears down its buildlet.
 func (st *buildStatus) cancelBuild() {
 	st.mu.Lock()
 	if st.canceled {
 		// Already done. Shouldn't happen currently, but make
 		// it safe for duplicate calls in the future.
 		st.mu.Unlock()
 		return
 	}

 	st.canceled = true
 	st.output.Close()
 	// cancel the context, which stops the creation of helper
 	// buildlets, etc. The context isn't plumbed everywhere yet,
 	// so we also forcefully close its buildlet out from under it
 	// to trigger a failure. When we get the failure later, we
 	// just ignore it (knowing that the canceled bit was set
 	// true).
 	st.cancel()
 	bc := st.bc
 	st.mu.Unlock()

 	if bc != nil {
 		// closing the buildlet may be slow (up to ~10 seconds
 		// on a wedged buildlet) so run it in its own
 		// goroutine, so we're not holding st.mu for too long.
 		bc.Close()
 	}
 }

 func (st *buildStatus) setDone(succeeded bool) {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	if st.canceled {
 		return
 	}
 	st.succeeded = succeeded
 	st.done = time.Now()
 	st.output.Close()
 	st.cancel()
 }

 func (st *buildStatus) isRunning() bool {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	return st.isRunningLocked()
 }

 func (st *buildStatus) isRunningLocked() bool { return st.done.IsZero() }

 func (st *buildStatus) logf(format string, args ...interface{}) {
 	log.Printf("[build %s %s]: %s", st.Name, st.Rev, fmt.Sprintf(format, args...))
 }

 // start starts the build in a new goroutine.
 // The buildStatus's context is closed when the build is complete,
 // successfully or not.
 func (st *buildStatus) start() {
 	setStatus(st.BuilderRev, st)
 	go func() {
 		err := st.build()
 		if err == errSkipBuildDueToDeps {
 			st.setDone(true)
 		} else {
 			if err != nil {
 				fmt.Fprintf(st, "\n\nError: %v\n", err)
 				log.Println(st.BuilderRev, "failed:", err)
 			}
 			st.setDone(err == nil)
 			pool.CoordinatorProcess().PutBuildRecord(st.buildRecord())
 		}
 		markDone(st.BuilderRev)
 	}()
 }

 func (st *buildStatus) buildletPool() pool.Buildlet {
 	return pool.ForHost(st.conf.HostConfig())
 }

 func (st *buildStatus) expectedMakeBashDuration() time.Duration {
 	// TODO: base this on historical measurements, instead of statically configured.
 	// TODO: move this to dashboard/builders.go? But once we based on on historical
 	// measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
 	// better in this file.
 	goos, goarch := st.conf.GOOS(), st.conf.GOARCH()

 	if goos == "linux" {
 		if goarch == "arm" {
 			return 4 * time.Minute
 		}
 		return 45 * time.Second
 	}
 	return 60 * time.Second
 }

 func (st *buildStatus) expectedBuildletStartDuration() time.Duration {
 	// TODO: move this to dashboard/builders.go? But once we based on on historical
 	// measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
 	// better in this file.
 	p := st.buildletPool()
 	switch p.(type) {
 	case *pool.GCEBuildlet:
 		if strings.HasPrefix(st.Name, "android-") {
 			// about a minute for buildlet + minute for Android emulator to be usable
 			return 2 * time.Minute
 		}
 		return time.Minute
 	case *pool.EC2Buildlet:
 		// lack of historical data. 2 * time.Minute is a safe overestimate
 		return 2 * time.Minute
 	case *pool.ReverseBuildletPool:
 		goos, arch := st.conf.GOOS(), st.conf.GOARCH()
 		if goos == "darwin" {
 			if arch == "arm" || arch == "arm64" {
 				// iOS; idle or it's not.
 				return 0
 			}
 			if arch == "amd64" || arch == "386" {
 				return 0 // TODO: remove this once we're using VMware
 				// return 1 * time.Minute // VMware boot of hermetic OS X
 			}
 		}
 	}
 	return 0
 }

 // getHelpersReadySoon waits a bit (as a function of the build
 // configuration) and starts getting the buildlets for test sharding
 // ready, such that they're ready when make.bash is done. But we don't
 // want to start too early, lest we waste idle resources during make.bash.
 func (st *buildStatus) getHelpersReadySoon() {
 	if st.IsSubrepo() || st.conf.NumTestHelpers(st.isTry()) == 0 || st.conf.IsReverse() {
 		return
 	}
 	time.AfterFunc(st.expectedMakeBashDuration()-st.expectedBuildletStartDuration(),
 		func() {
 			st.LogEventTime("starting_helpers")
 			st.getHelpers() // and ignore the result.
 		})
 }

 // getHelpers returns a channel of buildlet test helpers, with an item
 // sent as they become available. The channel is closed at the end.
 func (st *buildStatus) getHelpers() <-chan buildlet.Client {
 	st.onceInitHelpers.Do(st.onceInitHelpersFunc)
 	return st.helpers
 }

 func (st *buildStatus) onceInitHelpersFunc() {
 	schedTmpl := &queue.SchedItem{
 		BuilderRev: st.BuilderRev,
 		HostType:   st.conf.HostType,
 		IsTry:      st.isTry(),
 		CommitTime: st.commitTime(),
 		Branch:     st.branch(),
 		Repo:       st.RepoOrGo(),
 		User:       st.AuthorEmail,
 	}
 	st.helpers = getBuildlets(st.ctx, st.conf.NumTestHelpers(st.isTry()), schedTmpl, st)
 }

 // useSnapshot reports whether this type of build uses a snapshot of
 // make.bash if it exists (anything can SplitMakeRun) and that the
 // snapshot exists.
 func (st *buildStatus) useSnapshot() bool {
 	return st.useSnapshotFor(st.Rev)
 }

 func (st *buildStatus) useSnapshotFor(rev string) bool {
 	if st.conf.SkipSnapshot {
 		return false
 	}
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	if b, ok := st.useSnapshotMemo[rev]; ok {
 		return b
 	}
 	br := st.BuilderRev
 	br.Rev = rev
 	b := st.conf.SplitMakeRun() && br.SnapshotExists(context.TODO(), pool.NewGCEConfiguration().BuildEnv())
 	if st.useSnapshotMemo == nil {
 		st.useSnapshotMemo = make(map[string]bool)
 	}
 	st.useSnapshotMemo[rev] = b
 	return b
 }

 func (st *buildStatus) forceSnapshotUsage() {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	if st.useSnapshotMemo == nil {
 		st.useSnapshotMemo = make(map[string]bool)
 	}
 	st.useSnapshotMemo[st.Rev] = true
 }

 func (st *buildStatus) getCrossCompileConfig() *dashboard.CrossCompileConfig {
 	if pool.KubeErr() != nil {
 		return nil
 	}
 	config := st.conf.CrossCompileConfig
 	if config == nil {
 		return nil
 	}
 	if config.AlwaysCrossCompile {
 		return config
 	}
 	if pool.NewGCEConfiguration().InStaging() || st.isTry() {
 		return config
 	}
 	return nil
 }

 func (st *buildStatus) checkDep(ctx context.Context, dep string) (have bool, err error) {
 	span := st.CreateSpan("ask_maintner_has_ancestor")
 	defer func() { span.Done(err) }()
 	tries := 0
 	for {
 		tries++
 		res, err := maintnerClient.HasAncestor(ctx, &apipb.HasAncestorRequest{
 			Commit:   st.Rev,
 			Ancestor: dep,
 		})
 		if err != nil {
 			if tries == 3 {
 				span.Done(err)
 				return false, err
 			}
 			time.Sleep(1 * time.Second)
 			continue
 		}
 		if res.UnknownCommit {
 			select {
 			case <-ctx.Done():
 				return false, ctx.Err()
 			case <-time.After(1 * time.Second):
 			}
 			continue
 		}
 		return res.HasAncestor, nil
 	}
 }

 var errSkipBuildDueToDeps = errors.New("build was skipped due to missing deps")

 func (st *buildStatus) getBuildlet() (buildlet.Client, error) {
 	schedItem := &queue.SchedItem{
 		HostType:   st.conf.HostType,
 		IsTry:      st.trySet != nil,
 		BuilderRev: st.BuilderRev,
 		CommitTime: st.commitTime(),
 		Repo:       st.RepoOrGo(),
 		Branch:     st.branch(),
 		User:       st.AuthorEmail,
 	}
 	st.mu.Lock()
 	st.schedItem = schedItem
 	st.mu.Unlock()

 	sp := st.CreateSpan("get_buildlet")
 	bc, err := sched.GetBuildlet(st.ctx, schedItem)
 	sp.Done(err)
 	if err != nil {
 		err = fmt.Errorf("failed to get a buildlet: %v", err)
 		go st.reportErr(err)
 		return nil, err
 	}
 	atomic.StoreInt32(&st.hasBuildlet, 1)

 	st.mu.Lock()
 	st.bc = bc
 	st.mu.Unlock()
 	st.LogEventTime("using_buildlet", bc.IPPort())

 	return bc, nil
 }

 func (st *buildStatus) build() error {
 	if deps := st.conf.GoDeps; len(deps) > 0 {
 		ctx, cancel := context.WithTimeout(st.ctx, 30*time.Second)
 		defer cancel()
 		for _, dep := range deps {
 			has, err := st.checkDep(ctx, dep)
 			if err != nil {
 				fmt.Fprintf(st, "Error checking whether commit %s includes ancestor %s: %v\n", st.Rev, dep, err)
 				return err
 			}
 			if !has {
 				st.LogEventTime(eventSkipBuildMissingDep)
 				fmt.Fprintf(st, "skipping build; commit %s lacks ancestor %s\n", st.Rev, dep)
 				return errSkipBuildDueToDeps
 			}
 		}
 		cancel()
 	}

 	pool.CoordinatorProcess().PutBuildRecord(st.buildRecord())

 	sp := st.CreateSpan("checking_for_snapshot")
 	if pool.NewGCEConfiguration().InStaging() {
 		err := pool.NewGCEConfiguration().StorageClient().Bucket(pool.NewGCEConfiguration().BuildEnv().SnapBucket).Object(st.SnapshotObjectName()).Delete(context.Background())
 		st.LogEventTime("deleted_snapshot", fmt.Sprint(err))
 	}
 	snapshotExists := st.useSnapshot()
 	sp.Done(nil)

 	if config := st.getCrossCompileConfig(); !snapshotExists && config != nil {
 		if err := st.crossCompileMakeAndSnapshot(config); err != nil {
 			return err
 		}
 		st.forceSnapshotUsage()
 	}

 	bc, err := st.getBuildlet()
 	if err != nil {
 		return err
 	}
 	defer bc.Close()

 	if st.useSnapshot() {
 		if err := st.writeGoSnapshot(); err != nil {
 			return err
 		}
 	} else {
 		// Write the Go source and bootstrap tool chain in parallel.
 		var grp syncutil.Group
 		grp.Go(st.writeGoSource)
 		grp.Go(st.writeBootstrapToolchain)
 		if err := grp.Err(); err != nil {
 			return err
 		}
 	}

 	execStartTime := time.Now()
 	fmt.Fprintf(st, "%s at %v", st.Name, st.Rev)
 	if st.IsSubrepo() {
 		fmt.Fprintf(st, " building %v at %v", st.SubName, st.SubRev)
 	}
 	fmt.Fprint(st, "\n\n")

 	makeTest := st.CreateSpan("make_and_test") // warning: magic event named used by handleLogs

 	var remoteErr error
 	if st.conf.SplitMakeRun() {
 		remoteErr, err = st.runAllSharded()
 	} else {
 		remoteErr, err = st.runAllLegacy()
 	}
 	makeTest.Done(err)

 	// bc (aka st.bc) may be invalid past this point, so let's
 	// close it to make sure we don't accidentally use it.
 	bc.Close()

 	doneMsg := "all tests passed"
 	if remoteErr != nil {
 		doneMsg = "with test failures"
 	} else if err != nil {
 		doneMsg = "comm error: " + err.Error()
 	}
 	// If a build fails multiple times due to communication
 	// problems with the buildlet, assume something's wrong with
 	// the buildlet or machine and fail the build, rather than
 	// looping forever. This promotes the err (communication
 	// error) to a remoteErr (an error that occurred remotely and
 	// is terminal).
 	if rerr := st.repeatedCommunicationError(err); rerr != nil {
 		remoteErr = rerr
 		err = nil
 		doneMsg = "communication error to buildlet (promoted to terminal error): " + rerr.Error()
 		fmt.Fprintf(st, "\n%s\n", doneMsg)
 	}
 	if err != nil {
 		// Return the error *before* we create the magic
 		// "done" event. (which the try coordinator looks for)
 		return err
 	}
 	st.LogEventTime(eventDone, doneMsg)

 	if devPause {
 		st.LogEventTime("DEV_MAIN_SLEEP")
 		time.Sleep(5 * time.Minute)
 	}

 	if st.trySet == nil {
 		buildLog := st.logs()
 		if remoteErr != nil {
 			// If we just have the line-or-so little
 			// banner at top, that means we didn't get any
 			// interesting output from the remote side, so
 			// include the remoteErr text.  Otherwise,
 			// assume that remoteErr is redundant with the
 			// buildlog text itself.
 			if strings.Count(buildLog, "\n") < 10 {
 				buildLog += "\n" + remoteErr.Error()
 			}
 		}
 		if err := recordResult(st.BuilderRev, remoteErr == nil, buildLog, time.Since(execStartTime)); err != nil {
 			if remoteErr != nil {
 				return fmt.Errorf("Remote error was %q but failed to report it to the dashboard: %v", remoteErr, err)
 			}
 			return fmt.Errorf("Build succeeded but failed to report it to the dashboard: %v", err)
 		}
 	}
 	if remoteErr != nil {
 		return remoteErr
 	}
 	return nil
 }

 func (st *buildStatus) HasBuildlet() bool { return atomic.LoadInt32(&st.hasBuildlet) != 0 }

 // useKeepGoingFlag reports whether this build should use -k flag of 'go tool
 // dist test', which makes it keep going even when some tests have failed.
 func (st *buildStatus) useKeepGoingFlag() bool {
 	// For now, keep going for post-submit builders on release branches,
 	// because we prioritize seeing more complete test results over failing fast.
 	// Later on, we may start doing this all post-submit builders on all branches.
 	// See golang.org/issue/14305.
 	//
 	// TODO(golang.org/issue/36181): A more ideal long term solution is one that reports
 	// a failure fast, but still keeps going to make all other test results available.
 	return !st.isTry() && strings.HasPrefix(st.branch(), "release-branch.go")
 }

 // isTry reports whether the build is a part of a TryBot (pre-submit) run.
 // It may be a normal TryBot (part of the default try set) or a SlowBot.
 func (st *buildStatus) isTry() bool { return st.trySet != nil }

 // isSlowBot reports whether the build is an explicitly requested SlowBot.
 func (st *buildStatus) isSlowBot() bool {
 	if st.trySet == nil {
 		return false
 	}
 	for _, conf := range st.trySet.slowBots {
 		if st.conf == conf {
 			return true
 		}
 	}
 	return false
 }

 func (st *buildStatus) buildRecord() *types.BuildRecord {
 	rec := &types.BuildRecord{
 		ID:        st.buildID,
 		ProcessID: processID,
 		StartTime: st.startTime,
 		IsTry:     st.isTry(),
 		IsSlowBot: st.isSlowBot(),
 		GoRev:     st.Rev,
 		Rev:       st.SubRevOrGoRev(),
 		Repo:      st.RepoOrGo(),
 		Builder:   st.Name,
 		OS:        st.conf.GOOS(),
 		Arch:      st.conf.GOARCH(),
 	}

 	// Log whether we used COS, so we can do queries to analyze
 	// Kubernetes vs COS performance for containers.
 	if st.conf.IsContainer() && pool.ForHost(st.conf.HostConfig()) == pool.NewGCEConfiguration().BuildletPool() {
 		rec.ContainerHost = "cos"
 	}

 	st.mu.Lock()
 	defer st.mu.Unlock()
 	// TODO: buildlet instance name
 	if !st.done.IsZero() {
 		rec.EndTime = st.done
 		rec.LogURL = st.logURL
 		rec.Seconds = rec.EndTime.Sub(rec.StartTime).Seconds()
 		if st.succeeded {
 			rec.Result = "ok"
 		} else {
 			rec.Result = "fail"
 		}
 	}
 	return rec
 }

 func (st *buildStatus) SpanRecord(sp *schedule.Span, err error) *types.SpanRecord {
 	rec := &types.SpanRecord{
 		BuildID: st.buildID,
 		IsTry:   st.isTry(),
 		GoRev:   st.Rev,
 		Rev:     st.SubRevOrGoRev(),
 		Repo:    st.RepoOrGo(),
 		Builder: st.Name,
 		OS:      st.conf.GOOS(),
 		Arch:    st.conf.GOARCH(),

 		Event:     sp.Event(),
 		Detail:    sp.OptText(),
 		StartTime: sp.Start(),
 		EndTime:   sp.End(),
 		Seconds:   sp.End().Sub(sp.Start()).Seconds(),
 	}
 	if err != nil {
 		rec.Error = err.Error()
 	}
 	return rec
 }

 // goBuilder returns a GoBuilder for this buildStatus.
 func (st *buildStatus) goBuilder() buildgo.GoBuilder {
 	return buildgo.GoBuilder{
 		Logger:     st,
 		BuilderRev: st.BuilderRev,
 		Conf:       st.conf,
 		Goroot:     "go",
 	}
 }

 // runAllSharded runs make.bash and then shards the test execution.
 // remoteErr and err are as described at the top of this file.
 //
 // After runAllSharded returns, the caller must assume that st.bc
 // might be invalid (It's possible that only one of the helper
 // buildlets survived).
 func (st *buildStatus) runAllSharded() (remoteErr, err error) {
 	st.getHelpersReadySoon()

 	if !st.useSnapshot() {
 		remoteErr, err = st.goBuilder().RunMake(st.ctx, st.bc, st)
 		if err != nil {
 			return nil, err
 		}
 		if remoteErr != nil {
 			return fmt.Errorf("build failed: %v", remoteErr), nil
 		}
 	}
 	if st.conf.StopAfterMake {
 		return nil, nil
 	}

 	if err := st.doSnapshot(st.bc); err != nil {
 		return nil, err
 	}

 	if st.conf.RunBench {
 		remoteErr, err = st.runBenchmarkTests()
 	} else if st.IsSubrepo() {
 		remoteErr, err = st.runSubrepoTests()
 	} else {
 		remoteErr, err = st.runTests(st.getHelpers())
 	}

 	if err == errBuildletsGone {
 		// Don't wrap this error. TODO: use xerrors.
 		return nil, errBuildletsGone
 	}
 	if err != nil {
 		return nil, fmt.Errorf("runTests: %v", err)
 	}
 	if remoteErr != nil {
 		return fmt.Errorf("tests failed: %v", remoteErr), nil
 	}
 	return nil, nil
 }

 func (st *buildStatus) crossCompileMakeAndSnapshot(config *dashboard.CrossCompileConfig) (err error) {
 	// TODO: currently we ditch this buildlet when we're done with
 	// the make.bash & snapshot. For extra speed later, we could
 	// keep it around and use it to "go test -c" each stdlib
 	// package's tests, and push the binary to each ARM helper
 	// machine. That might be too little gain for the complexity,
 	// though, or slower once we ship everything around.
 	ctx, cancel := context.WithCancel(st.ctx)
 	defer cancel()
 	sp := st.CreateSpan("get_buildlet_cross")
 	kubeBC, err := sched.GetBuildlet(ctx, &queue.SchedItem{
 		HostType:   config.CompileHostType,
 		IsTry:      st.trySet != nil,
 		BuilderRev: st.BuilderRev,
 		CommitTime: st.commitTime(),
 		Repo:       st.RepoOrGo(),
 		Branch:     st.branch(),
 		User:       st.AuthorEmail,
 	})
 	sp.Done(err)
 	if err != nil {
 		err = fmt.Errorf("cross-compile and snapshot: failed to get a buildlet: %v", err)
 		go st.reportErr(err)
 		return err
 	}
 	defer kubeBC.Close()

 	if err := st.writeGoSourceTo(kubeBC, st.Rev, "go"); err != nil {
 		return err
 	}

 	makeSpan := st.CreateSpan("make_cross_compile_kube")
 	defer func() { makeSpan.Done(err) }()

 	goos, goarch := st.conf.GOOS(), st.conf.GOARCH()

 	remoteErr, err := kubeBC.Exec(st.ctx, "/bin/bash", buildlet.ExecOpts{
 		SystemLevel: true,
 		Args: []string{
 			"-c",
 			"cd $WORKDIR/go/src && " +
 				"./make.bash && " +
 				"cd .. && " +
 				"mv bin/*_*/* bin && " +
 				"rmdir bin/*_* && " +
 				"rm -rf pkg/linux_amd64 pkg/tool/linux_amd64 pkg/bootstrap pkg/obj",
 		},
 		Output: st,
 		ExtraEnv: []string{
 			"GOROOT_BOOTSTRAP=/go1.4",
 			"CGO_ENABLED=1",
 			"CC_FOR_TARGET=" + config.CCForTarget,
 			"GOOS=" + goos,
 			"GOARCH=" + goarch,
 			"GOARM=" + config.GOARM, // harmless if GOARCH != "arm"
 		},
 		Debug: true,
 	})
 	if err != nil {
 		return err
 	}
 	if remoteErr != nil {
 		// Add the "done" event if make.bash fails, otherwise
 		// try builders will loop forever:
 		st.LogEventTime(eventDone, fmt.Sprintf("make.bash failed: %v", remoteErr))
 		return fmt.Errorf("remote error: %v", remoteErr)
 	}

 	if err := st.doSnapshot(kubeBC); err != nil {
 		return err
 	}

 	return nil
 }

 // runAllLegacy executes all.bash (or .bat, or whatever) in the traditional way.
 // remoteErr and err are as described at the top of this file.
 //
 // TODO(bradfitz,adg): delete this function when all builders
 // can split make & run (and then delete the SplitMakeRun method)
 func (st *buildStatus) runAllLegacy() (remoteErr, err error) {
 	allScript := st.conf.AllScript()
 	sp := st.CreateSpan("legacy_all_path", allScript)
 	remoteErr, err = st.bc.Exec(st.ctx, "./go/"+allScript, buildlet.ExecOpts{
 		Output:   st,
 		ExtraEnv: st.conf.Env(),
 		Debug:    true,
 		Args:     st.conf.AllScriptArgs(),
 	})
 	if err != nil {
 		sp.Done(err)
 		return nil, err
 	}
 	if remoteErr != nil {
 		sp.Done(err)
 		return fmt.Errorf("all script failed: %v", remoteErr), nil
 	}
 	sp.Done(nil)
 	return nil, nil
 }

 func (st *buildStatus) doSnapshot(bc buildlet.Client) error {
 	// If we're using a pre-built snapshot, don't make another.
 	if st.useSnapshot() {
 		return nil
 	}
 	if st.conf.SkipSnapshot {
 		return nil
 	}
 	if pool.NewGCEConfiguration().BuildEnv().SnapBucket == "" {
 		// Build environment isn't configured to do snapshots.
 		return nil
 	}
 	if err := st.cleanForSnapshot(bc); err != nil {
 		return fmt.Errorf("cleanForSnapshot: %v", err)
 	}
 	if err := st.writeSnapshot(bc); err != nil {
 		return fmt.Errorf("writeSnapshot: %v", err)
 	}
 	return nil
 }

 func (st *buildStatus) writeGoSnapshot() (err error) {
 	return st.writeGoSnapshotTo(st.Rev, "go")
 }

 func (st *buildStatus) writeGoSnapshotTo(rev, dir string) (err error) {
 	sp := st.CreateSpan("write_snapshot_tar")
 	defer func() { sp.Done(err) }()

 	snapshotURL := pool.NewGCEConfiguration().BuildEnv().SnapshotURL(st.Name, rev)

 	if err := st.bc.PutTarFromURL(st.ctx, snapshotURL, dir); err != nil {
 		return fmt.Errorf("failed to put baseline snapshot to buildlet: %v", err)
 	}
 	return nil
 }

 func (st *buildStatus) writeGoSource() error {
 	return st.writeGoSourceTo(st.bc, st.Rev, "go")
 }

 func (st *buildStatus) writeGoSourceTo(bc buildlet.Client, rev, dir string) error {
 	// Write the VERSION file.
 	sp := st.CreateSpan("write_version_tar")
 	if err := bc.PutTar(st.ctx, buildgo.VersionTgz(rev), dir); err != nil {
 		return sp.Done(fmt.Errorf("writing VERSION tgz: %v", err))
 	}

 	srcTar, err := sourcecache.GetSourceTgz(st, "go", rev)
 	if err != nil {
 		return err
 	}
 	sp = st.CreateSpan("write_go_src_tar")
 	if err := bc.PutTar(st.ctx, srcTar, dir); err != nil {
 		return sp.Done(fmt.Errorf("writing tarball from Gerrit: %v", err))
 	}
 	return sp.Done(nil)
 }

 func (st *buildStatus) writeBootstrapToolchain() error {
 	u := st.conf.GoBootstrapURL(pool.NewGCEConfiguration().BuildEnv())
 	if u == "" {
 		return nil
 	}
 	const bootstrapDir = "go1.4" // might be newer; name is the default
 	sp := st.CreateSpan("write_go_bootstrap_tar")
 	return sp.Done(st.bc.PutTarFromURL(st.ctx, u, bootstrapDir))
 }

 func (st *buildStatus) cleanForSnapshot(bc buildlet.Client) error {
 	sp := st.CreateSpan("clean_for_snapshot")
 	return sp.Done(bc.RemoveAll(st.ctx,
 		"go/doc/gopher",
 		"go/pkg/bootstrap",
 	))
 }

 func (st *buildStatus) writeSnapshot(bc buildlet.Client) (err error) {
 	sp := st.CreateSpan("write_snapshot_to_gcs")
 	defer func() { sp.Done(err) }()
 	// A typical Go snapshot tarball in April 2022 is around 150 MB in size.
 	// Builders with a fast uplink speed can upload the tar within seconds or minutes.
 	// Reverse builders might be far away on the network, so be more lenient for them.
 	// (Fast builds require a sufficiently fast uplink speed or turning off snapshots,
 	// so the timeout here is mostly an upper bound to prevent infinite hangs.)
 	timeout := 5 * time.Minute
 	if st.conf.IsReverse() {
 		timeout *= 3
 	}
 	ctx, cancel := context.WithTimeout(st.ctx, timeout)
 	defer cancel()

 	tsp := st.CreateSpan("fetch_snapshot_reader_from_buildlet")
 	tgz, err := bc.GetTar(ctx, "go")
 	tsp.Done(err)
 	if err != nil {
 		return err
 	}
 	defer tgz.Close()

 	sc := pool.NewGCEConfiguration().StorageClient()
 	if sc == nil {
 		return errors.New("GCE configuration missing storage client")
 	}
 	bucket := pool.NewGCEConfiguration().BuildEnv().SnapBucket
 	if bucket == "" {
 		return errors.New("build environment missing snapshot bucket")
 	}
 	wr := sc.Bucket(bucket).Object(st.SnapshotObjectName()).NewWriter(ctx)
 	wr.ContentType = "application/octet-stream"
 	if n, err := io.Copy(wr, tgz); err != nil {
 		st.logf("failed to write snapshot to GCS after copying %d bytes: %v", n, err)
 		return err
 	}

 	return wr.Close()
 }

 // baselineCommit determines the baseline commit for this benchmark run.
 func (st *buildStatus) baselineCommit() (baseline string, err error) {
 	sp := st.CreateSpan("list_go_releases")
 	defer func() { sp.Done(err) }()

 	// TODO(prattmic): Cache responses for a while. These won't change often.
 	res, err := maintnerClient.ListGoReleases(st.ctx, &apipb.ListGoReleasesRequest{})
 	if err != nil {
 		return "", err
 	}

 	releases := res.GetReleases()
 	if len(releases) == 0 {
 		return "", fmt.Errorf("no Go releases: %v", res)
 	}

 	if st.RevBranch == "master" {
 		// Testing master, baseline is latest release.
 		return releases[0].GetTagCommit(), nil
 	}

 	// Testing release branch. Baseline is latest patch version of this
 	// release.
 	for _, r := range releases {
 		if st.RevBranch == r.GetBranchName() {
 			return r.GetTagCommit(), nil
 		}
 	}

 	return "", fmt.Errorf("cannot find latest release for %s", st.RevBranch)
 }

 // reportErr reports an error to Stackdriver.
 func (st *buildStatus) reportErr(err error) {
 	gceErrsClient := pool.NewGCEConfiguration().ErrorsClient()
 	if gceErrsClient == nil {
 		// errorsClient is nil in dev environments.
 		return
 	}

 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()

 	err = fmt.Errorf("buildID: %v, name: %s, hostType: %s, error: %v", st.buildID, st.conf.Name, st.conf.HostType, err)
 	gceErrsClient.ReportSync(ctx, errorreporting.Entry{Error: err})
 }

 func (st *buildStatus) distTestList() (names []string, remoteErr, err error) {
 	workDir, err := st.bc.WorkDir(st.ctx)
 	if err != nil {
 		err = fmt.Errorf("distTestList, WorkDir: %v", err)
 		return
 	}
 	goroot := st.conf.FilePathJoin(workDir, "go")

 	args := []string{"tool", "dist", "test", "--no-rebuild", "--list"}
 	if st.conf.IsRace() {
 		args = append(args, "--race")
 	}
 	if st.conf.CompileOnly {
 		args = append(args, "--compile-only")
 	}
 	var buf bytes.Buffer
 	remoteErr, err = st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{
 		Output:      &buf,
 		ExtraEnv:    append(st.conf.Env(), "GOROOT="+goroot),
 		OnStartExec: func() { st.LogEventTime("discovering_tests") },
 		Path:        []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"},
 		Args:        args,
 	})
 	if remoteErr != nil {
 		remoteErr = fmt.Errorf("Remote error: %v, %s", remoteErr, buf.Bytes())
 		err = nil
 		return
 	}
 	if err != nil {
 		err = fmt.Errorf("Exec error: %v, %s", err, buf.Bytes())
 		return
 	}
 	for _, test := range strings.Fields(buf.String()) {
 		isNormalTry := st.isTry() && !st.isSlowBot()
 		if !st.conf.ShouldRunDistTest(test, isNormalTry) {
 			continue
 		}
 		names = append(names, test)
 	}
 	return names, nil, nil
 }

 type token struct{}

 // newTestSet returns a new testSet given the dist test names (strings from "go tool dist test -list")
 // and benchmark items.
 func (st *buildStatus) newTestSet(testStats *buildstats.TestStats, distTestNames []string) (*testSet, error) {
 	set := &testSet{
 		st:        st,
 		testStats: testStats,
 	}
 	for _, name := range distTestNames {
 		set.items = append(set.items, &testItem{
 			set:      set,
 			name:     name,
 			duration: testStats.Duration(st.BuilderRev.Name, name),
 			take:     make(chan token, 1),
 			done:     make(chan token),
 		})
 	}
 	return set, nil
 }

 var (
 	testStats       atomic.Value // of *buildstats.TestStats
 	testStatsLoader singleflight.Group
 )

 func getTestStats(sl spanlog.Logger) *buildstats.TestStats {
 	sp := sl.CreateSpan("get_test_stats")
 	ts, ok := testStats.Load().(*buildstats.TestStats)
 	if ok && ts.AsOf.After(time.Now().Add(-1*time.Hour)) {
 		sp.Done(nil)
 		return ts
 	}
 	v, err, _ := testStatsLoader.Do("", func() (interface{}, error) {
 		log.Printf("getTestStats: reloading from BigQuery...")
 		sp := sl.CreateSpan("query_test_stats")
 		ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
 		defer cancel()
 		ts, err := buildstats.QueryTestStats(ctx, pool.NewGCEConfiguration().BuildEnv())
 		sp.Done(err)
 		if err != nil {
 			log.Printf("getTestStats: error: %v", err)
 			return nil, err
 		}
 		testStats.Store(ts)
 		return ts, nil
 	})
 	if err != nil {
 		sp.Done(err)
 		return nil
 	}
 	sp.Done(nil)
 	return v.(*buildstats.TestStats)
 }

 func (st *buildStatus) runSubrepoTests() (remoteErr, err error) {
 	st.LogEventTime("fetching_subrepo", st.SubName)

 	workDir, err := st.bc.WorkDir(st.ctx)
 	if err != nil {
 		err = fmt.Errorf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err)
 		return nil, err
 	}
 	goroot := st.conf.FilePathJoin(workDir, "go")
 	gopath := st.conf.FilePathJoin(workDir, "gopath")

 	// A goTestRun represents a single invocation of the 'go test' command.
 	type goTestRun struct {
 		Dir      string   // Directory where 'go test' should be executed.
 		Patterns []string // Import path patterns to provide to 'go test'.
 	}
 	// Test all packages selected by the "./..." pattern at the repository root.
 	// (If there are modules in subdirectories, they'll be found and handled below.)
 	repoPath := importPathOfRepo(st.SubName)
 	testRuns := []goTestRun{{
 		Dir:      "gopath/src/" + repoPath,
 		Patterns: []string{"./..."},
 	}}

 	// Check out the provided sub-repo to the buildlet's workspace so we
 	// can find go.mod files and run tests in it.
 	{
 		tgz, err := sourcecache.GetSourceTgz(st, st.SubName, st.SubRev)
 		if errors.As(err, new(sourcecache.TooBigError)) {
 			// Source being too big is a non-retryable error.
 			return err, nil
 		} else if err != nil {
 			return nil, err
 		}
 		err = st.bc.PutTar(st.ctx, tgz, "gopath/src/"+repoPath)
 		if err != nil {
 			return nil, err
 		}
 	}

 	// Look for inner modules, in order to test them too. See golang.org/issue/32528.
 	sp := st.CreateSpan("listing_subrepo_modules", st.SubName)
 	err = st.bc.ListDir(st.ctx, "gopath/src/"+repoPath, buildlet.ListDirOpts{Recursive: true}, func(e buildlet.DirEntry) {
 		goModFile := path.Base(e.Name()) == "go.mod" && !e.IsDir()
 		if !goModFile {
 			return
 		}
 		// Found a go.mod file in a subdirectory, which indicates the root of a module.
 		modulePath := path.Join(repoPath, path.Dir(e.Name()))
 		if modulePath == repoPath {
 			// This is the go.mod file at the repository root.
 			// It's already a part of testRuns, so skip it.
 			return
 		} else if ignoredByGoTool(modulePath) || isVendored(modulePath) {
 			// go.mod file is in a directory we're not looking to support, so skip it.
 			return
 		}
 		// Add an additional test run entry that will test all packages in this module.
 		testRuns = append(testRuns, goTestRun{
 			Dir:      "gopath/src/" + modulePath,
 			Patterns: []string{"./..."},
 		})
 	})
 	sp.Done(err)
 	if err != nil {
 		return nil, err
 	}

 	// Finally, execute all of the test runs.
 	// If any fail, keep going so that all test results are included in the output.

 	sp = st.CreateSpan("running_subrepo_tests", st.SubName)
 	defer func() { sp.Done(err) }()

 	env := append(st.conf.Env(),
 		"GOROOT="+goroot,
 		"GOPATH="+gopath,
 	)
 	if !st.conf.IsReverse() {
 		// GKE value but will be ignored/overwritten by reverse buildlets
 		env = append(env, "GOPROXY="+moduleProxy())
 	}
 	env = append(env, st.conf.ModulesEnv(st.SubName)...)

 	args := []string{"test"}
 	if !st.conf.IsLongTest() {
 		args = append(args, "-short")
 	}
 	if st.conf.IsRace() {
 		args = append(args, "-race")
 	}

 	var remoteErrors []error
 	for _, tr := range testRuns {
 		rErr, err := st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{
 			Debug:    true, // make buildlet print extra debug in output for failures
 			Output:   st,
 			Dir:      tr.Dir,
 			ExtraEnv: env,
 			Path:     []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"},
 			Args:     append(args, tr.Patterns...),
 		})
 		if err != nil {
 			// A network/communication error. Give up here;
 			// the caller can retry as it sees fit.
 			return nil, err
 		} else if rErr != nil {
 			// An error occurred remotely and is terminal, but we want to
 			// keep testing other packages and report their failures too,
 			// rather than stopping short.
 			remoteErrors = append(remoteErrors, rErr)
 		}
 	}
 	if len(remoteErrors) > 0 {
 		return multiError(remoteErrors), nil
 	}
 	return nil, nil
 }

 // ignoredByGoTool reports whether the given import path corresponds
 // to a directory that would be ignored by the go tool.
 //
 // The logic of the go tool for ignoring directories is documented at
 // https://golang.org/cmd/go/#hdr-Package_lists_and_patterns:
 //
 //	Directory and file names that begin with "." or "_" are ignored
 //	by the go tool, as are directories named "testdata".
 func ignoredByGoTool(importPath string) bool {
 	for _, el := range strings.Split(importPath, "/") {
 		if strings.HasPrefix(el, ".") || strings.HasPrefix(el, "_") || el == "testdata" {
 			return true
 		}
 	}
 	return false
 }

 // isVendored reports whether the given import path corresponds
 // to a Go package that is inside a vendor directory.
 //
 // The logic for what is considered a vendor directory is documented at
 // https://golang.org/cmd/go/#hdr-Vendor_Directories.
 func isVendored(importPath string) bool {
 	return strings.HasPrefix(importPath, "vendor/") ||
 		strings.Contains(importPath, "/vendor/")
 }

 // multiError is a concatenation of multiple errors.
 // There must be one or more errors, and all must be non-nil.
 type multiError []error

 // Error concatenates all error strings into a single string,
 // using a semicolon and space as a separator.
 func (m multiError) Error() string {
 	if len(m) == 1 {
 		return m[0].Error()
 	}

 	var b strings.Builder
 	for i, e := range m {
 		if i != 0 {
 			b.WriteString("; ")
 		}
 		b.WriteString(e.Error())
 	}
 	return b.String()
 }

 // moduleProxy returns the GOPROXY environment value to use for module-enabled
 // tests.
 //
 // We go through an internal (10.0.0.0/8) proxy that then hits
 // https://proxy.golang.org/ so we're still able to firewall
 // non-internal outbound connections on builder nodes.
 //
 // This moduleProxy func in prod mode (when running on GKE) returns an http
 // URL to the current GKE pod's IP with a Kubernetes NodePort service
 // port that forwards back to the coordinator's 8123. See comment below.
 //
 // In localhost dev mode it just returns the value of GOPROXY.
 func moduleProxy() string {
 	// If we're running on localhost, just use the current environment's value.
 	if pool.NewGCEConfiguration().BuildEnv() == nil || !pool.NewGCEConfiguration().BuildEnv().IsProd {
 		// If empty, use installed VCS tools as usual to fetch modules.
 		return os.Getenv("GOPROXY")
 	}
 	// We run a NodePort service on each GKE node
 	// (cmd/coordinator/module-proxy-service.yaml) on port 30157
 	// that maps back the coordinator's port 8123. (We could round
 	// robin over all the GKE nodes' IPs if we wanted, but the
 	// coordinator is running on GKE so our node by definition is
 	// up, so just use it. It won't be much traffic.)
 	// TODO: migrate to a GKE internal load balancer with an internal static IP
 	// once we migrate symbolic-datum-552 off a Legacy VPC network to the modern
 	// scheme that supports internal static IPs.
 	return "http://" + pool.NewGCEConfiguration().GKENodeHostname() + ":30157"
 }

 // runBenchmarkTests runs benchmarks from x/benchmarks when RunBench is set.
 func (st *buildStatus) runBenchmarkTests() (remoteErr, err error) {
 	if st.SubName != "benchmarks" {
 		return nil, fmt.Errorf("benchmark tests only supported in x/benchmarks")
 	}

 	const baselineDir = "gobaseline"

 	workDir, err := st.bc.WorkDir(st.ctx)
 	if err != nil {
 		err = fmt.Errorf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err)
 		return nil, err
 	}
 	goroot := st.conf.FilePathJoin(workDir, "go")
 	baselineGoroot := st.conf.FilePathJoin(workDir, baselineDir)
 	gopath := st.conf.FilePathJoin(workDir, "gopath")
 	repoPath := importPathOfRepo(st.SubName)

 	// Install baseline toolchain in addition to the experiment toolchain.
 	sp := st.CreateSpan("install_baseline")
 	baseline, err := st.baselineCommit()
 	if err != nil {
 		return nil, sp.Done(fmt.Errorf("error finding baseline commit: %w", err))
 	}
 	fmt.Fprintf(st, "Baseline toolchain %v\n", baseline)
 	if st.useSnapshotFor(baseline) {
 		if err := st.writeGoSnapshotTo(baseline, baselineDir); err != nil {
 			return nil, sp.Done(fmt.Errorf("error writing baseline snapshot: %w", err))
 		}
 	} else {
 		if err := st.writeGoSourceTo(st.bc, baseline, baselineDir); err != nil {
 			return nil, sp.Done(fmt.Errorf("error writing baseline source: %w", err))
 		}

 		br := st.BuilderRev
 		br.Rev = baseline

 		builder := buildgo.GoBuilder{
 			Logger:     st,
 			BuilderRev: br,
 			Conf:       st.conf,
 			Goroot:     baselineDir,
 			// Use the primary GOROOT as GOROOT_BOOTSTRAP. The
 			// typical bootstrap toolchain may not be available if
 			// the primary toolchain was installed from a snapshot.
 			GorootBootstrap: goroot,
 		}
 		remoteErr, err = builder.RunMake(st.ctx, st.bc, st)
 		if err != nil {
 			return nil, sp.Done(err)
 		}
 		if remoteErr != nil {
 			return sp.Done(remoteErr), nil
 		}
 	}
 	sp.Done(nil)

 	st.LogEventTime("fetching_subrepo", st.SubName)

 	// Check out the provided sub-repo to the buildlet's workspace so we
 	// can run scripts from the repo.
 	{
 		tgz, err := sourcecache.GetSourceTgz(st, st.SubName, st.SubRev)
 		if errors.As(err, new(sourcecache.TooBigError)) {
 			// Source being too big is a non-retryable error.
 			return err, nil
 		} else if err != nil {
 			return nil, err
 		}
 		err = st.bc.PutTar(st.ctx, tgz, "gopath/src/"+repoPath)
 		if err != nil {
 			return nil, err
 		}
 	}

 	// Run golang.org/x/benchmarks/cmd/bench to perform benchmarks.
 	sp = st.CreateSpan("running_benchmark_tests", st.SubName)
 	defer func() { sp.Done(err) }()

 	env := append(st.conf.Env(),
 		"BENCH_BASELINE_GOROOT="+baselineGoroot,
 		"BENCH_BRANCH="+st.RevBranch,
 		"GOROOT="+goroot,
 		"GOPATH="+gopath,
 		"GOPROXY="+moduleProxy(), // GKE value but will be ignored/overwritten by reverse buildlets
 	)
 	env = append(env, st.conf.ModulesEnv(st.SubName)...)
 	rErr, err := st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{
 		Debug:    true, // make buildlet print extra debug in output for failures
 		Output:   st,
 		Dir:      "gopath/src/" + repoPath,
 		ExtraEnv: env,
 		Path:     []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"},
 		Args:     []string{"run", repoPath + "/cmd/bench"},
 	})
 	if err != nil || rErr != nil {
 		return rErr, err
 	}

 	// Upload benchmark results on success.
 	if err := st.uploadBenchResults(baseline); err != nil {
 		return nil, err
 	}
 	return nil, nil
 }

 func (st *buildStatus) uploadBenchResults(baseline string) (err error) {
 	sp := st.CreateSpan("upload_bench_results")
 	defer func() { sp.Done(err) }()

 	s := pool.NewGCEConfiguration().BuildEnv().PerfDataURL
 	if s == "" {
 		log.Printf("No perfdata URL, skipping benchmark upload")
 		return nil
 	}
 	client := &perfstorage.Client{BaseURL: s, HTTPClient: pool.NewGCEConfiguration().OAuthHTTPClient()}
 	u := client.NewUpload(st.ctx)
 	w, err := u.CreateFile("results")
 	if err != nil {
 		u.Abort()
 		return fmt.Errorf("error creating perfdata file: %w", err)
 	}

 	// Prepend some useful metadata.
 	var b strings.Builder
 	fmt.Fprintf(&b, "experiment-commit: %s\n", st.Rev)
 	fmt.Fprintf(&b, "experiment-commit-time: %s\n", st.RevCommitTime.In(time.UTC).Format(time.RFC3339Nano))
 	fmt.Fprintf(&b, "baseline-commit: %s\n", baseline)
 	fmt.Fprintf(&b, "benchmarks-commit: %s\n", st.SubRev)
 	fmt.Fprintf(&b, "post-submit: %t\n", st.trySet == nil)
 	if _, err := w.Write([]byte(b.String())); err != nil {
 		u.Abort()
 		return fmt.Errorf("error writing perfdata metadata with contents %q: %w", b.String(), err)
 	}

 	// TODO(prattmic): Full log output may contain non-benchmark output
 	// that can be erroneously parsed as benchfmt.
 	if _, err := w.Write([]byte(st.logs())); err != nil {
 		u.Abort()
 		return fmt.Errorf("error writing perfdata file with contents %q: %w", st.logs(), err)
 	}
 	status, err := u.Commit()
 	if err != nil {
 		return fmt.Errorf("error committing perfdata file: %w", err)
 	}
 	st.LogEventTime("bench_upload", status.UploadID)
 	return nil
 }

 var errBuildletsGone = errors.New("runTests: dist test failed: all buildlets had network errors or timeouts, yet tests remain")

 // runTests is only called for builders which support a split make/run
 // (should be everything, at least soon). Currently (2015-05-27) iOS
 // and Android do not.
 //
 // After runTests completes, the caller must assume that st.bc might be invalid
 // (It's possible that only one of the helper buildlets survived).
 func (st *buildStatus) runTests(helpers <-chan buildlet.Client) (remoteErr, err error) {
 	testNames, remoteErr, err := st.distTestList()
 	if remoteErr != nil {
 		return fmt.Errorf("distTestList remote: %v", remoteErr), nil
 	}
 	if err != nil {
 		return nil, fmt.Errorf("distTestList exec: %v", err)
 	}
 	testStats := getTestStats(st)

 	set, err := st.newTestSet(testStats, testNames)
 	if err != nil {
 		return nil, err
 	}
 	st.LogEventTime("starting_tests", fmt.Sprintf("%d tests", len(set.items)))
 	startTime := time.Now()

 	workDir, err := st.bc.WorkDir(st.ctx)
 	if err != nil {
 		return nil, fmt.Errorf("error discovering workdir for main buildlet, %s: %v", st.bc.Name(), err)
 	}

 	mainBuildletGoroot := st.conf.FilePathJoin(workDir, "go")
 	mainBuildletGopath := st.conf.FilePathJoin(workDir, "gopath")

 	// We use our original buildlet to run the tests in order, to
 	// make the streaming somewhat smooth and not incredibly
 	// lumpy.  The rest of the buildlets run the largest tests
 	// first (critical path scheduling).
 	// The buildletActivity WaitGroup is used to track when all
 	// the buildlets are dead or done.
 	var buildletActivity sync.WaitGroup
 	buildletActivity.Add(2) // one per goroutine below (main + helper launcher goroutine)
 	go func() {
 		defer buildletActivity.Done() // for the per-goroutine Add(2) above
 		for !st.bc.IsBroken() {
 			tis, ok := set.testsToRunInOrder()
 			if !ok {
 				select {
 				case <-st.ctx.Done():
 					return
 				case <-time.After(5 * time.Second):
 				}
 				continue
 			}
 			st.runTestsOnBuildlet(st.bc, tis, mainBuildletGoroot, mainBuildletGopath)
 		}
 		st.LogEventTime("main_buildlet_broken", st.bc.Name())
 	}()
 	go func() {
 		defer buildletActivity.Done() // for the per-goroutine Add(2) above
 		for helper := range helpers {
 			buildletActivity.Add(1)
 			go func(bc buildlet.Client) {
 				defer buildletActivity.Done() // for the per-helper Add(1) above
 				defer st.LogEventTime("closed_helper", bc.Name())
 				defer bc.Close()
 				if devPause {
 					defer time.Sleep(5 * time.Minute)
 					defer st.LogEventTime("DEV_HELPER_SLEEP", bc.Name())
 				}
 				st.LogEventTime("got_empty_test_helper", bc.String())
 				if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(pool.NewGCEConfiguration().BuildEnv()), "go"); err != nil {
 					log.Printf("failed to extract snapshot for helper %s: %v", bc.Name(), err)
 					return
 				}
 				workDir, err := bc.WorkDir(st.ctx)
 				if err != nil {
 					log.Printf("error discovering workdir for helper %s: %v", bc.Name(), err)
 					return
 				}
 				st.LogEventTime("test_helper_set_up", bc.Name())
 				goroot := st.conf.FilePathJoin(workDir, "go")
 				gopath := st.conf.FilePathJoin(workDir, "gopath")
 				for !bc.IsBroken() {
 					tis, ok := set.testsToRunBiggestFirst()
 					if !ok {
 						st.LogEventTime("no_new_tests_remain", bc.Name())
 						return
 					}
 					st.runTestsOnBuildlet(bc, tis, goroot, gopath)
 				}
 				st.LogEventTime("test_helper_is_broken", bc.Name())
 			}(helper)
 		}
 	}()

 	// Convert a sync.WaitGroup into a channel.
 	// Aside: https://groups.google.com/forum/#!topic/golang-dev/7fjGWuImu5k
 	buildletsGone := make(chan struct{})
 	go func() {
 		buildletActivity.Wait()
 		close(buildletsGone)
 	}()

 	var lastMetadata string
 	var lastHeader string
 	var serialDuration time.Duration
 	for _, ti := range set.items {
 	AwaitDone:
 		for {
 			timer := time.NewTimer(30 * time.Second)
 			select {
 			case <-ti.done: // wait for success
 				timer.Stop()
 				break AwaitDone
 			case <-timer.C:
 				st.LogEventTime("still_waiting_on_test", ti.name)
 			case <-buildletsGone:
 				set.cancelAll()
 				return nil, errBuildletsGone
 			}
 		}

 		serialDuration += ti.execDuration
 		if len(ti.output) > 0 {
 			metadata, header, out := parseOutputAndHeader(ti.output)
 			printHeader := false
 			if metadata != lastMetadata {
 				lastMetadata = metadata
 				fmt.Fprintf(st, "\n%s\n", metadata)
 				// Always include the test header after
 				// metadata changes. This is a readability
 				// optimization that ensures that tests are
 				// always immediately preceded by their test
 				// banner, even if it is duplicate banner
 				// because the test metadata changed.
 				printHeader = true
 			}
 			if header != lastHeader {
 				lastHeader = header
 				printHeader = true
 			}
 			if printHeader {
 				fmt.Fprintf(st, "\n%s\n", header)
 			}
 			if pool.NewGCEConfiguration().InStaging() {
 				out = bytes.TrimSuffix(out, nl)
 				st.Write(out)
 				fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize)
 			} else {
 				st.Write(out)
 			}
 		}

 		if ti.remoteErr != nil {
 			set.cancelAll()
 			return fmt.Errorf("dist test failed: %s: %v", ti.name, ti.remoteErr), nil
 		}
 	}
 	elapsed := time.Since(startTime)
 	var msg string
 	if st.conf.NumTestHelpers(st.isTry()) > 0 {
 		msg = fmt.Sprintf("took %v; aggregate %v; saved %v", elapsed, serialDuration, serialDuration-elapsed)
 	} else {
 		msg = fmt.Sprintf("took %v", elapsed)
 	}
 	st.LogEventTime("tests_complete", msg)
 	fmt.Fprintf(st, "\nAll tests passed.\n")
 	return nil, nil
 }

 const (
 	banner       = "XXXBANNERXXX:" // flag passed to dist
 	bannerPrefix = "\n" + banner   // with the newline added by dist

 	metadataBannerPrefix = bannerPrefix + "Test execution environment."

 	outputBanner = "##### " // banner to display in output.
 )

 var (
 	bannerPrefixBytes         = []byte(bannerPrefix)
 	metadataBannerPrefixBytes = []byte(metadataBannerPrefix)
 )

 // parseOutputAndHeader parses b and returns the test (optional) environment
 // metaadata, display header (e.g., "##### Testing packages.") and the
 // following output.
 //
 // metadata is the optional execution environment metadata block. e.g.,
 //
 // ##### Test execution environment.
 // # GOARCH: amd64
 // # CPU: Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz
 func parseOutputAndHeader(b []byte) (metadata, header string, out []byte) {
 	if !bytes.HasPrefix(b, bannerPrefixBytes) {
 		return "", "", b
 	}

 	if bytes.HasPrefix(b, metadataBannerPrefixBytes) {
 		// Header includes everything up to and including the next
 		// banner.
 		rem := b[len(metadataBannerPrefixBytes):]
 		i := bytes.Index(rem, bannerPrefixBytes)
 		if i == -1 {
 			// Metadata block without a following block doesn't
 			// make sense. Bail.
 			return "", "", b
 		}
 		bi := i + len(metadataBannerPrefixBytes)
 		// Metadata portion of header, skipping initial and trailing newlines.
 		metadata = strings.Trim(string(b[:bi]), "\n")
 		metadata = strings.Replace(metadata, banner, outputBanner, 1)
 		b = b[bi+1:] // skip newline at start of next banner.
 	} else {
 		b = b[1:] // skip newline
 	}

 	// Find end of primary test banner.
 	nl := bytes.IndexByte(b, '\n')
 	if nl == -1 {
 		// No newline, everything is header.
 		header = string(b)
 		b = nil
 	} else {
 		header = string(b[:nl])
 		b = b[nl+1:]
 	}

 	// Replace internal marker banner with the human-friendly version.
 	header = strings.Replace(header, banner, outputBanner, 1)
 	return metadata, header, b
 }

 // maxTestExecError is the number of test execution failures at which
 // we give up and stop trying and instead permanently fail the test.
 // Note that this is not related to whether the test failed remotely,
 // but whether we were unable to start or complete watching it run.
 // (A communication error)
 const maxTestExecErrors = 3

 // runTestsOnBuildlet runs tis on bc, using the optional goroot & gopath environment variables.
 func (st *buildStatus) runTestsOnBuildlet(bc buildlet.Client, tis []*testItem, goroot, gopath string) {
 	names := make([]string, len(tis))
 	for i, ti := range tis {
 		names[i] = ti.name
 		if i > 0 && (!strings.HasPrefix(ti.name, "go_test:") || !strings.HasPrefix(names[0], "go_test:")) {
 			panic("only go_test:* tests may be merged")
 		}
 	}
 	var spanName string
 	var detail string
 	if len(names) == 1 {
 		spanName = "run_test:" + names[0]
 		detail = bc.Name()
 	} else {
 		spanName = "run_tests_multi"
 		detail = fmt.Sprintf("%s: %v", bc.Name(), names)
 	}
 	sp := st.CreateSpan(spanName, detail)

 	args := []string{"tool", "dist", "test", "--no-rebuild", "--banner=" + banner}
 	if st.conf.IsRace() {
 		args = append(args, "--race")
 	}
 	if st.conf.CompileOnly {
 		args = append(args, "--compile-only")
 	}
 	if st.useKeepGoingFlag() {
 		args = append(args, "-k")
 	}
 	args = append(args, names...)
 	var buf bytes.Buffer
 	t0 := time.Now()
 	timeout := st.conf.DistTestsExecTimeout(names)

 	ctx, cancel := context.WithTimeout(st.ctx, timeout)
 	defer cancel()

 	env := append(st.conf.Env(),
 		"GOROOT="+goroot,
 		"GOPATH="+gopath,
 		"GOPROXY="+moduleProxy(),
 	)
 	env = append(env, st.conf.ModulesEnv("go")...)

 	remoteErr, err := bc.Exec(ctx, "./go/bin/go", buildlet.ExecOpts{
 		// We set Dir to "." instead of the default ("go/bin") so when the dist tests
 		// try to run os/exec.Command("go", "test", ...), the LookPath of "go" doesn't
 		// return "./go.exe" (which exists in the current directory: "go/bin") and then
 		// fail when dist tries to run the binary in dir "$GOROOT/src", since
 		// "$GOROOT/src" + "./go.exe" doesn't exist. Perhaps LookPath should return
 		// an absolute path.
 		Dir:      ".",
 		Output:   &buf, // see "maybe stream lines" TODO below
 		ExtraEnv: env,
 		Path:     []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"},
 		Args:     args,
 	})
 	execDuration := time.Since(t0)
 	sp.Done(err)
 	if err != nil {
 		bc.MarkBroken() // prevents reuse
 		for _, ti := range tis {
 			ti.numFail++
 			st.logf("Execution error running %s on %s: %v (numFails = %d)", ti.name, bc, err, ti.numFail)
 			if err == buildlet.ErrTimeout {
 				ti.failf("Test %q ran over %v limit (%v); saw output:\n%s", ti.name, timeout, execDuration, buf.Bytes())
 			} else if ti.numFail >= maxTestExecErrors {
 				ti.failf("Failed to schedule %q test after %d tries.\n", ti.name, maxTestExecErrors)
 			} else {
 				ti.retry()
 			}
 		}
 		return
 	}

 	out := buf.Bytes()
 	out = bytes.Replace(out, []byte("\nALL TESTS PASSED (some were excluded)\n"), nil, 1)
 	out = bytes.Replace(out, []byte("\nALL TESTS PASSED\n"), nil, 1)

 	for _, ti := range tis {
 		ti.output = out
 		ti.remoteErr = remoteErr
 		ti.execDuration = execDuration
 		ti.groupSize = len(tis)
 		ti.shardIPPort = bc.IPPort()
 		close(ti.done)

 		// After the first one, make the rest succeed with no output.
 		// TODO: maybe stream lines (set Output to a line-reading
 		// Writer instead of &buf). for now we just wait for them in
 		// ~10 second batches.  Doesn't look as smooth on the output,
 		// though.
 		out = nil
 		remoteErr = nil
 		execDuration = 0
 	}
 }

 func (st *buildStatus) CreateSpan(event string, optText ...string) spanlog.Span {
 	return schedule.CreateSpan(st, event, optText...)
 }

 func (st *buildStatus) LogEventTime(event string, optText ...string) {
 	if len(optText) > 1 {
 		panic("usage")
 	}
 	if pool.NewGCEConfiguration().InStaging() {
 		st.logf("%s %v", event, optText)
 	}
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	var text string
 	if len(optText) > 0 {
 		text = optText[0]
 	}
 	st.events = append(st.events, eventAndTime{
 		t:    time.Now(),
 		evt:  event,
 		text: text,
 	})
 }

 func (st *buildStatus) hasEvent(event string) bool {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	for _, e := range st.events {
 		if e.evt == event {
 			return true
 		}
 	}
 	return false
 }

 // HTMLStatusLine returns the HTML to show within the <pre> block on
 // the main page's list of active builds.
 func (st *buildStatus) HTMLStatusLine() template.HTML      { return st.htmlStatus(singleLine) }
 func (st *buildStatus) HTMLStatusTruncated() template.HTML { return st.htmlStatus(truncated) }
 func (st *buildStatus) HTMLStatus() template.HTML          { return st.htmlStatus(full) }

 func strSliceTo(s string, n int) string {
 	if len(s) <= n {
 		return s
 	}
 	return s[:n]
 }

 type buildStatusDetail int

 const (
 	singleLine buildStatusDetail = iota
 	truncated
 	full
 )

 func (st *buildStatus) htmlStatus(detail buildStatusDetail) template.HTML {
 	if st == nil {
 		return "[nil]"
 	}
 	st.mu.Lock()
 	defer st.mu.Unlock()

 	urlPrefix := "https://go-review.googlesource.com/#/q/"

 	if st.Rev == "" {
 		log.Printf("warning: st.Rev is empty")
 	}

 	var buf bytes.Buffer
 	fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
 		st.Name, urlPrefix, st.Rev, strSliceTo(st.Rev, 8))
 	if st.IsSubrepo() {
 		if st.SubRev == "" {
 			log.Printf("warning: st.SubRev is empty on subrepo")
 		}
 		fmt.Fprintf(&buf, " (sub-repo %s rev <a href='%s%s'>%s</a>)",
 			st.SubName, urlPrefix, st.SubRev, strSliceTo(st.SubRev, 8))
 	}
 	if ts := st.trySet; ts != nil {
 		if ts.ChangeID == "" {
 			log.Printf("warning: ts.ChangeID is empty")
 		}
 		fmt.Fprintf(&buf, " (<a href='/try?commit=%v'>trybot set</a> for <a href='https://go-review.googlesource.com/#/q/%s'>%s</a>)",
 			strSliceTo(ts.Commit, 8),
 			ts.ChangeTriple(), strSliceTo(ts.ChangeID, 8))
 	}

 	var state string
 	if st.canceled {
 		state = "canceled"
 	} else if st.done.IsZero() {
 		if st.HasBuildlet() {
 			state = "running"
 		} else {
 			state = "waiting_for_machine"
 		}
 	} else if st.succeeded {
 		state = "succeeded"
 	} else {
 		state = "<font color='#700000'>failed</font>"
 	}
 	if detail > singleLine && st.bc != nil {
 		fmt.Fprintf(&buf, "; <a href='%s'>%s</a>; %s", html.EscapeString(st.logsURLLocked()), state, html.EscapeString(st.bc.String()))
 	} else {
 		fmt.Fprintf(&buf, "; <a href='%s'>%s</a>", html.EscapeString(st.logsURLLocked()), state)
 	}

 	t := st.done
 	if t.IsZero() {
 		t = st.startTime
 	}
 	fmt.Fprintf(&buf, ", %v ago", time.Since(t).Round(time.Second))
 	if detail > singleLine {
 		buf.WriteByte('\n')
 		lastLines := 0
 		if detail == truncated {
 			lastLines = 3
 		}
 		st.writeEventsLocked(&buf, true, lastLines)
 	}
 	return template.HTML(buf.String())
 }

 func (st *buildStatus) logsURLLocked() string {
 	if st.logURL != "" {
 		return st.logURL
 	}
 	var urlPrefix string
 	if pool.NewGCEConfiguration().BuildEnv() == buildenv.Production {
 		urlPrefix = "https://farmer.golang.org"
 	} else {
 		urlPrefix = "http://" + pool.NewGCEConfiguration().BuildEnv().StaticIP
 	}
 	if *mode == "dev" {
 		urlPrefix = "https://localhost:8119"
 	}
 	u := fmt.Sprintf("%v/temporarylogs?name=%s&rev=%s&st=%p", urlPrefix, st.Name, st.Rev, st)
 	if st.IsSubrepo() {
 		u += fmt.Sprintf("&subName=%v&subRev=%v", st.SubName, st.SubRev)
 	}
 	return u
 }

 // st.mu must be held.
 // If numLines is greater than zero, it's the number of final lines to truncate to.
 func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool, numLines int) {
 	startAt := 0
 	if numLines > 0 {
 		startAt = len(st.events) - numLines
 		if startAt > 0 {
 			io.WriteString(w, "...\n")
 		} else {
 			startAt = 0
 		}
 	}

 	for i := startAt; i < len(st.events); i++ {
 		evt := st.events[i]
 		e := evt.evt
 		text := evt.text
 		if htmlMode {
 			if e == "running_exec" {
 				e = fmt.Sprintf("<a href='%s'>%s</a>", html.EscapeString(st.logsURLLocked()), e)
 			}
 			e = "<b>" + e + "</b>"
 			text = "<i>" + html.EscapeString(text) + "</i>"
 		}
 		fmt.Fprintf(w, "  %v %s %s\n", evt.t.Format(time.RFC3339), e, text)
 	}
 	if st.isRunningLocked() && len(st.events) > 0 {
 		lastEvt := st.events[len(st.events)-1]
 		fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastEvt.t).Seconds()))
 	}
 }

 func (st *buildStatus) logs() string {
 	return st.output.String()
 }

 func (st *buildStatus) Write(p []byte) (n int, err error) {
 	return st.output.Write(p)
 }

 // repeatedCommunicationError takes a buildlet execution error (a
 // network/communication error, as opposed to a remote execution that
 // ran and had a non-zero exit status and we heard about) and
 // conditionally promotes it to a terminal error. If this returns a
 // non-nil value, the execErr should be considered terminal with the
 // returned error.
 func (st *buildStatus) repeatedCommunicationError(execErr error) error {
 	if execErr == nil {
 		return nil
 	}
 	// For now, only do this for plan9, which is flaky (Issue 31261)
 	if strings.HasPrefix(st.Name, "plan9-") && execErr == errBuildletsGone {
 		// TODO: give it two tries at least later (store state
 		// somewhere; global map?). But for now we're going to
 		// only give it one try.
 		return fmt.Errorf("network error promoted to terminal error: %v", execErr)
 	}
 	return nil
 }

 // commitTime returns the greater of Rev and SubRev's commit times.
 func (st *buildStatus) commitTime() time.Time {
 	if st.RevCommitTime.Before(st.SubRevCommitTime) {
 		return st.SubRevCommitTime
 	}
 	return st.RevCommitTime
 }

 // branch returns branch for either Rev, or SubRev if it exists.
 func (st *buildStatus) branch() string {
 	if st.SubRev != "" {
 		return st.SubRevBranch
 	}
 	return st.RevBranch
 }