internal/pool: move the gce buildlet pool into a pool package

This CL creates the internal/coordinator/pool package intended to
contain all buildlet pool implementations. In order to keep this
change small and carefully discover where the interactions are
between the gce buildlet pool and the rest of the coordinator
are, this change only moves the gce buildlet over to the new
package.

The next steps will be to move the rest of the buildlet pools
over to this package. After that we will restructure the
implementations themselves in order to increase test coverage
and increase the ease of testing.

Updates golang/go#36841
Updates golang/go#38337

Change-Id: If82ae1b584bd77c697aa84fadf9011c9e79fa409
Reviewed-on: https://go-review.googlesource.com/c/build/+/227141
Run-TryBot: Carlos Amedee <carlos@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 7b166e8..6dc50a6 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -60,6 +60,7 @@
 	"golang.org/x/build/gerrit"
 	"golang.org/x/build/internal/buildgo"
 	"golang.org/x/build/internal/buildstats"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/secret"
 	"golang.org/x/build/internal/singleflight"
 	"golang.org/x/build/internal/sourcecache"
@@ -174,12 +175,12 @@
 	if autocertManager != nil {
 		config.GetCertificate = autocertManager.GetCertificate
 	} else {
-		certPEM, err := readGCSFile("farmer-cert.pem")
+		certPEM, err := pool.ReadGCSFile("farmer-cert.pem")
 		if err != nil {
 			log.Printf("cannot load TLS cert, skipping https: %v", err)
 			return
 		}
-		keyPEM, err := readGCSFile("farmer-key.pem")
+		keyPEM, err := pool.ReadGCSFile("farmer-key.pem")
 		if err != nil {
 			log.Printf("cannot load TLS key, skipping https: %v", err)
 			return
@@ -232,16 +233,6 @@
 	http.DefaultServeMux.ServeHTTP(w, r)
 }
 
-type loggerFunc func(event string, optText ...string)
-
-func (fn loggerFunc) LogEventTime(event string, optText ...string) {
-	fn(event, optText...)
-}
-
-func (fn loggerFunc) CreateSpan(event string, optText ...string) spanlog.Span {
-	return createSpan(fn, event, optText...)
-}
-
 // autocertManager is non-nil if LetsEncrypt is in use.
 var autocertManager *autocert.Manager
 
@@ -263,7 +254,7 @@
 
 	mustInitMasterKeyCache(sc)
 
-	err := initGCE(sc)
+	err := pool.InitGCE(sc, vmDeleteTimeout, testFiles, &basePinErr, isGCERemoteBuildlet, *buildEnvName, *mode)
 	if err != nil {
 		if *mode == "" {
 			*mode = "dev"
@@ -275,8 +266,8 @@
 		}
 	}
 
-	if bucket := buildEnv.AutoCertCacheBucket; bucket != "" {
-		if storageClient == nil {
+	if bucket := pool.GCEBuildEnv().AutoCertCacheBucket; bucket != "" {
+		if pool.GCEStorageClient() == nil {
 			log.Fatalf("expected storage client to be non-nil")
 		}
 		autocertManager = &autocert.Manager{
@@ -287,7 +278,7 @@
 				}
 				return nil
 			},
-			Cache: autocertcache.NewGoogleCloudStorageCache(storageClient, bucket),
+			Cache: autocertcache.NewGoogleCloudStorageCache(pool.GCEStorageClient(), bucket),
 		}
 	}
 
@@ -319,7 +310,7 @@
 		log.Printf("Failed to load static resources: %v", err)
 	}
 
-	dh := &builddash.Handler{Datastore: goDSClient, Maintner: maintnerClient}
+	dh := &builddash.Handler{Datastore: pool.GCEGoDSClient(), Maintner: maintnerClient}
 	gs := &gRPCServer{dashboardURL: "https://build.golang.org"}
 	protos.RegisterCoordinatorServer(grpcServer, gs)
 	http.HandleFunc("/", handleStatus)
@@ -352,14 +343,14 @@
 
 	if *mode == "dev" {
 		// TODO(crawshaw): do more in dev mode
-		gcePool.SetEnabled(*devEnableGCE)
+		pool.GetGCEBuildletPool().SetEnabled(*devEnableGCE)
 	} else {
-		go gcePool.cleanUpOldVMs()
+		go pool.GetGCEBuildletPool().CleanUpOldVMs()
 		if kubeErr == nil {
 			go kubePool.cleanUpOldPodsLoop(context.Background())
 		}
 
-		if inStaging {
+		if pool.GCEInStaging() {
 			dashboard.Builders = stagingClusterBuilders()
 		}
 
@@ -411,7 +402,7 @@
 		return
 	}
 	if !mayBuildRev(work) {
-		if inStaging {
+		if pool.GCEInStaging() {
 			if _, ok := dashboard.Builders[work.Name]; ok && logCantBuildStaging.Allow() {
 				log.Printf("may not build %v; skipping", work)
 			}
@@ -500,7 +491,7 @@
 		}
 		return false
 	}
-	if buildEnv.MaxBuilds > 0 && numCurrentBuilds() >= buildEnv.MaxBuilds {
+	if pool.GCEBuildEnv().MaxBuilds > 0 && numCurrentBuilds() >= pool.GCEBuildEnv().MaxBuilds {
 		return false
 	}
 	if buildConf.IsReverse() && !reversePool.CanBuild(buildConf.HostType) {
@@ -863,7 +854,7 @@
 // findTryWorkLoop.
 func findWorkLoop() {
 	// Useful for debugging a single run:
-	if inStaging && false {
+	if pool.GCEInStaging() && false {
 		const debugSubrepo = false
 		if debugSubrepo {
 			addWork(buildgo.BuilderRev{
@@ -1006,7 +997,7 @@
 					// then skip. But some builders on slow networks
 					// don't snapshot, so don't wait for them. They'll
 					// need to run make.bash first for x/ repos tests.
-					!builderInfo.SkipSnapshot && !rev.SnapshotExists(context.TODO(), buildEnv) {
+					!builderInfo.SkipSnapshot && !rev.SnapshotExists(context.TODO(), pool.GCEBuildEnv()) {
 					continue
 				}
 			}
@@ -1032,7 +1023,7 @@
 // findTryWorkLoop is a goroutine which loops periodically and queries
 // Gerrit for TryBot work.
 func findTryWorkLoop() {
-	if errTryDeps != nil {
+	if pool.GCETryDepsErr() != nil {
 		return
 	}
 	ticker := time.NewTicker(1 * time.Second)
@@ -1045,12 +1036,12 @@
 }
 
 func findTryWork() error {
-	if inStaging && !stagingTryWork {
+	if pool.GCEInStaging() && !stagingTryWork {
 		return nil
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) // should be milliseconds
 	defer cancel()
-	tryRes, err := maintnerClient.GoFindTryWork(ctx, &apipb.GoFindTryWorkRequest{ForStaging: inStaging})
+	tryRes, err := maintnerClient.GoFindTryWork(ctx, &apipb.GoFindTryWorkRequest{ForStaging: pool.GCEInStaging()})
 	if err != nil {
 		return err
 	}
@@ -1376,7 +1367,7 @@
 	msg := name + " beginning. Status page: https://farmer.golang.org/try?commit=" + ts.Commit[:8]
 
 	ctx := context.Background()
-	if ci, err := gerritClient.GetChangeDetail(ctx, ts.ChangeTriple()); err == nil {
+	if ci, err := pool.GCEGerritClient().GetChangeDetail(ctx, ts.ChangeTriple()); err == nil {
 		if len(ci.Messages) == 0 {
 			log.Printf("No Gerrit comments retrieved on %v", ts.ChangeTriple())
 		}
@@ -1391,7 +1382,7 @@
 	}
 
 	// Ignore error. This isn't critical.
-	gerritClient.SetReview(ctx, ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{Message: msg})
+	pool.GCEGerritClient().SetReview(ctx, ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{Message: msg})
 }
 
 // awaitTryBuild runs in its own goroutine and waits for a build in a
@@ -1524,7 +1515,7 @@
 		ts.mu.Unlock()
 
 		if numFail == 1 && remain > 0 {
-			if err := gerritClient.SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{
+			if err := pool.GCEGerritClient().SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{
 				Message: fmt.Sprintf(
 					"Build is still in progress...\n"+
 						"This change failed on %s:\n"+
@@ -1575,7 +1566,7 @@
 			// TODO: restore this functionality
 			// msg += fmt.Sprintf("\nBenchmark results are available at:\nhttps://perf.golang.org/search?q=cl:%d+try:%s", ts.ci.ChangeNumber, ts.tryID)
 		}
-		if err := gerritClient.SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{
+		if err := pool.GCEGerritClient().SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{
 			Message: buf.String(),
 			Labels: map[string]int{
 				"TryBot-Result": score,
@@ -1587,42 +1578,9 @@
 	}
 }
 
-type eventTimeLogger interface {
-	LogEventTime(event string, optText ...string)
-}
-
-// logger is the logging interface used within the coordinator.
-// It can both log a message at a point in time, as well
-// as log a span (something having a start and end time, as well as
-// a final success status).
-type logger interface {
-	eventTimeLogger // point in time
-	spanlog.Logger  // action spanning time
-}
-
-// buildletTimeoutOpt is a context.Value key for BuildletPool.GetBuildlet.
-type buildletTimeoutOpt struct{} // context Value key; value is time.Duration
-
-type BuildletPool interface {
-	// GetBuildlet returns a new buildlet client.
-	//
-	// The hostType is the key into the dashboard.Hosts
-	// map (such as "host-linux-jessie"), NOT the buidler type
-	// ("linux-386").
-	//
-	// Users of GetBuildlet must both call Client.Close when done
-	// with the client as well as cancel the provided Context.
-	//
-	// The ctx may have context values of type buildletTimeoutOpt
-	// and highPriorityOpt.
-	GetBuildlet(ctx context.Context, hostType string, lg logger) (*buildlet.Client, error)
-
-	String() string // TODO(bradfitz): more status stuff
-}
-
 // getBuildlets creates up to n buildlets and sends them on the returned channel
 // before closing the channel.
-func getBuildlets(ctx context.Context, n int, schedTmpl *SchedItem, lg logger) <-chan *buildlet.Client {
+func getBuildlets(ctx context.Context, n int, schedTmpl *SchedItem, lg pool.Logger) <-chan *buildlet.Client {
 	ch := make(chan *buildlet.Client) // NOT buffered
 	var wg sync.WaitGroup
 	wg.Add(n)
@@ -1657,9 +1615,9 @@
 	return ch
 }
 
-var testPoolHook func(*dashboard.HostConfig) BuildletPool
+var testPoolHook func(*dashboard.HostConfig) pool.Buildlet
 
-func poolForConf(conf *dashboard.HostConfig) BuildletPool {
+func poolForConf(conf *dashboard.HostConfig) pool.Buildlet {
 	if testPoolHook != nil {
 		return testPoolHook(conf)
 	}
@@ -1668,10 +1626,10 @@
 	}
 	switch {
 	case conf.IsVM():
-		return gcePool
+		return pool.GetGCEBuildletPool()
 	case conf.IsContainer():
-		if buildEnv.PreferContainersOnCOS || kubeErr != nil {
-			return gcePool // it also knows how to do containers.
+		if pool.GCEBuildEnv().PreferContainersOnCOS || kubeErr != nil {
+			return pool.GetGCEBuildletPool() // it also knows how to do containers.
 		} else {
 			return kubePool
 		}
@@ -1746,7 +1704,7 @@
 	}()
 }
 
-func (st *buildStatus) buildletPool() BuildletPool {
+func (st *buildStatus) buildletPool() pool.Buildlet {
 	return poolForConf(st.conf.HostConfig())
 }
 
@@ -1776,9 +1734,9 @@
 	// TODO: move this to dashboard/builders.go? But once we based on on historical
 	// measurements, it'll need GCE services (bigtable/bigquery?), so it's probably
 	// better in this file.
-	pool := st.buildletPool()
-	switch pool.(type) {
-	case *gceBuildletPool:
+	p := st.buildletPool()
+	switch p.(type) {
+	case *pool.GCEBuildlet:
 		if strings.HasPrefix(st.Name, "android-") {
 			// about a minute for buildlet + minute for Android emulator to be usable
 			return 2 * time.Minute
@@ -1849,7 +1807,7 @@
 	if st.useSnapshotMemo != nil {
 		return *st.useSnapshotMemo
 	}
-	b := st.conf.SplitMakeRun() && st.BuilderRev.SnapshotExists(context.TODO(), buildEnv)
+	b := st.conf.SplitMakeRun() && st.BuilderRev.SnapshotExists(context.TODO(), pool.GCEBuildEnv())
 	st.useSnapshotMemo = &b
 	return b
 }
@@ -1872,7 +1830,7 @@
 	if config.AlwaysCrossCompile {
 		return config
 	}
-	if inStaging || st.isTry() {
+	if pool.GCEInStaging() || st.isTry() {
 		return config
 	}
 	return nil
@@ -1962,8 +1920,8 @@
 	putBuildRecord(st.buildRecord())
 
 	sp := st.CreateSpan("checking_for_snapshot")
-	if inStaging {
-		err := storageClient.Bucket(buildEnv.SnapBucket).Object(st.SnapshotObjectName()).Delete(context.Background())
+	if pool.GCEInStaging() {
+		err := pool.GCEStorageClient().Bucket(pool.GCEBuildEnv().SnapBucket).Object(st.SnapshotObjectName()).Delete(context.Background())
 		st.LogEventTime("deleted_snapshot", fmt.Sprint(err))
 	}
 	snapshotExists := st.useSnapshot()
@@ -1984,7 +1942,7 @@
 
 	if st.useSnapshot() {
 		sp := st.CreateSpan("write_snapshot_tar")
-		if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(buildEnv), "go"); err != nil {
+		if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(pool.GCEBuildEnv()), "go"); err != nil {
 			return sp.Done(fmt.Errorf("failed to put snapshot to buildlet: %v", err))
 		}
 		sp.Done(nil)
@@ -2124,7 +2082,7 @@
 
 	// Log whether we used COS, so we can do queries to analyze
 	// Kubernetes vs COS performance for containers.
-	if st.conf.IsContainer() && poolForConf(st.conf.HostConfig()) == gcePool {
+	if st.conf.IsContainer() && poolForConf(st.conf.HostConfig()) == pool.GetGCEBuildletPool() {
 		rec.ContainerHost = "cos"
 	}
 
@@ -2369,7 +2327,7 @@
 }
 
 func (st *buildStatus) writeBootstrapToolchain() error {
-	u := st.conf.GoBootstrapURL(buildEnv)
+	u := st.conf.GoBootstrapURL(pool.GCEBuildEnv())
 	if u == "" {
 		return nil
 	}
@@ -2404,7 +2362,7 @@
 	}
 	defer tgz.Close()
 
-	wr := storageClient.Bucket(buildEnv.SnapBucket).Object(st.SnapshotObjectName()).NewWriter(ctx)
+	wr := pool.GCEStorageClient().Bucket(pool.GCEBuildEnv().SnapBucket).Object(st.SnapshotObjectName()).NewWriter(ctx)
 	wr.ContentType = "application/octet-stream"
 	wr.ACL = append(wr.ACL, storage.ACLRule{Entity: storage.AllUsers, Role: storage.RoleReader})
 	if _, err := io.Copy(wr, tgz); err != nil {
@@ -2418,7 +2376,7 @@
 
 // reportErr reports an error to Stackdriver.
 func (st *buildStatus) reportErr(err error) {
-	if errorsClient == nil {
+	if pool.GCEErrorsClient() == nil {
 		// errorsClient is nil in dev environments.
 		return
 	}
@@ -2427,7 +2385,7 @@
 	defer cancel()
 
 	err = fmt.Errorf("buildID: %v, name: %s, hostType: %s, error: %v", st.buildID, st.conf.Name, st.conf.HostType, err)
-	errorsClient.ReportSync(ctx, errorreporting.Entry{Error: err})
+	pool.GCEErrorsClient().ReportSync(ctx, errorreporting.Entry{Error: err})
 }
 
 func (st *buildStatus) distTestList() (names []string, remoteErr, err error) {
@@ -2558,7 +2516,7 @@
 		sp := sl.CreateSpan("query_test_stats")
 		ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
 		defer cancel()
-		ts, err := buildstats.QueryTestStats(ctx, buildEnv)
+		ts, err := buildstats.QueryTestStats(ctx, pool.GCEBuildEnv())
 		sp.Done(err)
 		if err != nil {
 			log.Printf("getTestStats: error: %v", err)
@@ -2927,7 +2885,7 @@
 // In localhost dev mode it just returns the value of GOPROXY.
 func moduleProxy() string {
 	// If we're running on localhost, just use the current environment's value.
-	if buildEnv == nil || !buildEnv.IsProd {
+	if pool.GCEBuildEnv() == nil || !pool.GCEBuildEnv().IsProd {
 		// If empty, use installed VCS tools as usual to fetch modules.
 		return os.Getenv("GOPROXY")
 	}
@@ -2940,7 +2898,7 @@
 	// TODO: migrate to a GKE internal load balancer with an internal static IP
 	// once we migrate symbolic-datum-552 off a Legacy VPC network to the modern
 	// scheme that supports internal static IPs.
-	return "http://" + gkeNodeIP + ":30157"
+	return "http://" + pool.GKENodeIP() + ":30157"
 }
 
 // affectedPkgs returns the name of every package affected by this commit.
@@ -3053,7 +3011,7 @@
 					defer st.LogEventTime("DEV_HELPER_SLEEP", bc.Name())
 				}
 				st.LogEventTime("got_empty_test_helper", bc.String())
-				if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(buildEnv), "go"); err != nil {
+				if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(pool.GCEBuildEnv()), "go"); err != nil {
 					log.Printf("failed to extract snapshot for helper %s: %v", bc.Name(), err)
 					return
 				}
@@ -3121,7 +3079,7 @@
 				lastBanner = banner
 				fmt.Fprintf(st, "\n##### %s\n", banner)
 			}
-			if inStaging {
+			if pool.GCEInStaging() {
 				out = bytes.TrimSuffix(out, nl)
 				st.Write(out)
 				fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize)
@@ -3159,9 +3117,9 @@
 func (st *buildStatus) uploadBenchResults(ctx context.Context, files []*benchFile) error {
 	s := *perfServer
 	if s == "" {
-		s = buildEnv.PerfDataURL
+		s = pool.GCEBuildEnv().PerfDataURL
 	}
-	client := &perfstorage.Client{BaseURL: s, HTTPClient: oAuthHTTPClient}
+	client := &perfstorage.Client{BaseURL: s, HTTPClient: pool.GCEOAuthHTTPClient()}
 	u := client.NewUpload(ctx)
 	for _, b := range files {
 		w, err := u.CreateFile(b.name)
@@ -3207,7 +3165,7 @@
 			st.trySet.ci.ChangeNumber, ps, st.trySet.tryID,
 			st.Name, st.trySet.ci.Branch, st.trySet.ci.Project,
 		)
-		if inStaging {
+		if pool.GCEInStaging() {
 			benchFiles[0].out.WriteString("staging: true\n")
 		}
 		benchFiles[1].out.Write(benchFiles[0].out.Bytes())
@@ -3287,7 +3245,7 @@
 		pbr, perr := st.parentRev()
 		// TODO(quentin): Error if parent commit could not be determined?
 		if perr == nil {
-			remoteErr, err = ti.bench.Run(st.ctx, buildEnv, st, st.conf, bc, &buf, []buildgo.BuilderRev{st.BuilderRev, pbr})
+			remoteErr, err = ti.bench.Run(st.ctx, pool.GCEBuildEnv(), st, st.conf, bc, &buf, []buildgo.BuilderRev{st.BuilderRev, pbr})
 		}
 	} else {
 		env := append(st.conf.Env(),
@@ -3625,10 +3583,10 @@
 	optText string // optional details for event
 	start   time.Time
 	end     time.Time
-	el      eventTimeLogger // where we log to at the end; TODO: this will change
+	el      pool.EventTimeLogger // where we log to at the end; TODO: this will change
 }
 
-func createSpan(el eventTimeLogger, event string, optText ...string) *span {
+func createSpan(el pool.EventTimeLogger, event string, optText ...string) *span {
 	if len(optText) > 1 {
 		panic("usage")
 	}
@@ -3680,7 +3638,7 @@
 	if len(optText) > 1 {
 		panic("usage")
 	}
-	if inStaging {
+	if pool.GCEInStaging() {
 		st.logf("%s %v", event, optText)
 	}
 	st.mu.Lock()
@@ -3801,10 +3759,10 @@
 		return st.logURL
 	}
 	var urlPrefix string
-	if buildEnv == buildenv.Production {
+	if pool.GCEBuildEnv() == buildenv.Production {
 		urlPrefix = "https://farmer.golang.org"
 	} else {
-		urlPrefix = "http://" + buildEnv.StaticIP
+		urlPrefix = "http://" + pool.GCEBuildEnv().StaticIP
 	}
 	if *mode == "dev" {
 		urlPrefix = "https://localhost:8119"
@@ -3918,12 +3876,12 @@
 			ioutil.NopCloser(nil),
 		}, "devmode://build-log/" + objName
 	}
-	if storageClient == nil {
+	if pool.GCEStorageClient() == nil {
 		panic("nil storageClient in newFailureBlob")
 	}
-	bucket := buildEnv.LogBucket
+	bucket := pool.GCEBuildEnv().LogBucket
 
-	wr := storageClient.Bucket(bucket).Object(objName).NewWriter(context.Background())
+	wr := pool.GCEStorageClient().Bucket(bucket).Object(objName).NewWriter(context.Background())
 	wr.ContentType = "text/plain; charset=utf-8"
 	wr.ACL = append(wr.ACL, storage.ACLRule{
 		Entity: storage.AllUsers,
diff --git a/cmd/coordinator/coordinator_test.go b/cmd/coordinator/coordinator_test.go
index 3e9b0e2..b373389 100644
--- a/cmd/coordinator/coordinator_test.go
+++ b/cmd/coordinator/coordinator_test.go
@@ -21,6 +21,7 @@
 	"golang.org/x/build/buildenv"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/maintner/maintnerd/apipb"
 )
 
@@ -230,8 +231,9 @@
 	if testing.Short() {
 		t.Skip("skipping in short mode")
 	}
-	defer func(old *buildenv.Environment) { buildEnv = old }(buildEnv)
-	buildEnv = buildenv.Production
+	buildEnv := pool.GCEBuildEnv()
+	defer func(old *buildenv.Environment) { pool.SetGCEBuildEnv(old) }(buildEnv)
+	pool.SetGCEBuildEnv(buildenv.Production)
 	defer func() { buildgo.TestHookSnapshotExists = nil }()
 	buildgo.TestHookSnapshotExists = func(br *buildgo.BuilderRev) bool {
 		if strings.Contains(br.Name, "android") {
diff --git a/cmd/coordinator/dash.go b/cmd/coordinator/dash.go
index 50f4955..b28cbd7 100644
--- a/cmd/coordinator/dash.go
+++ b/cmd/coordinator/dash.go
@@ -28,6 +28,7 @@
 
 	"cloud.google.com/go/compute/metadata"
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/secret"
 )
 
@@ -50,7 +51,7 @@
 	}
 	var r *http.Response
 	var err error
-	cmd = buildEnv.DashBase() + cmd + "?" + argsCopy.Encode()
+	cmd = pool.GCEBuildEnv().DashBase() + cmd + "?" + argsCopy.Encode()
 	switch meth {
 	case "GET":
 		if req != nil {
diff --git a/cmd/coordinator/debug.go b/cmd/coordinator/debug.go
index 912ab82..d5c9c9d 100644
--- a/cmd/coordinator/debug.go
+++ b/cmd/coordinator/debug.go
@@ -19,6 +19,7 @@
 	"text/template"
 
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/types"
 )
 
@@ -50,8 +51,8 @@
 
 		// Cap number of jobs that can be scheduled from debug UI. If
 		// buildEnv.MaxBuilds is zero, there is no cap.
-		if buildEnv.MaxBuilds > 0 && count > buildEnv.MaxBuilds {
-			count = buildEnv.MaxBuilds
+		if pool.GCEBuildEnv().MaxBuilds > 0 && count > pool.GCEBuildEnv().MaxBuilds {
+			count = pool.GCEBuildEnv().MaxBuilds
 		}
 		log.Printf("looking for %v work items for %q", count, mode)
 
diff --git a/cmd/coordinator/kube.go b/cmd/coordinator/kube.go
index db2bde3..c901d79 100644
--- a/cmd/coordinator/kube.go
+++ b/cmd/coordinator/kube.go
@@ -22,6 +22,7 @@
 
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/sourcecache"
 	"golang.org/x/build/kubernetes"
 	"golang.org/x/build/kubernetes/api"
@@ -44,12 +45,12 @@
 
 // initGCE must be called before initKube
 func initKube() error {
-	if buildEnv.KubeBuild.MaxNodes == 0 {
+	if pool.GCEBuildEnv().KubeBuild.MaxNodes == 0 {
 		return errors.New("Kubernetes builders disabled due to KubeBuild.MaxNodes == 0")
 	}
 
 	// projectID was set by initGCE
-	registryPrefix += "/" + buildEnv.ProjectName
+	registryPrefix += "/" + pool.GCEBuildEnv().ProjectName
 	if !hasCloudPlatformScope() {
 		return errors.New("coordinator not running with access to the Cloud Platform scope.")
 	}
@@ -58,19 +59,19 @@
 	defer cancel() // ctx is only used for discovery and connect; not retained.
 	var err error
 	buildletsKubeClient, err = gke.NewClient(ctx,
-		buildEnv.KubeBuild.Name,
-		gke.OptZone(buildEnv.ControlZone),
-		gke.OptProject(buildEnv.ProjectName),
-		gke.OptTokenSource(gcpCreds.TokenSource))
+		pool.GCEBuildEnv().KubeBuild.Name,
+		gke.OptZone(pool.GCEBuildEnv().ControlZone),
+		gke.OptProject(pool.GCEBuildEnv().ProjectName),
+		gke.OptTokenSource(pool.GCPCredentials().TokenSource))
 	if err != nil {
 		return err
 	}
 
 	goKubeClient, err = gke.NewClient(ctx,
-		buildEnv.KubeTools.Name,
-		gke.OptZone(buildEnv.ControlZone),
-		gke.OptProject(buildEnv.ProjectName),
-		gke.OptTokenSource(gcpCreds.TokenSource))
+		pool.GCEBuildEnv().KubeTools.Name,
+		gke.OptZone(pool.GCEBuildEnv().ControlZone),
+		gke.OptProject(pool.GCEBuildEnv().ProjectName),
+		gke.OptTokenSource(pool.GCPCredentials().TokenSource))
 	if err != nil {
 		return err
 	}
@@ -135,12 +136,12 @@
 func (p *kubeBuildletPool) pollCapacity(ctx context.Context) {
 	nodes, err := buildletsKubeClient.GetNodes(ctx)
 	if err != nil {
-		log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", buildEnv.ProjectName, buildEnv.Region(), err)
+		log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", pool.GCEBuildEnv().ProjectName, pool.GCEBuildEnv().Region(), err)
 		return
 	}
 	pods, err := buildletsKubeClient.GetPods(ctx)
 	if err != nil {
-		log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", buildEnv.ProjectName, buildEnv.Region(), err)
+		log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", pool.GCEBuildEnv().ProjectName, pool.GCEBuildEnv().Region(), err)
 		return
 	}
 
@@ -209,7 +210,7 @@
 
 }
 
-func (p *kubeBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg logger) (*buildlet.Client, error) {
+func (p *kubeBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg pool.Logger) (*buildlet.Client, error) {
 	hconf, ok := dashboard.Hosts[hostType]
 	if !ok || !hconf.IsContainer() {
 		return nil, fmt.Errorf("kubepool: invalid host type %q", hostType)
@@ -221,7 +222,7 @@
 		panic("expect non-nil buildletsKubeClient")
 	}
 
-	deleteIn, ok := ctx.Value(buildletTimeoutOpt{}).(time.Duration)
+	deleteIn, ok := ctx.Value(pool.BuildletTimeoutOpt{}).(time.Duration)
 	if !ok {
 		deleteIn = podDeleteTimeout
 	}
@@ -236,7 +237,7 @@
 	log.Printf("Creating Kubernetes pod %q for %s", podName, hostType)
 
 	bc, err := buildlet.StartPod(ctx, buildletsKubeClient, podName, hostType, buildlet.PodOpts{
-		ProjectID:     buildEnv.ProjectName,
+		ProjectID:     pool.GCEBuildEnv().ProjectName,
 		ImageRegistry: registryPrefix,
 		Description:   fmt.Sprintf("Go Builder for %s", hostType),
 		DeleteIn:      deleteIn,
@@ -291,7 +292,7 @@
 		fmt.Fprintf(w, "<ul>")
 		for i, pod := range active {
 			if i < show/2 || i >= len(active)-(show/2) {
-				fmt.Fprintf(w, "<li>%v, %v</li>\n", pod.name, time.Since(pod.creation))
+				fmt.Fprintf(w, "<li>%v, %v</li>\n", pod.Name, time.Since(pod.Creation))
 			} else if i == show/2 {
 				fmt.Fprintf(w, "<li>... %d of %d total omitted ...</li>\n", len(active)-show, len(active))
 			}
@@ -353,16 +354,16 @@
 	return ok
 }
 
-func (p *kubeBuildletPool) podsActive() (ret []resourceTime) {
+func (p *kubeBuildletPool) podsActive() (ret []pool.ResourceTime) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	for name, ph := range p.pods {
-		ret = append(ret, resourceTime{
-			name:     name,
-			creation: ph.requestedAt,
+		ret = append(ret, pool.ResourceTime{
+			Name:     name,
+			Creation: ph.requestedAt,
 		})
 	}
-	sort.Sort(byCreationTime(ret))
+	sort.Sort(pool.ByCreationTime(ret))
 	return ret
 }
 
@@ -437,7 +438,7 @@
 				}
 				if err == nil && time.Now().Unix() > unixDeadline {
 					stats.DeletedOld++
-					log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, buildEnv.ControlZone)
+					log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, pool.GCEBuildEnv().ControlZone)
 					err = buildletsKubeClient.DeletePod(ctx, pod.Name)
 					if err != nil {
 						log.Printf("cleanUpOldPods: problem deleting old pod %q: %v", pod.Name, err)
@@ -467,5 +468,5 @@
 }
 
 func hasCloudPlatformScope() bool {
-	return hasScope(container.CloudPlatformScope)
+	return pool.HasScope(container.CloudPlatformScope)
 }
diff --git a/cmd/coordinator/log.go b/cmd/coordinator/log.go
index b642fbc..90e29ce 100644
--- a/cmd/coordinator/log.go
+++ b/cmd/coordinator/log.go
@@ -15,6 +15,7 @@
 
 	"cloud.google.com/go/datastore"
 
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/types"
 )
 
@@ -32,13 +33,13 @@
 }
 
 func updateInstanceRecord() {
-	if dsClient == nil {
+	if pool.GCEDSClient() == nil {
 		return
 	}
 	ctx := context.Background()
 	for {
 		key := datastore.NameKey("Process", processID, nil)
-		_, err := dsClient.Put(ctx, key, &ProcessRecord{
+		_, err := pool.GCEDSClient().Put(ctx, key, &ProcessRecord{
 			ID:            processID,
 			Start:         processStartTime,
 			LastHeartbeat: time.Now(),
@@ -51,23 +52,23 @@
 }
 
 func putBuildRecord(br *types.BuildRecord) {
-	if dsClient == nil {
+	if pool.GCEDSClient() == nil {
 		return
 	}
 	ctx := context.Background()
 	key := datastore.NameKey("Build", br.ID, nil)
-	if _, err := dsClient.Put(ctx, key, br); err != nil {
+	if _, err := pool.GCEDSClient().Put(ctx, key, br); err != nil {
 		log.Printf("datastore Build Put: %v", err)
 	}
 }
 
 func putSpanRecord(sr *types.SpanRecord) {
-	if dsClient == nil {
+	if pool.GCEDSClient() == nil {
 		return
 	}
 	ctx := context.Background()
 	key := datastore.NameKey("Span", fmt.Sprintf("%s-%v-%v", sr.BuildID, sr.StartTime.UnixNano(), sr.Event), nil)
-	if _, err := dsClient.Put(ctx, key, sr); err != nil {
+	if _, err := pool.GCEDSClient().Put(ctx, key, sr); err != nil {
 		log.Printf("datastore Span Put: %v", err)
 	}
 }
diff --git a/cmd/coordinator/metrics.go b/cmd/coordinator/metrics.go
index 91c4963..dcd4b9d 100644
--- a/cmd/coordinator/metrics.go
+++ b/cmd/coordinator/metrics.go
@@ -13,6 +13,7 @@
 	"time"
 
 	"golang.org/x/build/cmd/coordinator/metrics"
+	"golang.org/x/build/internal/coordinator/pool"
 
 	"github.com/golang/protobuf/ptypes"
 	metpb "google.golang.org/genproto/googleapis/api/metric"
@@ -66,8 +67,8 @@
 		})
 	}
 
-	return metricsClient.CreateTimeSeries(ctx, &monpb.CreateTimeSeriesRequest{
-		Name:       m.DescriptorPath(buildEnv.ProjectName),
+	return pool.MetricsClient().CreateTimeSeries(ctx, &monpb.CreateTimeSeriesRequest{
+		Name:       m.DescriptorPath(pool.GCEBuildEnv().ProjectName),
 		TimeSeries: ts,
 	})
 }
diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go
index 681af31..aef65fc 100644
--- a/cmd/coordinator/remote.go
+++ b/cmd/coordinator/remote.go
@@ -37,6 +37,7 @@
 	"github.com/kr/pty"
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/gophers"
 	"golang.org/x/build/internal/secret"
 	"golang.org/x/build/types"
@@ -525,11 +526,11 @@
 			log.Fatal(err)
 		}
 	} else {
-		if storageClient == nil {
+		if pool.StorageClient() == nil {
 			log.Printf("GCS storage client not available; not running SSH server.")
 			return
 		}
-		r, err := storageClient.Bucket(buildEnv.BuildletBucket).Object("coordinator-gomote-ssh.key").NewReader(context.Background())
+		r, err := pool.StorageClient().Bucket(pool.GCEBuildEnv().BuildletBucket).Object("coordinator-gomote-ssh.key").NewReader(context.Background())
 		if err != nil {
 			log.Printf("Failed to read ssh host key: %v; not running SSH server.", err)
 			return
diff --git a/cmd/coordinator/remote_test.go b/cmd/coordinator/remote_test.go
index 13236cd..4f7918e 100644
--- a/cmd/coordinator/remote_test.go
+++ b/cmd/coordinator/remote_test.go
@@ -21,6 +21,7 @@
 
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/coordinator/pool"
 )
 
 type TestBuildletPool struct {
@@ -30,7 +31,7 @@
 
 // GetBuildlet finds the first available buildlet for the hostType and returns
 // it, or an error if no buildlets are available for that hostType.
-func (tp *TestBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg logger) (*buildlet.Client, error) {
+func (tp *TestBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg pool.Logger) (*buildlet.Client, error) {
 	tp.mu.Lock()
 	defer tp.mu.Unlock()
 	c, ok := tp.clients[hostType]
@@ -123,7 +124,7 @@
 	defer log.SetOutput(os.Stderr)
 	addBuilder(buildName)
 	remoteBuildlets.m = map[string]*remoteBuildlet{}
-	testPoolHook = func(_ *dashboard.HostConfig) BuildletPool { return testPool }
+	testPoolHook = func(_ *dashboard.HostConfig) pool.Buildlet { return testPool }
 	defer func() {
 		timeNow = time.Now
 		removeBuilder(buildName)
@@ -152,7 +153,7 @@
 	defer log.SetOutput(os.Stderr)
 	addBuilder(buildName)
 	remoteBuildlets.m = map[string]*remoteBuildlet{}
-	testPoolHook = func(_ *dashboard.HostConfig) BuildletPool { return testPool }
+	testPoolHook = func(_ *dashboard.HostConfig) pool.Buildlet { return testPool }
 	defer func() {
 		timeNow = time.Now
 		removeBuilder(buildName)
diff --git a/cmd/coordinator/reverse.go b/cmd/coordinator/reverse.go
index c694702..ab2f3c4 100644
--- a/cmd/coordinator/reverse.go
+++ b/cmd/coordinator/reverse.go
@@ -47,6 +47,7 @@
 
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/revdial/v2"
 	"golang.org/x/build/types"
 )
@@ -295,7 +296,7 @@
 	p.waiters[hostType] += delta
 }
 
-func (p *reverseBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg logger) (*buildlet.Client, error) {
+func (p *reverseBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg pool.Logger) (*buildlet.Client, error) {
 	p.updateWaiterCounter(hostType, 1)
 	defer p.updateWaiterCounter(hostType, -1)
 	seenErrInUse := false
@@ -324,7 +325,7 @@
 	}
 }
 
-func (p *reverseBuildletPool) cleanedBuildlet(b *buildlet.Client, lg logger) (*buildlet.Client, error) {
+func (p *reverseBuildletPool) cleanedBuildlet(b *buildlet.Client, lg pool.Logger) (*buildlet.Client, error) {
 	// Clean up any files from previous builds.
 	sp := lg.CreateSpan("clean_buildlet", b.String())
 	err := b.RemoveAll(context.Background(), ".")
diff --git a/cmd/coordinator/sched.go b/cmd/coordinator/sched.go
index 64e8d38..05f7a11 100644
--- a/cmd/coordinator/sched.go
+++ b/cmd/coordinator/sched.go
@@ -18,6 +18,7 @@
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/spanlog"
 	"golang.org/x/build/types"
 )
@@ -43,7 +44,7 @@
 // A getBuildletResult is a buildlet that was just created and is up and
 // is ready to be assigned to a caller based on priority.
 type getBuildletResult struct {
-	Pool     BuildletPool
+	Pool     pool.Buildlet
 	HostType string
 
 	// One of Client or Err gets set:
@@ -142,7 +143,7 @@
 
 // getPoolBuildlet is launched as its own goroutine to do a
 // potentially long blocking cal to pool.GetBuildlet.
-func (s *Scheduler) getPoolBuildlet(pool BuildletPool, hostType string) {
+func (s *Scheduler) getPoolBuildlet(pool pool.Buildlet, hostType string) {
 	res := getBuildletResult{
 		Pool:     pool,
 		HostType: hostType,
@@ -341,7 +342,7 @@
 	s           *Scheduler
 	requestTime time.Time
 	tryFor      string // TODO: which user. (user with 1 trybot >> user with 50 trybots)
-	pool        BuildletPool
+	pool        pool.Buildlet
 	ctxDone     <-chan struct{}
 
 	// wantRes is the unbuffered channel that's passed
diff --git a/cmd/coordinator/sched_test.go b/cmd/coordinator/sched_test.go
index 67ba1df..51812ea 100644
--- a/cmd/coordinator/sched_test.go
+++ b/cmd/coordinator/sched_test.go
@@ -16,6 +16,7 @@
 
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
+	cpool "golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/spanlog"
 )
 
@@ -207,7 +208,7 @@
 
 type poolChan map[string]chan interface{} // hostType -> { *buildlet.Client | error}
 
-func (m poolChan) GetBuildlet(ctx context.Context, hostType string, lg logger) (*buildlet.Client, error) {
+func (m poolChan) GetBuildlet(ctx context.Context, hostType string, lg cpool.Logger) (*buildlet.Client, error) {
 	c, ok := m[hostType]
 	if !ok {
 		return nil, fmt.Errorf("pool doesn't support host type %q", hostType)
@@ -307,7 +308,7 @@
 		pool["test-host-foo"] = make(chan interface{}, 1)
 		pool["test-host-bar"] = make(chan interface{}, 1)
 
-		testPoolHook = func(*dashboard.HostConfig) BuildletPool { return pool }
+		testPoolHook = func(*dashboard.HostConfig) cpool.Buildlet { return pool }
 		t.Run(tt.name, func(t *testing.T) {
 			s := NewScheduler()
 			for i, st := range tt.steps() {
diff --git a/cmd/coordinator/status.go b/cmd/coordinator/status.go
index 66a92e4..063113c 100644
--- a/cmd/coordinator/status.go
+++ b/cmd/coordinator/status.go
@@ -32,6 +32,7 @@
 
 	"golang.org/x/build/cmd/coordinator/internal"
 	"golang.org/x/build/dashboard"
+	"golang.org/x/build/internal/coordinator/pool"
 	"golang.org/x/build/internal/foreach"
 	"golang.org/x/build/kubernetes/api"
 )
@@ -646,8 +647,8 @@
 	sort.Sort(byAge(data.Active))
 	sort.Sort(byAge(data.Pending))
 	sort.Sort(sort.Reverse(byAge(data.Recent)))
-	if errTryDeps != nil {
-		data.TrybotsErr = errTryDeps.Error()
+	if pool.GCETryDepsErr() != nil {
+		data.TrybotsErr = pool.GCETryDepsErr().Error()
 	} else {
 		if buf.Len() == 0 {
 			data.Trybots = template.HTML("<i>(none)</i>")
@@ -657,7 +658,7 @@
 	}
 
 	buf.Reset()
-	gcePool.WriteHTMLStatus(&buf)
+	pool.GetGCEBuildletPool().WriteHTMLStatus(&buf)
 	data.GCEPoolStatus = template.HTML(buf.String())
 	buf.Reset()
 
diff --git a/cmd/coordinator/gce.go b/internal/coordinator/pool/gce.go
similarity index 76%
rename from cmd/coordinator/gce.go
rename to internal/coordinator/pool/gce.go
index 762a4b5..3cbbd47 100644
--- a/cmd/coordinator/gce.go
+++ b/internal/coordinator/pool/gce.go
@@ -8,7 +8,7 @@
 // Code interacting with Google Compute Engine (GCE) and
 // a GCE implementation of the BuildletPool interface.
 
-package main
+package pool
 
 import (
 	"context"
@@ -25,6 +25,7 @@
 	"strconv"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"cloud.google.com/go/compute/metadata"
@@ -60,7 +61,13 @@
 	<-apiCallTicker.C
 }
 
+// IsGCERemoteBuildletFunc should return true if the buildlet instance name is
+// is a GCE remote buildlet.
+type IsGCERemoteBuildletFunc func(instanceName string) bool
+
 // Initialized by initGCE:
+// TODO(http://golang.org/issue/38337): These should be moved into a struct as
+// part of the effort to reduce package level variables.
 var (
 	buildEnv *buildenv.Environment
 
@@ -78,6 +85,13 @@
 	errorsClient   *errorreporting.Client // Stackdriver errors client
 	gkeNodeIP      string
 
+	// values created due to seperating the buildlet pools into a seperate package
+	gceMode             string
+	deleteTimeout       time.Duration
+	testFiles           map[string]string
+	basePinErr          *atomic.Value
+	isGCERemoteBuildlet IsGCERemoteBuildletFunc
+
 	initGCECalled bool
 )
 
@@ -85,26 +99,31 @@
 // It is initialized by initGCE.
 var oAuthHTTPClient *http.Client
 
-func initGCE(sc *secret.Client) error {
+// InitGCE initializes the GCE buildlet pool.
+func InitGCE(sc *secret.Client, vmDeleteTimeout time.Duration, tFiles map[string]string, basePin *atomic.Value, fn IsGCERemoteBuildletFunc, buildEnvName, mode string) error {
 	initGCECalled = true
+	deleteTimeout = vmDeleteTimeout
+	testFiles = tFiles
+	basePinErr = basePin
+	isGCERemoteBuildlet = fn
 	var err error
 	ctx := context.Background()
 
 	// If the coordinator is running on a GCE instance and a
 	// buildEnv was not specified with the env flag, set the
 	// buildEnvName to the project ID
-	if *buildEnvName == "" {
-		if *mode == "dev" {
-			*buildEnvName = "dev"
+	if buildEnvName == "" {
+		if mode == "dev" {
+			buildEnvName = "dev"
 		} else if metadata.OnGCE() {
-			*buildEnvName, err = metadata.ProjectID()
+			buildEnvName, err = metadata.ProjectID()
 			if err != nil {
 				log.Fatalf("metadata.ProjectID: %v", err)
 			}
 		}
 	}
 
-	buildEnv = buildenv.ByProjectID(*buildEnvName)
+	buildEnv = buildenv.ByProjectID(buildEnvName)
 	inStaging = buildEnv == buildenv.Staging
 
 	// If running on GCE, override the zone and static IP, and check service account permissions.
@@ -133,19 +152,12 @@
 		if !hasComputeScope() {
 			return errors.New("coordinator is not running with access to read and write Compute resources. VM support disabled")
 		}
-
-		ctxSec, cancel := context.WithTimeout(ctx, 10*time.Second)
-		defer cancel()
-
-		if value, err := sc.Retrieve(ctxSec, secret.NameFarmerRunBench); err == nil {
-			*shouldRunBench, _ = strconv.ParseBool(value)
-		}
 	}
 
 	cfgDump, _ := json.MarshalIndent(buildEnv, "", "  ")
-	log.Printf("Loaded configuration %q for project %q:\n%s", *buildEnvName, buildEnv.ProjectName, cfgDump)
+	log.Printf("Loaded configuration %q for project %q:\n%s", buildEnvName, buildEnv.ProjectName, cfgDump)
 
-	if *mode != "dev" {
+	if mode != "dev" {
 		storageClient, err = storage.NewClient(ctx)
 		if err != nil {
 			log.Fatalf("storage.NewClient: %v", err)
@@ -159,7 +171,7 @@
 
 	dsClient, err = datastore.NewClient(ctx, buildEnv.ProjectName)
 	if err != nil {
-		if *mode == "dev" {
+		if mode == "dev" {
 			log.Printf("Error creating datastore client for %q: %v", buildEnv.ProjectName, err)
 		} else {
 			log.Fatalf("Error creating datastore client for %q: %v", buildEnv.ProjectName, err)
@@ -167,7 +179,7 @@
 	}
 	goDSClient, err = datastore.NewClient(ctx, buildEnv.GoProjectName)
 	if err != nil {
-		if *mode == "dev" {
+		if mode == "dev" {
 			log.Printf("Error creating datastore client for %q: %v", buildEnv.GoProjectName, err)
 		} else {
 			log.Fatalf("Error creating datastore client for %q: %v", buildEnv.GoProjectName, err)
@@ -175,7 +187,7 @@
 	}
 
 	// don't send dev errors to Stackdriver.
-	if *mode != "dev" {
+	if mode != "dev" {
 		errorsClient, err = errorreporting.NewClient(ctx, buildEnv.ProjectName, errorreporting.Config{
 			ServiceName: "coordinator",
 		})
@@ -187,7 +199,7 @@
 
 	gcpCreds, err = buildEnv.Credentials(ctx)
 	if err != nil {
-		if *mode == "dev" {
+		if mode == "dev" {
 			// don't try to do anything else with GCE, as it will likely fail
 			return nil
 		}
@@ -202,20 +214,101 @@
 		log.Printf("TryBot builders enabled.")
 	}
 
-	if *mode != "dev" {
+	if mode != "dev" {
 		go syncBuildStatsLoop(buildEnv)
 	}
 
+	gceMode = mode
+
 	go gcePool.pollQuotaLoop()
 	go createBasepinDisks(context.Background())
 	return nil
 }
 
+// TODO(http://golang.org/issue/38337): These should be moved into a struct as
+// part of the effort to reduce package level variables.
+
+// GCEStorageClient retrieves the GCE storage client.
+func GCEStorageClient() *storage.Client {
+	return storageClient
+}
+
+// GCEBuildEnv retrieves the GCE build env.
+func GCEBuildEnv() *buildenv.Environment {
+	return buildEnv
+}
+
+// SetGCEBuildEnv sets the GCE build env. This is primarily reserved for
+// testing purposes.
+func SetGCEBuildEnv(b *buildenv.Environment) {
+	buildEnv = b
+}
+
+// GetGCEBuildletPool retrieves the GCE buildlet pool.
+func GetGCEBuildletPool() *GCEBuildlet {
+	return gcePool
+}
+
+// GCEInStaging returns a boolean denoting if the enviornment is stageing.
+func GCEInStaging() bool {
+	return inStaging
+}
+
+// GCEGerritClient retrieves a gerrit client.
+func GCEGerritClient() *gerrit.Client {
+	return gerritClient
+}
+
+// GKENodeIP retrieves the GKE node IP.
+func GKENodeIP() string {
+	return gkeNodeIP
+}
+
+// GCEDSClient retrieves the datastore client.
+func GCEDSClient() *datastore.Client {
+	return dsClient
+}
+
+// GCEGoDSClient retrieves the datastore client for golang.org project.
+func GCEGoDSClient() *datastore.Client {
+	return goDSClient
+}
+
+// GCETryDepsErr retrives any Trybot dependency error.
+func GCETryDepsErr() error {
+	return errTryDeps
+}
+
+// GCEErrorsClient retrieves the stackdriver errors client.
+func GCEErrorsClient() *errorreporting.Client {
+	return errorsClient
+}
+
+// GCEOAuthHTTPClient retrieves an OAuth2 HTTP client used to make API calls to GCP.
+func GCEOAuthHTTPClient() *http.Client {
+	return oAuthHTTPClient
+}
+
+// GCPCredentials retrieves the GCP credentials.
+func GCPCredentials() *google.Credentials {
+	return gcpCreds
+}
+
+// MetricsClient retrieves a metrics client.
+func MetricsClient() *monapi.MetricClient {
+	return metricsClient
+}
+
+// StorageClient retrieves a storage client.
+func StorageClient() *storage.Client {
+	return storageClient
+}
+
 func checkTryBuildDeps(ctx context.Context, sc *secret.Client) error {
 	if !hasStorageScope() {
 		return errors.New("coordinator's GCE instance lacks the storage service scope")
 	}
-	if *mode == "dev" {
+	if gceMode == "dev" {
 		return errors.New("running in dev mode")
 	}
 	wr := storageClient.Bucket(buildEnv.LogBucket).Object("hello.txt").NewWriter(context.Background())
@@ -241,16 +334,17 @@
 	return nil
 }
 
-var gcePool = &gceBuildletPool{}
+var gcePool = &GCEBuildlet{}
 
-var _ BuildletPool = (*gceBuildletPool)(nil)
+var _ Buildlet = (*GCEBuildlet)(nil)
 
 // maxInstances is a temporary hack because we can't get buildlets to boot
 // without IPs, and we only have 200 IP addresses.
 // TODO(bradfitz): remove this once fixed.
 const maxInstances = 190
 
-type gceBuildletPool struct {
+// GCEBuildlet manages a pool of GCE buildlets.
+type GCEBuildlet struct {
 	mu sync.Mutex // guards all following
 
 	disabled bool
@@ -264,7 +358,7 @@
 	inst      map[string]time.Time // GCE VM instance name -> creationTime
 }
 
-func (p *gceBuildletPool) pollQuotaLoop() {
+func (p *GCEBuildlet) pollQuotaLoop() {
 	if computeService == nil {
 		log.Printf("pollQuotaLoop: no GCE access; not checking quota.")
 		return
@@ -279,7 +373,7 @@
 	}
 }
 
-func (p *gceBuildletPool) pollQuota() {
+func (p *GCEBuildlet) pollQuota() {
 	gceAPIGate()
 	reg, err := computeService.Regions.Get(buildEnv.ProjectName, buildEnv.Region()).Do()
 	if err != nil {
@@ -302,13 +396,15 @@
 	}
 }
 
-func (p *gceBuildletPool) SetEnabled(enabled bool) {
+// SetEnabled marks the buildlet pool as enabled.
+func (p *GCEBuildlet) SetEnabled(enabled bool) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.disabled = !enabled
 }
 
-func (p *gceBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg logger) (bc *buildlet.Client, err error) {
+// GetBuildlet retrieves a buildlet client for an available buildlet.
+func (p *GCEBuildlet) GetBuildlet(ctx context.Context, hostType string, lg Logger) (bc *buildlet.Client, err error) {
 	hconf, ok := dashboard.Hosts[hostType]
 	if !ok {
 		return nil, fmt.Errorf("gcepool: unknown host type %q", hostType)
@@ -320,9 +416,9 @@
 		return nil, err
 	}
 
-	deleteIn, ok := ctx.Value(buildletTimeoutOpt{}).(time.Duration)
+	deleteIn, ok := ctx.Value(BuildletTimeoutOpt{}).(time.Duration)
 	if !ok {
-		deleteIn = vmDeleteTimeout
+		deleteIn = deleteTimeout
 	}
 
 	instName := "buildlet-" + strings.TrimPrefix(hostType, "host-") + "-rn" + randHex(7)
@@ -378,7 +474,7 @@
 	return bc, nil
 }
 
-func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, hostType, zone, instName string) error {
+func (p *GCEBuildlet) putBuildlet(bc *buildlet.Client, hostType, zone, instName string) error {
 	// TODO(bradfitz): add the buildlet to a freelist (of max N
 	// items) for up to 10 minutes since when it got started if
 	// it's never seen a command execution failure, and we can
@@ -399,7 +495,8 @@
 	return nil
 }
 
-func (p *gceBuildletPool) WriteHTMLStatus(w io.Writer) {
+// WriteHTMLStatus writes the status of the buildlet pool to an io.Writer.
+func (p *GCEBuildlet) WriteHTMLStatus(w io.Writer) {
 	fmt.Fprintf(w, "<b>GCE pool</b> capacity: %s", p.capacityString())
 	const show = 6 // must be even
 	active := p.instancesActive()
@@ -407,7 +504,7 @@
 		fmt.Fprintf(w, "<ul>")
 		for i, inst := range active {
 			if i < show/2 || i >= len(active)-(show/2) {
-				fmt.Fprintf(w, "<li>%v, %s</li>\n", inst.name, friendlyDuration(time.Since(inst.creation)))
+				fmt.Fprintf(w, "<li>%v, %s</li>\n", inst.Name, friendlyDuration(time.Since(inst.Creation)))
 			} else if i == show/2 {
 				fmt.Fprintf(w, "<li>... %d of %d total omitted ...</li>\n", len(active)-show, len(active))
 			}
@@ -416,11 +513,11 @@
 	}
 }
 
-func (p *gceBuildletPool) String() string {
+func (p *GCEBuildlet) String() string {
 	return fmt.Sprintf("GCE pool capacity: %s", p.capacityString())
 }
 
-func (p *gceBuildletPool) capacityString() string {
+func (p *GCEBuildlet) capacityString() string {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	return fmt.Sprintf("%d/%d instances; %d/%d CPUs",
@@ -430,7 +527,7 @@
 
 // awaitVMCountQuota waits for numCPU CPUs of quota to become available,
 // or returns ctx.Err.
-func (p *gceBuildletPool) awaitVMCountQuota(ctx context.Context, numCPU int) error {
+func (p *GCEBuildlet) awaitVMCountQuota(ctx context.Context, numCPU int) error {
 	// Poll every 2 seconds, which could be better, but works and
 	// is simple.
 	for {
@@ -449,11 +546,11 @@
 // starting numCPU more CPUs.
 //
 // precondition: p.mu must be held.
-func (p *gceBuildletPool) haveQuotaLocked(numCPU int) bool {
+func (p *GCEBuildlet) haveQuotaLocked(numCPU int) bool {
 	return p.cpuLeft >= numCPU && p.instLeft >= 1 && len(p.inst) < maxInstances && p.addrUsage < maxInstances
 }
 
-func (p *gceBuildletPool) tryAllocateQuota(numCPU int) bool {
+func (p *GCEBuildlet) tryAllocateQuota(numCPU int) bool {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if p.disabled {
@@ -471,7 +568,7 @@
 
 // putVMCountQuota adjusts the dead-reckoning of our quota usage by
 // one instance and cpu CPUs.
-func (p *gceBuildletPool) putVMCountQuota(cpu int) {
+func (p *GCEBuildlet) putVMCountQuota(cpu int) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.cpuUsage -= cpu
@@ -479,7 +576,7 @@
 	p.instLeft++
 }
 
-func (p *gceBuildletPool) setInstanceUsed(instName string, used bool) {
+func (p *GCEBuildlet) setInstanceUsed(instName string, used bool) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if p.inst == nil {
@@ -492,39 +589,41 @@
 	}
 }
 
-func (p *gceBuildletPool) instanceUsed(instName string) bool {
+func (p *GCEBuildlet) instanceUsed(instName string) bool {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	_, ok := p.inst[instName]
 	return ok
 }
 
-func (p *gceBuildletPool) instancesActive() (ret []resourceTime) {
+func (p *GCEBuildlet) instancesActive() (ret []ResourceTime) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	for name, create := range p.inst {
-		ret = append(ret, resourceTime{
-			name:     name,
-			creation: create,
+		ret = append(ret, ResourceTime{
+			Name:     name,
+			Creation: create,
 		})
 	}
-	sort.Sort(byCreationTime(ret))
+	sort.Sort(ByCreationTime(ret))
 	return ret
 }
 
-// resourceTime is a GCE instance or Kube pod name and its creation time.
-type resourceTime struct {
-	name     string
-	creation time.Time
+// ResourceTime is a GCE instance or Kube pod name and its creation time.
+type ResourceTime struct {
+	Name     string
+	Creation time.Time
 }
 
-type byCreationTime []resourceTime
+// ByCreationTime provides the functionality to sort resource times by
+// the time of creation.
+type ByCreationTime []ResourceTime
 
-func (s byCreationTime) Len() int           { return len(s) }
-func (s byCreationTime) Less(i, j int) bool { return s[i].creation.Before(s[j].creation) }
-func (s byCreationTime) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s ByCreationTime) Len() int           { return len(s) }
+func (s ByCreationTime) Less(i, j int) bool { return s[i].Creation.Before(s[j].Creation) }
+func (s ByCreationTime) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 
-// cleanUpOldVMs loops forever and periodically enumerates virtual
+// CleanUpOldVMs loops forever and periodically enumerates virtual
 // machines and deletes those which have expired.
 //
 // A VM is considered expired if it has a "delete-at" metadata
@@ -537,8 +636,8 @@
 // stranded and wasting resources forever, we instead set the
 // "delete-at" metadata attribute on them when created to some time
 // that's well beyond their expected lifetime.
-func (p *gceBuildletPool) cleanUpOldVMs() {
-	if *mode == "dev" {
+func (p *GCEBuildlet) CleanUpOldVMs() {
+	if gceMode == "dev" {
 		return
 	}
 	if computeService == nil {
@@ -560,7 +659,7 @@
 }
 
 // cleanZoneVMs is part of cleanUpOldVMs, operating on a single zone.
-func (p *gceBuildletPool) cleanZoneVMs(zone string) error {
+func (p *GCEBuildlet) cleanZoneVMs(zone string) error {
 	// Fetch the first 500 (default) running instances and clean
 	// those. We expect that we'll be running many fewer than
 	// that. Even if we have more, eventually the first 500 will
@@ -631,6 +730,8 @@
 
 var deletedVMCache = lru.New(100) // keyed by instName
 
+type token struct{}
+
 // deleteVM starts a delete of an instance in a given zone.
 //
 // It either returns an operation name (if delete is pending) or the
@@ -653,7 +754,8 @@
 	return op.Name, nil
 }
 
-func hasScope(want string) bool {
+// HasScope returns true if the GCE metadata contains the default scopes.
+func HasScope(want string) bool {
 	// If not on GCE, assume full access
 	if !metadata.OnGCE() {
 		return true
@@ -672,15 +774,16 @@
 }
 
 func hasComputeScope() bool {
-	return hasScope(compute.ComputeScope) || hasScope(compute.CloudPlatformScope)
+	return HasScope(compute.ComputeScope) || HasScope(compute.CloudPlatformScope)
 }
 
 func hasStorageScope() bool {
-	return hasScope(storage.ScopeReadWrite) || hasScope(storage.ScopeFullControl) || hasScope(compute.CloudPlatformScope)
+	return HasScope(storage.ScopeReadWrite) || HasScope(storage.ScopeFullControl) || HasScope(compute.CloudPlatformScope)
 }
 
-func readGCSFile(name string) ([]byte, error) {
-	if *mode == "dev" {
+// ReadGCSFile reads the named file from the GCS bucket.
+func ReadGCSFile(name string) ([]byte, error) {
+	if gceMode == "dev" {
 		b, ok := testFiles[name]
 		if !ok {
 			return nil, &os.PathError{
diff --git a/internal/coordinator/pool/log.go b/internal/coordinator/pool/log.go
new file mode 100644
index 0000000..e1ad2cd
--- /dev/null
+++ b/internal/coordinator/pool/log.go
@@ -0,0 +1,22 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pool
+
+import "golang.org/x/build/internal/spanlog"
+
+// EventTimeLogger is the logging interface used to log
+// an event at a point in time.
+type EventTimeLogger interface {
+	LogEventTime(event string, optText ...string)
+}
+
+// Logger is the logging interface used within the coordinator.
+// It can both log a message at a point in time, as well
+// as log a span (something having a start and end time, as well as
+// a final success status).
+type Logger interface {
+	EventTimeLogger // point in time
+	spanlog.Logger  // action spanning time
+}
diff --git a/internal/coordinator/pool/pool.go b/internal/coordinator/pool/pool.go
new file mode 100644
index 0000000..03c3a7a
--- /dev/null
+++ b/internal/coordinator/pool/pool.go
@@ -0,0 +1,58 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pool
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"math/rand"
+	"time"
+
+	"golang.org/x/build/buildlet"
+)
+
+// BuildletTimeoutOpt is a context.Value key for BuildletPool.GetBuildlet.
+type BuildletTimeoutOpt struct{} // context Value key; value is time.Duration
+
+// Buildlet defines an interface for a pool of buildlets.
+type Buildlet interface {
+	// GetBuildlet returns a new buildlet client.
+	//
+	// The hostType is the key into the dashboard.Hosts
+	// map (such as "host-linux-jessie"), NOT the buidler type
+	// ("linux-386").
+	//
+	// Users of GetBuildlet must both call Client.Close when done
+	// with the client as well as cancel the provided Context.
+	//
+	// The ctx may have context values of type buildletTimeoutOpt
+	// and highPriorityOpt.
+	GetBuildlet(ctx context.Context, hostType string, lg Logger) (*buildlet.Client, error)
+
+	String() string // TODO(bradfitz): more status stuff
+}
+
+// randHex generates a random hex string.
+func randHex(n int) string {
+	buf := make([]byte, n/2+1)
+	if _, err := rand.Read(buf); err != nil {
+		log.Fatalf("randHex: %v", err)
+	}
+	return fmt.Sprintf("%x", buf)[:n]
+}
+
+func friendlyDuration(d time.Duration) string {
+	if d > 10*time.Second {
+		d2 := ((d + 50*time.Millisecond) / (100 * time.Millisecond)) * (100 * time.Millisecond)
+		return d2.String()
+	}
+	if d > time.Second {
+		d2 := ((d + 5*time.Millisecond) / (10 * time.Millisecond)) * (10 * time.Millisecond)
+		return d2.String()
+	}
+	d2 := ((d + 50*time.Microsecond) / (100 * time.Microsecond)) * (100 * time.Microsecond)
+	return d2.String()
+}