all: move services to new Kubernetes cluster

Our makefiles install xb from the repository, so this has to be checked
in before I do anything else, and nobody else should deploy anything
until I'm done.

Because the new cluster is an Autopilot cluster, it lives in a region
(us-central1) and that's what you pass to the kubectl command, etc.
Move Region/Zone into the individual KubeConfigs and use the correct
ones as appropriate.

For golang/go#48408.

Change-Id: Iceacfe68305a3744aa87ce0fef777b977a252586
Reviewed-on: https://go-review.googlesource.com/c/build/+/350137
Trust: Heschi Kreinick <heschi@google.com>
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/buildenv/envs.go b/buildenv/envs.go
index f737feb..fbbd247 100644
--- a/buildenv/envs.go
+++ b/buildenv/envs.go
@@ -14,7 +14,6 @@
 	"math/rand"
 	"os"
 	"path/filepath"
-	"strings"
 	"sync"
 
 	"golang.org/x/oauth2"
@@ -42,6 +41,12 @@
 	// MachineType is the GCE machine type to use for the Kubernetes cluster nodes.
 	MachineType string
 
+	// The zone of the cluster. Autopilot clusters have no single zone.
+	Zone string
+
+	// The region of the cluster.
+	Region string
+
 	// Name is the name of the Kubernetes cluster that will be created.
 	Name string
 
@@ -49,6 +54,19 @@
 	Namespace string
 }
 
+// ZoneOrRegion returns the zone or if unset, the region of the cluster.
+// This is the string to use as the "zone" of the cluster when connecting to it
+// with the Kubernetes API.
+func (kc KubeConfig) ZoneOrRegion() string {
+	if kc.Zone != "" {
+		return kc.Zone
+	}
+	if kc.Region != "" {
+		return kc.Region
+	}
+	panic(fmt.Sprintf("KubeConfig has neither zone nor region: %#v", kc))
+}
+
 // Environment describes the configuration of the infrastructure for a
 // coordinator and its buildlet resources running on Google Cloud Platform.
 // Staging and Production are the two common build environments.
@@ -72,11 +90,6 @@
 	// disabled and the coordinator serves on 8119.
 	IsProd bool
 
-	// ControlZone is the GCE zone that the coordinator instance and Kubernetes cluster
-	// will run in. This field may be overridden as necessary without impacting
-	// other fields.
-	ControlZone string
-
 	// VMZones are the GCE zones that the VMs will be deployed to. These
 	// GCE zones will be periodically cleaned by deleting old VMs. The zones
 	// should all exist within a single region.
@@ -90,10 +103,10 @@
 	// MachineType is the GCE machine type to use for the coordinator.
 	MachineType string
 
-	// KubeBuild is the Kubernetes config for the buildlet cluster.
+	// KubeBuild is the cluster that runs buildlets.
 	KubeBuild KubeConfig
-	// KubeTools is the Kubernetes config for the tools cluster.
-	KubeTools KubeConfig
+	// KubeServices is the cluster that runs the coordinator and other services.
+	KubeServices KubeConfig
 
 	// PreferContainersOnCOS controls whether we do most builds on
 	// Google's Container-Optimized OS Linux image running on a VM
@@ -154,19 +167,10 @@
 }
 
 // RandomVMZone returns a randomly selected zone from the zones in VMZones.
-// The Zone value will be returned if VMZones is not set.
 func (e Environment) RandomVMZone() string {
-	if len(e.VMZones) == 0 {
-		return e.ControlZone
-	}
 	return e.VMZones[rand.Intn(len(e.VMZones))]
 }
 
-// Region returns the GCE region, derived from its zone.
-func (e Environment) Region() string {
-	return e.ControlZone[:strings.LastIndex(e.ControlZone, "-")]
-}
-
 // SnapshotURL returns the absolute URL of the .tar.gz containing a
 // built Go tree for the builderType and Go rev (40 character Git
 // commit hash). The tarball is suitable for passing to
@@ -248,7 +252,6 @@
 	ProjectNumber:         302018677728,
 	GoProjectName:         "go-dashboard-dev",
 	IsProd:                true,
-	ControlZone:           "us-central1-f",
 	VMZones:               []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
 	StaticIP:              "104.154.113.235",
 	MachineType:           "n1-standard-1",
@@ -256,12 +259,16 @@
 	KubeBuild: KubeConfig{
 		MinNodes:    1,
 		MaxNodes:    1, // auto-scaling disabled
+		Zone:        "us-central1-f",
+		Region:      "us-central1",
 		Name:        "buildlets",
 		MachineType: "n1-standard-4", // only used for make.bash due to PreferContainersOnCOS
 	},
-	KubeTools: KubeConfig{
+	KubeServices: KubeConfig{
 		MinNodes:    3,
 		MaxNodes:    3,
+		Zone:        "us-central1-f",
+		Region:      "us-central1",
 		Name:        "go",
 		MachineType: "n1-standard-4",
 		Namespace:   "default",
@@ -284,7 +291,6 @@
 	ProjectNumber:         872405196845,
 	GoProjectName:         "golang-org",
 	IsProd:                true,
-	ControlZone:           "us-central1-f",
 	VMZones:               []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
 	StaticIP:              "107.178.219.46",
 	MachineType:           "n1-standard-4",
@@ -292,13 +298,16 @@
 	KubeBuild: KubeConfig{
 		MinNodes:    2,
 		MaxNodes:    2, // auto-scaling disabled
+		Zone:        "us-central1-f",
+		Region:      "us-central1",
 		Name:        "buildlets",
 		MachineType: "n1-standard-4", // only used for make.bash due to PreferContainersOnCOS
 	},
-	KubeTools: KubeConfig{
+	KubeServices: KubeConfig{
 		MinNodes:    4,
 		MaxNodes:    4,
-		Name:        "go",
+		Region:      "us-central1",
+		Name:        "services",
 		MachineType: "n1-standard-4",
 		Namespace:   "prod",
 	},
diff --git a/buildenv/envs_test.go b/buildenv/envs_test.go
index bbee9aa..07e6f61 100644
--- a/buildenv/envs_test.go
+++ b/buildenv/envs_test.go
@@ -9,44 +9,13 @@
 )
 
 func TestEnvironmentNextZone(t *testing.T) {
-	testCases := []struct {
-		name      string
-		env       Environment
-		wantOneOf []string // desired zone should appear in this slice
-	}{
-		{
-			name: "zones-not-set",
-			env: Environment{
-				ControlZone: "kentucky",
-				VMZones:     []string{},
-			},
-			wantOneOf: []string{"kentucky"},
-		},
-		{
-			name: "zone-and-zones-set",
-			env: Environment{
-				ControlZone: "kentucky",
-				VMZones:     []string{"texas", "california", "washington"},
-			},
-
-			wantOneOf: []string{"texas", "california", "washington"},
-		},
-		{
-			name: "zones-only-contains-one-entry",
-			env: Environment{
-				ControlZone: "kentucky",
-				VMZones:     []string{"texas"},
-			},
-			wantOneOf: []string{"texas"},
-		},
+	env := Environment{
+		VMZones: []string{"texas", "california", "washington"},
 	}
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			got := tc.env.RandomVMZone()
-			if !containsString(got, tc.wantOneOf) {
-				t.Errorf("got=%q; want %v", got, tc.wantOneOf)
-			}
-		})
+	wantOneOf := []string{"texas", "california", "washington"}
+	got := env.RandomVMZone()
+	if !containsString(got, wantOneOf) {
+		t.Errorf("got=%q; want %v", got, wantOneOf)
 	}
 }
 
diff --git a/cmd/gcpinit/gcpinit.go b/cmd/gcpinit/gcpinit.go
index dfee59f..9f60852 100644
--- a/cmd/gcpinit/gcpinit.go
+++ b/cmd/gcpinit/gcpinit.go
@@ -32,7 +32,7 @@
 - name: "{{ .Kube.Name }}"
   type: container.v1.cluster
   properties:
-    zone: "{{ .Env.ControlZone }}"
+    zone: "{{ .Env.KubeServices.Zone }}"
     cluster:
       initial_node_count: {{ .Kube.MinNodes }}
       network: "default"
@@ -77,7 +77,7 @@
 		log.Fatalf("could not create client: %v", err)
 	}
 
-	for _, c := range []*buildenv.KubeConfig{&buildEnv.KubeBuild, &buildEnv.KubeTools} {
+	for _, c := range []*buildenv.KubeConfig{&buildEnv.KubeBuild, &buildEnv.KubeServices} {
 		err := createCluster(bgc, c)
 		if err != nil {
 			log.Fatalf("Error creating Kubernetes cluster %q: %v", c.Name, err)
diff --git a/cmd/xb/xb.go b/cmd/xb/xb.go
index 1a320d2..dbf457d 100644
--- a/cmd/xb/xb.go
+++ b/cmd/xb/xb.go
@@ -56,16 +56,14 @@
 	case "kubectl":
 		env := getEnv()
 		curCtx := kubeCurrentContext()
-		wantCtx := fmt.Sprintf("gke_%s_%s_go", env.ProjectName, env.ControlZone)
+		wantCtx := fmt.Sprintf("gke_%s_%s_%s", env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name)
 		if curCtx != wantCtx {
 			log.SetFlags(0)
-			log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n  gcloud container clusters get-credentials --project=%s --zone=%s go",
+			log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n  gcloud container clusters get-credentials --project=%s --zone=%s %s",
 				curCtx, wantCtx,
-				env.ProjectName, env.ControlZone,
+				env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name,
 			)
 		}
-		// gcloud container clusters get-credentials --zone=us-central1-f go
-		// gcloud container clusters get-credentials --zone=us-central1-f buildlets
 		runCmd()
 	case "docker":
 		runDocker()
diff --git a/internal/buildgo/basepin.go b/internal/buildgo/basepin.go
index 06d12c0..319c9ad 100644
--- a/internal/buildgo/basepin.go
+++ b/internal/buildgo/basepin.go
@@ -82,7 +82,7 @@
 				delete(need, d.SourceImage)
 				continue
 			}
-			if zone != c.Env.ControlZone {
+			if zone != c.Env.KubeBuild.Zone {
 				log.Printf("basepin: deleting unnecessary disk %v in zone %v", d.Name, zone)
 				op, err := svc.Disks.Delete(c.Env.ProjectName, zone, d.Name).Do()
 				if err != nil {
diff --git a/internal/coordinator/pool/gce.go b/internal/coordinator/pool/gce.go
index e4e3357..97ecce2 100644
--- a/internal/coordinator/pool/gce.go
+++ b/internal/coordinator/pool/gce.go
@@ -133,7 +133,7 @@
 
 		// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
 		projectZone = path.Base(projectZone)
-		buildEnv.ControlZone = projectZone
+		buildEnv.KubeBuild.Zone = projectZone
 
 		if buildEnv.StaticIP == "" {
 			buildEnv.StaticIP, err = metadata.ExternalIP()
@@ -349,9 +349,9 @@
 
 func (p *GCEBuildlet) pollQuota() {
 	gceAPIGate()
-	reg, err := computeService.Regions.Get(buildEnv.ProjectName, buildEnv.Region()).Do()
+	reg, err := computeService.Regions.Get(buildEnv.ProjectName, buildEnv.KubeBuild.Region).Do()
 	if err != nil {
-		log.Printf("Failed to get quota for %s/%s: %v", buildEnv.ProjectName, buildEnv.Region(), err)
+		log.Printf("Failed to get quota for %s/%s: %v", buildEnv.ProjectName, buildEnv.KubeBuild.Region, err)
 		return
 	}
 	p.mu.Lock()
diff --git a/internal/coordinator/pool/kube.go b/internal/coordinator/pool/kube.go
index 32aaf5c..c5e2e38 100644
--- a/internal/coordinator/pool/kube.go
+++ b/internal/coordinator/pool/kube.go
@@ -74,7 +74,7 @@
 	var err error
 	buildletsKubeClient, err = gke.NewClient(ctx,
 		gceBuildEnv.KubeBuild.Name,
-		gke.OptZone(gceBuildEnv.ControlZone),
+		gke.OptZone(gceBuildEnv.KubeBuild.ZoneOrRegion()),
 		gke.OptProject(gceBuildEnv.ProjectName),
 		gke.OptTokenSource(gce.GCPCredentials().TokenSource))
 	if err != nil {
@@ -82,9 +82,9 @@
 	}
 
 	goKubeClient, err = gke.NewClient(ctx,
-		gceBuildEnv.KubeTools.Name,
-		gke.OptNamespace(gceBuildEnv.KubeTools.Namespace),
-		gke.OptZone(gceBuildEnv.ControlZone),
+		gceBuildEnv.KubeServices.Name,
+		gke.OptNamespace(gceBuildEnv.KubeServices.Namespace),
+		gke.OptZone(gceBuildEnv.KubeServices.ZoneOrRegion()),
 		gke.OptProject(gceBuildEnv.ProjectName),
 		gke.OptTokenSource(gce.GCPCredentials().TokenSource))
 	if err != nil {
@@ -172,12 +172,12 @@
 	gceBuildEnv := NewGCEConfiguration().BuildEnv()
 	nodes, err := buildletsKubeClient.GetNodes(ctx)
 	if err != nil {
-		log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.Region(), err)
+		log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.KubeBuild.Region, err)
 		return
 	}
 	pods, err := buildletsKubeClient.GetPods(ctx)
 	if err != nil {
-		log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.Region(), err)
+		log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.KubeBuild.Region, err)
 		return
 	}
 
@@ -474,7 +474,7 @@
 				}
 				if err == nil && time.Now().Unix() > unixDeadline {
 					stats.DeletedOld++
-					log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, NewGCEConfiguration().BuildEnv().ControlZone)
+					log.Printf("cleanUpOldPods: Deleting expired pod %q...", pod.Name)
 					err = buildletsKubeClient.DeletePod(ctx, pod.Name)
 					if err != nil {
 						log.Printf("cleanUpOldPods: problem deleting old pod %q: %v", pod.Name, err)