all: move services to new Kubernetes cluster
Our makefiles install xb from the repository, so this has to be checked
in before I do anything else, and nobody else should deploy anything
until I'm done.
Because the new cluster is an Autopilot cluster, it lives in a region
(us-central1) and that's what you pass to the kubectl command, etc.
Move Region/Zone into the individual KubeConfigs and use the correct
ones as appropriate.
For golang/go#48408.
Change-Id: Iceacfe68305a3744aa87ce0fef777b977a252586
Reviewed-on: https://go-review.googlesource.com/c/build/+/350137
Trust: Heschi Kreinick <heschi@google.com>
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/buildenv/envs.go b/buildenv/envs.go
index f737feb..fbbd247 100644
--- a/buildenv/envs.go
+++ b/buildenv/envs.go
@@ -14,7 +14,6 @@
"math/rand"
"os"
"path/filepath"
- "strings"
"sync"
"golang.org/x/oauth2"
@@ -42,6 +41,12 @@
// MachineType is the GCE machine type to use for the Kubernetes cluster nodes.
MachineType string
+ // The zone of the cluster. Autopilot clusters have no single zone.
+ Zone string
+
+ // The region of the cluster.
+ Region string
+
// Name is the name of the Kubernetes cluster that will be created.
Name string
@@ -49,6 +54,19 @@
Namespace string
}
+// ZoneOrRegion returns the zone or if unset, the region of the cluster.
+// This is the string to use as the "zone" of the cluster when connecting to it
+// with the Kubernetes API.
+func (kc KubeConfig) ZoneOrRegion() string {
+ if kc.Zone != "" {
+ return kc.Zone
+ }
+ if kc.Region != "" {
+ return kc.Region
+ }
+ panic(fmt.Sprintf("KubeConfig has neither zone nor region: %#v", kc))
+}
+
// Environment describes the configuration of the infrastructure for a
// coordinator and its buildlet resources running on Google Cloud Platform.
// Staging and Production are the two common build environments.
@@ -72,11 +90,6 @@
// disabled and the coordinator serves on 8119.
IsProd bool
- // ControlZone is the GCE zone that the coordinator instance and Kubernetes cluster
- // will run in. This field may be overridden as necessary without impacting
- // other fields.
- ControlZone string
-
// VMZones are the GCE zones that the VMs will be deployed to. These
// GCE zones will be periodically cleaned by deleting old VMs. The zones
// should all exist within a single region.
@@ -90,10 +103,10 @@
// MachineType is the GCE machine type to use for the coordinator.
MachineType string
- // KubeBuild is the Kubernetes config for the buildlet cluster.
+ // KubeBuild is the cluster that runs buildlets.
KubeBuild KubeConfig
- // KubeTools is the Kubernetes config for the tools cluster.
- KubeTools KubeConfig
+ // KubeServices is the cluster that runs the coordinator and other services.
+ KubeServices KubeConfig
// PreferContainersOnCOS controls whether we do most builds on
// Google's Container-Optimized OS Linux image running on a VM
@@ -154,19 +167,10 @@
}
// RandomVMZone returns a randomly selected zone from the zones in VMZones.
-// The Zone value will be returned if VMZones is not set.
func (e Environment) RandomVMZone() string {
- if len(e.VMZones) == 0 {
- return e.ControlZone
- }
return e.VMZones[rand.Intn(len(e.VMZones))]
}
-// Region returns the GCE region, derived from its zone.
-func (e Environment) Region() string {
- return e.ControlZone[:strings.LastIndex(e.ControlZone, "-")]
-}
-
// SnapshotURL returns the absolute URL of the .tar.gz containing a
// built Go tree for the builderType and Go rev (40 character Git
// commit hash). The tarball is suitable for passing to
@@ -248,7 +252,6 @@
ProjectNumber: 302018677728,
GoProjectName: "go-dashboard-dev",
IsProd: true,
- ControlZone: "us-central1-f",
VMZones: []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
StaticIP: "104.154.113.235",
MachineType: "n1-standard-1",
@@ -256,12 +259,16 @@
KubeBuild: KubeConfig{
MinNodes: 1,
MaxNodes: 1, // auto-scaling disabled
+ Zone: "us-central1-f",
+ Region: "us-central1",
Name: "buildlets",
MachineType: "n1-standard-4", // only used for make.bash due to PreferContainersOnCOS
},
- KubeTools: KubeConfig{
+ KubeServices: KubeConfig{
MinNodes: 3,
MaxNodes: 3,
+ Zone: "us-central1-f",
+ Region: "us-central1",
Name: "go",
MachineType: "n1-standard-4",
Namespace: "default",
@@ -284,7 +291,6 @@
ProjectNumber: 872405196845,
GoProjectName: "golang-org",
IsProd: true,
- ControlZone: "us-central1-f",
VMZones: []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
StaticIP: "107.178.219.46",
MachineType: "n1-standard-4",
@@ -292,13 +298,16 @@
KubeBuild: KubeConfig{
MinNodes: 2,
MaxNodes: 2, // auto-scaling disabled
+ Zone: "us-central1-f",
+ Region: "us-central1",
Name: "buildlets",
MachineType: "n1-standard-4", // only used for make.bash due to PreferContainersOnCOS
},
- KubeTools: KubeConfig{
+ KubeServices: KubeConfig{
MinNodes: 4,
MaxNodes: 4,
- Name: "go",
+ Region: "us-central1",
+ Name: "services",
MachineType: "n1-standard-4",
Namespace: "prod",
},
diff --git a/buildenv/envs_test.go b/buildenv/envs_test.go
index bbee9aa..07e6f61 100644
--- a/buildenv/envs_test.go
+++ b/buildenv/envs_test.go
@@ -9,44 +9,13 @@
)
func TestEnvironmentNextZone(t *testing.T) {
- testCases := []struct {
- name string
- env Environment
- wantOneOf []string // desired zone should appear in this slice
- }{
- {
- name: "zones-not-set",
- env: Environment{
- ControlZone: "kentucky",
- VMZones: []string{},
- },
- wantOneOf: []string{"kentucky"},
- },
- {
- name: "zone-and-zones-set",
- env: Environment{
- ControlZone: "kentucky",
- VMZones: []string{"texas", "california", "washington"},
- },
-
- wantOneOf: []string{"texas", "california", "washington"},
- },
- {
- name: "zones-only-contains-one-entry",
- env: Environment{
- ControlZone: "kentucky",
- VMZones: []string{"texas"},
- },
- wantOneOf: []string{"texas"},
- },
+ env := Environment{
+ VMZones: []string{"texas", "california", "washington"},
}
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- got := tc.env.RandomVMZone()
- if !containsString(got, tc.wantOneOf) {
- t.Errorf("got=%q; want %v", got, tc.wantOneOf)
- }
- })
+ wantOneOf := []string{"texas", "california", "washington"}
+ got := env.RandomVMZone()
+ if !containsString(got, wantOneOf) {
+ t.Errorf("got=%q; want %v", got, wantOneOf)
}
}
diff --git a/cmd/gcpinit/gcpinit.go b/cmd/gcpinit/gcpinit.go
index dfee59f..9f60852 100644
--- a/cmd/gcpinit/gcpinit.go
+++ b/cmd/gcpinit/gcpinit.go
@@ -32,7 +32,7 @@
- name: "{{ .Kube.Name }}"
type: container.v1.cluster
properties:
- zone: "{{ .Env.ControlZone }}"
+ zone: "{{ .Env.KubeServices.Zone }}"
cluster:
initial_node_count: {{ .Kube.MinNodes }}
network: "default"
@@ -77,7 +77,7 @@
log.Fatalf("could not create client: %v", err)
}
- for _, c := range []*buildenv.KubeConfig{&buildEnv.KubeBuild, &buildEnv.KubeTools} {
+ for _, c := range []*buildenv.KubeConfig{&buildEnv.KubeBuild, &buildEnv.KubeServices} {
err := createCluster(bgc, c)
if err != nil {
log.Fatalf("Error creating Kubernetes cluster %q: %v", c.Name, err)
diff --git a/cmd/xb/xb.go b/cmd/xb/xb.go
index 1a320d2..dbf457d 100644
--- a/cmd/xb/xb.go
+++ b/cmd/xb/xb.go
@@ -56,16 +56,14 @@
case "kubectl":
env := getEnv()
curCtx := kubeCurrentContext()
- wantCtx := fmt.Sprintf("gke_%s_%s_go", env.ProjectName, env.ControlZone)
+ wantCtx := fmt.Sprintf("gke_%s_%s_%s", env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name)
if curCtx != wantCtx {
log.SetFlags(0)
- log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n gcloud container clusters get-credentials --project=%s --zone=%s go",
+ log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n gcloud container clusters get-credentials --project=%s --zone=%s %s",
curCtx, wantCtx,
- env.ProjectName, env.ControlZone,
+ env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name,
)
}
- // gcloud container clusters get-credentials --zone=us-central1-f go
- // gcloud container clusters get-credentials --zone=us-central1-f buildlets
runCmd()
case "docker":
runDocker()
diff --git a/internal/buildgo/basepin.go b/internal/buildgo/basepin.go
index 06d12c0..319c9ad 100644
--- a/internal/buildgo/basepin.go
+++ b/internal/buildgo/basepin.go
@@ -82,7 +82,7 @@
delete(need, d.SourceImage)
continue
}
- if zone != c.Env.ControlZone {
+ if zone != c.Env.KubeBuild.Zone {
log.Printf("basepin: deleting unnecessary disk %v in zone %v", d.Name, zone)
op, err := svc.Disks.Delete(c.Env.ProjectName, zone, d.Name).Do()
if err != nil {
diff --git a/internal/coordinator/pool/gce.go b/internal/coordinator/pool/gce.go
index e4e3357..97ecce2 100644
--- a/internal/coordinator/pool/gce.go
+++ b/internal/coordinator/pool/gce.go
@@ -133,7 +133,7 @@
// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
projectZone = path.Base(projectZone)
- buildEnv.ControlZone = projectZone
+ buildEnv.KubeBuild.Zone = projectZone
if buildEnv.StaticIP == "" {
buildEnv.StaticIP, err = metadata.ExternalIP()
@@ -349,9 +349,9 @@
func (p *GCEBuildlet) pollQuota() {
gceAPIGate()
- reg, err := computeService.Regions.Get(buildEnv.ProjectName, buildEnv.Region()).Do()
+ reg, err := computeService.Regions.Get(buildEnv.ProjectName, buildEnv.KubeBuild.Region).Do()
if err != nil {
- log.Printf("Failed to get quota for %s/%s: %v", buildEnv.ProjectName, buildEnv.Region(), err)
+ log.Printf("Failed to get quota for %s/%s: %v", buildEnv.ProjectName, buildEnv.KubeBuild.Region, err)
return
}
p.mu.Lock()
diff --git a/internal/coordinator/pool/kube.go b/internal/coordinator/pool/kube.go
index 32aaf5c..c5e2e38 100644
--- a/internal/coordinator/pool/kube.go
+++ b/internal/coordinator/pool/kube.go
@@ -74,7 +74,7 @@
var err error
buildletsKubeClient, err = gke.NewClient(ctx,
gceBuildEnv.KubeBuild.Name,
- gke.OptZone(gceBuildEnv.ControlZone),
+ gke.OptZone(gceBuildEnv.KubeBuild.ZoneOrRegion()),
gke.OptProject(gceBuildEnv.ProjectName),
gke.OptTokenSource(gce.GCPCredentials().TokenSource))
if err != nil {
@@ -82,9 +82,9 @@
}
goKubeClient, err = gke.NewClient(ctx,
- gceBuildEnv.KubeTools.Name,
- gke.OptNamespace(gceBuildEnv.KubeTools.Namespace),
- gke.OptZone(gceBuildEnv.ControlZone),
+ gceBuildEnv.KubeServices.Name,
+ gke.OptNamespace(gceBuildEnv.KubeServices.Namespace),
+ gke.OptZone(gceBuildEnv.KubeServices.ZoneOrRegion()),
gke.OptProject(gceBuildEnv.ProjectName),
gke.OptTokenSource(gce.GCPCredentials().TokenSource))
if err != nil {
@@ -172,12 +172,12 @@
gceBuildEnv := NewGCEConfiguration().BuildEnv()
nodes, err := buildletsKubeClient.GetNodes(ctx)
if err != nil {
- log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.Region(), err)
+ log.Printf("failed to retrieve nodes to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.KubeBuild.Region, err)
return
}
pods, err := buildletsKubeClient.GetPods(ctx)
if err != nil {
- log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.Region(), err)
+ log.Printf("failed to retrieve pods to calculate cluster capacity for %s/%s: %v", gceBuildEnv.ProjectName, gceBuildEnv.KubeBuild.Region, err)
return
}
@@ -474,7 +474,7 @@
}
if err == nil && time.Now().Unix() > unixDeadline {
stats.DeletedOld++
- log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, NewGCEConfiguration().BuildEnv().ControlZone)
+ log.Printf("cleanUpOldPods: Deleting expired pod %q...", pod.Name)
err = buildletsKubeClient.DeletePod(ctx, pod.Name)
if err != nil {
log.Printf("cleanUpOldPods: problem deleting old pod %q: %v", pod.Name, err)