cmd/coordinator: modernize Kubernetes access

Turns out that to query for a regional Kubernetes cluster you need to
use a different API. Require an explicit location to create a client and
use that new API.

Also, modern Kubernetes clusters won't have client cert auth enabled.
Use OAuth instead.

Finally, don't require that the buildlets cluster be in the same zone as
the coordinator.

For golang/go#48408.

Change-Id: Ic3f9525b9bffa89d779e684c8ea1be116d3f983f
Reviewed-on: https://go-review.googlesource.com/c/build/+/350754
Trust: Heschi Kreinick <heschi@google.com>
Run-TryBot: Heschi Kreinick <heschi@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
diff --git a/buildenv/envs.go b/buildenv/envs.go
index 677b774..9414b8a 100644
--- a/buildenv/envs.go
+++ b/buildenv/envs.go
@@ -41,10 +41,10 @@
 	Namespace string
 }
 
-// ZoneOrRegion returns the zone or if unset, the region of the cluster.
+// Location returns the zone or if unset, the region of the cluster.
 // This is the string to use as the "zone" of the cluster when connecting to it
-// with the Kubernetes API.
-func (kc KubeConfig) ZoneOrRegion() string {
+// with kubectl.
+func (kc KubeConfig) Location() string {
 	if kc.Zone != "" {
 		return kc.Zone
 	}
@@ -277,8 +277,9 @@
 		Name:   "buildlets",
 	},
 	KubeServices: KubeConfig{
+		Zone:      "us-central1-f",
 		Region:    "us-central1",
-		Name:      "services",
+		Name:      "go",
 		Namespace: "prod",
 	},
 	DashURL:             "https://build.golang.org/",
diff --git a/cmd/xb/xb.go b/cmd/xb/xb.go
index dbf457d..93261c6 100644
--- a/cmd/xb/xb.go
+++ b/cmd/xb/xb.go
@@ -56,12 +56,12 @@
 	case "kubectl":
 		env := getEnv()
 		curCtx := kubeCurrentContext()
-		wantCtx := fmt.Sprintf("gke_%s_%s_%s", env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name)
+		wantCtx := fmt.Sprintf("gke_%s_%s_%s", env.ProjectName, env.KubeServices.Location(), env.KubeServices.Name)
 		if curCtx != wantCtx {
 			log.SetFlags(0)
 			log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n  gcloud container clusters get-credentials --project=%s --zone=%s %s",
 				curCtx, wantCtx,
-				env.ProjectName, env.KubeServices.ZoneOrRegion(), env.KubeServices.Name,
+				env.ProjectName, env.KubeServices.Location(), env.KubeServices.Name,
 			)
 		}
 		runCmd()
diff --git a/internal/coordinator/pool/gce.go b/internal/coordinator/pool/gce.go
index 97ecce2..9f70505 100644
--- a/internal/coordinator/pool/gce.go
+++ b/internal/coordinator/pool/gce.go
@@ -121,19 +121,20 @@
 
 	// If running on GCE, override the zone and static IP, and check service account permissions.
 	if metadata.OnGCE() {
-		projectZone, err := metadata.Get("instance/zone")
-		if err != nil || projectZone == "" {
-			return fmt.Errorf("failed to get current GCE zone: %v", err)
-		}
-
 		gkeNodeHostname, err = metadata.Get("instance/hostname")
 		if err != nil {
 			return fmt.Errorf("failed to get current instance hostname: %v", err)
 		}
 
-		// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
-		projectZone = path.Base(projectZone)
-		buildEnv.KubeBuild.Zone = projectZone
+		if buildEnv.KubeBuild.Zone == "" {
+			projectZone, err := metadata.Get("instance/zone")
+			if err != nil || projectZone == "" {
+				return fmt.Errorf("failed to get current GCE zone: %v", err)
+			}
+			// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
+			projectZone = path.Base(projectZone)
+			buildEnv.KubeBuild.Zone = projectZone
+		}
 
 		if buildEnv.StaticIP == "" {
 			buildEnv.StaticIP, err = metadata.ExternalIP()
diff --git a/internal/coordinator/pool/kube.go b/internal/coordinator/pool/kube.go
index 5e336ae..fba51f8 100644
--- a/internal/coordinator/pool/kube.go
+++ b/internal/coordinator/pool/kube.go
@@ -74,7 +74,7 @@
 	var err error
 	buildletsKubeClient, err = gke.NewClient(ctx,
 		gceBuildEnv.KubeBuild.Name,
-		gke.OptZone(gceBuildEnv.KubeBuild.ZoneOrRegion()),
+		gceBuildEnv.KubeBuild.Location(),
 		gke.OptProject(gceBuildEnv.ProjectName),
 		gke.OptTokenSource(gce.GCPCredentials().TokenSource))
 	if err != nil {
@@ -83,8 +83,8 @@
 
 	goKubeClient, err = gke.NewClient(ctx,
 		gceBuildEnv.KubeServices.Name,
+		gceBuildEnv.KubeServices.Location(),
 		gke.OptNamespace(gceBuildEnv.KubeServices.Namespace),
-		gke.OptZone(gceBuildEnv.KubeServices.ZoneOrRegion()),
 		gke.OptProject(gceBuildEnv.ProjectName),
 		gke.OptTokenSource(gce.GCPCredentials().TokenSource))
 	if err != nil {
diff --git a/kubernetes/gke/gke.go b/kubernetes/gke/gke.go
index a7ab1ac..ea3fd68 100644
--- a/kubernetes/gke/gke.go
+++ b/kubernetes/gke/gke.go
@@ -34,7 +34,6 @@
 type clientOpt struct {
 	Project     string
 	TokenSource oauth2.TokenSource
-	Zone        string
 	Namespace   string
 }
 
@@ -52,15 +51,6 @@
 	})
 }
 
-// OptZone specifies the GCP zone the cluster is located in.
-// This is necessary if and only if there are multiple GKE clusters with
-// the same name in different zones.
-func OptZone(zoneName string) ClientOpt {
-	return clientOptFunc(func(o *clientOpt) {
-		o.Zone = zoneName
-	})
-}
-
 // OptTokenSource sets the oauth2 token source for making
 // authenticated requests to the GKE API. If unset, the default token
 // source is used (https://godoc.org/golang.org/x/oauth2/google#DefaultTokenSource).
@@ -78,7 +68,7 @@
 }
 
 // NewClient returns an Kubernetes client to a GKE cluster.
-func NewClient(ctx context.Context, clusterName string, opts ...ClientOpt) (*kubernetes.Client, error) {
+func NewClient(ctx context.Context, clusterName string, location string, opts ...ClientOpt) (*kubernetes.Client, error) {
 	opt := clientOpt{Namespace: "default"}
 	for _, o := range opts {
 		o.modify(&opt)
@@ -104,76 +94,28 @@
 		return nil, fmt.Errorf("could not create client for Google Container Engine: %v", err)
 	}
 
-	var cluster *container.Cluster
-	if opt.Zone == "" {
-		clusters, err := containerService.Projects.Zones.Clusters.List(opt.Project, "-").Context(ctx).Do()
-		if err != nil {
-			return nil, err
-		}
-		if len(clusters.MissingZones) > 0 {
-			return nil, fmt.Errorf("GKE cluster list response contains missing zones: %v", clusters.MissingZones)
-		}
-		matches := 0
-		for _, cl := range clusters.Clusters {
-			if cl.Name == clusterName {
-				cluster = cl
-				matches++
-			}
-		}
-		if matches == 0 {
-			return nil, fmt.Errorf("cluster %q not found in any zone", clusterName)
-		}
-		if matches > 1 {
-			return nil, fmt.Errorf("cluster %q is ambiguous without using gke.OptZone to specify a zone", clusterName)
-		}
-	} else {
-		cluster, err = containerService.Projects.Zones.Clusters.Get(opt.Project, opt.Zone, clusterName).Context(ctx).Do()
-		if err != nil {
-			return nil, fmt.Errorf("cluster %q could not be found in project %q, zone %q: %v", clusterName, opt.Project, opt.Zone, err)
-		}
-	}
-
-	// Decode certs
-	decode := func(which string, cert string) []byte {
-		if err != nil {
-			return nil
-		}
-		s, decErr := base64.StdEncoding.DecodeString(cert)
-		if decErr != nil {
-			err = fmt.Errorf("error decoding %s cert: %v", which, decErr)
-		}
-		return []byte(s)
-	}
-	clientCert := decode("client cert", cluster.MasterAuth.ClientCertificate)
-	clientKey := decode("client key", cluster.MasterAuth.ClientKey)
-	caCert := decode("cluster cert", cluster.MasterAuth.ClusterCaCertificate)
+	cluster, err := containerService.Projects.Locations.Clusters.Get(fmt.Sprintf("projects/%s/locations/%s/clusters/%s", opt.Project, location, clusterName)).Context(ctx).Do()
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("cluster %q could not be found in project %q, location %q: %v", clusterName, opt.Project, location, err)
 	}
 
-	// HTTPS client
-	cert, err := tls.X509KeyPair(clientCert, clientKey)
+	// Connect to Kubernetes using OAuth authentication, trusting its CA.
+	caPool := x509.NewCertPool()
+	caCertPEM, err := base64.StdEncoding.DecodeString(cluster.MasterAuth.ClusterCaCertificate)
 	if err != nil {
-		return nil, fmt.Errorf("x509 client key pair could not be generated: %v", err)
+		return nil, fmt.Errorf("invalid base64 in ClusterCaCertificate: %v", err)
 	}
-
-	// CA Cert from kube master
-	caCertPool := x509.NewCertPool()
-	caCertPool.AppendCertsFromPEM([]byte(caCert))
-
-	// Setup TLS config
-	tlsConfig := &tls.Config{
-		Certificates: []tls.Certificate{cert},
-		RootCAs:      caCertPool,
-	}
-	tlsConfig.BuildNameToCertificate()
-
+	caPool.AppendCertsFromPEM(caCertPEM)
 	kubeHTTPClient := &http.Client{
-		Transport: &http.Transport{
-			TLSClientConfig: tlsConfig,
+		Transport: &oauth2.Transport{
+			Source: opt.TokenSource,
+			Base: &http.Transport{
+				TLSClientConfig: &tls.Config{
+					RootCAs: caPool,
+				},
+			},
 		},
 	}
-
 	kubeClient, err := kubernetes.NewClient("https://"+cluster.Endpoint, opt.Namespace, kubeHTTPClient)
 	if err != nil {
 		return nil, fmt.Errorf("kubernetes HTTP client could not be created: %v", err)
diff --git a/kubernetes/gke/gke_test.go b/kubernetes/gke/gke_test.go
index 8376718..f8453cc 100644
--- a/kubernetes/gke/gke_test.go
+++ b/kubernetes/gke/gke_test.go
@@ -154,7 +154,7 @@
 		t.Skipf("default token source doesn't work; skipping test: %v", err)
 	}
 
-	clusters, err := containerService.Projects.Zones.Clusters.List(proj, "-").Context(ctx).Do()
+	clusters, err := containerService.Projects.Locations.Clusters.List("/project/" + proj + "/locations/-").Context(ctx).Do()
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -163,7 +163,7 @@
 		t.Skip("no GKE clusters")
 	}
 	for _, cl := range clusters.Clusters {
-		kc, err := gke.NewClient(ctx, cl.Name, gke.OptZone(cl.Zone))
+		kc, err := gke.NewClient(ctx, cl.Name, cl.Zone)
 		if err != nil {
 			t.Fatal(err)
 		}