dashboard: create buildlet client package, move coordinator code into it

Operation Packification, step 2 of tons.

Eventually the buildlet client binary will use this stuff now.

Change-Id: I4cf5f3e6beb9e56bdc795ed513ce6daaf61425e3
Reviewed-on: https://go-review.googlesource.com/2921
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/builders.go b/builders.go
index edeadc2..bfa4d25 100644
--- a/builders.go
+++ b/builders.go
@@ -36,6 +36,17 @@
 	tool    string   // the tool this configuration is for
 }
 
+func (c *BuildConfig) GOOS() string { return c.Name[:strings.Index(c.Name, "-")] }
+
+func (c *BuildConfig) GOARCH() string {
+	arch := c.Name[strings.Index(c.Name, "-")+1:]
+	i := strings.Index(arch, "-")
+	if i == -1 {
+		return arch
+	}
+	return arch[:i]
+}
+
 func (c *BuildConfig) UsesDocker() bool { return c.VMImage == "" }
 func (c *BuildConfig) UsesVM() bool     { return c.VMImage != "" }
 
diff --git a/builders_test.go b/builders_test.go
new file mode 100644
index 0000000..96effe5
--- /dev/null
+++ b/builders_test.go
@@ -0,0 +1,22 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dashboard
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestOSARCHAccessors(t *testing.T) {
+	valid := func(s string) bool { return s != "" && !strings.Contains(s, "-") }
+	for _, conf := range Builders {
+		os := conf.GOOS()
+		arch := conf.GOARCH()
+		osArch := os + "-" + arch
+		if !valid(os) || !valid(arch) || !(conf.Name == osArch || strings.HasPrefix(conf.Name, osArch+"-")) {
+			t.Errorf("OS+ARCH(%q) = %q, %q; invalid", conf.Name, os, arch)
+		}
+	}
+}
diff --git a/buildlet/buildletclient.go b/buildlet/buildletclient.go
new file mode 100644
index 0000000..ae0e87b
--- /dev/null
+++ b/buildlet/buildletclient.go
@@ -0,0 +1,72 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build extdep
+
+// Package buildlet contains client tools for working with a buildlet
+// server.
+package buildlet // import "golang.org/x/tools/dashboard/buildlet"
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"strings"
+)
+
+// KeyPair is the TLS public certificate PEM file and its associated
+// private key PEM file that a builder will use for its HTTPS
+// server. The zero value means no HTTPs, which is used by the
+// coordinator for machines running within a firewall.
+type KeyPair struct {
+	CertPEM string
+	KeyPEM  string
+}
+
+// NoKeyPair is used by the coordinator to speak http directly to buildlets,
+// inside their firewall, without TLS.
+var NoKeyPair = KeyPair{}
+
+// NewClient returns a *Client that will manipulate ipPort,
+// authenticated using the provided keypair.
+//
+// This constructor returns immediately without testing the host or auth.
+func NewClient(ipPort string, tls KeyPair) *Client {
+	return &Client{
+		ipPort: ipPort,
+		tls:    tls,
+	}
+}
+
+// A Client interacts with a single buildlet.
+type Client struct {
+	ipPort string
+	tls    KeyPair
+}
+
+// URL returns the buildlet's URL prefix, without a trailing slash.
+func (c *Client) URL() string {
+	if c.tls != NoKeyPair {
+		return "http://" + strings.TrimSuffix(c.ipPort, ":80")
+	}
+	return "https://" + strings.TrimSuffix(c.ipPort, ":443")
+}
+
+func (c *Client) PutTarball(r io.Reader) error {
+	req, err := http.NewRequest("PUT", c.URL()+"/writetgz", r)
+	if err != nil {
+		return err
+	}
+	res, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer res.Body.Close()
+	if res.StatusCode/100 != 2 {
+		slurp, _ := ioutil.ReadAll(io.LimitReader(res.Body, 4<<10))
+		return fmt.Errorf("%v; body: %s", res.Status, slurp)
+	}
+	return nil
+}
diff --git a/buildlet/gce.go b/buildlet/gce.go
new file mode 100644
index 0000000..325eb35
--- /dev/null
+++ b/buildlet/gce.go
@@ -0,0 +1,245 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build extdep
+
+package buildlet
+
+import (
+	"crypto/tls"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"golang.org/x/oauth2"
+	"golang.org/x/tools/dashboard"
+	"google.golang.org/api/compute/v1"
+)
+
+type VMOpts struct {
+	// Zone is the GCE zone to create the VM in. Required.
+	Zone string
+
+	// ProjectID is the GCE project ID. Required.
+	ProjectID string
+
+	// TLS optionally specifies the TLS keypair to use.
+	// If zero, http without auth is used.
+	TLS KeyPair
+
+	// Optional description of the VM.
+	Description string
+
+	// Optional metadata to put on the instance.
+	Meta map[string]string
+
+	// DeleteIn optionally specifies a duration at which
+	// to delete the VM.
+	DeleteIn time.Duration
+
+	// OnInstanceRequested optionally specifies a hook to run synchronously
+	// after the computeService.Instances.Insert call, but before
+	// waiting for its operation to proceed.
+	OnInstanceRequested func()
+
+	// OnInstanceCreated optionally specifies a hook to run synchronously
+	// after the instance operation succeeds.
+	OnInstanceCreated func()
+
+	// OnInstanceCreated optionally specifies a hook to run synchronously
+	// after the computeService.Instances.Get call.
+	OnGotInstanceInfo func()
+}
+
+// StartNewVM boots a new VM on GCE and returns a buildlet client
+// configured to speak to it.
+func StartNewVM(ts oauth2.TokenSource, instName, builderType string, opts VMOpts) (*Client, error) {
+	computeService, _ := compute.New(oauth2.NewClient(oauth2.NoContext, ts))
+
+	conf, ok := dashboard.Builders[builderType]
+	if !ok {
+		return nil, fmt.Errorf("invalid builder type %q", builderType)
+	}
+
+	zone := opts.Zone
+	if zone == "" {
+		// TODO: automatic? maybe that's not useful.
+		// For now just return an error.
+		return nil, errors.New("buildlet: missing required Zone option")
+	}
+	projectID := opts.ProjectID
+	if projectID == "" {
+		return nil, errors.New("buildlet: missing required ProjectID option")
+	}
+
+	prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
+	machType := prefix + "/zones/" + zone + "/machineTypes/" + conf.MachineType()
+
+	instance := &compute.Instance{
+		Name:        instName,
+		Description: opts.Description,
+		MachineType: machType,
+		Disks: []*compute.AttachedDisk{
+			{
+				AutoDelete: true,
+				Boot:       true,
+				Type:       "PERSISTENT",
+				InitializeParams: &compute.AttachedDiskInitializeParams{
+					DiskName:    instName,
+					SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.VMImage,
+					DiskType:    "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + zone + "/diskTypes/pd-ssd",
+				},
+			},
+		},
+		Tags: &compute.Tags{
+			// Warning: do NOT list "http-server" or "allow-ssh" (our
+			// project's custom tag to allow ssh access) here; the
+			// buildlet provides full remote code execution.
+			// The https-server is authenticated, though.
+			Items: []string{"https-server"},
+		},
+		Metadata: &compute.Metadata{
+			Items: []*compute.MetadataItems{
+				// The buildlet-binary-url is the URL of the buildlet binary
+				// which the VMs are configured to download at boot and run.
+				// This lets us/ update the buildlet more easily than
+				// rebuilding the whole VM image.
+				{
+					Key:   "buildlet-binary-url",
+					Value: "http://storage.googleapis.com/go-builder-data/buildlet." + conf.GOOS() + "-" + conf.GOARCH(),
+				},
+			},
+		},
+		NetworkInterfaces: []*compute.NetworkInterface{
+			&compute.NetworkInterface{
+				AccessConfigs: []*compute.AccessConfig{
+					&compute.AccessConfig{
+						Type: "ONE_TO_ONE_NAT",
+						Name: "External NAT",
+					},
+				},
+				Network: prefix + "/global/networks/default",
+			},
+		},
+	}
+
+	if opts.DeleteIn != 0 {
+		// In case the VM gets away from us (generally: if the
+		// coordinator dies while a build is running), then we
+		// set this attribute of when it should be killed so
+		// we can kill it later when the coordinator is
+		// restarted. The cleanUpOldVMs goroutine loop handles
+		// that killing.
+		instance.Metadata.Items = append(instance.Metadata.Items, &compute.MetadataItems{
+			Key:   "delete-at",
+			Value: fmt.Sprint(time.Now().Add(opts.DeleteIn).Unix()),
+		})
+	}
+	for k, v := range opts.Meta {
+		instance.Metadata.Items = append(instance.Metadata.Items, &compute.MetadataItems{
+			Key:   k,
+			Value: v,
+		})
+	}
+
+	op, err := computeService.Instances.Insert(projectID, zone, instance).Do()
+	if err != nil {
+		return nil, fmt.Errorf("Failed to create instance: %v", err)
+	}
+	if fn := opts.OnInstanceRequested; fn != nil {
+		fn()
+	}
+	createOp := op.Name
+
+	// Wait for instance create operation to succeed.
+OpLoop:
+	for {
+		time.Sleep(2 * time.Second)
+		op, err := computeService.ZoneOperations.Get(projectID, zone, createOp).Do()
+		if err != nil {
+			return nil, fmt.Errorf("Failed to get op %s: %v", createOp, err)
+		}
+		switch op.Status {
+		case "PENDING", "RUNNING":
+			continue
+		case "DONE":
+			if op.Error != nil {
+				for _, operr := range op.Error.Errors {
+					return nil, fmt.Errorf("Error creating instance: %+v", operr)
+				}
+				return nil, errors.New("Failed to start.")
+			}
+			break OpLoop
+		default:
+			return nil, fmt.Errorf("Unknown create status %q: %+v", op.Status, op)
+		}
+	}
+	if fn := opts.OnInstanceCreated; fn != nil {
+		fn()
+	}
+
+	inst, err := computeService.Instances.Get(projectID, zone, instName).Do()
+	if err != nil {
+		return nil, fmt.Errorf("Error getting instance %s details after creation: %v", instName, err)
+	}
+
+	// Find its internal IP.
+	var ip string
+	for _, iface := range inst.NetworkInterfaces {
+		if strings.HasPrefix(iface.NetworkIP, "10.") {
+			ip = iface.NetworkIP
+		}
+	}
+	if ip == "" {
+		return nil, errors.New("didn't find its internal IP address")
+	}
+
+	// Wait for it to boot and its buildlet to come up.
+	var buildletURL string
+	var ipPort string
+	if opts.TLS != NoKeyPair {
+		buildletURL = "https://" + ip
+		ipPort = ip + ":443"
+	} else {
+		buildletURL = "http://" + ip
+		ipPort = ip + ":80"
+	}
+	if fn := opts.OnGotInstanceInfo; fn != nil {
+		fn()
+	}
+
+	const timeout = 90 * time.Second
+	var alive bool
+	impatientClient := &http.Client{
+		Timeout: 5 * time.Second,
+		Transport: &http.Transport{
+			TLSClientConfig: &tls.Config{
+				InsecureSkipVerify: true,
+			},
+		},
+	}
+	deadline := time.Now().Add(timeout)
+	try := 0
+	for time.Now().Before(deadline) {
+		try++
+		res, err := impatientClient.Get(buildletURL)
+		if err != nil {
+			time.Sleep(1 * time.Second)
+			continue
+		}
+		res.Body.Close()
+		if res.StatusCode != 200 {
+			return nil, fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, try)
+		}
+		alive = true
+		break
+	}
+	if !alive {
+		return nil, fmt.Errorf("buildlet didn't come up in %v", timeout)
+	}
+
+	return NewClient(ipPort, opts.TLS), nil
+}
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 2a8ad08..40d58fe 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -27,7 +27,6 @@
 	"os"
 	"os/exec"
 	"path"
-	"regexp"
 	"sort"
 	"strconv"
 	"strings"
@@ -37,6 +36,7 @@
 	"golang.org/x/oauth2"
 	"golang.org/x/oauth2/google"
 	"golang.org/x/tools/dashboard"
+	"golang.org/x/tools/dashboard/buildlet"
 	"golang.org/x/tools/dashboard/types"
 	"google.golang.org/api/compute/v1"
 	"google.golang.org/cloud/compute/metadata"
@@ -80,6 +80,7 @@
 	projectZone    string
 	computeService *compute.Service
 	externalIP     string
+	tokenSource    oauth2.TokenSource
 )
 
 func initGCE() error {
@@ -105,8 +106,8 @@
 	if err != nil {
 		return fmt.Errorf("ExternalIP: %v", err)
 	}
-	ts := google.ComputeTokenSource("default")
-	computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, ts))
+	tokenSource = google.ComputeTokenSource("default")
+	computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, tokenSource))
 	return nil
 }
 
@@ -669,8 +670,6 @@
 	return st, nil
 }
 
-var osArchRx = regexp.MustCompile(`^(\w+-\w+)`)
-
 func randHex(n int) string {
 	buf := make([]byte, n/2)
 	_, err := rand.Read(buf)
@@ -687,95 +686,22 @@
 		name: conf.Name,
 		rev:  rev,
 	}
-	st := &buildStatus{
-		builderRev: brev,
-		start:      time.Now(),
-	}
-
 	// name is the project-wide unique name of the GCE instance. It can't be longer
 	// than 61 bytes, so we only use the first 8 bytes of the rev.
 	name := "buildlet-" + conf.Name + "-" + rev[:8] + "-rn" + randHex(6)
 
-	// buildletURL is the URL of the buildlet binary which the VMs
-	// are configured to download at boot and run. This lets us
-	// update the buildlet more easily than rebuilding the whole
-	// VM image. We put this URL in a well-known GCE metadata attribute.
-	// The value will be of the form:
-	//  http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH
-	m := osArchRx.FindStringSubmatch(conf.Name)
-	if m == nil {
-		return nil, fmt.Errorf("invalid builder name %q", conf.Name)
+	st := &buildStatus{
+		builderRev: brev,
+		start:      time.Now(),
+		instName:   name,
 	}
-	buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m[1]
 
-	prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID
-	machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf.MachineType()
-
-	instance := &compute.Instance{
-		Name:        name,
-		Description: fmt.Sprintf("Go Builder building %s %s", conf.Name, rev),
-		MachineType: machType,
-		Disks: []*compute.AttachedDisk{
-			{
-				AutoDelete: true,
-				Boot:       true,
-				Type:       "PERSISTENT",
-				InitializeParams: &compute.AttachedDiskInitializeParams{
-					DiskName:    name,
-					SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.VMImage,
-					DiskType:    "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd",
-				},
-			},
-		},
-		Tags: &compute.Tags{
-			// Warning: do NOT list "http-server" or "allow-ssh" (our
-			// project's custom tag to allow ssh access) here; the
-			// buildlet provides full remote code execution.
-			Items: []string{},
-		},
-		Metadata: &compute.Metadata{
-			Items: []*compute.MetadataItems{
-				{
-					Key:   "buildlet-binary-url",
-					Value: buildletURL,
-				},
-				// In case the VM gets away from us (generally: if the
-				// coordinator dies while a build is running), then we
-				// set this attribute of when it should be killed so
-				// we can kill it later when the coordinator is
-				// restarted. The cleanUpOldVMs goroutine loop handles
-				// that killing.
-				{
-					Key:   "delete-at",
-					Value: fmt.Sprint(time.Now().Add(vmDeleteTimeout).Unix()),
-				},
-			},
-		},
-		NetworkInterfaces: []*compute.NetworkInterface{
-			&compute.NetworkInterface{
-				AccessConfigs: []*compute.AccessConfig{
-					&compute.AccessConfig{
-						Type: "ONE_TO_ONE_NAT",
-						Name: "External NAT",
-					},
-				},
-				Network: prefix + "/global/networks/default",
-			},
-		},
-	}
-	op, err := computeService.Instances.Insert(projectID, projectZone, instance).Do()
-	if err != nil {
-		return nil, fmt.Errorf("Failed to create instance: %v", err)
-	}
-	st.createOp = op.Name
-	st.instName = name
-	log.Printf("%v now building in VM %v", brev, st.instName)
-	// Start the goroutine to monitor the VM now that it's booting. This might
-	// take minutes for it to come up, and then even more time to do the build.
 	go func() {
-		err := watchVM(st)
-		if st.hasEvent("instance_created") {
-			deleteVM(projectZone, st.instName)
+		err := buildInVM(conf, st)
+		if err != nil {
+			if st.hasEvent("instance_created") {
+				go deleteVM(projectZone, st.instName)
+			}
 		}
 		st.setDone(err == nil)
 		if err != nil {
@@ -786,8 +712,27 @@
 	return st, nil
 }
 
-// watchVM monitors a VM doing a build.
-func watchVM(st *buildStatus) (retErr error) {
+func buildInVM(conf dashboard.BuildConfig, st *buildStatus) (retErr error) {
+	bc, err := buildlet.StartNewVM(tokenSource, st.instName, conf.Name, buildlet.VMOpts{
+		ProjectID:   projectID,
+		Zone:        projectZone,
+		Description: fmt.Sprintf("Go Builder building %s %s", conf.Name, st.rev),
+		DeleteIn:    vmDeleteTimeout,
+		OnInstanceRequested: func() {
+			st.logEventTime("instance_create_requested")
+			log.Printf("%v now booting VM %v for build", st.builderRev, st.instName)
+		},
+		OnInstanceCreated: func() {
+			st.logEventTime("instance_created")
+		},
+		OnGotInstanceInfo: func() {
+			st.logEventTime("waiting_for_buildlet")
+		},
+	})
+	if err != nil {
+		return err
+	}
+	st.logEventTime("buildlet_up")
 	goodRes := func(res *http.Response, err error, what string) bool {
 		if err != nil {
 			retErr = fmt.Errorf("%s: %v", what, err)
@@ -802,82 +747,11 @@
 		}
 		return true
 	}
-	st.logEventTime("instance_create_requested")
-	// Wait for instance create operation to succeed.
-OpLoop:
-	for {
-		time.Sleep(2 * time.Second)
-		op, err := computeService.ZoneOperations.Get(projectID, projectZone, st.createOp).Do()
-		if err != nil {
-			return fmt.Errorf("Failed to get op %s: %v", st.createOp, err)
-		}
-		switch op.Status {
-		case "PENDING", "RUNNING":
-			continue
-		case "DONE":
-			if op.Error != nil {
-				for _, operr := range op.Error.Errors {
-					return fmt.Errorf("Error creating instance: %+v", operr)
-				}
-				return errors.New("Failed to start.")
-			}
-			break OpLoop
-		default:
-			log.Fatalf("Unknown status %q: %+v", op.Status, op)
-		}
-	}
-	st.logEventTime("instance_created")
-
-	inst, err := computeService.Instances.Get(projectID, projectZone, st.instName).Do()
-	if err != nil {
-		return fmt.Errorf("Error getting instance %s details after creation: %v", st.instName, err)
-	}
-	st.logEventTime("got_instance_info")
-
-	// Find its internal IP.
-	var ip string
-	for _, iface := range inst.NetworkInterfaces {
-		if strings.HasPrefix(iface.NetworkIP, "10.") {
-			ip = iface.NetworkIP
-		}
-	}
-	if ip == "" {
-		return errors.New("didn't find its internal IP address")
-	}
-
-	// Wait for it to boot and its buildlet to come up on port 80.
-	st.logEventTime("waiting_for_buildlet")
-	buildletURL := "http://" + ip
-	const numTries = 60
-	var alive bool
-	impatientClient := &http.Client{Timeout: 2 * time.Second}
-	for i := 1; i <= numTries; i++ {
-		res, err := impatientClient.Get(buildletURL)
-		if err != nil {
-			time.Sleep(1 * time.Second)
-			continue
-		}
-		res.Body.Close()
-		if res.StatusCode != 200 {
-			return fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, i)
-		}
-		st.logEventTime("buildlet_up")
-		alive = true
-		break
-	}
-	if !alive {
-		return fmt.Errorf("buildlet didn't come up in %d seconds", numTries)
-	}
 
 	// Write the VERSION file.
 	st.logEventTime("start_write_version_tar")
-	verReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", versionTgz(st.rev))
-	if err != nil {
-		return err
-	}
-	verRes, err := http.DefaultClient.Do(verReq)
-	if !goodRes(verRes, err, "writing VERSION tgz") {
-		return
+	if err := bc.PutTarball(versionTgz(st.rev)); err != nil {
+		return fmt.Errorf("writing VERSION tgz: %v", err)
 	}
 
 	// Feed the buildlet a tar file for it to extract.
@@ -889,18 +763,13 @@
 	}
 
 	st.logEventTime("start_write_tar")
-	putReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", tarRes.Body)
-	if err != nil {
+	if err := bc.PutTarball(tarRes.Body); err != nil {
 		tarRes.Body.Close()
-		return err
+		return fmt.Errorf("writing tarball from Gerrit: %v", err)
 	}
-	putRes, err := http.DefaultClient.Do(putReq)
 	st.logEventTime("end_write_tar")
-	tarRes.Body.Close()
-	if !goodRes(putRes, err, "writing tarball to buildlet") {
-		return
-	}
 
+	// TODO(bradfitz): add an Exec method to buildlet.Client and update this code.
 	// Run the builder
 	cmd := "all.bash"
 	if strings.HasPrefix(st.name, "windows-") {
@@ -910,7 +779,7 @@
 	}
 	execStartTime := time.Now()
 	st.logEventTime("start_exec")
-	res, err := http.PostForm(buildletURL+"/exec", url.Values{"cmd": {"src/" + cmd}})
+	res, err := http.PostForm(bc.URL()+"/exec", url.Values{"cmd": {"src/" + cmd}})
 	if !goodRes(res, err, "running "+cmd) {
 		return
 	}
@@ -958,7 +827,6 @@
 	container string // container ID for docker, else it's a VM
 
 	// Immutable, used by VM only:
-	createOp string // Instances.Insert operation name
 	instName string
 
 	mu        sync.Mutex   // guards following