cmd/coordinator, cmd/gomote: show periodic status updates to stderr

Also modernizes some code in the coordinator.

Updates golang/go#35354 (or fixes. But we could return more info.)

Change-Id: Ifc1aa85ca217a0932e388ec5d36ef0737b90c63d
Reviewed-on: https://go-review.googlesource.com/c/build/+/207841
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
diff --git a/buildlet/remote.go b/buildlet/remote.go
index ecd51c3..179844f 100644
--- a/buildlet/remote.go
+++ b/buildlet/remote.go
@@ -5,12 +5,14 @@
 package buildlet
 
 import (
+	"bufio"
 	"bytes"
 	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"io/ioutil"
+	"log"
 	"net/http"
 	"net/url"
 	"os"
@@ -22,6 +24,7 @@
 
 	"golang.org/x/build"
 	"golang.org/x/build/buildenv"
+	"golang.org/x/build/types"
 )
 
 type UserPass struct {
@@ -73,15 +76,28 @@
 // act as either linux-amd64 or linux-386-387.
 //
 // It may expire at any time.
-// To release it, call Client.Destroy.
+// To release it, call Client.Close.
 func (cc *CoordinatorClient) CreateBuildlet(builderType string) (*Client, error) {
+	return cc.CreateBuildletWithStatus(builderType, nil)
+}
+
+const (
+	// GomoteCreateStreamVersion is the gomote protocol version at which JSON streamed responses started.
+	GomoteCreateStreamVersion = "20191119"
+
+	// GomoteCreateMinVersion is the oldest "gomote create" protocol version that's still supported.
+	GomoteCreateMinVersion = "20160922"
+)
+
+// CreateBuildletWithStatus is like CreateBuildlet but accepts an optional status callback.
+func (cc *CoordinatorClient) CreateBuildletWithStatus(builderType string, status func(types.BuildletWaitStatus)) (*Client, error) {
 	hc, err := cc.client()
 	if err != nil {
 		return nil, err
 	}
 	ipPort, _ := cc.instance().TLSHostPort() // must succeed if client did
 	form := url.Values{
-		"version":     {"20160922"}, // checked by cmd/coordinator/remote.go
+		"version":     {GomoteCreateStreamVersion}, // checked by cmd/coordinator/remote.go
 		"builderType": {builderType},
 	}
 	req, _ := http.NewRequest("POST",
@@ -99,18 +115,52 @@
 		slurp, _ := ioutil.ReadAll(res.Body)
 		return nil, fmt.Errorf("%s: %s", res.Status, slurp)
 	}
-	var rb RemoteBuildlet
-	if err := json.NewDecoder(res.Body).Decode(&rb); err != nil {
-		return nil, err
+
+	// TODO: delete this once the server's been deployed with it.
+	// This code only exists for compatibility for a day or two at most.
+	if res.Header.Get("X-Supported-Version") < GomoteCreateStreamVersion {
+		var rb RemoteBuildlet
+		if err := json.NewDecoder(res.Body).Decode(&rb); err != nil {
+			return nil, err
+		}
+		return cc.NamedBuildlet(rb.Name)
 	}
-	if rb.Name == "" {
-		return nil, errors.New("buildlet: failed to create remote buildlet; unexpected missing name in response")
+
+	type msg struct {
+		Error    string                    `json:"error"`
+		Buildlet *RemoteBuildlet           `json:"buildlet"`
+		Status   *types.BuildletWaitStatus `json:"status"`
 	}
-	c, err := cc.NamedBuildlet(rb.Name)
-	if err != nil {
-		return nil, err
+	bs := bufio.NewScanner(res.Body)
+	for bs.Scan() {
+		line := bs.Bytes()
+		var m msg
+		if err := json.Unmarshal(line, &m); err != nil {
+			return nil, err
+		}
+		if m.Error != "" {
+			return nil, errors.New(m.Error)
+		}
+		if m.Buildlet != nil {
+			if m.Buildlet.Name == "" {
+				return nil, fmt.Errorf("buildlet: coordinator's /buildlet/create returned an unnamed buildlet")
+			}
+			return cc.NamedBuildlet(m.Buildlet.Name)
+		}
+		if m.Status != nil {
+			if status != nil {
+				status(*m.Status)
+			}
+			continue
+		}
+		log.Printf("buildlet: unknown message type from coordinator's /buildlet/create endpoint: %q", line)
+		continue
 	}
-	return c, nil
+	err = bs.Err()
+	if err == nil {
+		err = errors.New("buildlet: coordinator's /buildlet/create ended its response stream without a terminal message")
+	}
+	return nil, err
 }
 
 type RemoteBuildlet struct {
diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go
index ec461ae..e1302df 100644
--- a/cmd/coordinator/remote.go
+++ b/cmd/coordinator/remote.go
@@ -41,6 +41,7 @@
 	"golang.org/x/build/buildlet"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/gophers"
+	"golang.org/x/build/types"
 	gossh "golang.org/x/crypto/ssh"
 )
 
@@ -123,11 +124,12 @@
 		http.Error(w, "POST required", 400)
 		return
 	}
-	const serverVersion = "20160922" // sent by cmd/gomote via buildlet/remote.go
-	if version := r.FormValue("version"); version < serverVersion {
-		http.Error(w, fmt.Sprintf("gomote client version %q is too old; predates server version %q", version, serverVersion), 400)
+	clientVersion := r.FormValue("version")
+	if clientVersion < buildlet.GomoteCreateMinVersion {
+		http.Error(w, fmt.Sprintf("gomote client version %q is too old; predates server minimum version %q", clientVersion, buildlet.GomoteCreateMinVersion), 400)
 		return
 	}
+
 	builderType := r.FormValue("builderType")
 	if builderType == "" {
 		http.Error(w, "missing 'builderType' parameter", 400)
@@ -140,32 +142,60 @@
 	}
 	user, _, _ := r.BasicAuth()
 
-	var closeNotify <-chan bool
-	if cn, ok := w.(http.CloseNotifier); ok {
-		closeNotify = cn.CloseNotify()
+	w.Header().Set("X-Supported-Version", buildlet.GomoteCreateStreamVersion)
+
+	wantStream := false // streaming JSON updates, one JSON message (type msg) per line
+	if clientVersion >= buildlet.GomoteCreateStreamVersion {
+		wantStream = true
+		w.Header().Set("Content-Type", "application/json; charset=utf-8")
+		w.(http.Flusher).Flush()
 	}
 
-	ctx := context.WithValue(context.Background(), buildletTimeoutOpt{}, time.Duration(0))
-	ctx, cancel := context.WithCancel(ctx)
-	// NOTE: don't defer close this cancel. If the context is
-	// closed, the pod is destroyed.
-	// TODO: clean this up.
+	si := &SchedItem{
+		HostType: bconf.HostType,
+		IsGomote: true,
+	}
+
+	ctx := r.Context()
+
+	// ticker for sending status updates to client
+	var ticker <-chan time.Time
+	if wantStream {
+		t := time.NewTicker(5 * time.Second)
+		defer t.Stop()
+		ticker = t.C
+	}
 
 	resc := make(chan *buildlet.Client)
 	errc := make(chan error)
+
 	go func() {
-		bc, err := sched.GetBuildlet(ctx, &SchedItem{
-			HostType: bconf.HostType,
-			IsGomote: true,
-		})
+		bc, err := sched.GetBuildlet(ctx, si)
 		if bc != nil {
 			resc <- bc
-			return
+		} else {
+			errc <- err
 		}
-		errc <- err
 	}()
+	// One of these fields is set:
+	type msg struct {
+		Error    string                   `json:"error"`
+		Buildlet *remoteBuildlet          `json:"buildlet"`
+		Status   types.BuildletWaitStatus `json:"status"`
+	}
+	sendJSONLine := func(v interface{}) {
+		jenc, err := json.Marshal(v)
+		if err != nil {
+			log.Fatalf("remote: error marshalling JSON of type %T: %v", v, v)
+		}
+		jenc = append(jenc, '\n')
+		w.Write(jenc)
+		w.(http.Flusher).Flush()
+	}
 	for {
 		select {
+		case <-ticker:
+			sendJSONLine(msg{Status: sched.waiterState(si)})
 		case bc := <-resc:
 			rb := &remoteBuildlet{
 				User:        user,
@@ -177,25 +207,23 @@
 			}
 			rb.Name = addRemoteBuildlet(rb)
 			bc.SetName(rb.Name)
-			jenc, err := json.MarshalIndent(rb, "", "  ")
-			if err != nil {
-				http.Error(w, err.Error(), 500)
-				log.Print(err)
-				return
-			}
 			log.Printf("created buildlet %v for %v (%s)", rb.Name, rb.User, bc.String())
-			w.Header().Set("Content-Type", "application/json; charset=utf-8")
-			jenc = append(jenc, '\n')
-			w.Write(jenc)
+			if wantStream {
+				// We already set a content-type above before we flushed, so don't
+				// set it again.
+			} else {
+				w.Header().Set("Content-Type", "application/json; charset=utf-8")
+			}
+			sendJSONLine(msg{Buildlet: rb})
 			return
 		case err := <-errc:
-			log.Printf("error creating buildlet: %v", err)
-			http.Error(w, err.Error(), 500)
+			log.Printf("error creating gomote buildlet: %v", err)
+			if wantStream {
+				sendJSONLine(msg{Error: err.Error()})
+			} else {
+				http.Error(w, err.Error(), 500)
+			}
 			return
-		case <-closeNotify:
-			log.Printf("client went away during buildlet create request")
-			cancel()
-			closeNotify = nil // unnecessary, but habit.
 		}
 	}
 }
diff --git a/cmd/coordinator/sched.go b/cmd/coordinator/sched.go
index 771aa3b..332053d 100644
--- a/cmd/coordinator/sched.go
+++ b/cmd/coordinator/sched.go
@@ -19,6 +19,7 @@
 	"golang.org/x/build/cmd/coordinator/spanlog"
 	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/types"
 )
 
 // The Scheduler prioritizes access to buidlets. It accepts requests
@@ -267,6 +268,22 @@
 	return st
 }
 
+// waiterState returns tells waiter how many callers are on the line
+// in front of them.
+func (s *Scheduler) waiterState(waiter *SchedItem) (ws types.BuildletWaitStatus) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	m := s.waiting[waiter.HostType]
+	for si := range m {
+		if schedLess(si, waiter) {
+			ws.Ahead++
+		}
+	}
+
+	return ws
+}
+
 // schedLess reports whether scheduled item ia is "less" (more
 // important) than scheduled item ib.
 func schedLess(ia, ib *SchedItem) bool {
diff --git a/cmd/gomote/create.go b/cmd/gomote/create.go
index 2ec66db..5bf0ac6 100644
--- a/cmd/gomote/create.go
+++ b/cmd/gomote/create.go
@@ -13,8 +13,10 @@
 	"os"
 	"sort"
 	"strings"
+	"time"
 
 	"golang.org/x/build/buildlet"
+	"golang.org/x/build/types"
 )
 
 type builderType struct {
@@ -92,6 +94,9 @@
 		}
 		os.Exit(1)
 	}
+	var status bool
+	fs.BoolVar(&status, "status", true, "print regular status updates while waiting")
+
 	// TODO(bradfitz): restore this option, and send it to the coordinator:
 	// For now, comment it out so it's not misleading.
 	// var timeout time.Duration
@@ -103,11 +108,16 @@
 	}
 	builderType := fs.Arg(0)
 
+	t := time.Now()
 	cc, err := buildlet.NewCoordinatorClientFromFlags()
 	if err != nil {
 		return fmt.Errorf("failed to create coordinator client: %v", err)
 	}
-	client, err := cc.CreateBuildlet(builderType)
+	client, err := cc.CreateBuildletWithStatus(builderType, func(st types.BuildletWaitStatus) {
+		if status {
+			fmt.Fprintf(os.Stderr, "# still creating %s after %v; %d requests ahead of you\n", builderType, time.Since(t).Round(time.Second), st.Ahead)
+		}
+	})
 	if err != nil {
 		return fmt.Errorf("failed to create buildlet: %v", err)
 	}
diff --git a/types/types.go b/types/types.go
index 1261de4..081f049 100644
--- a/types/types.go
+++ b/types/types.go
@@ -168,3 +168,18 @@
 	}
 	return a.Minor < b.Minor
 }
+
+// BuildletWaitStatus is the periodic messages we send to "gomote create"
+// clients or show on trybot status pages to tell the user who long
+// they're expected to wait.
+type BuildletWaitStatus struct {
+	Ahead int `json:"ahead"` // number of waiters ahead of this buildlet request
+
+	// TODO: add number of active builds, and number of builds
+	// creating. And for how long. And maybe an estimate of how
+	// long those builds typically take? But recognize which are
+	// dynamic vs static (reverse) builder types and don't say
+	// that "1 is creating" on a reverse buildlet that can't
+	// actually "create" any. (It can just wait for one register
+	// itself)
+}