cmd/coordinator, cmd/gomote: show periodic status updates to stderr
Also modernizes some code in the coordinator.
Updates golang/go#35354 (or fixes. But we could return more info.)
Change-Id: Ifc1aa85ca217a0932e388ec5d36ef0737b90c63d
Reviewed-on: https://go-review.googlesource.com/c/build/+/207841
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
diff --git a/buildlet/remote.go b/buildlet/remote.go
index ecd51c3..179844f 100644
--- a/buildlet/remote.go
+++ b/buildlet/remote.go
@@ -5,12 +5,14 @@
package buildlet
import (
+ "bufio"
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
+ "log"
"net/http"
"net/url"
"os"
@@ -22,6 +24,7 @@
"golang.org/x/build"
"golang.org/x/build/buildenv"
+ "golang.org/x/build/types"
)
type UserPass struct {
@@ -73,15 +76,28 @@
// act as either linux-amd64 or linux-386-387.
//
// It may expire at any time.
-// To release it, call Client.Destroy.
+// To release it, call Client.Close.
func (cc *CoordinatorClient) CreateBuildlet(builderType string) (*Client, error) {
+ return cc.CreateBuildletWithStatus(builderType, nil)
+}
+
+const (
+ // GomoteCreateStreamVersion is the gomote protocol version at which JSON streamed responses started.
+ GomoteCreateStreamVersion = "20191119"
+
+ // GomoteCreateMinVersion is the oldest "gomote create" protocol version that's still supported.
+ GomoteCreateMinVersion = "20160922"
+)
+
+// CreateBuildletWithStatus is like CreateBuildlet but accepts an optional status callback.
+func (cc *CoordinatorClient) CreateBuildletWithStatus(builderType string, status func(types.BuildletWaitStatus)) (*Client, error) {
hc, err := cc.client()
if err != nil {
return nil, err
}
ipPort, _ := cc.instance().TLSHostPort() // must succeed if client did
form := url.Values{
- "version": {"20160922"}, // checked by cmd/coordinator/remote.go
+ "version": {GomoteCreateStreamVersion}, // checked by cmd/coordinator/remote.go
"builderType": {builderType},
}
req, _ := http.NewRequest("POST",
@@ -99,18 +115,52 @@
slurp, _ := ioutil.ReadAll(res.Body)
return nil, fmt.Errorf("%s: %s", res.Status, slurp)
}
- var rb RemoteBuildlet
- if err := json.NewDecoder(res.Body).Decode(&rb); err != nil {
- return nil, err
+
+ // TODO: delete this once the server's been deployed with it.
+ // This code only exists for compatibility for a day or two at most.
+ if res.Header.Get("X-Supported-Version") < GomoteCreateStreamVersion {
+ var rb RemoteBuildlet
+ if err := json.NewDecoder(res.Body).Decode(&rb); err != nil {
+ return nil, err
+ }
+ return cc.NamedBuildlet(rb.Name)
}
- if rb.Name == "" {
- return nil, errors.New("buildlet: failed to create remote buildlet; unexpected missing name in response")
+
+ type msg struct {
+ Error string `json:"error"`
+ Buildlet *RemoteBuildlet `json:"buildlet"`
+ Status *types.BuildletWaitStatus `json:"status"`
}
- c, err := cc.NamedBuildlet(rb.Name)
- if err != nil {
- return nil, err
+ bs := bufio.NewScanner(res.Body)
+ for bs.Scan() {
+ line := bs.Bytes()
+ var m msg
+ if err := json.Unmarshal(line, &m); err != nil {
+ return nil, err
+ }
+ if m.Error != "" {
+ return nil, errors.New(m.Error)
+ }
+ if m.Buildlet != nil {
+ if m.Buildlet.Name == "" {
+ return nil, fmt.Errorf("buildlet: coordinator's /buildlet/create returned an unnamed buildlet")
+ }
+ return cc.NamedBuildlet(m.Buildlet.Name)
+ }
+ if m.Status != nil {
+ if status != nil {
+ status(*m.Status)
+ }
+ continue
+ }
+ log.Printf("buildlet: unknown message type from coordinator's /buildlet/create endpoint: %q", line)
+ continue
}
- return c, nil
+ err = bs.Err()
+ if err == nil {
+ err = errors.New("buildlet: coordinator's /buildlet/create ended its response stream without a terminal message")
+ }
+ return nil, err
}
type RemoteBuildlet struct {
diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go
index ec461ae..e1302df 100644
--- a/cmd/coordinator/remote.go
+++ b/cmd/coordinator/remote.go
@@ -41,6 +41,7 @@
"golang.org/x/build/buildlet"
"golang.org/x/build/dashboard"
"golang.org/x/build/internal/gophers"
+ "golang.org/x/build/types"
gossh "golang.org/x/crypto/ssh"
)
@@ -123,11 +124,12 @@
http.Error(w, "POST required", 400)
return
}
- const serverVersion = "20160922" // sent by cmd/gomote via buildlet/remote.go
- if version := r.FormValue("version"); version < serverVersion {
- http.Error(w, fmt.Sprintf("gomote client version %q is too old; predates server version %q", version, serverVersion), 400)
+ clientVersion := r.FormValue("version")
+ if clientVersion < buildlet.GomoteCreateMinVersion {
+ http.Error(w, fmt.Sprintf("gomote client version %q is too old; predates server minimum version %q", clientVersion, buildlet.GomoteCreateMinVersion), 400)
return
}
+
builderType := r.FormValue("builderType")
if builderType == "" {
http.Error(w, "missing 'builderType' parameter", 400)
@@ -140,32 +142,60 @@
}
user, _, _ := r.BasicAuth()
- var closeNotify <-chan bool
- if cn, ok := w.(http.CloseNotifier); ok {
- closeNotify = cn.CloseNotify()
+ w.Header().Set("X-Supported-Version", buildlet.GomoteCreateStreamVersion)
+
+ wantStream := false // streaming JSON updates, one JSON message (type msg) per line
+ if clientVersion >= buildlet.GomoteCreateStreamVersion {
+ wantStream = true
+ w.Header().Set("Content-Type", "application/json; charset=utf-8")
+ w.(http.Flusher).Flush()
}
- ctx := context.WithValue(context.Background(), buildletTimeoutOpt{}, time.Duration(0))
- ctx, cancel := context.WithCancel(ctx)
- // NOTE: don't defer close this cancel. If the context is
- // closed, the pod is destroyed.
- // TODO: clean this up.
+ si := &SchedItem{
+ HostType: bconf.HostType,
+ IsGomote: true,
+ }
+
+ ctx := r.Context()
+
+ // ticker for sending status updates to client
+ var ticker <-chan time.Time
+ if wantStream {
+ t := time.NewTicker(5 * time.Second)
+ defer t.Stop()
+ ticker = t.C
+ }
resc := make(chan *buildlet.Client)
errc := make(chan error)
+
go func() {
- bc, err := sched.GetBuildlet(ctx, &SchedItem{
- HostType: bconf.HostType,
- IsGomote: true,
- })
+ bc, err := sched.GetBuildlet(ctx, si)
if bc != nil {
resc <- bc
- return
+ } else {
+ errc <- err
}
- errc <- err
}()
+ // One of these fields is set:
+ type msg struct {
+ Error string `json:"error"`
+ Buildlet *remoteBuildlet `json:"buildlet"`
+ Status types.BuildletWaitStatus `json:"status"`
+ }
+ sendJSONLine := func(v interface{}) {
+ jenc, err := json.Marshal(v)
+ if err != nil {
+ log.Fatalf("remote: error marshalling JSON of type %T: %v", v, v)
+ }
+ jenc = append(jenc, '\n')
+ w.Write(jenc)
+ w.(http.Flusher).Flush()
+ }
for {
select {
+ case <-ticker:
+ sendJSONLine(msg{Status: sched.waiterState(si)})
case bc := <-resc:
rb := &remoteBuildlet{
User: user,
@@ -177,25 +207,23 @@
}
rb.Name = addRemoteBuildlet(rb)
bc.SetName(rb.Name)
- jenc, err := json.MarshalIndent(rb, "", " ")
- if err != nil {
- http.Error(w, err.Error(), 500)
- log.Print(err)
- return
- }
log.Printf("created buildlet %v for %v (%s)", rb.Name, rb.User, bc.String())
- w.Header().Set("Content-Type", "application/json; charset=utf-8")
- jenc = append(jenc, '\n')
- w.Write(jenc)
+ if wantStream {
+ // We already set a content-type above before we flushed, so don't
+ // set it again.
+ } else {
+ w.Header().Set("Content-Type", "application/json; charset=utf-8")
+ }
+ sendJSONLine(msg{Buildlet: rb})
return
case err := <-errc:
- log.Printf("error creating buildlet: %v", err)
- http.Error(w, err.Error(), 500)
+ log.Printf("error creating gomote buildlet: %v", err)
+ if wantStream {
+ sendJSONLine(msg{Error: err.Error()})
+ } else {
+ http.Error(w, err.Error(), 500)
+ }
return
- case <-closeNotify:
- log.Printf("client went away during buildlet create request")
- cancel()
- closeNotify = nil // unnecessary, but habit.
}
}
}
diff --git a/cmd/coordinator/sched.go b/cmd/coordinator/sched.go
index 771aa3b..332053d 100644
--- a/cmd/coordinator/sched.go
+++ b/cmd/coordinator/sched.go
@@ -19,6 +19,7 @@
"golang.org/x/build/cmd/coordinator/spanlog"
"golang.org/x/build/dashboard"
"golang.org/x/build/internal/buildgo"
+ "golang.org/x/build/types"
)
// The Scheduler prioritizes access to buidlets. It accepts requests
@@ -267,6 +268,22 @@
return st
}
+// waiterState returns tells waiter how many callers are on the line
+// in front of them.
+func (s *Scheduler) waiterState(waiter *SchedItem) (ws types.BuildletWaitStatus) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ m := s.waiting[waiter.HostType]
+ for si := range m {
+ if schedLess(si, waiter) {
+ ws.Ahead++
+ }
+ }
+
+ return ws
+}
+
// schedLess reports whether scheduled item ia is "less" (more
// important) than scheduled item ib.
func schedLess(ia, ib *SchedItem) bool {
diff --git a/cmd/gomote/create.go b/cmd/gomote/create.go
index 2ec66db..5bf0ac6 100644
--- a/cmd/gomote/create.go
+++ b/cmd/gomote/create.go
@@ -13,8 +13,10 @@
"os"
"sort"
"strings"
+ "time"
"golang.org/x/build/buildlet"
+ "golang.org/x/build/types"
)
type builderType struct {
@@ -92,6 +94,9 @@
}
os.Exit(1)
}
+ var status bool
+ fs.BoolVar(&status, "status", true, "print regular status updates while waiting")
+
// TODO(bradfitz): restore this option, and send it to the coordinator:
// For now, comment it out so it's not misleading.
// var timeout time.Duration
@@ -103,11 +108,16 @@
}
builderType := fs.Arg(0)
+ t := time.Now()
cc, err := buildlet.NewCoordinatorClientFromFlags()
if err != nil {
return fmt.Errorf("failed to create coordinator client: %v", err)
}
- client, err := cc.CreateBuildlet(builderType)
+ client, err := cc.CreateBuildletWithStatus(builderType, func(st types.BuildletWaitStatus) {
+ if status {
+ fmt.Fprintf(os.Stderr, "# still creating %s after %v; %d requests ahead of you\n", builderType, time.Since(t).Round(time.Second), st.Ahead)
+ }
+ })
if err != nil {
return fmt.Errorf("failed to create buildlet: %v", err)
}
diff --git a/types/types.go b/types/types.go
index 1261de4..081f049 100644
--- a/types/types.go
+++ b/types/types.go
@@ -168,3 +168,18 @@
}
return a.Minor < b.Minor
}
+
+// BuildletWaitStatus is the periodic messages we send to "gomote create"
+// clients or show on trybot status pages to tell the user who long
+// they're expected to wait.
+type BuildletWaitStatus struct {
+ Ahead int `json:"ahead"` // number of waiters ahead of this buildlet request
+
+ // TODO: add number of active builds, and number of builds
+ // creating. And for how long. And maybe an estimate of how
+ // long those builds typically take? But recognize which are
+ // dynamic vs static (reverse) builder types and don't say
+ // that "1 is creating" on a reverse buildlet that can't
+ // actually "create" any. (It can just wait for one register
+ // itself)
+}