cmd/coordinator: make contended gomote create request cancel post-submit build
If somebody wants to "gomote create" a reverse buildlet that's busy
doing a post-submit build, the coordinator tries to find an active
non-trybot build and call (*buildStatus).cancelBuild() on it, to make
it clean up and return itself to the coordinator, so the scheduler
will get it soon and give it to the gomote user.
Fixes golang/go#35714
Change-Id: I417aead083cf3520058b3fd4d626fc385f25984e
Reviewed-on: https://go-review.googlesource.com/c/build/+/209757
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 9a54578..cad470a 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -562,6 +562,30 @@
return nil
}
+// cancelOnePostSubmitBuildWithHostType tries to cancel one
+// post-submit (non trybot) build with the provided host type and
+// reports whether it did so.
+//
+// It currently selects the one that's been running the least amount
+// of time, but that's not guaranteed.
+func cancelOnePostSubmitBuildWithHostType(hostType string) bool {
+ statusMu.Lock()
+ defer statusMu.Unlock()
+ var best *buildStatus
+ for _, st := range status {
+ if st.isTry() || st.conf.HostType != hostType {
+ continue
+ }
+ if best == nil || st.startTime.After(best.startTime) {
+ best = st
+ }
+ }
+ if best != nil {
+ go best.cancelBuild()
+ }
+ return best != nil
+}
+
type byAge []*buildStatus
func (s byAge) Len() int { return len(s) }
diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go
index effa66c..19fb411 100644
--- a/cmd/coordinator/remote.go
+++ b/cmd/coordinator/remote.go
@@ -171,6 +171,8 @@
resc := make(chan *buildlet.Client)
errc := make(chan error)
+ hconf := bconf.HostConfig()
+
go func() {
bc, err := sched.GetBuildlet(ctx, si)
if bc != nil {
@@ -194,6 +196,30 @@
w.Write(jenc)
w.(http.Flusher).Flush()
}
+ sendText := func(s string) {
+ sendJSONLine(msg{Status: &types.BuildletWaitStatus{Message: s}})
+ }
+
+ // If the gomote builder type requested is a reverse buildlet
+ // and all instances are busy, try canceling a post-submit
+ // build so it'll reconnect and the scheduler will give it to
+ // the higher priority gomote user.
+ isReverse := hconf.IsReverse
+ if isReverse {
+ if hs := reversePool.buildReverseStatusJSON().HostTypes[hconf.HostType]; hs == nil {
+ sendText(fmt.Sprintf("host type %q is not elastic; no machines are connected", hconf.HostType))
+ } else {
+ sendText(fmt.Sprintf("host type %q is not elastic; %d of %d machines connected, %d busy",
+ hconf.HostType, hs.Connected, hs.Expect, hs.Busy))
+ if hs.Connected > 0 && hs.Idle == 0 {
+ // Try to cancel one.
+ if cancelOnePostSubmitBuildWithHostType(hconf.HostType) {
+ sendText(fmt.Sprintf("canceled a post-submit build on a machine of type %q; it should reconnect and get assigned to you", hconf.HostType))
+ }
+ }
+ }
+ }
+
for {
select {
case <-ticker:
diff --git a/cmd/gomote/create.go b/cmd/gomote/create.go
index 5bf0ac6..18d0d2c 100644
--- a/cmd/gomote/create.go
+++ b/cmd/gomote/create.go
@@ -115,6 +115,10 @@
}
client, err := cc.CreateBuildletWithStatus(builderType, func(st types.BuildletWaitStatus) {
if status {
+ if st.Message != "" {
+ fmt.Fprintf(os.Stderr, "# %s\n", st.Message)
+ return
+ }
fmt.Fprintf(os.Stderr, "# still creating %s after %v; %d requests ahead of you\n", builderType, time.Since(t).Round(time.Second), st.Ahead)
}
})
diff --git a/types/types.go b/types/types.go
index 081f049..a068ae6 100644
--- a/types/types.go
+++ b/types/types.go
@@ -173,7 +173,12 @@
// clients or show on trybot status pages to tell the user who long
// they're expected to wait.
type BuildletWaitStatus struct {
- Ahead int `json:"ahead"` // number of waiters ahead of this buildlet request
+ // Message is a free-form message to send to the user's gomote binary.
+ // If present, all other fields are ignored.
+ Message string `json:"message"`
+
+ // Ahead are the number of waiters ahead of this buildlet request.
+ Ahead int `json:"ahead"`
// TODO: add number of active builds, and number of builds
// creating. And for how long. And maybe an estimate of how