cmd/coordinator: add SlowBots support, opt-in to different slow trybots

This parses TRY= comments to opt-in to slower/difference trybots.

This needs some docs/UI work yet.

Updates golang/go#34501

Change-Id: I13a835520d7ac341bc86139b0a2118235bc83e60
Reviewed-on: https://go-review.googlesource.com/c/build/+/201338
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index bb18dbc..ee356b6 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -42,6 +42,7 @@
 	"sync"
 	"sync/atomic"
 	"time"
+	"unicode"
 
 	"go4.org/syncutil"
 	grpc "grpc.go4.org"
@@ -1054,7 +1055,8 @@
 type trySet struct {
 	// immutable
 	tryKey
-	tryID string // "T" + 9 random hex
+	tryID    string                   // "T" + 9 random hex
+	slowBots []*dashboard.BuildConfig // any opt-in slower builders to run in a trybot run
 
 	// wantedAsOf is guarded by statusMu and is used by
 	// findTryWork. It records the last time this tryKey was still
@@ -1105,7 +1107,11 @@
 func newTrySet(work *apipb.GerritTryWorkItem) *trySet {
 	key := tryWorkItemKey(work)
 	goBranch := key.Branch // assume same as repo's branch for now
+
 	builders := dashboard.TryBuildersForProject(key.Project, key.Branch, goBranch)
+	slowBots := slowBotsFromComments(work, builders)
+	builders = append(builders, slowBots...)
+
 	log.Printf("Starting new trybot set for %v", key)
 	ts := &trySet{
 		tryKey: key,
@@ -1113,6 +1119,7 @@
 		trySetState: trySetState{
 			builds: make([]*buildStatus, 0, len(builders)),
 		},
+		slowBots: slowBots,
 	}
 
 	// Defensive check that the input is well-formed.
@@ -1237,7 +1244,11 @@
 // notifyStarting runs in its own goroutine and posts to Gerrit that
 // the trybots have started on the user's CL with a link of where to watch.
 func (ts *trySet) notifyStarting() {
-	msg := "TryBots beginning. Status page: https://farmer.golang.org/try?commit=" + ts.Commit[:8]
+	name := "TryBots"
+	if len(ts.slowBots) > 0 {
+		name = "SlowBots"
+	}
+	msg := name + " beginning. Status page: https://farmer.golang.org/try?commit=" + ts.Commit[:8]
 
 	ctx := context.Background()
 	if ci, err := gerritClient.GetChangeDetail(ctx, ts.ChangeTriple()); err == nil {
@@ -1382,20 +1393,38 @@
 	}
 
 	if remain == 0 {
-		score, msg := 1, "TryBots are happy."
-		if numFail > 0 {
+		name := "TryBots"
+		if len(ts.slowBots) > 0 {
+			name = "SlowBots"
+		}
+
+		var buf bytes.Buffer
+		var score int
+		if numFail == 0 {
+			score = 1
+			fmt.Fprintf(&buf, "%s are happy.\n", name)
+		} else if numFail > 0 {
+			score = -1
 			ts.mu.Lock()
 			errMsg := ts.errMsg.String()
 			ts.mu.Unlock()
-			score, msg = -1, fmt.Sprintf("%d of %d TryBots failed:\n%s\n"+failureFooter,
-				numFail, len(ts.builds), errMsg)
+			fmt.Fprintf(&buf, "%d of %d %s failed:\n%s\n"+failureFooter,
+				numFail, len(ts.builds), name, errMsg)
 		}
+		if len(ts.slowBots) > 0 {
+			fmt.Fprintf(&buf, "Extra slowbot builds that ran:\n")
+			for _, c := range ts.slowBots {
+				fmt.Fprintf(&buf, "* %s\n", c.Name)
+			}
+		}
+		// TODO: provide a link in the final report that links to a permanent summary page
+		// of which builds ran, and for how long.
 		if len(benchResults) > 0 {
 			// TODO: restore this functionality
 			// msg += fmt.Sprintf("\nBenchmark results are available at:\nhttps://perf.golang.org/search?q=cl:%d+try:%s", ts.ci.ChangeNumber, ts.tryID)
 		}
 		if err := gerritClient.SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, gerrit.ReviewInput{
-			Message: msg,
+			Message: buf.String(),
 			Labels: map[string]int{
 				"TryBot-Result": score,
 			},
@@ -1857,12 +1886,25 @@
 
 func (st *buildStatus) isTry() bool { return st.trySet != nil }
 
+func (st *buildStatus) isSlowBot() bool {
+	if st.trySet == nil {
+		return false
+	}
+	for _, conf := range st.trySet.slowBots {
+		if st.conf == conf {
+			return true
+		}
+	}
+	return false
+}
+
 func (st *buildStatus) buildRecord() *types.BuildRecord {
 	rec := &types.BuildRecord{
 		ID:        st.buildID,
 		ProcessID: processID,
 		StartTime: st.startTime,
 		IsTry:     st.isTry(),
+		IsExtra:   st.isSlowBot(),
 		GoRev:     st.Rev,
 		Rev:       st.SubRevOrGoRev(),
 		Repo:      st.RepoOrGo(),
@@ -3634,3 +3676,59 @@
 	}
 	return "golang.org/x/" + repo
 }
+
+// slowBotsFromComments looks at the TRY= comments from Gerrit (in
+// work) and returns any addition slow trybot build configuration that
+// should be tested in addition to the ones provided in the existing
+// slice.
+func slowBotsFromComments(work *apipb.GerritTryWorkItem, existing []*dashboard.BuildConfig) (extraBuilders []*dashboard.BuildConfig) {
+	tryMsg := latestTryMessage(work) // "aix, darwin, linux-386-387, arm64"
+	if tryMsg == "" {
+		return nil
+	}
+	if len(tryMsg) > 1<<10 { // arbitrary sanity
+		return nil
+	}
+
+	have := map[string]bool{}
+	for _, bc := range existing {
+		have[bc.Name] = true
+	}
+
+	tryTerms := strings.FieldsFunc(tryMsg, func(c rune) bool {
+		return !unicode.IsLetter(c) && !unicode.IsNumber(c) && c != '-' && c != '_'
+	})
+	for _, bc := range dashboard.Builders {
+		if have[bc.Name] {
+			continue
+		}
+		for _, term := range tryTerms {
+			if bc.MatchesSlowBotTerm(term) {
+				extraBuilders = append(extraBuilders, bc)
+				break
+			}
+		}
+	}
+	sort.Slice(extraBuilders, func(i, j int) bool {
+		return extraBuilders[i].Name < extraBuilders[j].Name
+	})
+	return
+}
+
+func latestTryMessage(work *apipb.GerritTryWorkItem) string {
+	// Prioritize exact version matches first
+	for i := len(work.TryMessage) - 1; i >= 0; i-- {
+		m := work.TryMessage[i]
+		if m.Version == work.Version {
+			return m.Message
+		}
+	}
+	// Otherwise the latest message at all
+	for i := len(work.TryMessage) - 1; i >= 0; i-- {
+		m := work.TryMessage[i]
+		if m.Message != "" {
+			return m.Message
+		}
+	}
+	return ""
+}
diff --git a/cmd/coordinator/coordinator_test.go b/cmd/coordinator/coordinator_test.go
index 7c3c02f..2ad4442 100644
--- a/cmd/coordinator/coordinator_test.go
+++ b/cmd/coordinator/coordinator_test.go
@@ -19,6 +19,7 @@
 	"time"
 
 	"golang.org/x/build/buildenv"
+	"golang.org/x/build/dashboard"
 	"golang.org/x/build/internal/buildgo"
 	"golang.org/x/build/maintner/maintnerd/apipb"
 )
@@ -211,3 +212,41 @@
 		t.Error(buf.String())
 	}
 }
+
+func mustConf(t *testing.T, name string) *dashboard.BuildConfig {
+	conf, ok := dashboard.Builders[name]
+	if !ok {
+		t.Fatalf("unknown builder %q", name)
+	}
+	return conf
+}
+
+func TestSlowBotsFromComments(t *testing.T) {
+	existing := []*dashboard.BuildConfig{mustConf(t, "linux-amd64")}
+	work := &apipb.GerritTryWorkItem{
+		Version: 2,
+		TryMessage: []*apipb.TryVoteMessage{
+			{
+				Version: 1,
+				Message: "ios",
+			},
+			{
+				Version: 2,
+				Message: "arm64, mac aix ",
+			},
+			{
+				Version: 1,
+				Message: "aix",
+			},
+		},
+	}
+	extra := slowBotsFromComments(work, existing)
+	var got []string
+	for _, bc := range extra {
+		got = append(got, bc.Name)
+	}
+	want := []string{"aix-ppc64", "darwin-amd64-10_14", "linux-arm64-packet"}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("mismatch:\n got: %q\nwant: %q\n", got, want)
+	}
+}
diff --git a/dashboard/builders.go b/dashboard/builders.go
index c12ab1d..ebc2d51 100644
--- a/dashboard/builders.go
+++ b/dashboard/builders.go
@@ -19,6 +19,81 @@
 	"golang.org/x/build/types"
 )
 
+// slowBotAliases maps short names from TRY= comments to which builder to run.
+//
+// TODO: we'll likely expand this, or move it, or change the matching
+// syntax entirely. This is a first draft.
+var slowBotAliases = map[string]string{
+	// Known missing builders:
+	"linux-mips":    "",
+	"linux-mips64":  "",
+	"linux-mipsle":  "",
+	"mips":          "",
+	"mips64":        "",
+	"mipsle":        "",
+	"netbsd-arm64":  "",
+	"openbsd-arm":   "",
+	"openbsd-arm64": "",
+	"nacl-arm":      "",
+
+	"386":            "linux-386",
+	"aix":            "aix-ppc64",
+	"amd64":          "linux-amd64",
+	"amd64p32":       "nacl-amd64p32",
+	"android":        "android-amd64-emu",
+	"android-386":    "android-386-emu",
+	"android-amd64":  "android-amd64-emu",
+	"android-arm":    "android-arm-corellium",
+	"android-arm64":  "android-arm64-corellium",
+	"arm":            "linux-arm",
+	"arm64":          "linux-arm64-packet",
+	"arm64p32":       "nacl-amd64p32",
+	"darwin":         "darwin-amd64-10_14",
+	"darwin-386":     "darwin-386-10_14",
+	"darwin-amd64":   "darwin-amd64-10_14",
+	"darwin-arm":     "darwin-arm-mg912baios",
+	"darwin-arm64":   "darwin-arm64-corellium",
+	"dragonfly":      "dragonfly-amd64",
+	"freebsd":        "freebsd-amd64-12_0",
+	"freebsd-386":    "freebsd-386-12_0",
+	"freebsd-amd64":  "freebsd-amd64-12_0",
+	"freebsd-arm":    "freebsd-arm-paulzhol",
+	"illumos":        "illumos-amd64",
+	"ios":            "darwin-arm64-corellium",
+	"js":             "js-wasm",
+	"linux":          "linux-amd64",
+	"linux-arm64":    "linux-arm64-packet",
+	"linux-mips64le": "linux-mips64le-mengzhuo",
+	"linux-ppc64":    "linux-ppc64-buildlet",
+	"linux-ppc64le":  "linux-ppc64le-buildlet",
+	"linux-s390x":    "linux-s390x-ibm",
+	"mac":            "darwin-amd64-10_14",
+	"macos":          "darwin-amd64-10_14",
+	"mips64le":       "linux-mips64le-mengzhuo",
+	"nacl":           "nacl-amd64p32",
+	"nacl-387":       "nacl-386",
+	"nacl-arm64p32":  "nacl-amd64p32",
+	"netbsd":         "netbsd-amd64-8_0",
+	"netbsd-386":     "netbsd-386-8_0",
+	"netbsd-amd64":   "netbsd-amd64-8_0",
+	"netbsd-arm":     "netbsd-arm-bsiegert",
+	"openbsd":        "openbsd-amd64-64",
+	"openbsd-386":    "openbsd-386-64",
+	"openbsd-amd64":  "openbsd-amd64-64",
+	"plan9":          "plan9-386-0intro",
+	"plan9-386":      "plan9-386-0intro",
+	"plan9-amd64":    "plan9-amd64-9front",
+	"ppc64":          "linux-ppc64-buildlet",
+	"ppc64le":        "linux-ppc64le-buildlet",
+	"s390x":          "linux-s390x-ibm",
+	"solaris":        "solaris-amd64-oraclerel",
+	"solaris-amd64":  "solaris-amd64-oraclerel",
+	"wasm":           "js-wasm",
+	"windows":        "windows-amd64-2016",
+	"windows-386":    "windows-386-2008",
+	"windows-amd64":  "windows-amd64-2016",
+}
+
 // Builders are the different build configurations.
 // The keys are like "darwin-amd64" or "linux-386-387".
 // This map should not be modified by other packages.
@@ -787,6 +862,13 @@
 	return arch[:i]
 }
 
+// MatchesSlowBotTerm reports whether some provided term from a
+// TRY=... comment on a Run-TryBot+1 vote on Gerrit should match this
+// build config.
+func (c *BuildConfig) MatchesSlowBotTerm(term string) bool {
+	return term != "" && (term == c.Name || slowBotAliases[term] == c.Name)
+}
+
 // FilePathJoin is mostly like filepath.Join (without the cleaning) except
 // it uses the path separator of c.GOOS instead of the host system's.
 func (c *BuildConfig) FilePathJoin(x ...string) string {
diff --git a/dashboard/builders_test.go b/dashboard/builders_test.go
index 52d0178..26611a8 100644
--- a/dashboard/builders_test.go
+++ b/dashboard/builders_test.go
@@ -5,7 +5,12 @@
 package dashboard
 
 import (
+	"bytes"
 	"fmt"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"sort"
 	"strings"
 	"testing"
 	"time"
@@ -645,3 +650,66 @@
 		}
 	}
 }
+
+func TestSlowBotAliases(t *testing.T) {
+	for term, name := range slowBotAliases {
+		if name == "" {
+			// Empty string means known missing builder.
+			continue
+		}
+		if _, ok := Builders[name]; !ok {
+			t.Errorf("slowbot term %q references unknown builder %q", term, name)
+		}
+	}
+
+	out, err := exec.Command(filepath.Join(runtime.GOROOT(), "bin", "go"), "tool", "dist", "list").Output()
+	if err != nil {
+		t.Errorf("dist list: %v", err)
+	}
+	ports := strings.Fields(string(out))
+
+	done := map[string]bool{}
+	var add bytes.Buffer
+	check := func(term string, isArch bool) {
+		if done[term] {
+			return
+		}
+		done[term] = true
+		_, isBuilderName := Builders[term]
+		_, hasAlias := slowBotAliases[term]
+		if !isBuilderName && !hasAlias {
+			prefix := term
+			if isArch {
+				prefix = "linux-" + term
+			}
+			var matches []string
+			for name := range Builders {
+				if strings.HasPrefix(name, prefix) {
+					matches = append(matches, name)
+				}
+			}
+			sort.Strings(matches)
+			t.Errorf("term %q has no match in slowBotAliases", term)
+			if len(matches) == 1 {
+				fmt.Fprintf(&add, "%q: %q,\n", term, matches[0])
+			} else if len(matches) > 1 {
+				t.Errorf("maybe add:  %q: %q,    (matches=%q)", term, matches[len(matches)-1], matches)
+			}
+		}
+	}
+
+	for _, port := range ports {
+		slash := strings.IndexByte(port, '/')
+		if slash == -1 {
+			t.Fatalf("unexpected port %q", port)
+		}
+		goos, goarch := port[:slash], port[slash+1:]
+		check(goos+"-"+goarch, false)
+		check(goos, false)
+		check(goarch, true)
+	}
+
+	if add.Len() > 0 {
+		t.Errorf("Missing items from slowBotAliases:\n%s", add.String())
+	}
+}
diff --git a/types/types.go b/types/types.go
index 71a8320..1261de4 100644
--- a/types/types.go
+++ b/types/types.go
@@ -91,6 +91,7 @@
 	ProcessID     string
 	StartTime     time.Time
 	IsTry         bool // is trybot run
+	IsExtra       bool // is an extra opt-in "slowbot" builder, not on by default
 	GoRev         string
 	Rev           string // same as GoRev for repo "go"
 	Repo          string // "go", "net", etc.