cmd/coordinator: handle new dist test names with current adjust policies

The coordinator has some custom logic for "go_test:*" dist test names
involved in partitioning, merging, and sorting them using historical
test timing data. We also have many builder-specific dist test adjust
policies, and some parts of them have become out of date (e.g.,
macTestPolicy skips "wiki", a dist test that's long gone).

Go 1.21 is simplifying the naming pattern for dist test names, and
we want to do that without having to manually update the (many!)
existing adjust policies, as well as to avoid breaking or creating
a need to re-engineer the existing mechanisms that make "go_test:*"
dist test names special.

Make that possible by remapping the 'go tool dist test -list' output
from the new "pkg[:variant]" format to the previous format (or pass it
through unmodified if it's already in the old format) for the purpose
of dist test adjust policies only. Also keep the raw unmodified dist
test name around since it's needed for invoking said dist tests.

For golang/go#37486.
For golang/go#59990.

Change-Id: Ie958609de2a810fa0053a00091a0c54c3f8bfd83
Reviewed-on: https://go-review.googlesource.com/c/build/+/496190
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/cmd/coordinator/buildstatus.go b/cmd/coordinator/buildstatus.go
index 7685a6e..aff0f93 100644
--- a/cmd/coordinator/buildstatus.go
+++ b/cmd/coordinator/buildstatus.go
@@ -867,7 +867,13 @@
 	gceErrsClient.ReportSync(ctx, errorreporting.Entry{Error: err})
 }
 
-func (st *buildStatus) distTestList() (names []string, remoteErr, err error) {
+// distTestList uses 'go tool dist test -list' to get a list of dist test names.
+//
+// As of Go 1.21, the dist test naming pattern has changed to always be in the
+// form of "<pkg>[:<variant>]", where "<pkg>" means what used to be previously
+// named "go_test:<pkg>". distTestList maps those new dist test names back to
+// that previous format, a combination of "go_test[_bench]:<pkg>" and others.
+func (st *buildStatus) distTestList() (names []distTestName, remoteErr, err error) {
 	workDir, err := st.bc.WorkDir(st.ctx)
 	if err != nil {
 		err = fmt.Errorf("distTestList, WorkDir: %v", err)
@@ -899,9 +905,12 @@
 		err = fmt.Errorf("Exec error: %v, %s", err, buf.Bytes())
 		return
 	}
-	for _, test := range strings.Fields(buf.String()) {
+	// To avoid needing to update all the existing dist test adjust policies,
+	// it's easier to remap new dist test names in "<pkg>[:<variant>]" format
+	// to ones used in Go 1.20 and prior. Do that for now.
+	for _, test := range go120DistTestNames(strings.Fields(buf.String())) {
 		isNormalTry := st.isTry() && !st.isSlowBot()
-		if !st.conf.ShouldRunDistTest(test, isNormalTry) {
+		if !st.conf.ShouldRunDistTest(test.Old, isNormalTry) {
 			continue
 		}
 		names = append(names, test)
@@ -909,20 +918,81 @@
 	return names, nil, nil
 }
 
+// go120DistTestNames converts a list of dist test names from
+// an arbitrary Go distribution to the format used in Go 1.20
+// and prior versions. (Go 1.21 introduces a simpler format.)
+//
+// This exists only to avoid rewriting current dist adjust policies.
+// We wish to avoid new dist adjust policies, but if they're truly needed,
+// they can choose to start using new dist test names instead.
+func go120DistTestNames(names []string) []distTestName {
+	if len(names) == 0 {
+		// Only happens if there's a problem, but no need to panic.
+		return nil
+	} else if strings.HasPrefix(names[0], "go_test:") {
+		// In Go 1.21 and newer no dist tests have a "go_test:" prefix.
+		// In Go 1.20 and older, go tool dist test -list always returns
+		// at least one "go_test:*" test first.
+		// So if we see it, the list is already in Go 1.20 format.
+		var s []distTestName
+		for _, old := range names {
+			s = append(s, distTestName{old, old})
+		}
+		return s
+	}
+	// Remap the new Go 1.21+ dist test names to old ones.
+	var s []distTestName
+	for _, new := range names {
+		var old string
+		switch pkg, variant, _ := strings.Cut(new, ":"); {
+		// Special cases. Enough to cover what's used by old dist
+		// adjust policies. Not much use in going far beyond that.
+		case variant == "nolibgcc":
+			old = "nolibgcc:" + pkg
+		case variant == "race":
+			old = "race"
+		case variant == "moved_goroot":
+			old = "moved_goroot"
+		case pkg == "cmd/internal/testdir":
+			if variant == "" {
+				// Handle this too for when we stop doing special-case sharding only for testdir inside dist.
+				variant = "0_1"
+			}
+			old = "test:" + variant
+		case pkg == "cmd/api" && variant == "check":
+			old = "api"
+		case pkg == "cmd/internal/bootstrap_test":
+			old = "reboot"
+
+		// Easy regular cases.
+		case variant == "":
+			old = "go_test:" + pkg
+		case variant == "racebench":
+			old = "go_test_bench:" + pkg
+
+		// Neither a known special case nor a regular case.
+		default:
+			old = new // Less bad than leaving it empty.
+		}
+		s = append(s, distTestName{Old: old, Raw: new})
+	}
+	return s
+}
+
 type token struct{}
 
-// newTestSet returns a new testSet given the dist test names (strings from "go tool dist test -list")
+// newTestSet returns a new testSet given the dist test names (from "go tool dist test -list")
 // and benchmark items.
-func (st *buildStatus) newTestSet(testStats *buildstats.TestStats, distTestNames []string) (*testSet, error) {
+func (st *buildStatus) newTestSet(testStats *buildstats.TestStats, names []distTestName) (*testSet, error) {
 	set := &testSet{
 		st:        st,
 		testStats: testStats,
 	}
-	for _, name := range distTestNames {
+	for _, name := range names {
 		set.items = append(set.items, &testItem{
 			set:      set,
 			name:     name,
-			duration: testStats.Duration(st.BuilderRev.Name, name),
+			duration: testStats.Duration(st.BuilderRev.Name, name.Old),
 			take:     make(chan token, 1),
 			done:     make(chan token),
 		})
@@ -1593,7 +1663,7 @@
 				timer.Stop()
 				break AwaitDone
 			case <-timer.C:
-				st.LogEventTime("still_waiting_on_test", ti.name)
+				st.LogEventTime("still_waiting_on_test", ti.name.Old)
 			case <-buildletsGone:
 				set.cancelAll()
 				return nil, errBuildletsGone
@@ -1720,10 +1790,10 @@
 
 // runTestsOnBuildlet runs tis on bc, using the optional goroot & gopath environment variables.
 func (st *buildStatus) runTestsOnBuildlet(bc buildlet.Client, tis []*testItem, goroot, gopath string) {
-	names := make([]string, len(tis))
+	names, rawNames := make([]string, len(tis)), make([]string, len(tis))
 	for i, ti := range tis {
-		names[i] = ti.name
-		if i > 0 && (!strings.HasPrefix(ti.name, "go_test:") || !strings.HasPrefix(names[0], "go_test:")) {
+		names[i], rawNames[i] = ti.name.Old, ti.name.Raw
+		if i > 0 && (!strings.HasPrefix(ti.name.Old, "go_test:") || !strings.HasPrefix(names[0], "go_test:")) {
 			panic("only go_test:* tests may be merged")
 		}
 	}
@@ -1748,7 +1818,7 @@
 	if st.useKeepGoingFlag() {
 		args = append(args, "-k")
 	}
-	args = append(args, names...)
+	args = append(args, rawNames...)
 	var buf bytes.Buffer
 	t0 := time.Now()
 	timeout := st.conf.DistTestsExecTimeout(names)
diff --git a/cmd/coordinator/buildstatus_test.go b/cmd/coordinator/buildstatus_test.go
index 91b1dd3..d4a7cfc 100644
--- a/cmd/coordinator/buildstatus_test.go
+++ b/cmd/coordinator/buildstatus_test.go
@@ -9,6 +9,7 @@
 package main
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
@@ -328,3 +329,53 @@
 		})
 	}
 }
+
+// Test that go120DistTestNames remaps both 1.20 (old) and 1.21 (new)
+// dist test names to old, and doesn't forget the original name (raw).
+func TestGo120DistTestNames(t *testing.T) {
+	for _, tc := range [...]struct {
+		name string
+		in   string
+		want string
+	}{
+		{
+			name: "empty",
+			in:   "",
+			want: "",
+		},
+		{
+			name: "old to old",
+			in:   "go_test:archive/tar go_test:cmd/go api reboot test:0_2 test:1_2",
+			want: "go_test:archive/tar go_test:cmd/go api reboot test:0_2 test:1_2",
+		},
+		{
+			name: "new to old",
+			in:   "        archive/tar         cmd/go cmd/api:check cmd/internal/bootstrap_test cmd/internal/testdir:0_2 cmd/internal/testdir:1_2",
+			want: "go_test:archive/tar go_test:cmd/go     api                  reboot                        test:0_2                 test:1_2",
+		},
+		{
+			name: "more special cases",
+			in:   "crypto/x509:nolibgcc fmt:moved_goroot flag:race net:race os:race",
+			want: "nolibgcc:crypto/x509     moved_goroot      race     race    race",
+		},
+		{
+			name: "unhandled special case",
+			in:   "something:something",
+			want: "something:something",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			got := go120DistTestNames(strings.Fields(tc.in))
+			var want []distTestName
+			for _, old := range strings.Fields(tc.want) {
+				want = append(want, distTestName{Old: old})
+			}
+			for i, raw := range strings.Fields(tc.in) {
+				want[i].Raw = raw
+			}
+			if diff := cmp.Diff(want, got); diff != "" {
+				t.Errorf("go120DistTestNames mismatch (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 07f7ffc..82c54f1 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -1842,8 +1842,8 @@
 	names := make([]string, len(s.items))
 	namedItem := map[string]*testItem{}
 	for i, ti := range s.items {
-		names[i] = ti.name
-		namedItem[ti.name] = ti
+		names[i] = ti.name.Old
+		namedItem[ti.name.Old] = ti
 	}
 
 	// First do the go_test:* ones. partitionGoTests
@@ -1860,7 +1860,7 @@
 	// Then do the misc tests, which are always by themselves.
 	// (No benefit to merging them)
 	for _, ti := range s.items {
-		if !strings.HasPrefix(ti.name, "go_test:") {
+		if !strings.HasPrefix(ti.name.Old, "go_test:") {
 			s.inOrder = append(s.inOrder, []*testItem{ti})
 		}
 	}
@@ -1915,7 +1915,7 @@
 
 type testItem struct {
 	set      *testSet
-	name     string        // "go_test:sort"
+	name     distTestName
 	duration time.Duration // optional approximate size
 
 	take chan token // buffered size 1: sending takes ownership of rest of fields:
@@ -1959,6 +1959,12 @@
 	close(ti.done)
 }
 
+// distTestName is the name of a dist test as discovered from 'go tool dist test -list'.
+type distTestName struct {
+	Old string // Old is dist test name converted to Go 1.20 format, like "go_test:sort" or "reboot".
+	Raw string // Raw is unmodified name from dist, suitable as an argument back to 'go tool dist test'.
+}
+
 type byTestDuration []*testItem
 
 func (s byTestDuration) Len() int           { return len(s) }
diff --git a/dashboard/builders.go b/dashboard/builders.go
index 28f054b..cd7e483 100644
--- a/dashboard/builders.go
+++ b/dashboard/builders.go
@@ -889,6 +889,9 @@
 	// 		return run
 	// 	}
 	//
+	// distTestAdjust works with dist test names expressed in the Go 1.20 format.
+	// See ShouldRunDistTest for details.
+	//
 	distTestAdjust func(run bool, distTest string, isNormalTry bool) bool
 
 	// numTestHelpers is the number of _additional_ buildlets
@@ -976,6 +979,9 @@
 
 // DistTestsExecTimeout returns how long the coordinator should wait
 // for a cmd/dist test execution to run the provided dist test names.
+//
+// The dist test names are expressed in the Go 1.20 format, even for
+// newer Go versions. See go120DistTestNames.
 func (c *BuildConfig) DistTestsExecTimeout(distTests []string) time.Duration {
 	// TODO: consider using distTests? We never did before, but
 	// now we have the TestStats in the coordinator. Pass in a
@@ -1177,13 +1183,20 @@
 }
 
 // ShouldRunDistTest reports whether the named cmd/dist test should be
-// run for this build config. The isNormalTry parameter is whether this
-// is for a normal TryBot (non-SlowBot) run.
+// run for this build config. The dist test name is expressed in the
+// Go 1.20 format, even for newer Go versions. See go120DistTestNames.
+//
+// The isNormalTry parameter specifies whether this is for a normal
+// TryBot build. It's false for SlowBot and post-submit builds.
 //
 // In general, this returns true. When in normal trybot mode,
 // some slow portable tests are only run on the fastest builder.
 //
-// Individual builders can adjust this policy to fit their needs.
+// It's possible for individual builders to adjust this policy for their needs,
+// though it is preferable to handle that by adjusting test skips in the tests
+// instead of here. That has the advantage of being easier to maintain over time
+// since both the test and its skips would be in one repository rather than two,
+// and having effect when tests are run locally by developers.
 func (c *BuildConfig) ShouldRunDistTest(distTest string, isNormalTry bool) bool {
 	run := true
 
@@ -1198,7 +1211,8 @@
 		}
 	}
 
-	// Let individual builders adjust the cmd/dist test policy.
+	// Individual builders have historically sometimes adjusted the cmd/dist test policy.
+	// Over time these can migrate to better ways of doing platform-based or speed-based test skips.
 	if c.distTestAdjust != nil {
 		run = c.distTestAdjust(run, distTest, isNormalTry)
 	}