cmd/coordinator: support testing subrepos that use modules

Only the "oauth2" and "build" repos for now, as a test. We'll lock
down policy more later and decide when to do this automatically.

Also, this currently only runs buildlets which run in our GCP project,
because we're not yet proxying the a localhost:3000 port from the
reverse buildlets to an authenticated TLS connection back to our
module proxy service on GKE.

Updates golang/go#14594
Fixes golang/go#29637

Change-Id: I6f05da2186b38dc8056081252563a82c50f0ce05
Reviewed-on: https://go-review.googlesource.com/c/157438
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
Reviewed-by: Andrew Bonventre <andybons@golang.org>
diff --git a/cmd/coordinator/athens-prod.yaml b/cmd/coordinator/athens-prod.yaml
new file mode 100644
index 0000000..549fd40
--- /dev/null
+++ b/cmd/coordinator/athens-prod.yaml
@@ -0,0 +1,37 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: athens-deployment
+spec:
+  template:
+    metadata:
+      labels:
+        app: athens
+      annotations:
+        container.seccomp.security.alpha.kubernetes.io/athens: docker/default
+        container.apparmor.security.beta.kubernetes.io/athens: runtime/default
+    spec:
+      volumes:
+      - name: cache-volume
+        emptyDir: {}
+      containers:
+      - name: athens
+        image: gomods/athens:v0.2.0
+        imagePullPolicy: Always
+        command: ["/bin/athens-proxy", "-config_file=/config/config.toml"]
+        volumeMounts:
+        - mountPath: "/athens-cache"
+          name: cache-volume
+        env:
+        - name: GO_ENV
+          value: "production"
+        - name: ATHENS_STORAGE_TYPE
+          value: "disk"
+        - name: ATHENS_DISK_STORAGE_ROOT
+          value: "/athens-cache"
+        ports:
+        - containerPort: 3000
+        resources:
+          requests:
+            cpu: "1"
+            memory: "2Gi"
diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go
index 94da768..fa65636 100644
--- a/cmd/coordinator/coordinator.go
+++ b/cmd/coordinator/coordinator.go
@@ -47,6 +47,7 @@
 	"go4.org/syncutil"
 	grpc "grpc.go4.org"
 
+	"cloud.google.com/go/compute/metadata"
 	"cloud.google.com/go/errorreporting"
 	"cloud.google.com/go/storage"
 	"golang.org/x/build"
@@ -985,9 +986,8 @@
 			log.Printf("Warning: skipping incomplete %#v", work)
 			continue
 		}
-		if work.Project == "build" || work.Project == "grpc-review" {
-			// Skip trybot request in build repo.
-			// Also skip grpc-review, which is only for reviews for now.
+		if work.Project == "grpc-review" {
+			// Skip grpc-review, which is only for reviews for now.
 			continue
 		}
 		key := tryWorkItemKey(work)
@@ -1073,6 +1073,8 @@
 	}
 }
 
+var testingKnobSkipBuilds bool
+
 // newTrySet creates a new trySet group of builders for a given
 // work item, the (Project, Branch, Change-ID, Commit) tuple.
 // It also starts goroutines for each build.
@@ -1108,11 +1110,16 @@
 		idx := len(ts.builds)
 		ts.builds = append(ts.builds, bs)
 		ts.remain++
+		if testingKnobSkipBuilds {
+			return
+		}
 		go bs.start() // acquires statusMu itself, so in a goroutine
 		go ts.awaitTryBuild(idx, bs, brev)
 	}
 
-	go ts.notifyStarting()
+	if !testingKnobSkipBuilds {
+		go ts.notifyStarting()
+	}
 	for _, bconf := range builders {
 		brev := tryKeyToBuilderRev(bconf.Name, key, goRev)
 		bs, err := newBuild(brev)
@@ -1130,6 +1137,10 @@
 		work.GoCommit = work.GoCommit[:len(work.GoBranch)]
 	}
 
+	// linuxBuilder is the standard builder we run for when testing x/* repos against
+	// the past two Go releases.
+	linuxBuilder := dashboard.Builders["linux-amd64"]
+
 	// If there's more than one GoCommit, that means this is an x/* repo
 	// and we're testing against previous releases of Go.
 	for i, goRev := range work.GoCommit {
@@ -1138,7 +1149,10 @@
 			continue
 		}
 		branch := work.GoBranch[i]
-		brev := tryKeyToBuilderRev("linux-amd64", key, goRev)
+		if !linuxBuilder.BuildBranch(key.Project, "master", branch) {
+			continue
+		}
+		brev := tryKeyToBuilderRev(linuxBuilder.Name, key, goRev)
 		bs, err := newBuild(brev)
 		if err != nil {
 			log.Printf("can't create build for %q: %v", brev, err)
@@ -1344,6 +1358,9 @@
 	}
 }
 
+// skipBuild reports whether the br build should be skipped.
+//
+// TODO(bradfitz): move this policy func into dashboard/builders.go in its own CL sometime.
 func skipBuild(br buildgo.BuilderRev) bool {
 	if br.Name == "freebsd-arm-paulzhol" {
 		// This was a fragile little machine with limited memory.
@@ -1356,11 +1373,20 @@
 		return true
 	}
 	switch br.SubName {
-	case "build", // has external deps
-		"exp",    // always broken, depends on mobile which is broken
+	case "oauth2", "build":
+		// The oauth2 and build repos are our guinea pigs for
+		// testing using modules. But they currently only work
+		// inside the GCP network.
+		// TODO: once we proxy through the module proxy from
+		// reverse buildlets' localhost:3000 to
+		// authenticated+TLS farmer.golang.org to the GKE
+		// service, then we can use modules less
+		// conditionally.
+		bc, ok := dashboard.Builders[br.Name]
+		return !ok || bc.IsReverse()
+	case "exp", // always broken, depends on mobile which is broken
 		"mobile", // always broken (gl, etc). doesn't compile.
-		"term",   // no code yet in repo: "warning: "golang.org/x/term/..." matched no packages"
-		"oauth2": // has external deps
+		"term":   // no code yet in repo,
 		return true
 	case "perf":
 		if br.Name == "linux-amd64-nocgo" {
@@ -2690,10 +2716,11 @@
 		return nil, nil
 	}
 
-	// Recursively fetch the repo and its dependencies.
-	// Dependencies are always fetched at master, which isn't
-	// great but the dashboard data model doesn't track
-	// sub-repo dependencies. TODO(adg): fix this somehow??
+	// Recursively fetch the repo and their golang.org/x/*
+	// dependencies. Dependencies are always fetched at master,
+	// which isn't great but the dashboard data model doesn't
+	// track non-golang.org/x/* dependencies. For those, we
+	// require on the code under test to be using Go modules.
 	for i := 0; i < len(toFetch); i++ {
 		repo := toFetch[i]
 		if fetched[repo] {
@@ -2725,17 +2752,70 @@
 
 	sp := st.CreateSpan("running_subrepo_tests", st.SubName)
 	defer func() { sp.Done(err) }()
+
+	goProxy, err := st.moduleProxy()
+	if err != nil {
+		return nil, err
+	}
+	var go111Module, dir string
+	if goProxy != "" {
+		go111Module = "on"
+		dir = "gopath/src/golang.org/x/" + st.SubName
+	}
+
 	return st.bc.Exec(path.Join("go", "bin", "go"), buildlet.ExecOpts{
 		Output: st,
+		Dir:    dir,
 		ExtraEnv: append(st.conf.Env(),
 			"GOROOT="+goroot,
 			"GOPATH="+gopath,
+			"GO111MODULE="+go111Module,
+			"GOPROXY="+goProxy,
 		),
 		Path: []string{"$WORKDIR/go/bin", "$PATH"},
 		Args: []string{"test", "-short", subrepoPrefix + st.SubName + "/..."},
 	})
 }
 
+// moduleProxy returns the GOPROXY environment value to use for this
+// build's tests. If non-empty, GO111MODULE=on is included in the
+// environment as well. Returning two zero values means to not
+// configure the environment values.
+//
+// We go through a GCP-project-internal module proxy ("GOPROXY") to
+// eliminate load on the origin servers. Our builder VMs are ephemeral
+// and only run for the duration of one build. They also often don't
+// have all the VCS tools installed (or even available: there is no
+// git for plan9).
+func (bs *buildStatus) moduleProxy() (string, error) {
+	switch bs.SubName {
+	case "oauth2", "build":
+		// The two repos we're starting with for testing.
+	default:
+		return "", nil
+	}
+	// If we're running on localhost, just use the current environment's value.
+	if buildEnv == nil || !buildEnv.IsProd {
+		return os.Getenv("GOPROXY"), nil
+	}
+
+	// We run a NodePort service on each GKE node
+	// (cmd/coordinator/module-proxy-service.yaml) on port 30156
+	// that maps to the Athens service. We could round robin over
+	// all the GKE nodes' IPs if we wanted, but the coordinator is
+	// running on GKE so our node by definition is up, so just use it.
+	// It won't be much traffic.
+	// TODO: migrate to a GKE internal load balancer with an internal static IP
+	// once we migrate symbolic-datum-552 off a Legacy VPC network to the modern
+	// scheme that supports internal static IPs.
+	gkeNodeIP, err := metadata.Get("instance/network-interfaces/0/ip")
+	if err != nil || gkeNodeIP == "" {
+		log.Printf("WARNING: failed to discover local GCE node's IP: %v; disabling GOPROXY", err)
+		return "", nil
+	}
+	return "http://" + gkeNodeIP + ":30156", nil
+}
+
 // affectedPkgs returns the name of every package affected by this commit.
 // The returned list may contain duplicates and is unsorted.
 // It is safe to call this on a nil trySet.
diff --git a/cmd/coordinator/coordinator_test.go b/cmd/coordinator/coordinator_test.go
index bd14218..583407e 100644
--- a/cmd/coordinator/coordinator_test.go
+++ b/cmd/coordinator/coordinator_test.go
@@ -15,6 +15,7 @@
 	"time"
 
 	"golang.org/x/build/internal/buildgo"
+	"golang.org/x/build/maintner/maintnerd/apipb"
 )
 
 func TestPartitionGoTests(t *testing.T) {
@@ -111,3 +112,25 @@
 	// Just test that it doesn't panic:
 	stagingClusterBuilders()
 }
+
+// tests that we don't test Go 1.10 for the build repo
+func TestNewTrySetBuildRepoGo110(t *testing.T) {
+	testingKnobSkipBuilds = true
+
+	work := &apipb.GerritTryWorkItem{
+		Project:  "build",
+		Branch:   "master",
+		ChangeId: "I6f05da2186b38dc8056081252563a82c50f0ce05",
+		Commit:   "a62e6a3ab11cc9cc2d9e22a50025dd33fc35d22f",
+		GoCommit: []string{"a2e79571a9d3dbe3cf10dcaeb1f9c01732219869", "e39e43d7349555501080133bb426f1ead4b3ef97", "f5ff72d62301c4e9d0a78167fab5914ca12919bd"},
+		GoBranch: []string{"master", "release-branch.go1.11", "release-branch.go1.10"},
+	}
+	ts := newTrySet(work)
+	for i, bs := range ts.builds {
+		v := bs.NameAndBranch()
+		if strings.Contains(v, "Go 1.10.x") {
+			t.Errorf("unexpected builder: %v", v)
+		}
+		t.Logf("build[%d]: %s", i, v)
+	}
+}
diff --git a/cmd/coordinator/module-proxy-service.yaml b/cmd/coordinator/module-proxy-service.yaml
new file mode 100644
index 0000000..56491f1
--- /dev/null
+++ b/cmd/coordinator/module-proxy-service.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: go-module-proxy
+  annotations:
+    cloud.google.com/load-balancer-type: "Internal"
+spec:
+  type: NodePort
+  ports:
+    - name: module-proxy
+      port: 3000
+      targetPort: 3000
+      nodePort: 30156
+      protocol: TCP
+  selector:
+    app: athens
+
+# TODO(bradfitz): migrate (destroy & recreate) symbolic-datum-552 to get it off legacy networking
+# so we can use an internal LoadBalancer with a static internal IP instead, and then:
+#
+#spec:
+#  type: LoadBalancer
+#  loadBalancerIP: "10.240.0.we-cant-do-this-because-symbolic-datum-552-is-using-legacy-networking"
+#  loadBalancerSourceRanges:
+#  - "10.0.0.0/8"
+#  ports:
+#    - port: 3000
+#      targetPort: 3000
+#  selector:
+#    app: athens
diff --git a/dashboard/builders.go b/dashboard/builders.go
index 3e144d6..71fb8d6 100644
--- a/dashboard/builders.go
+++ b/dashboard/builders.go
@@ -841,6 +841,15 @@
 // branch is the branch of the repo (usually "master").
 // goBranch is non-empty for a non-"go" repo, and is the branch of Go the subrepo is being tested at.
 func (c *BuildConfig) BuildBranch(repo, branch, goBranch string) bool {
+	// Don't try to build oauth2 or build before Go 1.11. These
+	// repos require modules.
+	switch repo {
+	case "oauth2", "build":
+		if branch == "release-branch.go1.10" || goBranch == "release-branch.go1.10" {
+			return false
+		}
+	}
+
 	if strings.HasPrefix(c.Name, "darwin-") {
 		switch c.Name {
 		case "darwin-amd64-10_8", "darwin-amd64-10_10", "darwin-amd64-10_11",
@@ -1023,8 +1032,9 @@
 				return true
 			}
 		}
+		// TODO: remove items from this set once these repos have go.mod files:
 		switch proj {
-		case "grpc-review", "build", "exp", "mobile", "term", "oauth2":
+		case "grpc-review", "exp", "mobile", "term":
 			return false
 		}
 		return true