cmd/go: add support for GOPROXY list

Following discussion on golang.org/issue/26334, this CL changes
the GOPROXY environment setting to be a list of proxies, tried in
sequence. The first successful or non-404/410 error is taken as
authoritative. Otherwise the next proxy is tried, and so on.

As in earlier releases, GOPROXY=direct means "connect directly",
but now it can appear in a longer list as well.

This will let companies run a proxy holding only their private modules
and let users set GOPROXY=thatproxy,publicproxy or GOPROXY=thatproxy,direct
to fall back to an alternate mechanism for fetching public modules.

Fixes #26334.

Change-Id: I642f0ae655ec307d9cdcad0830c0baac8670eb9c
Reviewed-on: https://go-review.googlesource.com/c/go/+/173441
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Jay Conrod <jayconrod@google.com>
diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go
index 650a81d..8be5df4 100644
--- a/src/cmd/go/alldocs.go
+++ b/src/cmd/go/alldocs.go
@@ -1990,8 +1990,13 @@
 // further control over the download source. If GOPROXY is unset, is the empty string,
 // or is the string "direct", downloads use the default direct connection to version
 // control systems. Setting GOPROXY to "off" disallows downloading modules from
-// any source. Otherwise, GOPROXY is expected to be the URL of a module proxy,
-// in which case the go command will fetch all modules from that proxy.
+// any source. Otherwise, GOPROXY is expected to be a comma-separated list of
+// the URLs of module proxies, in which case the go command will fetch modules
+// from those proxies. For each request, the go command tries each proxy in sequence,
+// only moving to the next if the current proxy returns a 404 or 410 HTTP response.
+// The string "direct" may appear in the proxy list, to cause a direct connection to
+// be attempted at that point in the search.
+//
 // No matter the source of the modules, downloaded modules must match existing
 // entries in go.sum (see 'go help modules' for discussion of verification).
 //
diff --git a/src/cmd/go/internal/modfetch/proxy.go b/src/cmd/go/internal/modfetch/proxy.go
index 97064cd..aa17782 100644
--- a/src/cmd/go/internal/modfetch/proxy.go
+++ b/src/cmd/go/internal/modfetch/proxy.go
@@ -6,14 +6,16 @@
 
 import (
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
-	urlpkg "net/url"
+	url "net/url"
 	"os"
 	pathpkg "path"
 	"path/filepath"
 	"strings"
+	"sync"
 	"time"
 
 	"cmd/go/internal/base"
@@ -33,8 +35,13 @@
 further control over the download source. If GOPROXY is unset, is the empty string,
 or is the string "direct", downloads use the default direct connection to version
 control systems. Setting GOPROXY to "off" disallows downloading modules from
-any source. Otherwise, GOPROXY is expected to be the URL of a module proxy,
-in which case the go command will fetch all modules from that proxy.
+any source. Otherwise, GOPROXY is expected to be a comma-separated list of
+the URLs of module proxies, in which case the go command will fetch modules
+from those proxies. For each request, the go command tries each proxy in sequence,
+only moving to the next if the current proxy returns a 404 or 410 HTTP response.
+The string "direct" may appear in the proxy list, to cause a direct connection to
+be attempted at that point in the search.
+
 No matter the source of the modules, downloaded modules must match existing
 entries in go.sum (see 'go help modules' for discussion of verification).
 
@@ -100,37 +107,86 @@
 	proxyURL = url
 }
 
+var proxyOnce struct {
+	sync.Once
+	list []string
+	err  error
+}
+
+func proxyURLs() ([]string, error) {
+	proxyOnce.Do(func() {
+		for _, proxyURL := range strings.Split(proxyURL, ",") {
+			if proxyURL == "" {
+				continue
+			}
+			if proxyURL == "direct" {
+				proxyOnce.list = append(proxyOnce.list, "direct")
+				continue
+			}
+
+			// Check that newProxyRepo accepts the URL.
+			// It won't do anything with the path.
+			_, err := newProxyRepo(proxyURL, "golang.org/x/text")
+			if err != nil {
+				proxyOnce.err = err
+				return
+			}
+			proxyOnce.list = append(proxyOnce.list, proxyURL)
+		}
+	})
+
+	return proxyOnce.list, proxyOnce.err
+}
+
 func lookupProxy(path string) (Repo, error) {
-	if strings.Contains(proxyURL, ",") {
-		return nil, fmt.Errorf("invalid $GOPROXY setting: cannot have comma")
-	}
-	r, err := newProxyRepo(proxyURL, path)
+	list, err := proxyURLs()
 	if err != nil {
 		return nil, err
 	}
-	return r, nil
+
+	var repos listRepo
+	for _, u := range list {
+		var r Repo
+		if u == "direct" {
+			// lookupDirect does actual network traffic.
+			// Especially if GOPROXY="http://mainproxy,direct",
+			// avoid the network until we need it by using a lazyRepo wrapper.
+			r = &lazyRepo{setup: lookupDirect, path: path}
+		} else {
+			// The URL itself was checked in proxyURLs.
+			// The only possible error here is a bad path,
+			// so we can return it unconditionally.
+			r, err = newProxyRepo(u, path)
+			if err != nil {
+				return nil, err
+			}
+		}
+		repos = append(repos, r)
+	}
+	return repos, nil
 }
 
 type proxyRepo struct {
-	url  *urlpkg.URL
+	url  *url.URL
 	path string
 }
 
 func newProxyRepo(baseURL, path string) (Repo, error) {
-	url, err := urlpkg.Parse(baseURL)
+	base, err := url.Parse(baseURL)
 	if err != nil {
 		return nil, err
 	}
-	switch url.Scheme {
-	case "file":
-		if *url != (urlpkg.URL{Scheme: url.Scheme, Path: url.Path, RawPath: url.RawPath}) {
-			return nil, fmt.Errorf("proxy URL %q uses file scheme with non-path elements", web.Redacted(url))
-		}
+	switch base.Scheme {
 	case "http", "https":
+		// ok
+	case "file":
+		if *base != (url.URL{Scheme: base.Scheme, Path: base.Path, RawPath: base.RawPath}) {
+			return nil, fmt.Errorf("invalid file:// proxy URL with non-path elements: %s", web.Redacted(base))
+		}
 	case "":
-		return nil, fmt.Errorf("proxy URL %q missing scheme", web.Redacted(url))
+		return nil, fmt.Errorf("invalid proxy URL missing scheme: %s", web.Redacted(base))
 	default:
-		return nil, fmt.Errorf("unsupported proxy scheme %q", url.Scheme)
+		return nil, fmt.Errorf("invalid proxy URL scheme (must be https, http, file): %s", web.Redacted(base))
 	}
 
 	enc, err := module.EncodePath(path)
@@ -138,9 +194,9 @@
 		return nil, err
 	}
 
-	url.Path = strings.TrimSuffix(url.Path, "/") + "/" + enc
-	url.RawPath = strings.TrimSuffix(url.RawPath, "/") + "/" + pathEscape(enc)
-	return &proxyRepo{url, path}, nil
+	base.Path = strings.TrimSuffix(base.Path, "/") + "/" + enc
+	base.RawPath = strings.TrimSuffix(base.RawPath, "/") + "/" + pathEscape(enc)
+	return &proxyRepo{base, path}, nil
 }
 
 func (p *proxyRepo) ModulePath() string {
@@ -159,24 +215,24 @@
 func (p *proxyRepo) getBody(path string) (io.ReadCloser, error) {
 	fullPath := pathpkg.Join(p.url.Path, path)
 	if p.url.Scheme == "file" {
-		rawPath, err := urlpkg.PathUnescape(fullPath)
+		rawPath, err := url.PathUnescape(fullPath)
 		if err != nil {
 			return nil, err
 		}
 		return os.Open(filepath.FromSlash(rawPath))
 	}
 
-	url := new(urlpkg.URL)
-	*url = *p.url
-	url.Path = fullPath
-	url.RawPath = pathpkg.Join(url.RawPath, pathEscape(path))
+	target := *p.url
+	target.Path = fullPath
+	target.RawPath = pathpkg.Join(target.RawPath, pathEscape(path))
 
-	resp, err := web.Get(web.DefaultSecurity, url)
+	resp, err := web.Get(web.DefaultSecurity, &target)
 	if err != nil {
 		return nil, err
 	}
-	if resp.StatusCode != 200 {
-		return nil, fmt.Errorf("unexpected status (%s): %v", web.Redacted(url), resp.Status)
+	if err := resp.Err(); err != nil {
+		resp.Body.Close()
+		return nil, err
 	}
 	return resp.Body, nil
 }
@@ -292,5 +348,119 @@
 // That is, it escapes things like ? and # (which really shouldn't appear anyway).
 // It does not escape / to %2F: our REST API is designed so that / can be left as is.
 func pathEscape(s string) string {
-	return strings.ReplaceAll(urlpkg.PathEscape(s), "%2F", "/")
+	return strings.ReplaceAll(url.PathEscape(s), "%2F", "/")
+}
+
+// A lazyRepo is a lazily-initialized Repo,
+// constructed on demand by calling setup.
+type lazyRepo struct {
+	path  string
+	setup func(string) (Repo, error)
+	once  sync.Once
+	repo  Repo
+	err   error
+}
+
+func (r *lazyRepo) init() {
+	r.repo, r.err = r.setup(r.path)
+}
+
+func (r *lazyRepo) ModulePath() string {
+	return r.path
+}
+
+func (r *lazyRepo) Versions(prefix string) ([]string, error) {
+	if r.once.Do(r.init); r.err != nil {
+		return nil, r.err
+	}
+	return r.repo.Versions(prefix)
+}
+
+func (r *lazyRepo) Stat(rev string) (*RevInfo, error) {
+	if r.once.Do(r.init); r.err != nil {
+		return nil, r.err
+	}
+	return r.repo.Stat(rev)
+}
+
+func (r *lazyRepo) Latest() (*RevInfo, error) {
+	if r.once.Do(r.init); r.err != nil {
+		return nil, r.err
+	}
+	return r.repo.Latest()
+}
+
+func (r *lazyRepo) GoMod(version string) ([]byte, error) {
+	if r.once.Do(r.init); r.err != nil {
+		return nil, r.err
+	}
+	return r.repo.GoMod(version)
+}
+
+func (r *lazyRepo) Zip(dst io.Writer, version string) error {
+	if r.once.Do(r.init); r.err != nil {
+		return r.err
+	}
+	return r.repo.Zip(dst, version)
+}
+
+// A listRepo is a preference list of Repos.
+// The list must be non-empty and all Repos
+// must return the same result from ModulePath.
+// For each method, the repos are tried in order
+// until one succeeds or returns a non-ErrNotExist (non-404) error.
+type listRepo []Repo
+
+func (l listRepo) ModulePath() string {
+	return l[0].ModulePath()
+}
+
+func (l listRepo) Versions(prefix string) ([]string, error) {
+	for i, r := range l {
+		v, err := r.Versions(prefix)
+		if i == len(l)-1 || !errors.Is(err, os.ErrNotExist) {
+			return v, err
+		}
+	}
+	panic("no repos")
+}
+
+func (l listRepo) Stat(rev string) (*RevInfo, error) {
+	for i, r := range l {
+		info, err := r.Stat(rev)
+		if i == len(l)-1 || !errors.Is(err, os.ErrNotExist) {
+			return info, err
+		}
+	}
+	panic("no repos")
+}
+
+func (l listRepo) Latest() (*RevInfo, error) {
+	for i, r := range l {
+		info, err := r.Latest()
+		if i == len(l)-1 || !errors.Is(err, os.ErrNotExist) {
+			return info, err
+		}
+	}
+	panic("no repos")
+}
+
+func (l listRepo) GoMod(version string) ([]byte, error) {
+	for i, r := range l {
+		data, err := r.GoMod(version)
+		if i == len(l)-1 || !errors.Is(err, os.ErrNotExist) {
+			return data, err
+		}
+	}
+	panic("no repos")
+}
+
+func (l listRepo) Zip(dst io.Writer, version string) error {
+	for i, r := range l {
+		err := r.Zip(dst, version)
+		if i == len(l)-1 || !errors.Is(err, os.ErrNotExist) {
+			return err
+		}
+	}
+	panic("no repos")
 }
diff --git a/src/cmd/go/internal/modfetch/repo.go b/src/cmd/go/internal/modfetch/repo.go
index c3c4ade..c66c8a8 100644
--- a/src/cmd/go/internal/modfetch/repo.go
+++ b/src/cmd/go/internal/modfetch/repo.go
@@ -209,6 +209,10 @@
 		return lookupProxy(path)
 	}
 
+	return lookupDirect(path)
+}
+
+func lookupDirect(path string) (Repo, error) {
 	security := web.SecureOnly
 	if get.Insecure {
 		security = web.Insecure
diff --git a/src/cmd/go/proxy_test.go b/src/cmd/go/proxy_test.go
index 830cea0..abca248 100644
--- a/src/cmd/go/proxy_test.go
+++ b/src/cmd/go/proxy_test.go
@@ -16,6 +16,7 @@
 	"net/http"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"
 	"sync"
 	"testing"
@@ -104,6 +105,16 @@
 		return
 	}
 	path := strings.TrimPrefix(r.URL.Path, "/mod/")
+
+	// If asked for 404/abc, serve a 404.
+	if j := strings.Index(path, "/"); j >= 0 {
+		n, err := strconv.Atoi(path[:j])
+		if err == nil && n >= 200 {
+			w.WriteHeader(n)
+			return
+		}
+	}
+
 	i := strings.Index(path, "/@v/")
 	if i < 0 {
 		http.NotFound(w, r)
diff --git a/src/cmd/go/testdata/script/mod_proxy_list.txt b/src/cmd/go/testdata/script/mod_proxy_list.txt
new file mode 100644
index 0000000..a486228
--- /dev/null
+++ b/src/cmd/go/testdata/script/mod_proxy_list.txt
@@ -0,0 +1,29 @@
+env GO111MODULE=on
+env proxy=$GOPROXY
+
+# Proxy that can't serve should fail.
+env GOPROXY=$proxy/404
+! go get rsc.io/quote@v1.0.0
+stderr '404 Not Found'
+
+# get should walk down the proxy list past 404 and 410 responses.
+env GOPROXY=$proxy/404,$proxy/410,$proxy
+go get rsc.io/quote@v1.1.0
+
+# get should not walk past other 4xx errors.
+env GOPROXY=$proxy/403,$proxy
+! go get rsc.io/quote@v1.2.0
+stderr 'reading.*/403/rsc.io/.*: 403 Forbidden'
+
+# get should not walk past non-4xx errors.
+env GOPROXY=$proxy/500,$proxy
+! go get rsc.io/quote@v1.3.0
+stderr 'reading.*/500/rsc.io/.*: 500 Internal Server Error'
+
+# get should return the final 404/410 if that's all we have.
+env GOPROXY=$proxy/404,$proxy/410
+! go get rsc.io/quote@v1.4.0
+stderr 'reading.*/410/rsc.io/.*: 410 Gone'
+
+-- go.mod --
+module x