cmd/go/internal/web: support file:// URLs

Fixes #27698
Fixes #32227

Change-Id: I2416408b3de2f9f1ae1af2911cc327a65d2c0170
Reviewed-on: https://go-review.googlesource.com/c/go/+/181037
Reviewed-by: Jay Conrod <jayconrod@google.com>
diff --git a/src/cmd/go/internal/get/vcs.go b/src/cmd/go/internal/get/vcs.go
index c6516c8..29d58e6 100644
--- a/src/cmd/go/internal/get/vcs.go
+++ b/src/cmd/go/internal/get/vcs.go
@@ -856,6 +856,9 @@
 	if url.Scheme == "" {
 		return errors.New("no scheme")
 	}
+	if url.Scheme == "file" {
+		return errors.New("file scheme disallowed")
+	}
 	return nil
 }
 
diff --git a/src/cmd/go/internal/modfetch/proxy.go b/src/cmd/go/internal/modfetch/proxy.go
index c1bc277..50e2662 100644
--- a/src/cmd/go/internal/modfetch/proxy.go
+++ b/src/cmd/go/internal/modfetch/proxy.go
@@ -13,12 +13,9 @@
 	"net/url"
 	"os"
 	pathpkg "path"
-	"path/filepath"
-	"runtime"
 	"strings"
 	"sync"
 	"time"
-	"unicode"
 
 	"cmd/go/internal/base"
 	"cmd/go/internal/cfg"
@@ -201,18 +198,6 @@
 
 func (p *proxyRepo) getBody(path string) (io.ReadCloser, error) {
 	fullPath := pathpkg.Join(p.url.Path, path)
-	if p.url.Scheme == "file" {
-		rawPath, err := url.PathUnescape(fullPath)
-		if err != nil {
-			return nil, err
-		}
-		if runtime.GOOS == "windows" && len(rawPath) >= 4 && rawPath[0] == '/' && unicode.IsLetter(rune(rawPath[1])) && rawPath[2] == ':' {
-			// On Windows, file URLs look like "file:///C:/foo/bar". url.Path will
-			// start with a slash which must be removed. See golang.org/issue/6027.
-			rawPath = rawPath[1:]
-		}
-		return os.Open(filepath.FromSlash(rawPath))
-	}
 
 	target := *p.url
 	target.Path = fullPath
diff --git a/src/cmd/go/internal/web/file_test.go b/src/cmd/go/internal/web/file_test.go
new file mode 100644
index 0000000..e31ad71
--- /dev/null
+++ b/src/cmd/go/internal/web/file_test.go
@@ -0,0 +1,58 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+	"errors"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestGetFileURL(t *testing.T) {
+	const content = "Hello, file!\n"
+
+	f, err := ioutil.TempFile("", "web-TestGetFileURL")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := f.WriteString(content); err != nil {
+		t.Error(err)
+	}
+	if err := f.Close(); err != nil {
+		t.Fatal(err)
+	}
+
+	u, err := urlFromFilePath(f.Name())
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	b, err := GetBytes(u)
+	if err != nil {
+		t.Fatalf("GetBytes(%v) = _, %v", u, err)
+	}
+	if string(b) != content {
+		t.Fatalf("after writing %q to %s, GetBytes(%v) read %q", content, f.Name(), u, b)
+	}
+}
+
+func TestGetNonexistentFile(t *testing.T) {
+	path, err := filepath.Abs("nonexistent")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	u, err := urlFromFilePath(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	b, err := GetBytes(u)
+	if !errors.Is(err, os.ErrNotExist) {
+		t.Fatalf("GetBytes(%v) = %q, %v; want _, os.ErrNotExist", u, b, err)
+	}
+}
diff --git a/src/cmd/go/internal/web/http.go b/src/cmd/go/internal/web/http.go
index 4e2b1c3..b790fe9 100644
--- a/src/cmd/go/internal/web/http.go
+++ b/src/cmd/go/internal/web/http.go
@@ -53,6 +53,11 @@
 
 func get(security SecurityMode, url *urlpkg.URL) (*Response, error) {
 	start := time.Now()
+
+	if url.Scheme == "file" {
+		return getFile(url)
+	}
+
 	if os.Getenv("TESTGOPROXY404") == "1" && url.Host == "proxy.golang.org" {
 		res := &Response{
 			URL:        Redacted(url),
@@ -172,4 +177,41 @@
 	return r, nil
 }
 
+func getFile(u *urlpkg.URL) (*Response, error) {
+	path, err := urlToFilePath(u)
+	if err != nil {
+		return nil, err
+	}
+	f, err := os.Open(path)
+
+	if os.IsNotExist(err) {
+		return &Response{
+			URL:        Redacted(u),
+			Status:     http.StatusText(http.StatusNotFound),
+			StatusCode: http.StatusNotFound,
+			Body:       http.NoBody,
+		}, nil
+	}
+
+	if os.IsPermission(err) {
+		return &Response{
+			URL:        Redacted(u),
+			Status:     http.StatusText(http.StatusForbidden),
+			StatusCode: http.StatusForbidden,
+			Body:       http.NoBody,
+		}, nil
+	}
+
+	if err != nil {
+		return nil, err
+	}
+
+	return &Response{
+		URL:        Redacted(u),
+		Status:     http.StatusText(http.StatusOK),
+		StatusCode: http.StatusOK,
+		Body:       f,
+	}, nil
+}
+
 func openBrowser(url string) bool { return browser.Open(url) }
diff --git a/src/cmd/go/internal/web/url.go b/src/cmd/go/internal/web/url.go
new file mode 100644
index 0000000..146c51f
--- /dev/null
+++ b/src/cmd/go/internal/web/url.go
@@ -0,0 +1,95 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+	"errors"
+	"net/url"
+	"path/filepath"
+	"strings"
+)
+
+// TODO(golang.org/issue/32456): If accepted, move these functions into the
+// net/url package.
+
+var errNotAbsolute = errors.New("path is not absolute")
+
+func urlToFilePath(u *url.URL) (string, error) {
+	if u.Scheme != "file" {
+		return "", errors.New("non-file URL")
+	}
+
+	checkAbs := func(path string) (string, error) {
+		if !filepath.IsAbs(path) {
+			return "", errNotAbsolute
+		}
+		return path, nil
+	}
+
+	if u.Path == "" {
+		if u.Host != "" || u.Opaque == "" {
+			return "", errors.New("file URL missing path")
+		}
+		return checkAbs(filepath.FromSlash(u.Opaque))
+	}
+
+	path, err := convertFileURLPath(u.Host, u.Path)
+	if err != nil {
+		return path, err
+	}
+	return checkAbs(path)
+}
+
+func urlFromFilePath(path string) (*url.URL, error) {
+	if !filepath.IsAbs(path) {
+		return nil, errNotAbsolute
+	}
+
+	// If path has a Windows volume name, convert the volume to a host and prefix
+	// per https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+	if vol := filepath.VolumeName(path); vol != "" {
+		if strings.HasPrefix(vol, `\\`) {
+			path = filepath.ToSlash(path[2:])
+			i := strings.IndexByte(path, '/')
+
+			if i < 0 {
+				// A degenerate case.
+				// \\host.example.com (without a share name)
+				// becomes
+				// file://host.example.com/
+				return &url.URL{
+					Scheme: "file",
+					Host:   path,
+					Path:   "/",
+				}, nil
+			}
+
+			// \\host.example.com\Share\path\to\file
+			// becomes
+			// file://host.example.com/Share/path/to/file
+			return &url.URL{
+				Scheme: "file",
+				Host:   path[:i],
+				Path:   filepath.ToSlash(path[i:]),
+			}, nil
+		}
+
+		// C:\path\to\file
+		// becomes
+		// file:///C:/path/to/file
+		return &url.URL{
+			Scheme: "file",
+			Path:   "/" + filepath.ToSlash(path),
+		}, nil
+	}
+
+	// /path/to/file
+	// becomes
+	// file:///path/to/file
+	return &url.URL{
+		Scheme: "file",
+		Path:   filepath.ToSlash(path),
+	}, nil
+}
diff --git a/src/cmd/go/internal/web/url_other.go b/src/cmd/go/internal/web/url_other.go
new file mode 100644
index 0000000..bd243e5
--- /dev/null
+++ b/src/cmd/go/internal/web/url_other.go
@@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !windows
+
+package web
+
+import (
+	"errors"
+	"path/filepath"
+)
+
+func convertFileURLPath(host, path string) (string, error) {
+	switch host {
+	case "", "localhost":
+	default:
+		return "", errors.New("file URL specifies non-local host")
+	}
+	return filepath.FromSlash(path), nil
+}
diff --git a/src/cmd/go/internal/web/url_other_test.go b/src/cmd/go/internal/web/url_other_test.go
new file mode 100644
index 0000000..b4a74d9
--- /dev/null
+++ b/src/cmd/go/internal/web/url_other_test.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//+build !windows
+
+package web
+
+var urlTests = []struct {
+	url          string
+	filePath     string
+	canonicalURL string // If empty, assume equal to url.
+	wantErr      string
+}{
+	// Examples from RFC 8089:
+	{
+		url:      `file:///path/to/file`,
+		filePath: `/path/to/file`,
+	},
+	{
+		url:          `file:/path/to/file`,
+		filePath:     `/path/to/file`,
+		canonicalURL: `file:///path/to/file`,
+	},
+	{
+		url:          `file://localhost/path/to/file`,
+		filePath:     `/path/to/file`,
+		canonicalURL: `file:///path/to/file`,
+	},
+
+	// We reject non-local files.
+	{
+		url:     `file://host.example.com/path/to/file`,
+		wantErr: "file URL specifies non-local host",
+	},
+}
diff --git a/src/cmd/go/internal/web/url_test.go b/src/cmd/go/internal/web/url_test.go
new file mode 100644
index 0000000..8f462f5
--- /dev/null
+++ b/src/cmd/go/internal/web/url_test.go
@@ -0,0 +1,77 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+	"net/url"
+	"testing"
+)
+
+func TestURLToFilePath(t *testing.T) {
+	for _, tc := range urlTests {
+		if tc.url == "" {
+			continue
+		}
+		tc := tc
+
+		t.Run(tc.url, func(t *testing.T) {
+			u, err := url.Parse(tc.url)
+			if err != nil {
+				t.Fatalf("url.Parse(%q): %v", tc.url, err)
+			}
+
+			path, err := urlToFilePath(u)
+			if err != nil {
+				if err.Error() == tc.wantErr {
+					return
+				}
+				if tc.wantErr == "" {
+					t.Fatalf("urlToFilePath(%v): %v; want <nil>", u, err)
+				} else {
+					t.Fatalf("urlToFilePath(%v): %v; want %s", u, err, tc.wantErr)
+				}
+			}
+
+			if path != tc.filePath || tc.wantErr != "" {
+				t.Fatalf("urlToFilePath(%v) = %q, <nil>; want %q, %s", u, path, tc.filePath, tc.wantErr)
+			}
+		})
+	}
+}
+
+func TestURLFromFilePath(t *testing.T) {
+	for _, tc := range urlTests {
+		if tc.filePath == "" {
+			continue
+		}
+		tc := tc
+
+		t.Run(tc.filePath, func(t *testing.T) {
+			u, err := urlFromFilePath(tc.filePath)
+			if err != nil {
+				if err.Error() == tc.wantErr {
+					return
+				}
+				if tc.wantErr == "" {
+					t.Fatalf("urlFromFilePath(%v): %v; want <nil>", tc.filePath, err)
+				} else {
+					t.Fatalf("urlFromFilePath(%v): %v; want %s", tc.filePath, err, tc.wantErr)
+				}
+			}
+
+			if tc.wantErr != "" {
+				t.Fatalf("urlFromFilePath(%v) = <nil>; want error: %s", tc.filePath, tc.wantErr)
+			}
+
+			wantURL := tc.url
+			if tc.canonicalURL != "" {
+				wantURL = tc.canonicalURL
+			}
+			if u.String() != wantURL {
+				t.Errorf("urlFromFilePath(%v) = %v; want %s", tc.filePath, u, wantURL)
+			}
+		})
+	}
+}
diff --git a/src/cmd/go/internal/web/url_windows.go b/src/cmd/go/internal/web/url_windows.go
new file mode 100644
index 0000000..2a65ec8
--- /dev/null
+++ b/src/cmd/go/internal/web/url_windows.go
@@ -0,0 +1,43 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+	"errors"
+	"path/filepath"
+	"strings"
+)
+
+func convertFileURLPath(host, path string) (string, error) {
+	if len(path) == 0 || path[0] != '/' {
+		return "", errNotAbsolute
+	}
+
+	path = filepath.FromSlash(path)
+
+	// We interpret Windows file URLs per the description in
+	// https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+
+	// The host part of a file URL (if any) is the UNC volume name,
+	// but RFC 8089 reserves the authority "localhost" for the local machine.
+	if host != "" && host != "localhost" {
+		// A common "legacy" format omits the leading slash before a drive letter,
+		// encoding the drive letter as the host instead of part of the path.
+		// (See https://blogs.msdn.microsoft.com/freeassociations/2005/05/19/the-bizarre-and-unhappy-story-of-file-urls/.)
+		// We do not support that format, but we should at least emit a more
+		// helpful error message for it.
+		if filepath.VolumeName(host) != "" {
+			return "", errors.New("file URL encodes volume in host field: too few slashes?")
+		}
+		return `\\` + host + path, nil
+	}
+
+	// If host is empty, path must contain an initial slash followed by a
+	// drive letter and path. Remove the slash and verify that the path is valid.
+	if vol := filepath.VolumeName(path[1:]); vol == "" || strings.HasPrefix(vol, `\\`) {
+		return "", errors.New("file URL missing drive letter")
+	}
+	return path[1:], nil
+}
diff --git a/src/cmd/go/internal/web/url_windows_test.go b/src/cmd/go/internal/web/url_windows_test.go
new file mode 100644
index 0000000..06386a0
--- /dev/null
+++ b/src/cmd/go/internal/web/url_windows_test.go
@@ -0,0 +1,94 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+var urlTests = []struct {
+	url          string
+	filePath     string
+	canonicalURL string // If empty, assume equal to url.
+	wantErr      string
+}{
+	// Examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/:
+
+	{
+		url:      `file://laptop/My%20Documents/FileSchemeURIs.doc`,
+		filePath: `\\laptop\My Documents\FileSchemeURIs.doc`,
+	},
+	{
+		url:      `file:///C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc`,
+		filePath: `C:\Documents and Settings\davris\FileSchemeURIs.doc`,
+	},
+	{
+		url:      `file:///D:/Program%20Files/Viewer/startup.htm`,
+		filePath: `D:\Program Files\Viewer\startup.htm`,
+	},
+	{
+		url:          `file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO`,
+		filePath:     `C:\Program Files\Music\Web Sys\main.html`,
+		canonicalURL: `file:///C:/Program%20Files/Music/Web%20Sys/main.html`,
+	},
+	{
+		url:      `file://applib/products/a-b/abc_9/4148.920a/media/start.swf`,
+		filePath: `\\applib\products\a-b\abc_9\4148.920a\media\start.swf`,
+	},
+	{
+		url:     `file:////applib/products/a%2Db/abc%5F9/4148.920a/media/start.swf`,
+		wantErr: "file URL missing drive letter",
+	},
+	{
+		url:     `C:\Program Files\Music\Web Sys\main.html?REQUEST=RADIO`,
+		wantErr: "non-file URL",
+	},
+
+	// The example "file://D:\Program Files\Viewer\startup.htm" errors out in
+	// url.Parse, so we substitute a slash-based path for testing instead.
+	{
+		url:     `file://D:/Program Files/Viewer/startup.htm`,
+		wantErr: "file URL encodes volume in host field: too few slashes?",
+	},
+
+	// The blog post discourages the use of non-ASCII characters because they
+	// depend on the user's current codepage. However, when we are working with Go
+	// strings we assume UTF-8 encoding, and our url package refuses to encode
+	// URLs to non-ASCII strings.
+	{
+		url:          `file:///C:/exampleㄓ.txt`,
+		filePath:     `C:\exampleㄓ.txt`,
+		canonicalURL: `file:///C:/example%E3%84%93.txt`,
+	},
+	{
+		url:      `file:///C:/example%E3%84%93.txt`,
+		filePath: `C:\exampleㄓ.txt`,
+	},
+
+	// Examples from RFC 8089:
+
+	// We allow the drive-letter variation from section E.2, because it is
+	// simpler to support than not to. However, we do not generate the shorter
+	// form in the reverse direction.
+	{
+		url:          `file:c:/path/to/file`,
+		filePath:     `c:\path\to\file`,
+		canonicalURL: `file:///c:/path/to/file`,
+	},
+
+	// We encode the UNC share name as the authority following section E.3.1,
+	// because that is what the Microsoft blog post explicitly recommends.
+	{
+		url:      `file://host.example.com/Share/path/to/file.txt`,
+		filePath: `\\host.example.com\Share\path\to\file.txt`,
+	},
+
+	// We decline the four- and five-slash variations from section E.3.2.
+	// The paths in these URLs would change meaning under path.Clean.
+	{
+		url:     `file:////host.example.com/path/to/file`,
+		wantErr: "file URL missing drive letter",
+	},
+	{
+		url:     `file://///host.example.com/path/to/file`,
+		wantErr: "file URL missing drive letter",
+	},
+}
diff --git a/src/cmd/go/testdata/script/mod_file_proxy.txt b/src/cmd/go/testdata/script/mod_file_proxy.txt
index cf097f8..38d9fd2 100644
--- a/src/cmd/go/testdata/script/mod_file_proxy.txt
+++ b/src/cmd/go/testdata/script/mod_file_proxy.txt
@@ -11,11 +11,12 @@
 grep v1.5.1 $GOPATH/pkg/mod/cache/download/rsc.io/quote/@v/list
 
 # Use download cache as file:/// proxy.
-[windows] stop # TODO: file://$WORK puts backslashes in the URL
 env GOPATH=$WORK/gopath2
-env GOPROXY=file:///nonexist
+[windows] env GOPROXY=file:///C:/nonexist
+[!windows] env GOPROXY=file:///nonexist
 ! go list
-env GOPROXY=file://$WORK/gopath1/pkg/mod/cache/download
+[windows] env GOPROXY=file:///$WORK/gopath1/pkg/mod/cache/download
+[!windows] env GOPROXY=file://$WORK/gopath1/pkg/mod/cache/download
 go list
 grep v1.5.1 $GOPATH/pkg/mod/cache/download/rsc.io/quote/@v/list
 
diff --git a/src/cmd/go/testdata/script/mod_sumdb_file_path.txt b/src/cmd/go/testdata/script/mod_sumdb_file_path.txt
new file mode 100644
index 0000000..744632e
--- /dev/null
+++ b/src/cmd/go/testdata/script/mod_sumdb_file_path.txt
@@ -0,0 +1,41 @@
+[!net] skip
+
+env GO111MODULE=on
+env GOSUMDB=
+
+# With a file-based proxy with an empty checksum directory,
+# downloading a new module should fail, even if a subsequent
+# proxy contains a more complete mirror of the sum database.
+[windows] env GOPROXY=file:///$WORK/sumproxy,https://proxy.golang.org
+[!windows] env GOPROXY=file://$WORK/sumproxy,https://proxy.golang.org
+! go get -d golang.org/x/text@v0.3.2
+stderr '^verifying golang.org/x/text.*: Not Found'
+
+# If the proxy does not claim to support the database,
+# checksum verification should fall through to the next proxy,
+# and downloading should succeed.
+[windows] env GOPROXY=file:///$WORK/emptyproxy,https://proxy.golang.org
+[!windows] env GOPROXY=file://$WORK/emptyproxy,https://proxy.golang.org
+go get -d golang.org/x/text@v0.3.2
+
+# Once the checksum is present in the go.sum file,
+# an empty file-based sumdb can be used in conjunction with
+# a fallback module mirror.
+grep golang.org/x/text go.sum
+go clean -modcache
+[windows] env GOPROXY=file:///$WORK/sumproxy
+[!windows] env GOPROXY=file://$WORK/sumproxy
+! go get -d golang.org/x/text@v0.3.2
+[windows] env GOPROXY=file:///$WORK/sumproxy,https://proxy.golang.org
+[!windows] env GOPROXY=file://$WORK/sumproxy,https://proxy.golang.org
+go get -d golang.org/x/text@v0.3.2
+
+-- go.mod --
+module example.com
+go 1.13
+-- $WORK/emptyproxy/README.md --
+This proxy contains no modules.
+-- $WORK/sumproxy/README.md --
+This proxy contains no modules.
+-- $WORK/sumproxy/sumdb/sum.golang.org/supported --
+This proxy blocks checksum downloads from sum.golang.org.