client: handle file URI scheme on windows

A file URI takes the form of
  file://host/path

https://en.wikipedia.org/wiki/File_URI_scheme

On windows, for example, vulndb located in c:\dir\vulndb will be

  file:///c:/dir/vulndb

Previously, the code took `file://` prefix and attempted to use the
remaining as a directory of local vulndb. On windows, that caused
to os.Stat on /c:/dir/vulndb when a correctly encoded URI was passed in.

Turned out file-uri parsing is a known, complex issue.

See golang/go#32456 for context.

This CL includes the code copied from the Go project.

https://github.com/golang/go/blob/2ebe77a2fda1ee9ff6fd9a3e08933ad1ebaea039/src/cmd/go/internal/web/url.go

Updated the tests to exercise the windows-like file path correctly
when testing on windows. Previously, tests depended on relative paths
or assumed an incorrect form of windows file uri (e.g. file://C:\workdir\gopath\src\golang.org\x\vuln\cmd\govulncheck/testdata/vulndb)

Change-Id: I5fc451e5ca44649b9623daee28ee3210a7b2b96c
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/438175
Run-TryBot: Hyang-Ah Hana Kim <hyangah@gmail.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
diff --git a/client/client.go b/client/client.go
index 97a5d30..053e2a5 100644
--- a/client/client.go
+++ b/client/client.go
@@ -45,6 +45,7 @@
 	"golang.org/x/mod/module"
 	"golang.org/x/vuln/internal"
 	"golang.org/x/vuln/internal/derrors"
+	"golang.org/x/vuln/internal/web"
 	"golang.org/x/vuln/osv"
 )
 
@@ -436,17 +437,16 @@
 func NewClient(sources []string, opts Options) (_ Client, err error) {
 	defer derrors.Wrap(&err, "NewClient(%v, opts)", sources)
 	c := &client{}
-	for _, uri := range sources {
-		uri = strings.TrimRight(uri, "/")
-		// should parse the URI out here instead of in there
-		switch {
-		case strings.HasPrefix(uri, "http://") || strings.HasPrefix(uri, "https://"):
-			hs := &httpSource{url: uri}
-			url, err := url.Parse(uri)
-			if err != nil {
-				return nil, err
-			}
-			hs.dbName = url.Hostname()
+	for _, source := range sources {
+		source = strings.TrimRight(source, "/") // TODO: why?
+		uri, err := url.Parse(source)
+		if err != nil {
+			return nil, err
+		}
+		switch uri.Scheme {
+		case "http", "https":
+			hs := &httpSource{url: uri.String()}
+			hs.dbName = uri.Hostname()
 			if opts.HTTPCache != nil {
 				hs.cache = opts.HTTPCache
 			}
@@ -456,8 +456,11 @@
 				hs.c = new(http.Client)
 			}
 			c.sources = append(c.sources, hs)
-		case strings.HasPrefix(uri, "file://"):
-			dir := strings.TrimPrefix(uri, "file://")
+		case "file":
+			dir, err := web.URLToFilePath(uri)
+			if err != nil {
+				return nil, err
+			}
 			fi, err := os.Stat(dir)
 			if err != nil {
 				return nil, err
diff --git a/client/client_test.go b/client/client_test.go
index 1d7d1ec..b11f8e2 100644
--- a/client/client_test.go
+++ b/client/client_test.go
@@ -7,6 +7,7 @@
 import (
 	"context"
 	"encoding/json"
+	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@@ -20,6 +21,7 @@
 
 	"github.com/google/go-cmp/cmp"
 	"golang.org/x/vuln/internal"
+	"golang.org/x/vuln/internal/web"
 	"golang.org/x/vuln/osv"
 )
 
@@ -85,7 +87,17 @@
 	return httptest.NewServer(mux)
 }
 
-const localURL = "file://testdata/vulndb"
+var localURL = func() string {
+	absDir, err := filepath.Abs("testdata/vulndb")
+	if err != nil {
+		panic(fmt.Sprintf("failed to read testdata/vulndb: %v", err))
+	}
+	u, err := web.URLFromFilePath(absDir)
+	if err != nil {
+		panic(fmt.Sprintf("failed to read testdata/vulndb: %v", err))
+	}
+	return u.String()
+}()
 
 func TestByModule(t *testing.T) {
 	if runtime.GOOS == "js" {
diff --git a/cmd/govulncheck/main_command_118_test.go b/cmd/govulncheck/main_command_118_test.go
index ac55915..e962696 100644
--- a/cmd/govulncheck/main_command_118_test.go
+++ b/cmd/govulncheck/main_command_118_test.go
@@ -22,6 +22,7 @@
 
 	"github.com/google/go-cmdtest"
 	"golang.org/x/vuln/internal/buildtest"
+	"golang.org/x/vuln/internal/web"
 )
 
 var update = flag.Bool("update", false, "update test files with results")
@@ -50,8 +51,17 @@
 		if inputFile != "" {
 			return nil, errors.New("input redirection makes no sense")
 		}
+		// We set GOVERSION to always get the same results regardless of the underlying Go build system.
+		vulndbDir, err := filepath.Abs(filepath.Join(testDir, "testdata", "vulndb"))
+		if err != nil {
+			return nil, err
+		}
+		govulndbURI, err := web.URLFromFilePath(vulndbDir)
+		if err != nil {
+			return nil, fmt.Errorf("failed to create GOVULNDB env var: %v", err)
+		}
 		// We set TEST_GOVERSION to always get the same results regardless of the underlying Go build system.
-		cmd.Env = append(os.Environ(), "GOVULNDB=file://"+testDir+"/testdata/vulndb", "TEST_GOVERSION=go1.18")
+		cmd.Env = append(os.Environ(), "GOVULNDB="+govulndbURI.String(), "TEST_GOVERSION=go1.18")
 		out, err := cmd.CombinedOutput()
 		out = filterGoFilePaths(out)
 		out = filterStdlibVersions(out)
diff --git a/internal/web/url.go b/internal/web/url.go
new file mode 100644
index 0000000..a0552d8
--- /dev/null
+++ b/internal/web/url.go
@@ -0,0 +1,143 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code copied from
+// https://github.com/golang/go/blob/2ebe77a2fda1ee9ff6fd9a3e08933ad1ebaea039/src/cmd/go/internal/web/url.go
+// TODO(go.dev/issue/32456): if accepted, use the new API.
+
+package web
+
+import (
+	"errors"
+	"net/url"
+	"path/filepath"
+	"runtime"
+	"strings"
+)
+
+var errNotAbsolute = errors.New("path is not absolute")
+
+// URLToFilePath converts a file-scheme url to a file path.
+func URLToFilePath(u *url.URL) (string, error) {
+	if u.Scheme != "file" {
+		return "", errors.New("non-file URL")
+	}
+
+	checkAbs := func(path string) (string, error) {
+		if !filepath.IsAbs(path) {
+			return "", errNotAbsolute
+		}
+		return path, nil
+	}
+
+	if u.Path == "" {
+		if u.Host != "" || u.Opaque == "" {
+			return "", errors.New("file URL missing path")
+		}
+		return checkAbs(filepath.FromSlash(u.Opaque))
+	}
+
+	path, err := convertFileURLPath(u.Host, u.Path)
+	if err != nil {
+		return path, err
+	}
+	return checkAbs(path)
+}
+
+// URLFromFilePath converts the given absolute path to a URL.
+func URLFromFilePath(path string) (*url.URL, error) {
+	if !filepath.IsAbs(path) {
+		return nil, errNotAbsolute
+	}
+
+	// If path has a Windows volume name, convert the volume to a host and prefix
+	// per https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+	if vol := filepath.VolumeName(path); vol != "" {
+		if strings.HasPrefix(vol, `\\`) {
+			path = filepath.ToSlash(path[2:])
+			i := strings.IndexByte(path, '/')
+
+			if i < 0 {
+				// A degenerate case.
+				// \\host.example.com (without a share name)
+				// becomes
+				// file://host.example.com/
+				return &url.URL{
+					Scheme: "file",
+					Host:   path,
+					Path:   "/",
+				}, nil
+			}
+
+			// \\host.example.com\Share\path\to\file
+			// becomes
+			// file://host.example.com/Share/path/to/file
+			return &url.URL{
+				Scheme: "file",
+				Host:   path[:i],
+				Path:   filepath.ToSlash(path[i:]),
+			}, nil
+		}
+
+		// C:\path\to\file
+		// becomes
+		// file:///C:/path/to/file
+		return &url.URL{
+			Scheme: "file",
+			Path:   "/" + filepath.ToSlash(path),
+		}, nil
+	}
+
+	// /path/to/file
+	// becomes
+	// file:///path/to/file
+	return &url.URL{
+		Scheme: "file",
+		Path:   filepath.ToSlash(path),
+	}, nil
+}
+
+func convertFileURLPath(host, path string) (string, error) {
+	if runtime.GOOS == "windows" {
+		return convertFileURLPathWindows(host, path)
+	}
+	switch host {
+	case "", "localhost":
+	default:
+		return "", errors.New("file URL specifies non-local host")
+	}
+	return filepath.FromSlash(path), nil
+}
+
+func convertFileURLPathWindows(host, path string) (string, error) {
+	if len(path) == 0 || path[0] != '/' {
+		return "", errNotAbsolute
+	}
+
+	path = filepath.FromSlash(path)
+
+	// We interpret Windows file URLs per the description in
+	// https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+
+	// The host part of a file URL (if any) is the UNC volume name,
+	// but RFC 8089 reserves the authority "localhost" for the local machine.
+	if host != "" && host != "localhost" {
+		// A common "legacy" format omits the leading slash before a drive letter,
+		// encoding the drive letter as the host instead of part of the path.
+		// (See https://blogs.msdn.microsoft.com/freeassociations/2005/05/19/the-bizarre-and-unhappy-story-of-file-urls/.)
+		// We do not support that format, but we should at least emit a more
+		// helpful error message for it.
+		if filepath.VolumeName(host) != "" {
+			return "", errors.New("file URL encodes volume in host field: too few slashes?")
+		}
+		return `\\` + host + path, nil
+	}
+
+	// If host is empty, path must contain an initial slash followed by a
+	// drive letter and path. Remove the slash and verify that the path is valid.
+	if vol := filepath.VolumeName(path[1:]); vol == "" || strings.HasPrefix(vol, `\\`) {
+		return "", errors.New("file URL missing drive letter")
+	}
+	return path[1:], nil
+}
diff --git a/internal/web/url_test.go b/internal/web/url_test.go
new file mode 100644
index 0000000..717bb30
--- /dev/null
+++ b/internal/web/url_test.go
@@ -0,0 +1,200 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+	"net/url"
+	"runtime"
+	"testing"
+)
+
+func TestURLToFilePath(t *testing.T) {
+	for _, tc := range urlTests() {
+		if tc.url == "" {
+			continue
+		}
+		tc := tc
+
+		t.Run(tc.url, func(t *testing.T) {
+			u, err := url.Parse(tc.url)
+			if err != nil {
+				t.Fatalf("url.Parse(%q): %v", tc.url, err)
+			}
+
+			path, err := URLToFilePath(u)
+			if err != nil {
+				if err.Error() == tc.wantErr {
+					return
+				}
+				if tc.wantErr == "" {
+					t.Fatalf("urlToFilePath(%v): %v; want <nil>", u, err)
+				} else {
+					t.Fatalf("urlToFilePath(%v): %v; want %s", u, err, tc.wantErr)
+				}
+			}
+
+			if path != tc.filePath || tc.wantErr != "" {
+				t.Fatalf("urlToFilePath(%v) = %q, <nil>; want %q, %s", u, path, tc.filePath, tc.wantErr)
+			}
+		})
+	}
+}
+
+func TestURLFromFilePath(t *testing.T) {
+	for _, tc := range urlTests() {
+		if tc.filePath == "" {
+			continue
+		}
+		tc := tc
+
+		t.Run(tc.filePath, func(t *testing.T) {
+			u, err := URLFromFilePath(tc.filePath)
+			if err != nil {
+				if err.Error() == tc.wantErr {
+					return
+				}
+				if tc.wantErr == "" {
+					t.Fatalf("urlFromFilePath(%v): %v; want <nil>", tc.filePath, err)
+				} else {
+					t.Fatalf("urlFromFilePath(%v): %v; want %s", tc.filePath, err, tc.wantErr)
+				}
+			}
+
+			if tc.wantErr != "" {
+				t.Fatalf("urlFromFilePath(%v) = <nil>; want error: %s", tc.filePath, tc.wantErr)
+			}
+
+			wantURL := tc.url
+			if tc.canonicalURL != "" {
+				wantURL = tc.canonicalURL
+			}
+			if u.String() != wantURL {
+				t.Errorf("urlFromFilePath(%v) = %v; want %s", tc.filePath, u, wantURL)
+			}
+		})
+	}
+}
+
+func urlTests() []urlTest {
+	if runtime.GOOS == "windows" {
+		return urlTestsWindows
+	}
+	return urlTestsOthers
+}
+
+type urlTest struct {
+	url          string
+	filePath     string
+	canonicalURL string // If empty, assume equal to url.
+	wantErr      string
+}
+
+var urlTestsOthers = []urlTest{
+	// Examples from RFC 8089:
+	{
+		url:      `file:///path/to/file`,
+		filePath: `/path/to/file`,
+	},
+	{
+		url:          `file:/path/to/file`,
+		filePath:     `/path/to/file`,
+		canonicalURL: `file:///path/to/file`,
+	},
+	{
+		url:          `file://localhost/path/to/file`,
+		filePath:     `/path/to/file`,
+		canonicalURL: `file:///path/to/file`,
+	},
+
+	// We reject non-local files.
+	{
+		url:     `file://host.example.com/path/to/file`,
+		wantErr: "file URL specifies non-local host",
+	},
+}
+
+var urlTestsWindows = []urlTest{
+	// Examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/:
+
+	{
+		url:      `file://laptop/My%20Documents/FileSchemeURIs.doc`,
+		filePath: `\\laptop\My Documents\FileSchemeURIs.doc`,
+	},
+	{
+		url:      `file:///C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc`,
+		filePath: `C:\Documents and Settings\davris\FileSchemeURIs.doc`,
+	},
+	{
+		url:      `file:///D:/Program%20Files/Viewer/startup.htm`,
+		filePath: `D:\Program Files\Viewer\startup.htm`,
+	},
+	{
+		url:          `file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO`,
+		filePath:     `C:\Program Files\Music\Web Sys\main.html`,
+		canonicalURL: `file:///C:/Program%20Files/Music/Web%20Sys/main.html`,
+	},
+	{
+		url:      `file://applib/products/a-b/abc_9/4148.920a/media/start.swf`,
+		filePath: `\\applib\products\a-b\abc_9\4148.920a\media\start.swf`,
+	},
+	{
+		url:     `file:////applib/products/a%2Db/abc%5F9/4148.920a/media/start.swf`,
+		wantErr: "file URL missing drive letter",
+	},
+	{
+		url:     `C:\Program Files\Music\Web Sys\main.html?REQUEST=RADIO`,
+		wantErr: "non-file URL",
+	},
+
+	// The example "file://D:\Program Files\Viewer\startup.htm" errors out in
+	// url.Parse, so we substitute a slash-based path for testing instead.
+	{
+		url:     `file://D:/Program Files/Viewer/startup.htm`,
+		wantErr: "file URL encodes volume in host field: too few slashes?",
+	},
+
+	// The blog post discourages the use of non-ASCII characters because they
+	// depend on the user's current codepage. However, when we are working with Go
+	// strings we assume UTF-8 encoding, and our url package refuses to encode
+	// URLs to non-ASCII strings.
+	{
+		url:          `file:///C:/exampleㄓ.txt`,
+		filePath:     `C:\exampleㄓ.txt`,
+		canonicalURL: `file:///C:/example%E3%84%93.txt`,
+	},
+	{
+		url:      `file:///C:/example%E3%84%93.txt`,
+		filePath: `C:\exampleㄓ.txt`,
+	},
+
+	// Examples from RFC 8089:
+
+	// We allow the drive-letter variation from section E.2, because it is
+	// simpler to support than not to. However, we do not generate the shorter
+	// form in the reverse direction.
+	{
+		url:          `file:c:/path/to/file`,
+		filePath:     `c:\path\to\file`,
+		canonicalURL: `file:///c:/path/to/file`,
+	},
+
+	// We encode the UNC share name as the authority following section E.3.1,
+	// because that is what the Microsoft blog post explicitly recommends.
+	{
+		url:      `file://host.example.com/Share/path/to/file.txt`,
+		filePath: `\\host.example.com\Share\path\to\file.txt`,
+	},
+
+	// We decline the four- and five-slash variations from section E.3.2.
+	// The paths in these URLs would change meaning under path.Clean.
+	{
+		url:     `file:////host.example.com/path/to/file`,
+		wantErr: "file URL missing drive letter",
+	},
+	{
+		url:     `file://///host.example.com/path/to/file`,
+		wantErr: "file URL missing drive letter",
+	},
+}