client: handle file URI scheme on windows
A file URI takes the form of
file://host/path
https://en.wikipedia.org/wiki/File_URI_scheme
On windows, for example, vulndb located in c:\dir\vulndb will be
file:///c:/dir/vulndb
Previously, the code took `file://` prefix and attempted to use the
remaining as a directory of local vulndb. On windows, that caused
to os.Stat on /c:/dir/vulndb when a correctly encoded URI was passed in.
Turned out file-uri parsing is a known, complex issue.
See golang/go#32456 for context.
This CL includes the code copied from the Go project.
https://github.com/golang/go/blob/2ebe77a2fda1ee9ff6fd9a3e08933ad1ebaea039/src/cmd/go/internal/web/url.go
Updated the tests to exercise the windows-like file path correctly
when testing on windows. Previously, tests depended on relative paths
or assumed an incorrect form of windows file uri (e.g. file://C:\workdir\gopath\src\golang.org\x\vuln\cmd\govulncheck/testdata/vulndb)
Change-Id: I5fc451e5ca44649b9623daee28ee3210a7b2b96c
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/438175
Run-TryBot: Hyang-Ah Hana Kim <hyangah@gmail.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
diff --git a/client/client.go b/client/client.go
index 97a5d30..053e2a5 100644
--- a/client/client.go
+++ b/client/client.go
@@ -45,6 +45,7 @@
"golang.org/x/mod/module"
"golang.org/x/vuln/internal"
"golang.org/x/vuln/internal/derrors"
+ "golang.org/x/vuln/internal/web"
"golang.org/x/vuln/osv"
)
@@ -436,17 +437,16 @@
func NewClient(sources []string, opts Options) (_ Client, err error) {
defer derrors.Wrap(&err, "NewClient(%v, opts)", sources)
c := &client{}
- for _, uri := range sources {
- uri = strings.TrimRight(uri, "/")
- // should parse the URI out here instead of in there
- switch {
- case strings.HasPrefix(uri, "http://") || strings.HasPrefix(uri, "https://"):
- hs := &httpSource{url: uri}
- url, err := url.Parse(uri)
- if err != nil {
- return nil, err
- }
- hs.dbName = url.Hostname()
+ for _, source := range sources {
+ source = strings.TrimRight(source, "/") // TODO: why?
+ uri, err := url.Parse(source)
+ if err != nil {
+ return nil, err
+ }
+ switch uri.Scheme {
+ case "http", "https":
+ hs := &httpSource{url: uri.String()}
+ hs.dbName = uri.Hostname()
if opts.HTTPCache != nil {
hs.cache = opts.HTTPCache
}
@@ -456,8 +456,11 @@
hs.c = new(http.Client)
}
c.sources = append(c.sources, hs)
- case strings.HasPrefix(uri, "file://"):
- dir := strings.TrimPrefix(uri, "file://")
+ case "file":
+ dir, err := web.URLToFilePath(uri)
+ if err != nil {
+ return nil, err
+ }
fi, err := os.Stat(dir)
if err != nil {
return nil, err
diff --git a/client/client_test.go b/client/client_test.go
index 1d7d1ec..b11f8e2 100644
--- a/client/client_test.go
+++ b/client/client_test.go
@@ -7,6 +7,7 @@
import (
"context"
"encoding/json"
+ "fmt"
"net/http"
"net/http/httptest"
"net/url"
@@ -20,6 +21,7 @@
"github.com/google/go-cmp/cmp"
"golang.org/x/vuln/internal"
+ "golang.org/x/vuln/internal/web"
"golang.org/x/vuln/osv"
)
@@ -85,7 +87,17 @@
return httptest.NewServer(mux)
}
-const localURL = "file://testdata/vulndb"
+var localURL = func() string {
+ absDir, err := filepath.Abs("testdata/vulndb")
+ if err != nil {
+ panic(fmt.Sprintf("failed to read testdata/vulndb: %v", err))
+ }
+ u, err := web.URLFromFilePath(absDir)
+ if err != nil {
+ panic(fmt.Sprintf("failed to read testdata/vulndb: %v", err))
+ }
+ return u.String()
+}()
func TestByModule(t *testing.T) {
if runtime.GOOS == "js" {
diff --git a/cmd/govulncheck/main_command_118_test.go b/cmd/govulncheck/main_command_118_test.go
index ac55915..e962696 100644
--- a/cmd/govulncheck/main_command_118_test.go
+++ b/cmd/govulncheck/main_command_118_test.go
@@ -22,6 +22,7 @@
"github.com/google/go-cmdtest"
"golang.org/x/vuln/internal/buildtest"
+ "golang.org/x/vuln/internal/web"
)
var update = flag.Bool("update", false, "update test files with results")
@@ -50,8 +51,17 @@
if inputFile != "" {
return nil, errors.New("input redirection makes no sense")
}
+ // We set GOVERSION to always get the same results regardless of the underlying Go build system.
+ vulndbDir, err := filepath.Abs(filepath.Join(testDir, "testdata", "vulndb"))
+ if err != nil {
+ return nil, err
+ }
+ govulndbURI, err := web.URLFromFilePath(vulndbDir)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create GOVULNDB env var: %v", err)
+ }
// We set TEST_GOVERSION to always get the same results regardless of the underlying Go build system.
- cmd.Env = append(os.Environ(), "GOVULNDB=file://"+testDir+"/testdata/vulndb", "TEST_GOVERSION=go1.18")
+ cmd.Env = append(os.Environ(), "GOVULNDB="+govulndbURI.String(), "TEST_GOVERSION=go1.18")
out, err := cmd.CombinedOutput()
out = filterGoFilePaths(out)
out = filterStdlibVersions(out)
diff --git a/internal/web/url.go b/internal/web/url.go
new file mode 100644
index 0000000..a0552d8
--- /dev/null
+++ b/internal/web/url.go
@@ -0,0 +1,143 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code copied from
+// https://github.com/golang/go/blob/2ebe77a2fda1ee9ff6fd9a3e08933ad1ebaea039/src/cmd/go/internal/web/url.go
+// TODO(go.dev/issue/32456): if accepted, use the new API.
+
+package web
+
+import (
+ "errors"
+ "net/url"
+ "path/filepath"
+ "runtime"
+ "strings"
+)
+
+var errNotAbsolute = errors.New("path is not absolute")
+
+// URLToFilePath converts a file-scheme url to a file path.
+func URLToFilePath(u *url.URL) (string, error) {
+ if u.Scheme != "file" {
+ return "", errors.New("non-file URL")
+ }
+
+ checkAbs := func(path string) (string, error) {
+ if !filepath.IsAbs(path) {
+ return "", errNotAbsolute
+ }
+ return path, nil
+ }
+
+ if u.Path == "" {
+ if u.Host != "" || u.Opaque == "" {
+ return "", errors.New("file URL missing path")
+ }
+ return checkAbs(filepath.FromSlash(u.Opaque))
+ }
+
+ path, err := convertFileURLPath(u.Host, u.Path)
+ if err != nil {
+ return path, err
+ }
+ return checkAbs(path)
+}
+
+// URLFromFilePath converts the given absolute path to a URL.
+func URLFromFilePath(path string) (*url.URL, error) {
+ if !filepath.IsAbs(path) {
+ return nil, errNotAbsolute
+ }
+
+ // If path has a Windows volume name, convert the volume to a host and prefix
+ // per https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+ if vol := filepath.VolumeName(path); vol != "" {
+ if strings.HasPrefix(vol, `\\`) {
+ path = filepath.ToSlash(path[2:])
+ i := strings.IndexByte(path, '/')
+
+ if i < 0 {
+ // A degenerate case.
+ // \\host.example.com (without a share name)
+ // becomes
+ // file://host.example.com/
+ return &url.URL{
+ Scheme: "file",
+ Host: path,
+ Path: "/",
+ }, nil
+ }
+
+ // \\host.example.com\Share\path\to\file
+ // becomes
+ // file://host.example.com/Share/path/to/file
+ return &url.URL{
+ Scheme: "file",
+ Host: path[:i],
+ Path: filepath.ToSlash(path[i:]),
+ }, nil
+ }
+
+ // C:\path\to\file
+ // becomes
+ // file:///C:/path/to/file
+ return &url.URL{
+ Scheme: "file",
+ Path: "/" + filepath.ToSlash(path),
+ }, nil
+ }
+
+ // /path/to/file
+ // becomes
+ // file:///path/to/file
+ return &url.URL{
+ Scheme: "file",
+ Path: filepath.ToSlash(path),
+ }, nil
+}
+
+func convertFileURLPath(host, path string) (string, error) {
+ if runtime.GOOS == "windows" {
+ return convertFileURLPathWindows(host, path)
+ }
+ switch host {
+ case "", "localhost":
+ default:
+ return "", errors.New("file URL specifies non-local host")
+ }
+ return filepath.FromSlash(path), nil
+}
+
+func convertFileURLPathWindows(host, path string) (string, error) {
+ if len(path) == 0 || path[0] != '/' {
+ return "", errNotAbsolute
+ }
+
+ path = filepath.FromSlash(path)
+
+ // We interpret Windows file URLs per the description in
+ // https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
+
+ // The host part of a file URL (if any) is the UNC volume name,
+ // but RFC 8089 reserves the authority "localhost" for the local machine.
+ if host != "" && host != "localhost" {
+ // A common "legacy" format omits the leading slash before a drive letter,
+ // encoding the drive letter as the host instead of part of the path.
+ // (See https://blogs.msdn.microsoft.com/freeassociations/2005/05/19/the-bizarre-and-unhappy-story-of-file-urls/.)
+ // We do not support that format, but we should at least emit a more
+ // helpful error message for it.
+ if filepath.VolumeName(host) != "" {
+ return "", errors.New("file URL encodes volume in host field: too few slashes?")
+ }
+ return `\\` + host + path, nil
+ }
+
+ // If host is empty, path must contain an initial slash followed by a
+ // drive letter and path. Remove the slash and verify that the path is valid.
+ if vol := filepath.VolumeName(path[1:]); vol == "" || strings.HasPrefix(vol, `\\`) {
+ return "", errors.New("file URL missing drive letter")
+ }
+ return path[1:], nil
+}
diff --git a/internal/web/url_test.go b/internal/web/url_test.go
new file mode 100644
index 0000000..717bb30
--- /dev/null
+++ b/internal/web/url_test.go
@@ -0,0 +1,200 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package web
+
+import (
+ "net/url"
+ "runtime"
+ "testing"
+)
+
+func TestURLToFilePath(t *testing.T) {
+ for _, tc := range urlTests() {
+ if tc.url == "" {
+ continue
+ }
+ tc := tc
+
+ t.Run(tc.url, func(t *testing.T) {
+ u, err := url.Parse(tc.url)
+ if err != nil {
+ t.Fatalf("url.Parse(%q): %v", tc.url, err)
+ }
+
+ path, err := URLToFilePath(u)
+ if err != nil {
+ if err.Error() == tc.wantErr {
+ return
+ }
+ if tc.wantErr == "" {
+ t.Fatalf("urlToFilePath(%v): %v; want <nil>", u, err)
+ } else {
+ t.Fatalf("urlToFilePath(%v): %v; want %s", u, err, tc.wantErr)
+ }
+ }
+
+ if path != tc.filePath || tc.wantErr != "" {
+ t.Fatalf("urlToFilePath(%v) = %q, <nil>; want %q, %s", u, path, tc.filePath, tc.wantErr)
+ }
+ })
+ }
+}
+
+func TestURLFromFilePath(t *testing.T) {
+ for _, tc := range urlTests() {
+ if tc.filePath == "" {
+ continue
+ }
+ tc := tc
+
+ t.Run(tc.filePath, func(t *testing.T) {
+ u, err := URLFromFilePath(tc.filePath)
+ if err != nil {
+ if err.Error() == tc.wantErr {
+ return
+ }
+ if tc.wantErr == "" {
+ t.Fatalf("urlFromFilePath(%v): %v; want <nil>", tc.filePath, err)
+ } else {
+ t.Fatalf("urlFromFilePath(%v): %v; want %s", tc.filePath, err, tc.wantErr)
+ }
+ }
+
+ if tc.wantErr != "" {
+ t.Fatalf("urlFromFilePath(%v) = <nil>; want error: %s", tc.filePath, tc.wantErr)
+ }
+
+ wantURL := tc.url
+ if tc.canonicalURL != "" {
+ wantURL = tc.canonicalURL
+ }
+ if u.String() != wantURL {
+ t.Errorf("urlFromFilePath(%v) = %v; want %s", tc.filePath, u, wantURL)
+ }
+ })
+ }
+}
+
+func urlTests() []urlTest {
+ if runtime.GOOS == "windows" {
+ return urlTestsWindows
+ }
+ return urlTestsOthers
+}
+
+type urlTest struct {
+ url string
+ filePath string
+ canonicalURL string // If empty, assume equal to url.
+ wantErr string
+}
+
+var urlTestsOthers = []urlTest{
+ // Examples from RFC 8089:
+ {
+ url: `file:///path/to/file`,
+ filePath: `/path/to/file`,
+ },
+ {
+ url: `file:/path/to/file`,
+ filePath: `/path/to/file`,
+ canonicalURL: `file:///path/to/file`,
+ },
+ {
+ url: `file://localhost/path/to/file`,
+ filePath: `/path/to/file`,
+ canonicalURL: `file:///path/to/file`,
+ },
+
+ // We reject non-local files.
+ {
+ url: `file://host.example.com/path/to/file`,
+ wantErr: "file URL specifies non-local host",
+ },
+}
+
+var urlTestsWindows = []urlTest{
+ // Examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/:
+
+ {
+ url: `file://laptop/My%20Documents/FileSchemeURIs.doc`,
+ filePath: `\\laptop\My Documents\FileSchemeURIs.doc`,
+ },
+ {
+ url: `file:///C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc`,
+ filePath: `C:\Documents and Settings\davris\FileSchemeURIs.doc`,
+ },
+ {
+ url: `file:///D:/Program%20Files/Viewer/startup.htm`,
+ filePath: `D:\Program Files\Viewer\startup.htm`,
+ },
+ {
+ url: `file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO`,
+ filePath: `C:\Program Files\Music\Web Sys\main.html`,
+ canonicalURL: `file:///C:/Program%20Files/Music/Web%20Sys/main.html`,
+ },
+ {
+ url: `file://applib/products/a-b/abc_9/4148.920a/media/start.swf`,
+ filePath: `\\applib\products\a-b\abc_9\4148.920a\media\start.swf`,
+ },
+ {
+ url: `file:////applib/products/a%2Db/abc%5F9/4148.920a/media/start.swf`,
+ wantErr: "file URL missing drive letter",
+ },
+ {
+ url: `C:\Program Files\Music\Web Sys\main.html?REQUEST=RADIO`,
+ wantErr: "non-file URL",
+ },
+
+ // The example "file://D:\Program Files\Viewer\startup.htm" errors out in
+ // url.Parse, so we substitute a slash-based path for testing instead.
+ {
+ url: `file://D:/Program Files/Viewer/startup.htm`,
+ wantErr: "file URL encodes volume in host field: too few slashes?",
+ },
+
+ // The blog post discourages the use of non-ASCII characters because they
+ // depend on the user's current codepage. However, when we are working with Go
+ // strings we assume UTF-8 encoding, and our url package refuses to encode
+ // URLs to non-ASCII strings.
+ {
+ url: `file:///C:/exampleㄓ.txt`,
+ filePath: `C:\exampleㄓ.txt`,
+ canonicalURL: `file:///C:/example%E3%84%93.txt`,
+ },
+ {
+ url: `file:///C:/example%E3%84%93.txt`,
+ filePath: `C:\exampleㄓ.txt`,
+ },
+
+ // Examples from RFC 8089:
+
+ // We allow the drive-letter variation from section E.2, because it is
+ // simpler to support than not to. However, we do not generate the shorter
+ // form in the reverse direction.
+ {
+ url: `file:c:/path/to/file`,
+ filePath: `c:\path\to\file`,
+ canonicalURL: `file:///c:/path/to/file`,
+ },
+
+ // We encode the UNC share name as the authority following section E.3.1,
+ // because that is what the Microsoft blog post explicitly recommends.
+ {
+ url: `file://host.example.com/Share/path/to/file.txt`,
+ filePath: `\\host.example.com\Share\path\to\file.txt`,
+ },
+
+ // We decline the four- and five-slash variations from section E.3.2.
+ // The paths in these URLs would change meaning under path.Clean.
+ {
+ url: `file:////host.example.com/path/to/file`,
+ wantErr: "file URL missing drive letter",
+ },
+ {
+ url: `file://///host.example.com/path/to/file`,
+ wantErr: "file URL missing drive letter",
+ },
+}