internal/{cveutils,worker}: use pkgsite client

Use the new pkgsite client instead of making direct calls to pkgsite.

Change-Id: I8d7f107740679ea1e2475f44b666824b8548b8b0
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/554356
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/cmd/worker/main.go b/cmd/worker/main.go
index db173aa..1a46827 100644
--- a/cmd/worker/main.go
+++ b/cmd/worker/main.go
@@ -21,10 +21,10 @@
 	"time"
 
 	"golang.org/x/vulndb/internal/cvelistrepo"
-	"golang.org/x/vulndb/internal/cveutils"
 	"golang.org/x/vulndb/internal/ghsa"
 	"golang.org/x/vulndb/internal/gitrepo"
 	"golang.org/x/vulndb/internal/issues"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/proxy"
 	"golang.org/x/vulndb/internal/report"
 	"golang.org/x/vulndb/internal/worker"
@@ -195,12 +195,15 @@
 	if *localRepoPath != "" {
 		repoPath = *localRepoPath
 	}
+	pc := pkgsite.Default()
 	if *knownModuleFile != "" {
-		if err := populateKnownModules(*knownModuleFile); err != nil {
+		known, err := readKnownModules(*knownModuleFile)
+		if err != nil {
 			return err
 		}
+		pc.SetKnownModules(known)
 	}
-	err := worker.UpdateCVEsAtCommit(ctx, repoPath, commitHash, cfg.Store, pkgsiteURL, *force)
+	err := worker.UpdateCVEsAtCommit(ctx, repoPath, commitHash, cfg.Store, pc, *force)
 	if cerr := new(worker.CheckUpdateError); errors.As(err, &cerr) {
 		return fmt.Errorf("%w; use -force to override", cerr)
 	}
@@ -219,10 +222,10 @@
 	return err
 }
 
-func populateKnownModules(filename string) error {
+func readKnownModules(filename string) ([]string, error) {
 	f, err := os.Open(filename)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	defer f.Close()
 
@@ -236,11 +239,10 @@
 		mods = append(mods, line)
 	}
 	if err := scan.Err(); err != nil {
-		return err
+		return nil, err
 	}
-	cveutils.SetKnownModules(mods)
-	fmt.Printf("set %d known modules\n", len(mods))
-	return nil
+	fmt.Printf("%d known modules\n", len(mods))
+	return mods, nil
 }
 
 func createIssuesCommand(ctx context.Context) error {
diff --git a/internal/cveutils/pkgsite.go b/internal/cveutils/pkgsite.go
deleted file mode 100644
index 993d78f..0000000
--- a/internal/cveutils/pkgsite.go
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package cveutils
-
-import (
-	"context"
-	"net/http"
-	"net/http/httptest"
-	"strconv"
-	"strings"
-	"testing"
-	"time"
-
-	"golang.org/x/time/rate"
-	"golang.org/x/vulndb/internal/worker/log"
-)
-
-// Limit pkgsite requests to this many per second.
-const pkgsiteQPS = 5
-
-var (
-	// The limiter used to throttle pkgsite requests.
-	// The second argument to rate.NewLimiter is the burst, which
-	// basically lets you exceed the rate briefly.
-	pkgsiteRateLimiter = rate.NewLimiter(rate.Every(time.Duration(1000/float64(pkgsiteQPS))*time.Millisecond), 3)
-
-	// Cache of module paths already seen.
-	seenModulePath = map[string]bool{}
-	// Does seenModulePath contain all known modules?
-	cacheComplete = false
-)
-
-// SetKnownModules provides a list of all known modules,
-// so that no requests need to be made to pkg.go.dev.
-func SetKnownModules(mods []string) {
-	for _, m := range mods {
-		seenModulePath[m] = true
-	}
-	cacheComplete = true
-}
-
-var pkgsiteURL = "https://pkg.go.dev"
-
-// knownToPkgsite reports whether pkgsite knows that modulePath actually refers
-// to a module.
-func knownToPkgsite(ctx context.Context, baseURL, modulePath string) (bool, error) {
-	// If we've seen it before, no need to call.
-	if b, ok := seenModulePath[modulePath]; ok {
-		return b, nil
-	}
-	if cacheComplete {
-		return false, nil
-	}
-	// Pause to maintain a max QPS.
-	if err := pkgsiteRateLimiter.Wait(ctx); err != nil {
-		return false, err
-	}
-	start := time.Now()
-
-	url := baseURL + "/mod/" + modulePath
-	res, err := http.Head(url)
-	var status string
-	if err == nil {
-		status = strconv.Quote(res.Status)
-	}
-	log.With(
-		"latency", time.Since(start),
-		"status", status,
-		"error", err,
-	).Debugf(ctx, "checked if %s is known to pkgsite at HEAD", url)
-	if err != nil {
-		return false, err
-	}
-	known := res.StatusCode == http.StatusOK
-	seenModulePath[modulePath] = known
-	return known, nil
-}
-
-// GetPkgsiteURL returns a URL to either a fake server or the real pkg.go.dev,
-// depending on the useRealPkgsite value.
-//
-// For testing.
-func GetPkgsiteURL(t *testing.T, useRealPkgsite bool) string {
-	if useRealPkgsite {
-		return pkgsiteURL
-	}
-	// Start a test server that recognizes anything from golang.org and bitbucket.org/foo/bar/baz.
-	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
-		if !strings.HasPrefix(modulePath, "golang.org/") &&
-			!strings.HasPrefix(modulePath, "bitbucket.org/foo/bar/baz") {
-			http.Error(w, "unknown", http.StatusNotFound)
-		}
-	}))
-	t.Cleanup(s.Close)
-	return s.URL
-}
diff --git a/internal/cveutils/testdata/pkgsite/TestTriageV4CVE.json b/internal/cveutils/testdata/pkgsite/TestTriageV4CVE.json
new file mode 100644
index 0000000..16ed1f1
--- /dev/null
+++ b/internal/cveutils/testdata/pkgsite/TestTriageV4CVE.json
@@ -0,0 +1,10 @@
+{
+   "bitbucket.org/foo/bar": false,
+   "github.com/something/something": false,
+   "github.com/something/something/404": false,
+   "golang.org/x/exp/event": true,
+   "golang.org/x/mod": true,
+   "snyk.io": false,
+   "snyk.io/vuln": false,
+   "snyk.io/vuln/SNYK-GOLANG-12345": false
+}
\ No newline at end of file
diff --git a/internal/cveutils/triage.go b/internal/cveutils/triage.go
index a47f782..d3f9917 100644
--- a/internal/cveutils/triage.go
+++ b/internal/cveutils/triage.go
@@ -15,6 +15,7 @@
 	"golang.org/x/vulndb/internal/cveschema"
 	"golang.org/x/vulndb/internal/derrors"
 	"golang.org/x/vulndb/internal/ghsa"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/stdlib"
 	"golang.org/x/vulndb/internal/worker/log"
 )
@@ -36,11 +37,11 @@
 const unknownPath = "Path is unknown"
 
 // TriageCVE reports whether the CVE refers to a Go module.
-func TriageCVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ *TriageResult, err error) {
+func TriageCVE(ctx context.Context, c *cveschema.CVE, pc *pkgsite.Client) (_ *TriageResult, err error) {
 	defer derrors.Wrap(&err, "triageCVE(%q)", c.ID)
 	switch c.DataVersion {
 	case "4.0":
-		return triageV4CVE(ctx, c, pkgsiteURL)
+		return triageV4CVE(ctx, c, pc)
 	default:
 		// TODO(https://golang.org/issue/49289): Add support for v5.0.
 		return nil, fmt.Errorf("CVE %q has DataVersion %q: %w", c.ID, c.DataVersion, errCVEVersionUnsupported)
@@ -79,8 +80,8 @@
 }
 
 // triageV4CVE triages a CVE following schema v4.0 and returns the result.
-func triageV4CVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (result *TriageResult, err error) {
-	defer derrors.Wrap(&err, "triageV4CVE(ctx, %q, %q)", c.ID, pkgsiteURL)
+func triageV4CVE(ctx context.Context, c *cveschema.CVE, pc *pkgsite.Client) (result *TriageResult, err error) {
+	defer derrors.Wrap(&err, "triageV4CVE(ctx, %q, %q)", c.ID, pc.URL())
 	defer func() {
 		if err != nil {
 			return
@@ -127,12 +128,12 @@
 			if notGoModules[mp] {
 				continue
 			}
-			known, err := knownToPkgsite(ctx, pkgsiteURL, mp)
+			known, err := pc.Known(ctx, mp)
 			if err != nil {
 				return nil, err
 			}
 			if known {
-				u := pkgsiteURL + "/" + mp
+				u := pc.URL() + "/" + mp
 				return &TriageResult{
 					ModulePath: mp,
 					Reason:     fmt.Sprintf("Reference data URL %q contains path %q; %q returned a status 200", r.URL, mp, u),
diff --git a/internal/cveutils/triage_test.go b/internal/cveutils/triage_test.go
index d8da5c5..5213128 100644
--- a/internal/cveutils/triage_test.go
+++ b/internal/cveutils/triage_test.go
@@ -15,6 +15,7 @@
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"golang.org/x/vulndb/internal/cveschema"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/stdlib"
 )
 
@@ -22,43 +23,54 @@
 
 func TestTriageV4CVE(t *testing.T) {
 	ctx := context.Background()
-	url := GetPkgsiteURL(t, *usePkgsite)
+	cf, err := pkgsite.CacheFile(t)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pc, err := pkgsite.TestClient(t, *usePkgsite, cf)
+	if err != nil {
+		t.Fatal(err)
+	}
 
 	for _, test := range []struct {
 		name string
+		desc string
 		in   *cveschema.CVE
 		want *TriageResult
 	}{
 		{
-			"repo path is unknown Go standard library",
-			&cveschema.CVE{
+			name: "unknown_std",
+			desc: "repo path is unknown Go standard library",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://groups.google.com/forum/#!topic/golang-nuts/1234"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath: stdlib.ModulePath,
 			},
 		},
 		{
-			"pkg.go.dev URL is Go standard library package",
-			&cveschema.CVE{
+			name: "std",
+			desc: "pkg.go.dev URL is Go standard library package",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://pkg.go.dev/net/http"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath:  stdlib.ModulePath,
 				PackagePath: "net/http",
 			},
 		},
 		{
-			"repo path is is valid golang.org module path",
-			&cveschema.CVE{
+			name: "repo_golang_org",
+			desc: "repo path is is valid golang.org module path",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://groups.google.com/forum/#!topic/golang-nuts/1234"},
@@ -66,89 +78,95 @@
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath: "golang.org/x/mod",
 			},
 		},
 		{
-			"pkg.go.dev URL is is valid golang.org module path",
-			&cveschema.CVE{
+			name: "pkg_golang_org",
+			desc: "pkg.go.dev URL is is valid golang.org module path",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://pkg.go.dev/golang.org/x/mod"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath: "golang.org/x/mod",
 			},
 		},
 		{
-			"contains golang.org/pkg URL",
-			&cveschema.CVE{
+			name: "golang_org_pkg",
+			desc: "contains golang.org/pkg URL",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://golang.org/pkg/net/http"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath:  stdlib.ModulePath,
 				PackagePath: "net/http",
 			},
 		},
 		{
-			"contains github.com but not on pkg.go.dev",
-			&cveschema.CVE{
+			name: "github_only",
+			desc: "contains github.com but not on pkg.go.dev",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://github.com/something/something/404"},
 					},
 				},
 			},
-			nil,
+			want: nil,
 		},
 		{
-			"contains longer module path",
-			&cveschema.CVE{
+			name: "long_path",
+			desc: "contains longer module path",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://golang.org/x/exp/event"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath: "golang.org/x/exp/event",
 			},
 		},
 		{
-			"repo path is not a module",
-			&cveschema.CVE{
+			name: "not_module",
+			desc: "repo path is not a module",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://bitbucket.org/foo/bar"},
 					},
 				},
 			},
-			nil,
+			want: nil,
 		},
 		{
-			"contains snyk.io URL containing GOLANG",
-			&cveschema.CVE{
+			name: "golang_snyk",
+			desc: "contains snyk.io URL containing GOLANG",
+			in: &cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
 						{URL: "https://snyk.io/vuln/SNYK-GOLANG-12345"},
 					},
 				},
 			},
-			&TriageResult{
+			want: &TriageResult{
 				ModulePath: unknownPath,
 			},
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
 			test.in.DataVersion = "4.0"
-			got, err := TriageCVE(ctx, test.in, url)
+			got, err := TriageCVE(ctx, test.in, pc)
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -161,31 +179,6 @@
 	}
 }
 
-func TestKnownToPkgsite(t *testing.T) {
-	ctx := context.Background()
-
-	const validModule = "golang.org/x/mod"
-	url := GetPkgsiteURL(t, *usePkgsite)
-
-	for _, test := range []struct {
-		in   string
-		want bool
-	}{
-		{validModule, true},
-		{"github.com/something/something", false},
-	} {
-		t.Run(test.in, func(t *testing.T) {
-			got, err := knownToPkgsite(ctx, url, test.in)
-			if err != nil {
-				t.Fatal(err)
-			}
-			if got != test.want {
-				t.Errorf("%s: got %t, want %t", test.in, got, test.want)
-			}
-		})
-	}
-}
-
 func TestGetAliasGHSAs(t *testing.T) {
 	cve := &cveschema.CVE{
 		References: cveschema.References{
diff --git a/internal/worker/server.go b/internal/worker/server.go
index ca62ee0..feaa19d 100644
--- a/internal/worker/server.go
+++ b/internal/worker/server.go
@@ -29,6 +29,7 @@
 	"golang.org/x/vulndb/internal/gitrepo"
 	"golang.org/x/vulndb/internal/issues"
 	"golang.org/x/vulndb/internal/observe"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/proxy"
 	"golang.org/x/vulndb/internal/report"
 	"golang.org/x/vulndb/internal/worker/log"
@@ -307,7 +308,7 @@
 		}
 	}
 	force := (r.FormValue("force") == "true")
-	err = UpdateCVEsAtCommit(r.Context(), cvelistrepo.URLv4, "HEAD", s.cfg.Store, pkgsiteURL, force)
+	err = UpdateCVEsAtCommit(r.Context(), cvelistrepo.URLv4, "HEAD", s.cfg.Store, pkgsite.Default(), force)
 	if cerr := new(CheckUpdateError); errors.As(err, &cerr) {
 		return &serverError{
 			status: http.StatusPreconditionFailed,
diff --git a/internal/worker/testdata/pkgsite/TestDoUpdate.json b/internal/worker/testdata/pkgsite/TestDoUpdate.json
new file mode 100644
index 0000000..d4304e6
--- /dev/null
+++ b/internal/worker/testdata/pkgsite/TestDoUpdate.json
@@ -0,0 +1,15 @@
+{
+   "github.com/pandatix/go-cvss": true,
+   "github.com/pandatix/go-cvss/security": false,
+   "github.com/pandatix/go-cvss/security/advisories": false,
+   "github.com/pandatix/go-cvss/security/advisories/GHSA-xhmf-mmv2-4hhx": false,
+   "golang.org/x/mod": true,
+   "www.intel.com": false,
+   "www.intel.com/content": false,
+   "www.intel.com/content/www": false,
+   "www.intel.com/content/www/us": false,
+   "www.intel.com/content/www/us/en": false,
+   "www.intel.com/content/www/us/en/security-center": false,
+   "www.intel.com/content/www/us/en/security-center/advisory": false,
+   "www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html": false
+}
\ No newline at end of file
diff --git a/internal/worker/update_test.go b/internal/worker/update_test.go
index 63d0f4b..8feff3a 100644
--- a/internal/worker/update_test.go
+++ b/internal/worker/update_test.go
@@ -22,6 +22,7 @@
 	"golang.org/x/vulndb/internal/cveutils"
 	"golang.org/x/vulndb/internal/ghsa"
 	"golang.org/x/vulndb/internal/gitrepo"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/worker/store"
 )
 
@@ -94,9 +95,16 @@
 		t.Fatal(err)
 	}
 	commit := headCommit(t, repo)
-	purl := cveutils.GetPkgsiteURL(t, *usePkgsite)
+	cf, err := pkgsite.CacheFile(t)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pc, err := pkgsite.TestClient(t, *usePkgsite, cf)
+	if err != nil {
+		t.Fatal(err)
+	}
 	needsIssue := func(cve *cveschema.CVE) (*cveutils.TriageResult, error) {
-		return cveutils.TriageCVE(ctx, cve, purl)
+		return cveutils.TriageCVE(ctx, cve, pc)
 	}
 
 	commitHash := commit.Hash.String()
@@ -134,10 +142,7 @@
 	rs[3].TriageState = store.TriageStateHasVuln
 
 	rs[4].TriageState = store.TriageStateNeedsIssue
-	rs[4].Module = "bitbucket.org/foo/bar/baz"
-	if *usePkgsite {
-		rs[4].Module = "github.com/pandatix/go-cvss"
-	}
+	rs[4].Module = "github.com/pandatix/go-cvss"
 	rs[4].CVE = cves[4]
 
 	for _, test := range []struct {
diff --git a/internal/worker/worker.go b/internal/worker/worker.go
index 202d5d0..4ea0c41 100644
--- a/internal/worker/worker.go
+++ b/internal/worker/worker.go
@@ -28,6 +28,7 @@
 	"golang.org/x/vulndb/internal/gitrepo"
 	"golang.org/x/vulndb/internal/issues"
 	"golang.org/x/vulndb/internal/observe"
+	"golang.org/x/vulndb/internal/pkgsite"
 	"golang.org/x/vulndb/internal/proxy"
 	"golang.org/x/vulndb/internal/report"
 	"golang.org/x/vulndb/internal/worker/log"
@@ -36,7 +37,7 @@
 
 // UpdateCVEsAtCommit performs an update on the store using the given commit.
 // Unless force is true, it checks that the update makes sense before doing it.
-func UpdateCVEsAtCommit(ctx context.Context, repoPath, commitHashString string, st store.Store, pkgsiteURL string, force bool) (err error) {
+func UpdateCVEsAtCommit(ctx context.Context, repoPath, commitHashString string, st store.Store, pc *pkgsite.Client, force bool) (err error) {
 	defer derrors.Wrap(&err, "RunCommitUpdate(%q, %q, force=%t)", repoPath, commitHashString, force)
 
 	log.Infof(ctx, "updating false positives")
@@ -76,7 +77,7 @@
 		return err
 	}
 	u := newCVEUpdater(repo, commit, st, knownVulnIDs, func(cve *cveschema.CVE) (*cveutils.TriageResult, error) {
-		return cveutils.TriageCVE(ctx, cve, pkgsiteURL)
+		return cveutils.TriageCVE(ctx, cve, pc)
 	})
 	_, err = u.update(ctx)
 	return err