internal/worker: use HEAD requests to pkgsite

When we test for the existence of a module, we just need the status
code; there's no need to read the entire page.

Add a test against the real pkgsite to confirm.

The test revealed that although it seems we were able to use the
internal pkgsite staging environment, in fact we were always getting
200s because we were fetching a login page. We may add auth in the
future, but for now drop the ability to access anything other than the
public pkgsite.

Change-Id: I8934e17c76aad6aa36bd974a813f300b02549491
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/368856
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/cmd/worker/main.go b/cmd/worker/main.go
index 050649f..6e37299 100644
--- a/cmd/worker/main.go
+++ b/cmd/worker/main.go
@@ -28,12 +28,14 @@
 	project        = flag.String("project", os.Getenv("GOOGLE_CLOUD_PROJECT"), "project ID (required)")
 	namespace      = flag.String("namespace", os.Getenv("VULN_WORKER_NAMESPACE"), "Firestore namespace (required)")
 	errorReporting = flag.Bool("reporterrors", os.Getenv("VULN_WORKER_REPORT_ERRORS") == "true", "use the error reporting API")
-	pkgsiteURL     = flag.String("pkgsite", "https://pkg.go.dev", "URL to pkgsite")
 	localRepoPath  = flag.String("repo", "", "path to local repo, instead of cloning remote")
 	force          = flag.Bool("force", false, "force an update to happen")
 )
 
-const serviceID = "vuln-worker"
+const (
+	pkgsiteURL = "https://pkg.go.dev"
+	serviceID  = "vuln-worker"
+)
 
 func main() {
 	flag.Usage = func() {
@@ -145,7 +147,7 @@
 	if *localRepoPath != "" {
 		repoPath = *localRepoPath
 	}
-	err := worker.UpdateCommit(ctx, repoPath, commitHash, st, *pkgsiteURL, *force)
+	err := worker.UpdateCommit(ctx, repoPath, commitHash, st, pkgsiteURL, *force)
 	if cerr := new(worker.CheckUpdateError); errors.As(err, &cerr) {
 		return fmt.Errorf("%w; use -force to override", cerr)
 	}
diff --git a/internal/worker/triage.go b/internal/worker/triage.go
index ec4681d..3343bd0 100644
--- a/internal/worker/triage.go
+++ b/internal/worker/triage.go
@@ -10,6 +10,7 @@
 	"fmt"
 	"net/http"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 
@@ -107,7 +108,7 @@
 
 // knownToPkgsite reports whether pkgsite knows that modulePath actually refers
 // to a module.
-func knownToPkgsite(ctx context.Context, url, modulePath string) (bool, error) {
+func knownToPkgsite(ctx context.Context, baseURL, modulePath string) (bool, error) {
 	// If we've seen it before, no need to call.
 	if b, ok := seenModulePath[modulePath]; ok {
 		return b, nil
@@ -116,13 +117,17 @@
 	if err := pkgsiteRateLimiter.Wait(ctx); err != nil {
 		return false, err
 	}
-	msg := fmt.Sprintf("call to %s", url)
-	log.Info(ctx, msg+" starting", event.String("path", modulePath))
 	start := time.Now()
-	res, err := http.Get(url + "/mod/" + modulePath)
-	log.Info(ctx, msg+" ended",
-		event.String("path", modulePath),
+
+	url := baseURL + "/mod/" + modulePath
+	res, err := http.Head(url)
+	var status string
+	if err == nil {
+		status = strconv.Quote(res.Status)
+	}
+	log.Info(ctx, "HEAD "+url,
 		event.Value("latency", time.Since(start)),
+		event.String("status", status),
 		event.Value("error", err))
 	if err != nil {
 		return false, err
diff --git a/internal/worker/triage_test.go b/internal/worker/triage_test.go
index 6c70c20..8e3e032 100644
--- a/internal/worker/triage_test.go
+++ b/internal/worker/triage_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"context"
+	"flag"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -14,17 +15,27 @@
 	"golang.org/x/vuln/internal/worker/log"
 )
 
+var usePkgsite = flag.Bool("pkgsite", false, "use pkg.go.dev for tests")
+
 func TestKnownToPkgsite(t *testing.T) {
 	ctx := log.WithLineLogger(context.Background())
 
 	const validModule = "golang.org/x/mod"
-	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
-		if modulePath != validModule {
-			http.Error(w, "unknown", http.StatusNotFound)
-		}
-	}))
-	defer s.Close()
+
+	var url string
+
+	if *usePkgsite {
+		url = "https://pkg.go.dev"
+	} else {
+		s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
+			if modulePath != validModule {
+				http.Error(w, "unknown", http.StatusNotFound)
+			}
+		}))
+		defer s.Close()
+		url = s.URL
+	}
 
 	for _, test := range []struct {
 		in   string
@@ -34,12 +45,12 @@
 		{"github.com/something/something", false},
 	} {
 		t.Run(test.in, func(t *testing.T) {
-			got, err := knownToPkgsite(ctx, s.URL, test.in)
+			got, err := knownToPkgsite(ctx, url, test.in)
 			if err != nil {
 				t.Fatal(err)
 			}
 			if got != test.want {
-				t.Errorf("got %t, want %t", got, test.want)
+				t.Errorf("%s: got %t, want %t", test.in, got, test.want)
 			}
 		})
 	}