internal/worker: identify modules by known package hosts

When the reference data contains a godoc.org, pkg.go.dev, or
golang.org/pkg URL, use that information to identify the module path.

Change-Id: Icbdadd9a35d3e7979d817e8652cf0b344ec1758b
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/370875
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/internal/worker/triage.go b/internal/worker/triage.go
index 3773380..7352725 100644
--- a/internal/worker/triage.go
+++ b/internal/worker/triage.go
@@ -35,7 +35,7 @@
 	"golang-nuts",
 }
 
-const stdlibPath = "Go Standard Library"
+const stdlibPath = "Go Standard Library (package not identified)"
 
 // TriageCVE reports whether the CVE refers to a Go module.
 func TriageCVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ *triageResult, err error) {
@@ -55,6 +55,12 @@
 	reason     string
 }
 
+// gopkgHosts are hostnames for popular Go package websites.
+var gopkgHosts = map[string]bool{
+	"godoc.org":  true,
+	"pkg.go.dev": true,
+}
+
 // triageV4CVE triages a CVE following schema v4.0 and returns the result.
 func triageV4CVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ *triageResult, err error) {
 	defer derrors.Wrap(&err, "triageV4CVE(ctx, %q, %q)", c.ID, pkgsiteURL)
@@ -66,6 +72,22 @@
 		if err != nil {
 			return nil, fmt.Errorf("url.Parse(%q): %v", r.URL, err)
 		}
+		if strings.Contains(r.URL, "golang.org/pkg") {
+			mp := strings.TrimPrefix(refURL.Path, "/pkg/")
+			return &triageResult{
+				modulePath: mp,
+				stdlib:     true,
+				reason:     fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp),
+			}, nil
+		}
+		if gopkgHosts[refURL.Host] {
+			mp := strings.TrimPrefix(refURL.Path, "/")
+			return &triageResult{
+				modulePath: mp,
+				stdlib:     stdlibContains(mp),
+				reason:     fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp),
+			}, nil
+		}
 		modpaths := candidateModulePaths(refURL.Host + refURL.Path)
 		for _, mp := range modpaths {
 			known, err := knownToPkgsite(ctx, pkgsiteURL, mp)
diff --git a/internal/worker/triage_test.go b/internal/worker/triage_test.go
index 07a8e2f..b253d06 100644
--- a/internal/worker/triage_test.go
+++ b/internal/worker/triage_test.go
@@ -15,6 +15,8 @@
 	"strings"
 	"testing"
 
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"golang.org/x/vuln/internal/cveschema"
 )
 
@@ -27,19 +29,35 @@
 	for _, test := range []struct {
 		name string
 		in   *cveschema.CVE
-		want string
+		want *triageResult
 	}{
 		{
-			"repo path is Go standard library",
+			"repo path is unknown Go standard library",
 			&cveschema.CVE{
 				References: cveschema.References{
 					Data: []cveschema.Reference{
-						{URL: "https://pkg.go.dev/net/http"},
 						{URL: "https://groups.google.com/forum/#!topic/golang-nuts/1234"},
 					},
 				},
 			},
-			stdlibPath,
+			&triageResult{
+				modulePath: stdlibPath,
+				stdlib:     true,
+			},
+		},
+		{
+			"pkg.go.dev URL is Go standard library package",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://pkg.go.dev/net/http"},
+					},
+				},
+			},
+			&triageResult{
+				modulePath: "net/http",
+				stdlib:     true,
+			},
 		},
 		{
 			"repo path is is valid golang.org module path",
@@ -51,7 +69,36 @@
 					},
 				},
 			},
-			"golang.org/x/mod",
+			&triageResult{
+				modulePath: "golang.org/x/mod",
+			},
+		},
+		{
+			"pkg.go.dev URL is is valid golang.org module path",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://pkg.go.dev/golang.org/x/mod"},
+					},
+				},
+			},
+			&triageResult{
+				modulePath: "golang.org/x/mod",
+			},
+		},
+		{
+			"contains golang.org/pkg URL",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://golang.org/pkg/net/http"},
+					},
+				},
+			},
+			&triageResult{
+				modulePath: "net/http",
+				stdlib:     true,
+			},
 		},
 		{
 			"contains github.com but not on pkg.go.dev",
@@ -62,7 +109,7 @@
 					},
 				},
 			},
-			"",
+			nil,
 		},
 		{
 			"contains longer module path",
@@ -73,7 +120,9 @@
 					},
 				},
 			},
-			"bitbucket.org/foo/bar/baz/v2",
+			&triageResult{
+				modulePath: "bitbucket.org/foo/bar/baz/v2",
+			},
 		},
 		{
 			"repo path is not a module",
@@ -84,7 +133,7 @@
 					},
 				},
 			},
-			"",
+			nil,
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
@@ -93,14 +142,10 @@
 			if err != nil {
 				t.Fatal(err)
 			}
-			if got == nil {
-				if test.want != "" {
-					t.Fatalf("got empty string, want %q", test.want)
-				}
-				return
-			}
-			if got.modulePath != test.want {
-				t.Errorf("got %q, want %q", got.modulePath, test.want)
+			if diff := cmp.Diff(test.want, got,
+				cmp.AllowUnexported(triageResult{}),
+				cmpopts.IgnoreFields(triageResult{}, "reason")); diff != "" {
+				t.Errorf("mismatch (-want, +got):\n%s", diff)
 			}
 		})
 	}