internal/worker: use candidate module path

cveModulePath now uses candidateModulePaths, so that longer module paths
that are not the repo path are caught.

For golang/go#49733

Change-Id: I96e0462f115afa4cdd85a8c6dd770f96dafdb4f1
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/369750
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/internal/worker/triage.go b/internal/worker/triage.go
index 8947547..1d9a0d0 100644
--- a/internal/worker/triage.go
+++ b/internal/worker/triage.go
@@ -23,15 +23,6 @@
 
 var errCVEVersionUnsupported = errors.New("unsupported CVE version")
 
-var vcsHostsWithThreeElementRepoName = map[string]bool{
-	"bitbucket.org": true,
-	"gitea.com":     true,
-	"gitee.com":     true,
-	"github.com":    true,
-	"gitlab.com":    true,
-	"golang.org":    true,
-}
-
 var stdlibKeywords = map[string]bool{
 	"github.com/golang": true,
 	"golang-announce":   true,
@@ -57,8 +48,6 @@
 
 // cveModulePath returns a Go module path for a CVE, if we can determine what
 // it is.
-// TODO(golang/go#49733) Use the CandidateModulePaths function from pkgsite to catch
-// longer module paths, e.g. github.com/pulumi/pulumi/sdk/v2.
 func cveModulePath(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ string, err error) {
 	defer derrors.Wrap(&err, "cveModulePath(%q)", c.ID)
 	for _, r := range c.References.Data {
@@ -70,26 +59,18 @@
 				return "Go Standard Library", nil
 			}
 		}
-		for host := range vcsHostsWithThreeElementRepoName {
-			if !strings.Contains(r.URL, host) {
-				continue
-			}
-			refURL, err := url.Parse(r.URL)
-			if err != nil {
-				return "", fmt.Errorf("url.Parse(%q): %v", r.URL, err)
-			}
-			u := refURL.Host + refURL.Path
-			parts := strings.Split(u, "/")
-			if len(parts) < 3 {
-				continue
-			}
-			mod := strings.Join(parts[0:3], "/")
-			known, err := knownToPkgsite(ctx, pkgsiteURL, mod)
+		refURL, err := url.Parse(r.URL)
+		if err != nil {
+			return "", fmt.Errorf("url.Parse(%q): %v", r.URL, err)
+		}
+		modpaths := candidateModulePaths(refURL.Host + refURL.Path)
+		for _, mp := range modpaths {
+			known, err := knownToPkgsite(ctx, pkgsiteURL, mp)
 			if err != nil {
 				return "", err
 			}
 			if known {
-				return mod, nil
+				return mp, nil
 			}
 		}
 	}
diff --git a/internal/worker/triage_test.go b/internal/worker/triage_test.go
index 3f4d7b4..8d5a367 100644
--- a/internal/worker/triage_test.go
+++ b/internal/worker/triage_test.go
@@ -12,11 +12,89 @@
 	"strings"
 	"testing"
 
+	"golang.org/x/vuln/internal/cveschema"
 	"golang.org/x/vuln/internal/worker/log"
 )
 
 var usePkgsite = flag.Bool("pkgsite", false, "use pkg.go.dev for tests")
 
+func TestCVEModulePath(t *testing.T) {
+	ctx := log.WithLineLogger(context.Background())
+	url := pkgsiteURL(t)
+
+	for _, test := range []struct {
+		name string
+		in   *cveschema.CVE
+		want string
+	}{
+		{
+			"contains golang-nuts",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://groups.google.com/forum/#!topic/golang-nuts/1234"},
+					},
+				},
+			},
+			"Go Standard Library",
+		},
+		{
+			"contains golang.org and on pkg.go.dev",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://golang.org/x/mod"},
+					},
+				},
+			},
+			"golang.org/x/mod",
+		},
+		{
+			"contains github.com but not on pkg.go.dev",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://github.com/something/something/404"},
+					},
+				},
+			},
+			"",
+		},
+		{
+			"contains longer module path",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://bitbucket.org/foo/bar/baz/v2"},
+					},
+				},
+			},
+			"bitbucket.org/foo/bar/baz/v2",
+		},
+		{
+			"repo path is not a module",
+			&cveschema.CVE{
+				References: cveschema.References{
+					Data: []cveschema.Reference{
+						{URL: "https://bitbucket.org/foo/bar"},
+					},
+				},
+			},
+			"",
+		},
+	} {
+		t.Run(test.name, func(t *testing.T) {
+			got, err := cveModulePath(ctx, test.in, url)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if got != test.want {
+				t.Errorf("got %q, want %q", got, test.want)
+			}
+		})
+	}
+}
+
 func TestKnownToPkgsite(t *testing.T) {
 	ctx := log.WithLineLogger(context.Background())
 
@@ -49,10 +127,11 @@
 	if *usePkgsite {
 		return "https://pkg.go.dev"
 	}
-	// Start a test server that recognizes anything from golang.org.
+	// Start a test server that recognizes anything from golang.org and bitbucket.org/foo/bar/baz.
 	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
-		if !strings.HasPrefix(modulePath, "golang.org/") {
+		if !strings.HasPrefix(modulePath, "golang.org/") &&
+			!strings.HasPrefix(modulePath, "bitbucket.org/foo/bar/baz") {
 			http.Error(w, "unknown", http.StatusNotFound)
 		}
 	}))