internal/worker add candidateModulePaths

The CandidateModulePaths function is copied from x/pkgsite with minor
modifications. This will be used in the next CL to catch longer module
paths as part of the auto-triage process.

For golang/go#49733

Change-Id: If02c558a3308b597e2b2ecdb03a0ed8afb3b7270
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/369749
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/worker/paths.go b/internal/worker/paths.go
new file mode 100644
index 0000000..0672a21
--- /dev/null
+++ b/internal/worker/paths.go
@@ -0,0 +1,70 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"path"
+	"strings"
+
+	"golang.org/x/mod/module"
+)
+
+// vcsHostWithThreeElementRepoName returns true when the hostname
+// has three elements like hostname/account/project.
+func vcsHostWithThreeElementRepoName(hostname string) bool {
+	switch hostname {
+	case
+		"git.sr.ht",
+		"gitea.com",
+		"gitee.com",
+		"gitlab.com",
+		"hg.sr.ht",
+		"bitbucket.org",
+		"github.com",
+		"golang.org",
+		"launchpad.net":
+		return true
+	default:
+		return false
+	}
+}
+
+// candidateModulePaths returns the potential module paths that could contain
+// the fullPath, from longest to shortest. It returns nil if no valid module
+// paths can be constructed.
+func candidateModulePaths(fullPath string) []string {
+	if stdlibContains(fullPath) {
+		if err := module.CheckImportPath(fullPath); err != nil {
+			return nil
+		}
+		return []string{"std"}
+	}
+	var r []string
+	for p := fullPath; p != "." && p != "/"; p = path.Dir(p) {
+		if err := module.CheckPath(p); err != nil {
+			continue
+		}
+		r = append(r, p)
+	}
+	if len(r) == 0 {
+		return nil
+	}
+	if !vcsHostWithThreeElementRepoName(r[len(r)-1]) {
+		return r
+	}
+	if len(r) < 3 {
+		return nil
+	}
+	return r[:len(r)-2]
+}
+
+// stdlibContains reports whether the given import path could be part of the Go standard library,
+// by reporting whether the first component lacks a '.'.
+func stdlibContains(path string) bool {
+	if i := strings.IndexByte(path, '/'); i != -1 {
+		path = path[:i]
+	}
+	return !strings.Contains(path, ".")
+}
diff --git a/internal/worker/paths_test.go b/internal/worker/paths_test.go
new file mode 100644
index 0000000..eb9f18f
--- /dev/null
+++ b/internal/worker/paths_test.go
@@ -0,0 +1,53 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestCandidateModulePaths(t *testing.T) {
+	for _, test := range []struct {
+		in   string
+		want []string
+	}{
+		{"", nil},
+		{".", nil},
+		{"///foo", nil},
+		{"github.com/google", nil},
+		{"std", []string{"std"}},
+		{"encoding/json", []string{"std"}},
+		{
+			"example.com/green/eggs/and/ham",
+			[]string{
+				"example.com/green/eggs/and/ham",
+				"example.com/green/eggs/and",
+				"example.com/green/eggs",
+				"example.com/green",
+				"example.com",
+			},
+		},
+		{
+			"github.com/google/go-cmp/cmp",
+			[]string{"github.com/google/go-cmp/cmp", "github.com/google/go-cmp"},
+		},
+		{
+			"bitbucket.org/ok/sure/no$dollars/allowed",
+			[]string{"bitbucket.org/ok/sure"},
+		},
+		{
+			// A module path cannot end in "v1".
+			"k8s.io/klog/v1",
+			[]string{"k8s.io/klog", "k8s.io"},
+		},
+	} {
+		got := candidateModulePaths(test.in)
+		if !cmp.Equal(got, test.want) {
+			t.Errorf("%q: got %v, want %v", test.in, got, test.want)
+		}
+	}
+}