internal/worker: don't call pkgsite for known non-modules
Requests to pkg.go.dev are throttled to qps, so they are expensive.
Many of the calls are unnecessary because the paths are known not
to be Go modules.
Keep a list of patterns that match such paths and check them before
calling pkgsite.
Change-Id: I5e80d0c494510824939964b72466605ecc5cacbe
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/370394
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/worker/paths.go b/internal/worker/paths.go
index 0672a21..dc5f4cd 100644
--- a/internal/worker/paths.go
+++ b/internal/worker/paths.go
@@ -6,6 +6,7 @@
import (
"path"
+ "regexp"
"strings"
"golang.org/x/mod/module"
@@ -31,10 +32,138 @@
}
}
+// negativePrefixPatterns is a list of glob patterns that describe prefixes of
+// potential module paths that are known not to be modules. These are turned
+// into regexps below and checked against each module path before calling
+// pkgsite. This can speed up triage because pkgsite requests are throttled.
+var negativePrefixPatterns = []string{
+ "*.blogspot.com",
+ "*.blogspot.dk",
+ "*.readthedocs.org",
+ "archives.neohapsis.com",
+ "archives.neohapsis.com/archives/bugtraq",
+ "blog.*",
+ "blog.python.org",
+ "blogs.oracle.com",
+ "blogs.technet.com",
+ "bugs.*",
+ "bugzilla.*",
+ "cr.yp.to/talks",
+ "crbug.com",
+ "developer.mozilla.org/docs",
+ "developer.mozilla.org/en-US/docs",
+ "docs.google.com",
+ "docs.microsoft.com",
+ "drupal.org/node",
+ "erpscan.com/advisories",
+ "exchange.xforce.ibmcloud.com",
+ "github.com/*/*/blob",
+ "github.com/*/*/commit",
+ "github.com/*/*/issues",
+ "github.com/torvalds/linux/commit",
+ "groups.google.com",
+ "helpx.adobe.com/security",
+ "hg.openjdk.java.net",
+ "ics-cert.us-cert.gov",
+ "issues.apache.org",
+ "jira.*",
+ "jvn.jp",
+ "jvndb.jvn.jp",
+ "krebsonsecurity.com",
+ "labs.mwrinfosecurity.com/advisories",
+ "lists.*/archive",
+ "lists.*/archives",
+ "lists.*/pipermail",
+ "lists.opensuse.org",
+ "lists.ubuntu.com",
+ "mail-archives.*",
+ "mail.*.org/archives",
+ "mail.*/pipermail",
+ "mailman.*.org/archives",
+ "mailman.*.org/pipermail",
+ "nodesecurity.io/advisories",
+ "openwall.com/lists",
+ "oss.oracle.com/pipermail",
+ "osvdb.org",
+ "owncloud.org/about/security",
+ "packetstormsecurity.com/files",
+ "plus.google.com",
+ "puppetlabs.com/security",
+ "raw.github.com",
+ "rhn.redhat.com/errata",
+ "seclists.org",
+ "secunia.com/advisories",
+ "secunia.com/secunia_research",
+ "security.gentoo.org/glsa",
+ "service.sap.com",
+ "subversion.apache.org/security",
+ "support.*",
+ "technet.microsoft.com/en-us/security",
+ "technet.microsoft.com/security",
+ "tools.cisco.com/security/center",
+ "twitter.com",
+ "ubuntu.com/usn",
+ "weblog.*",
+ "www.adobe.com/support/security",
+ "www.bugzilla.org/security",
+ "www.coresecurity.com/advisories",
+ "www.debian.org/security",
+ "www.drupal.org/node",
+ "www.exploit-db.com",
+ "www.htbridge.com/advisory",
+ "www.ibm.com/developerworks/java",
+ "www.kb.cert.org",
+ "www.kernel.org/pub/linux/kernel/v3*/ChangeLog*",
+ "www.mozilla.org/security",
+ "www.openwall.com/lists",
+ "www.oracle.com/technetwork",
+ "www.osvdb.org",
+ "www.phpmyadmin.net/home_page/security",
+ "www.portcullis-security.com/security-research-and-downloads",
+ "www.postgresql.org/docs",
+ "www.redhat.com/archives",
+ "www.samba.org/samba/security",
+ "www.security-assessment.com/files",
+ "www.securityfocus.com",
+ "www.securitytracker.com",
+ "www.sophos.com/en-us/support",
+ "www.suse.com/support",
+ "www.ubuntu.com/usn",
+ "www.us-cert.gov/cas",
+ "www.us-cert.gov/ncas",
+ "www.vmware.com/security/advisories",
+ "www.wireshark.org/security",
+ "www.zerodayinitiative.com/advisories",
+ "zerodayinitiative.com/advisories",
+}
+
+var negativeRegexps []*regexp.Regexp
+
+func init() {
+ rep := strings.NewReplacer(".", `\.`, "*", `[^/]*`)
+ for _, pat := range negativePrefixPatterns {
+ r := "^" + rep.Replace(pat) + "($|/)"
+ negativeRegexps = append(negativeRegexps, regexp.MustCompile(r))
+ }
+}
+
+// matchesNegativeRegexp reports whether s matches any element of negativeRegexps.
+func matchesNegativeRegexp(s string) bool {
+ for _, nr := range negativeRegexps {
+ if nr.MatchString(s) {
+ return true
+ }
+ }
+ return false
+}
+
// candidateModulePaths returns the potential module paths that could contain
// the fullPath, from longest to shortest. It returns nil if no valid module
// paths can be constructed.
func candidateModulePaths(fullPath string) []string {
+ if matchesNegativeRegexp(fullPath) {
+ return nil
+ }
if stdlibContains(fullPath) {
if err := module.CheckImportPath(fullPath); err != nil {
return nil
diff --git a/internal/worker/paths_test.go b/internal/worker/paths_test.go
index 5a4045f..ce027e8 100644
--- a/internal/worker/paths_test.go
+++ b/internal/worker/paths_test.go
@@ -54,3 +54,26 @@
}
}
}
+
+func TestMatchesNegativeRegexp(t *testing.T) {
+ for _, test := range []struct {
+ in string
+ want bool
+ }{
+ {"groups.google.com", true},
+ {"groupsgooglecom", false},
+ {"groups.google.com/foo", true},
+ {"groups.google.comics.org", false},
+ {"some/groups.google.com", false},
+ {"lists.ubuntu.com", true},
+ {"lists.ubuntu.com/pipermail", true},
+ {"bugzilla.anything.org", true},
+ {"github.com/evacchi/flatpress/issues/14", true},
+ {"github.com/evacchi/issues/14", false},
+ } {
+ got := matchesNegativeRegexp(test.in)
+ if got != test.want {
+ t.Errorf("%s: got %t, want %t", test.in, got, test.want)
+ }
+ }
+}