blob: 08400534d1cad13d4f4fe81705a877c70586ecfd [file] [log] [blame]
Julie Qiu5b3cf6b2021-12-20 16:26:47 -05001// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package worker
6
7import (
8 "path"
9 "regexp"
10 "strings"
11
12 "golang.org/x/mod/module"
Julie Qiu669af832022-01-04 15:57:59 -050013 "golang.org/x/vulndb/internal/stdlib"
Julie Qiu5b3cf6b2021-12-20 16:26:47 -050014)
15
16// vcsHostWithThreeElementRepoName returns true when the hostname
17// has three elements like hostname/account/project.
18func vcsHostWithThreeElementRepoName(hostname string) bool {
19 switch hostname {
20 case
21 "git.sr.ht",
22 "gitea.com",
23 "gitee.com",
24 "gitlab.com",
25 "hg.sr.ht",
26 "bitbucket.org",
27 "github.com",
28 "golang.org",
29 "launchpad.net":
30 return true
31 default:
32 return false
33 }
34}
35
36// negativePrefixPatterns is a list of glob patterns that describe prefixes of
Russ Coxe83d7152022-04-11 13:16:25 -040037// potential module paths that are known not to be modules. These are turned
38// into regexps below and checked against each module path before calling
39// pkgsite. This can speed up triage because pkgsite requests are throttled.
Julie Qiu5b3cf6b2021-12-20 16:26:47 -050040var negativePrefixPatterns = []string{
41 "*.blogspot.com",
42 "*.blogspot.dk",
43 "*.readthedocs.org",
44 "*.slashdot.org",
45 "advisories.mageia.org",
46 "archives.neohapsis.com",
47 "arstechnica.com/security",
48 "blog.python.org",
49 "blogs.oracle.com",
50 "blogs.technet.com",
51 "bugs.*",
52 "bugzilla.*",
53 "cert.uni-stuttgart.de/archive",
54 "community.rapid7.com/community/*/blog",
55 "cr.yp.to/talks",
56 "crbug.com",
57 "dev2dev.bea.com/pub/advisory",
58 "developer.mozilla.org/docs",
59 "developer.mozilla.org/en-US/docs",
60 "docs.google.com",
61 "docs.microsoft.com",
62 "downloads.securityfocus.com/vulnerabilities",
63 "drupal.org/node",
64 "erpscan.com/advisories",
65 "exchange.xforce.ibmcloud.com",
66 "fedoranews.org",
67 "ftp.caldera.com/pub/security",
68 "ftp.netbsd.org/pub",
69 "ftp.sco.com/pub",
70 "github.com/*/*/blob",
71 "github.com/*/*/commit",
72 "github.com/*/*/issues",
73 "groups.google.com",
74 "helpx.adobe.com/security",
75 "hg.openjdk.java.net",
76 "ics-cert.us-cert.gov",
77 "issues.apache.org",
78 "issues.rpath.com",
79 "java.net",
80 "jira.*",
81 "jvn.jp",
82 "jvndb.jvn.jp",
83 "krebsonsecurity.com",
84 "labs.mwrinfosecurity.com/advisories",
85 "lists.*/archive",
86 "lists.*/archives",
87 "lists.*/pipermail",
88 "lists.apache.org",
89 "lists.apple.com",
90 "lists.debian.org",
91 "lists.mysql.com",
92 "lists.opensuse.org",
93 "lists.ubuntu.com",
94 "mail-archives.*",
95 "mail.*.org/archive",
96 "mail.*.org/archives",
97 "mail.*/pipermail",
98 "mailman.*.org/archives",
99 "mailman.*.org/pipermail",
100 "nodesecurity.io/advisories",
101 "online.securityfocus.com/advisories",
102 "openwall.com/lists",
103 "oss.oracle.com/pipermail",
104 "osvdb.org",
105 "owncloud.org/about/security",
106 "packetstormsecurity.com/files",
107 "patches.sgi.com/support/free/security/advisories",
108 "plus.google.com",
109 "puppetlabs.com/security",
110 "raw.github.com",
111 "rhn.redhat.com/errata",
112 "seclists.org",
113 "secunia.com/advisories",
114 "secunia.com/secunia_research",
115 "security.e-matters.de/advisories",
116 "security.gentoo.org/glsa",
117 "securityreason.com/securityalert",
118 "securityreason.com/securityalert/",
119 "securityresponse.symantec.com",
120 "securitytracker.com/alerts",
121 "service.sap.com",
122 "subversion.apache.org/security",
123 "technet.microsoft.com/en-us/security",
124 "technet.microsoft.com/security",
125 "tools.cisco.com/security/center",
126 "twitter.com",
127 "ubuntu.com/usn",
128 "usn.ubuntu.com",
129 "www.adobe.com/support",
130 "www.adobe.com/support/security",
131 "www.atstake.com/research/advisories",
132 "www.bugzilla.org/security",
133 "www.cert.org/advisories",
134 "www.ciac.org/ciac/bulletins",
135 "www.cisco.com/warp/public/707",
136 "www.coresecurity.com/advisories",
137 "www.debian.org/security",
138 "www.derkeiler.com/Mailing-Lists",
139 "www.drupal.org/node",
140 "www.exploit-db.com",
141 "www.gentoo.org/security",
142 "www.htbridge.com/advisory",
143 "www.ibm.com/developerworks/java",
144 "www.iss.net/security_center",
145 "www.kb.cert.org",
146 "www.kde.org/info/security",
147 "www.kernel.org/pub",
148 "www.kernel.org/pub/linux/kernel/v3*/ChangeLog*",
149 "www.linux-mandrake.com/en/security",
150 "www.linuxsecurity.com/advisories",
151 "www.microsoft.com/technet/security",
152 "www.mozilla.org/security",
153 "www.netvigilance.com/advisory*",
154 "www.novell.com/linux/security",
155 "www.openwall.com/lists",
156 "www.oracle.com/technetwork",
157 "www.osvdb.org",
158 "www.phpmyadmin.net/home_page/security",
159 "www.portcullis-security.com/security-research-and-downloads",
160 "www.postgresql.org/docs",
161 "www.red-database-security.com/advisory",
162 "www.redhat.com/archives",
163 "www.redhat.com/support/errata",
164 "www.samba.org/samba/security",
165 "www.secunia.com/advisories",
166 "www.securiteam.com/exploits",
167 "www.securiteam.com/securitynews",
168 "www.securiteam.com/unixfocus",
169 "www.securiteam.com/windowsntfocus",
170 "www.security-assessment.com/files",
171 "www.securityfocus.com",
172 "www.securitytracker.com",
173 "www.sophos.com/en-us/support",
174 "www.suse.com/support",
175 "www.symantec.com/avcenter/security",
176 "www.trustix.org/errata",
177 "www.ubuntu.com/usn",
178 "www.us-cert.gov/cas",
179 "www.us-cert.gov/ncas",
180 "www.us.debian.org/security",
181 "www.vmware.com/security/advisories",
182 "www.vupen.com/english/advisories",
183 "www.wireshark.org/security",
184 "www.zerodayinitiative.com/advisories",
185 "xforce.iss.net/alerts",
186 "zerodayinitiative.com/advisories",
187}
188
189var negativeRegexps []*regexp.Regexp
190
191func init() {
192 rep := strings.NewReplacer(".", `\.`, "*", `[^/]*`)
193 for _, pat := range negativePrefixPatterns {
194 r := "^" + rep.Replace(pat) + "($|/)"
195 negativeRegexps = append(negativeRegexps, regexp.MustCompile(r))
196 }
197}
198
199// matchesNegativeRegexp reports whether s matches any element of negativeRegexps.
200func matchesNegativeRegexp(s string) bool {
201 for _, nr := range negativeRegexps {
202 if nr.MatchString(s) {
203 return true
204 }
205 }
206 return false
207}
208
209// candidateModulePaths returns the potential module paths that could contain
210// the fullPath, from longest to shortest. It returns nil if no valid module
211// paths can be constructed.
212func candidateModulePaths(fullPath string) []string {
213 if matchesNegativeRegexp(fullPath) {
214 return nil
215 }
Julie Qiu669af832022-01-04 15:57:59 -0500216 if stdlib.Contains(fullPath) {
Julie Qiu5b3cf6b2021-12-20 16:26:47 -0500217 if err := module.CheckImportPath(fullPath); err != nil {
218 return nil
219 }
220 return []string{"std"}
221 }
222 var r []string
223 for p := fullPath; p != "." && p != "/"; p = path.Dir(p) {
224 if err := module.CheckPath(p); err != nil {
225 continue
226 }
227 r = append(r, p)
228 }
229 if len(r) == 0 {
230 return nil
231 }
232 if !vcsHostWithThreeElementRepoName(r[len(r)-1]) {
233 return r
234 }
235 if len(r) < 3 {
236 return nil
237 }
238 return r[:len(r)-2]
239}