|  | // Copyright 2021 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package worker | 
|  |  | 
|  | import ( | 
|  | "context" | 
|  | "errors" | 
|  | "fmt" | 
|  | "net/http" | 
|  | "net/url" | 
|  | "regexp" | 
|  | "strconv" | 
|  | "strings" | 
|  | "time" | 
|  |  | 
|  | "golang.org/x/time/rate" | 
|  | "golang.org/x/vulndb/internal/cveschema" | 
|  | "golang.org/x/vulndb/internal/derrors" | 
|  | "golang.org/x/vulndb/internal/stdlib" | 
|  | "golang.org/x/vulndb/internal/worker/log" | 
|  | ) | 
|  |  | 
|  | var errCVEVersionUnsupported = errors.New("unsupported CVE version") | 
|  |  | 
|  | // stdlibReferenceDataKeywords are words found in the reference data URL that | 
|  | // indicate the CVE is about the standard library or a Go x-repo owned by the | 
|  | // Go team. | 
|  | var stdlibReferenceDataKeywords = []string{ | 
|  | "github.com/golang", | 
|  | "golang.org", | 
|  | // from https://groups.google.com/g/golang-announce. | 
|  | "golang-announce", | 
|  | // from https://groups.google.com/g/golang-nuts. | 
|  | "golang-nuts", | 
|  | } | 
|  |  | 
|  | const unknownPath = "Path is unknown" | 
|  |  | 
|  | // TriageCVE reports whether the CVE refers to a Go module. | 
|  | func TriageCVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ *triageResult, err error) { | 
|  | defer derrors.Wrap(&err, "triageCVE(%q)", c.ID) | 
|  | switch c.DataVersion { | 
|  | case "4.0": | 
|  | return triageV4CVE(ctx, c, pkgsiteURL) | 
|  | default: | 
|  | // TODO(https://golang.org/issue/49289): Add support for v5.0. | 
|  | return nil, fmt.Errorf("CVE %q has DataVersion %q: %w", c.ID, c.DataVersion, errCVEVersionUnsupported) | 
|  | } | 
|  | } | 
|  |  | 
|  | type triageResult struct { | 
|  | modulePath  string | 
|  | packagePath string | 
|  | reason      string | 
|  | } | 
|  |  | 
|  | // gopkgHosts are hostnames for popular Go package websites. | 
|  | var gopkgHosts = map[string]bool{ | 
|  | "godoc.org":  true, | 
|  | "pkg.go.dev": true, | 
|  | } | 
|  |  | 
|  | const snykIdentifier = "snyk.io/vuln/SNYK-GOLANG" | 
|  |  | 
|  | // nonGoModules are paths that return a 200 on pkg.go.dev, but do not contain | 
|  | // Go code. However, these libraries often have CVEs that are false positive for | 
|  | // a Go vuln. | 
|  | var notGoModules = map[string]bool{ | 
|  | "github.com/channelcat/sanic":            true, // python library | 
|  | "github.com/rapid7/metasploit-framework": true, // ruby library | 
|  | "github.com/tensorflow/tensorflow":       true, // python library | 
|  | "gitweb.gentoo.org/repo/gentoo.git":      true, // ebuild | 
|  | "qpid.apache.org":                        true, // C, python, & Java library | 
|  |  | 
|  | // vulnerability in tool, not importable package | 
|  | "github.com/grafana/grafana":          true, | 
|  | "github.com/sourcegraph/sourcegraph":  true, | 
|  | "gitlab.com/gitlab-org/gitlab-runner": true, | 
|  | "github.com/gravitational/teleport":   true, | 
|  | } | 
|  |  | 
|  | // triageV4CVE triages a CVE following schema v4.0 and returns the result. | 
|  | func triageV4CVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (result *triageResult, err error) { | 
|  | defer derrors.Wrap(&err, "triageV4CVE(ctx, %q, %q)", c.ID, pkgsiteURL) | 
|  | defer func() { | 
|  | if err != nil { | 
|  | return | 
|  | } | 
|  | msg := fmt.Sprintf("Triage result for %s", c.ID) | 
|  | if result == nil { | 
|  | log.Debugf(ctx, "%s: not Go vuln", msg) | 
|  | return | 
|  | } | 
|  | log.Debugf(ctx, "%s: is Go vuln:\n%s", msg, result.reason) | 
|  | }() | 
|  | for _, r := range c.References.Data { | 
|  | if r.URL == "" { | 
|  | continue | 
|  | } | 
|  | refURL, err := url.Parse(r.URL) | 
|  | if err != nil { | 
|  | return nil, fmt.Errorf("url.Parse(%q): %v", r.URL, err) | 
|  | } | 
|  | if strings.Contains(r.URL, "golang.org/pkg") { | 
|  | mp := strings.TrimPrefix(refURL.Path, "/pkg/") | 
|  | return &triageResult{ | 
|  | packagePath: mp, | 
|  | modulePath:  stdlib.ModulePath, | 
|  | reason:      fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp), | 
|  | }, nil | 
|  | } | 
|  | if gopkgHosts[refURL.Host] { | 
|  | mp := strings.TrimPrefix(refURL.Path, "/") | 
|  | if stdlib.Contains(mp) { | 
|  | return &triageResult{ | 
|  | packagePath: mp, | 
|  | modulePath:  stdlib.ModulePath, | 
|  | reason:      fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp), | 
|  | }, nil | 
|  | } | 
|  | return &triageResult{ | 
|  | modulePath: mp, | 
|  | reason:     fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp), | 
|  | }, nil | 
|  | } | 
|  | modpaths := candidateModulePaths(refURL.Host + refURL.Path) | 
|  | for _, mp := range modpaths { | 
|  | if notGoModules[mp] { | 
|  | continue | 
|  | } | 
|  | known, err := knownToPkgsite(ctx, pkgsiteURL, mp) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | if known { | 
|  | u := pkgsiteURL + "/" + mp | 
|  | return &triageResult{ | 
|  | modulePath: mp, | 
|  | reason:     fmt.Sprintf("Reference data URL %q contains path %q; %q returned a status 200", r.URL, mp, u), | 
|  | }, nil | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // We didn't find a Go package or module path in the reference data. Check | 
|  | // secondary heuristics to see if this is a Go related CVE. | 
|  | for _, r := range c.References.Data { | 
|  | // Example CVE containing snyk.io URL: | 
|  | // https://github.com/CVEProject/cvelist/blob/899bba20d62eb73e04d1841a5ff04cd6225e1618/2020/7xxx/CVE-2020-7668.json#L52. | 
|  | if strings.Contains(r.URL, snykIdentifier) { | 
|  | return &triageResult{ | 
|  | modulePath: unknownPath, | 
|  | reason:     fmt.Sprintf("Reference data URL %q contains %q", r.URL, snykIdentifier), | 
|  | }, nil | 
|  | } | 
|  |  | 
|  | // Check for reference data indicating that this is related to the Go | 
|  | // project. | 
|  | for _, k := range stdlibReferenceDataKeywords { | 
|  | if strings.Contains(r.URL, k) { | 
|  | return &triageResult{ | 
|  | modulePath: stdlib.ModulePath, | 
|  | reason:     fmt.Sprintf("Reference data URL %q contains %q", r.URL, k), | 
|  | }, nil | 
|  | } | 
|  | } | 
|  | } | 
|  | return nil, nil | 
|  | } | 
|  |  | 
|  | var ghsaRegex = regexp.MustCompile(`GHSA-[^-]{4}-[^-]{4}-[^-]{4}`) | 
|  |  | 
|  | func getAliasGHSAs(c *cveschema.CVE) []string { | 
|  | var ghsas []string | 
|  | for _, r := range c.References.Data { | 
|  | ghsas = append(ghsas, ghsaRegex.FindAllString(r.URL, 1)...) | 
|  | } | 
|  | return ghsas | 
|  | } | 
|  |  | 
|  | // Limit pkgsite requests to this many per second. | 
|  | const pkgsiteQPS = 5 | 
|  |  | 
|  | var ( | 
|  | // The limiter used to throttle pkgsite requests. | 
|  | // The second argument to rate.NewLimiter is the burst, which | 
|  | // basically lets you exceed the rate briefly. | 
|  | pkgsiteRateLimiter = rate.NewLimiter(rate.Every(time.Duration(1000/float64(pkgsiteQPS))*time.Millisecond), 3) | 
|  |  | 
|  | // Cache of module paths already seen. | 
|  | seenModulePath = map[string]bool{} | 
|  | // Does seenModulePath contain all known modules? | 
|  | cacheComplete = false | 
|  | ) | 
|  |  | 
|  | // SetKnownModules provides a list of all known modules, | 
|  | // so that no requests need to be made to pkg.go.dev. | 
|  | func SetKnownModules(mods []string) { | 
|  | for _, m := range mods { | 
|  | seenModulePath[m] = true | 
|  | } | 
|  | cacheComplete = true | 
|  | } | 
|  |  | 
|  | // knownToPkgsite reports whether pkgsite knows that modulePath actually refers | 
|  | // to a module. | 
|  | func knownToPkgsite(ctx context.Context, baseURL, modulePath string) (bool, error) { | 
|  | // If we've seen it before, no need to call. | 
|  | if b, ok := seenModulePath[modulePath]; ok { | 
|  | return b, nil | 
|  | } | 
|  | if cacheComplete { | 
|  | return false, nil | 
|  | } | 
|  | // Pause to maintain a max QPS. | 
|  | if err := pkgsiteRateLimiter.Wait(ctx); err != nil { | 
|  | return false, err | 
|  | } | 
|  | start := time.Now() | 
|  |  | 
|  | url := baseURL + "/mod/" + modulePath | 
|  | res, err := http.Head(url) | 
|  | var status string | 
|  | if err == nil { | 
|  | status = strconv.Quote(res.Status) | 
|  | } | 
|  | log.With( | 
|  | "latency", time.Since(start), | 
|  | "status", status, | 
|  | "error", err, | 
|  | ).Debugf(ctx, "checked if %s is known to pkgsite at HEAD", url) | 
|  | if err != nil { | 
|  | return false, err | 
|  | } | 
|  | known := res.StatusCode == http.StatusOK | 
|  | seenModulePath[modulePath] = known | 
|  | return known, nil | 
|  | } |