blob: d61c2f3a4fb00075299060fc920fd7c43cb493b4 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package worker
import (
"context"
"errors"
"fmt"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"golang.org/x/time/rate"
"golang.org/x/vulndb/internal/cveschema"
"golang.org/x/vulndb/internal/derrors"
"golang.org/x/vulndb/internal/stdlib"
"golang.org/x/vulndb/internal/worker/log"
)
var errCVEVersionUnsupported = errors.New("unsupported CVE version")
// stdlibReferenceDataKeywords are words found in the reference data URL that
// indicate the CVE is about the standard library or a Go x-repo owned by the
// Go team.
var stdlibReferenceDataKeywords = []string{
"github.com/golang",
"golang.org",
// from https://groups.google.com/g/golang-announce.
"golang-announce",
// from https://groups.google.com/g/golang-nuts.
"golang-nuts",
}
const unknownPath = "Path is unknown"
// TriageCVE reports whether the CVE refers to a Go module.
func TriageCVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (_ *triageResult, err error) {
defer derrors.Wrap(&err, "triageCVE(%q)", c.ID)
switch c.DataVersion {
case "4.0":
return triageV4CVE(ctx, c, pkgsiteURL)
default:
// TODO(https://golang.org/issue/49289): Add support for v5.0.
return nil, fmt.Errorf("CVE %q has DataVersion %q: %w", c.ID, c.DataVersion, errCVEVersionUnsupported)
}
}
type triageResult struct {
modulePath string
packagePath string
reason string
}
// gopkgHosts are hostnames for popular Go package websites.
var gopkgHosts = map[string]bool{
"godoc.org": true,
"pkg.go.dev": true,
}
const snykIdentifier = "snyk.io/vuln/SNYK-GOLANG"
// nonGoModules are paths that return a 200 on pkg.go.dev, but do not contain
// Go code. However, these libraries often have CVEs that are false positive for
// a Go vuln.
var notGoModules = map[string]bool{
"github.com/channelcat/sanic": true, // python library
"github.com/rapid7/metasploit-framework": true, // ruby library
"github.com/tensorflow/tensorflow": true, // python library
"gitweb.gentoo.org/repo/gentoo.git": true, // ebuild
"qpid.apache.org": true, // C, python, & Java library
// vulnerability in tool, not importable package
"github.com/grafana/grafana": true,
"github.com/sourcegraph/sourcegraph": true,
"gitlab.com/gitlab-org/gitlab-runner": true,
"github.com/gravitational/teleport": true,
}
// triageV4CVE triages a CVE following schema v4.0 and returns the result.
func triageV4CVE(ctx context.Context, c *cveschema.CVE, pkgsiteURL string) (result *triageResult, err error) {
defer derrors.Wrap(&err, "triageV4CVE(ctx, %q, %q)", c.ID, pkgsiteURL)
defer func() {
if err != nil {
return
}
msg := fmt.Sprintf("Triage result for %s", c.ID)
if result == nil {
log.Debugf(ctx, "%s: not Go vuln", msg)
return
}
log.Debugf(ctx, "%s: is Go vuln:\n%s", msg, result.reason)
}()
for _, r := range c.References.Data {
if r.URL == "" {
continue
}
refURL, err := url.Parse(r.URL)
if err != nil {
return nil, fmt.Errorf("url.Parse(%q): %v", r.URL, err)
}
if strings.Contains(r.URL, "golang.org/pkg") {
mp := strings.TrimPrefix(refURL.Path, "/pkg/")
return &triageResult{
packagePath: mp,
modulePath: stdlib.ModulePath,
reason: fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp),
}, nil
}
if gopkgHosts[refURL.Host] {
mp := strings.TrimPrefix(refURL.Path, "/")
if stdlib.Contains(mp) {
return &triageResult{
packagePath: mp,
modulePath: stdlib.ModulePath,
reason: fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp),
}, nil
}
return &triageResult{
modulePath: mp,
reason: fmt.Sprintf("Reference data URL %q contains path %q", r.URL, mp),
}, nil
}
modpaths := candidateModulePaths(refURL.Host + refURL.Path)
for _, mp := range modpaths {
if notGoModules[mp] {
continue
}
known, err := knownToPkgsite(ctx, pkgsiteURL, mp)
if err != nil {
return nil, err
}
if known {
u := pkgsiteURL + "/" + mp
return &triageResult{
modulePath: mp,
reason: fmt.Sprintf("Reference data URL %q contains path %q; %q returned a status 200", r.URL, mp, u),
}, nil
}
}
}
// We didn't find a Go package or module path in the reference data. Check
// secondary heuristics to see if this is a Go related CVE.
for _, r := range c.References.Data {
// Example CVE containing snyk.io URL:
// https://github.com/CVEProject/cvelist/blob/899bba20d62eb73e04d1841a5ff04cd6225e1618/2020/7xxx/CVE-2020-7668.json#L52.
if strings.Contains(r.URL, snykIdentifier) {
return &triageResult{
modulePath: unknownPath,
reason: fmt.Sprintf("Reference data URL %q contains %q", r.URL, snykIdentifier),
}, nil
}
// Check for reference data indicating that this is related to the Go
// project.
for _, k := range stdlibReferenceDataKeywords {
if strings.Contains(r.URL, k) {
return &triageResult{
modulePath: stdlib.ModulePath,
reason: fmt.Sprintf("Reference data URL %q contains %q", r.URL, k),
}, nil
}
}
}
return nil, nil
}
var ghsaRegex = regexp.MustCompile(`GHSA-[^-]{4}-[^-]{4}-[^-]{4}`)
func getAliasGHSAs(c *cveschema.CVE) []string {
var ghsas []string
for _, r := range c.References.Data {
ghsas = append(ghsas, ghsaRegex.FindAllString(r.URL, 1)...)
}
return ghsas
}
// Limit pkgsite requests to this many per second.
const pkgsiteQPS = 5
var (
// The limiter used to throttle pkgsite requests.
// The second argument to rate.NewLimiter is the burst, which
// basically lets you exceed the rate briefly.
pkgsiteRateLimiter = rate.NewLimiter(rate.Every(time.Duration(1000/float64(pkgsiteQPS))*time.Millisecond), 3)
// Cache of module paths already seen.
seenModulePath = map[string]bool{}
// Does seenModulePath contain all known modules?
cacheComplete = false
)
// SetKnownModules provides a list of all known modules,
// so that no requests need to be made to pkg.go.dev.
func SetKnownModules(mods []string) {
for _, m := range mods {
seenModulePath[m] = true
}
cacheComplete = true
}
// knownToPkgsite reports whether pkgsite knows that modulePath actually refers
// to a module.
func knownToPkgsite(ctx context.Context, baseURL, modulePath string) (bool, error) {
// If we've seen it before, no need to call.
if b, ok := seenModulePath[modulePath]; ok {
return b, nil
}
if cacheComplete {
return false, nil
}
// Pause to maintain a max QPS.
if err := pkgsiteRateLimiter.Wait(ctx); err != nil {
return false, err
}
start := time.Now()
url := baseURL + "/mod/" + modulePath
res, err := http.Head(url)
var status string
if err == nil {
status = strconv.Quote(res.Status)
}
log.With(
"latency", time.Since(start),
"status", status,
"error", err,
).Debugf(ctx, "checked if %s is known to pkgsite at HEAD", url)
if err != nil {
return false, err
}
known := res.StatusCode == http.StatusOK
seenModulePath[modulePath] = known
return known, nil
}