internal/worker: add modules mode to govulncheck pipeline
This is accomplished by using the newest version of govulncheck. The
tool now produces streaming JSON where it emits findings at every level
of precision (module, package, symbol) as it does work.
We thus collect all findings produced by govulncheck and convert them to
Vuln structure right before we save it rows. This simplifies matters. The
ecosystem metrics handler for govulncheck JSON is now trivial. The code
operates on govulncheck.Findings and lets vulnsForMode do the conversion
to Vuln in a single (last) step. Vuln also does not need Called field.
Change-Id: I73651a91b2707d9afd1e667ea4cedb371e763c73
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/562695
Reviewed-by: Maceo Thompson <maceothompson@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/go.mod b/go.mod
index 59d4b93..006428b 100644
--- a/go.mod
+++ b/go.mod
@@ -26,9 +26,9 @@
golang.org/x/exp/event v0.0.0-20220218215828-6cf2b201936e
golang.org/x/mod v0.15.0
golang.org/x/net v0.21.0
- golang.org/x/oauth2 v0.17.0
+ golang.org/x/oauth2 v0.10.0
golang.org/x/tools v0.18.0
- golang.org/x/vuln v1.0.1-0.20230810155601-91424c7c0e1c
+ golang.org/x/vuln v1.0.5-0.20240213143941-dcac2d7cab09
google.golang.org/api v0.132.0
google.golang.org/genproto/googleapis/api v0.0.0-20230706204954-ccb25ca9f130
google.golang.org/grpc v1.56.2
diff --git a/go.sum b/go.sum
index fda6bf4..ef9661b 100644
--- a/go.sum
+++ b/go.sum
@@ -448,8 +448,8 @@
golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.17.0 h1:6m3ZPmLEFdVxKKWnKq4VqZ60gutO35zm+zrAHVmHyDQ=
-golang.org/x/oauth2 v0.17.0/go.mod h1:OzPDGQiuQMguemayvdylqddI7qcD9lnSDb+1FiwQ5HA=
+golang.org/x/oauth2 v0.10.0 h1:zHCpF2Khkwy4mMB4bv0U37YtJdTGW8jI0glAApi0Kh8=
+golang.org/x/oauth2 v0.10.0/go.mod h1:kTpgurOux7LqtuxjuyZa4Gj2gdezIt/jQtGnNFfypQI=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -595,8 +595,8 @@
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ=
golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg=
-golang.org/x/vuln v1.0.1-0.20230810155601-91424c7c0e1c h1:Tpbe1bL1Wq4yGGhPZhs60B2JPTAOnBAoVBFaAm25PNM=
-golang.org/x/vuln v1.0.1-0.20230810155601-91424c7c0e1c/go.mod h1:V0eyhHwaAaHrt42J9bgrN6rd12f6GU4T0Lu0ex2wDg4=
+golang.org/x/vuln v1.0.5-0.20240213143941-dcac2d7cab09 h1:A64fP6pOn7HymW2o6mact9ciWbn8bAVYuEmoxjCd43Y=
+golang.org/x/vuln v1.0.5-0.20240213143941-dcac2d7cab09/go.mod h1:17esbGXVHp03iSShz13iJUumhcDknaO/tzYo0Oau1TQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/internal/govulncheck/govulncheck.go b/internal/govulncheck/govulncheck.go
index fdb346a..fbc3747 100644
--- a/internal/govulncheck/govulncheck.go
+++ b/internal/govulncheck/govulncheck.go
@@ -107,18 +107,12 @@
// a bigquery vuln.
func ConvertGovulncheckFinding(f *govulncheckapi.Finding) *Vuln {
vulnerableFrame := f.Trace[0]
- vuln := &Vuln{
+ return &Vuln{
ID: f.OSV,
PackagePath: vulnerableFrame.Package,
ModulePath: vulnerableFrame.Module,
Version: vulnerableFrame.Version,
- Called: false,
}
- if vulnerableFrame.Function != "" {
- vuln.Called = true
- }
-
- return vuln
}
const TableName = "govulncheck"
@@ -201,12 +195,6 @@
PackagePath string `bigquery:"package_path"`
ModulePath string `bigquery:"module_path"`
Version string `bigquery:"version"`
- // Called is currently used to differentiate between
- // called and imported vulnerabilities. We need it
- // because we don't conduct an imports analysis yet
- // use the full results of govulncheck source analysis.
- // It is not part of the bigquery schema.
- Called bool `bigquery:"-"`
}
// SchemaVersion changes whenever the govulncheck schema changes.
diff --git a/internal/govulncheck/govulncheck_test.go b/internal/govulncheck/govulncheck_test.go
index 6c76fda..5f69f15 100644
--- a/internal/govulncheck/govulncheck_test.go
+++ b/internal/govulncheck/govulncheck_test.go
@@ -61,7 +61,6 @@
PackagePath: "example.com/repo/module/package",
ModulePath: "example.com/repo/module",
Version: "v0.0.1",
- Called: true,
},
},
{
@@ -72,7 +71,6 @@
PackagePath: "example.com/repo/module/package",
ModulePath: "example.com/repo/module",
Version: "v1.0.0",
- Called: false,
},
},
}
diff --git a/internal/govulncheck/handler.go b/internal/govulncheck/handler.go
index 44bf307..0b0a9c0 100644
--- a/internal/govulncheck/handler.go
+++ b/internal/govulncheck/handler.go
@@ -5,22 +5,18 @@
package govulncheck
import (
- "golang.org/x/exp/maps"
"golang.org/x/pkgsite-metrics/internal/govulncheckapi"
"golang.org/x/pkgsite-metrics/internal/osv"
)
-// NewMetricsHandler returns a handler that returns a set of all findings.
+// NewMetricsHandler returns a handler that returns all findings.
// For use in the ecosystem metrics pipeline.
func NewMetricsHandler() *MetricsHandler {
- m := make(map[string]*govulncheckapi.Finding)
- return &MetricsHandler{
- byOSV: m,
- }
+ return &MetricsHandler{}
}
type MetricsHandler struct {
- byOSV map[string]*govulncheckapi.Finding
+ findings []*govulncheckapi.Finding
}
func (h *MetricsHandler) Config(c *govulncheckapi.Config) error {
@@ -36,16 +32,10 @@
}
func (h *MetricsHandler) Finding(finding *govulncheckapi.Finding) error {
- f, found := h.byOSV[finding.OSV]
- if !found || f.Trace[0].Function == "" {
- // If the vuln wasn't called in the first trace, replace it with
- // the new finding (that way if the vuln is called at any point
- // it's trace will reflect that, which is needed when converting to bq)
- h.byOSV[finding.OSV] = finding
- }
+ h.findings = append(h.findings, finding)
return nil
}
func (h *MetricsHandler) Findings() []*govulncheckapi.Finding {
- return maps.Values(h.byOSV)
+ return h.findings
}
diff --git a/internal/govulncheck/handler_test.go b/internal/govulncheck/handler_test.go
deleted file mode 100644
index cfba8ca..0000000
--- a/internal/govulncheck/handler_test.go
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package govulncheck
-
-import (
- "testing"
-
- "golang.org/x/pkgsite-metrics/internal/govulncheckapi"
-)
-
-func TestMetricsHandler(t *testing.T) {
- osvID := "GO-YYYY-XXXX"
- calledFinding := &govulncheckapi.Finding{
- OSV: osvID,
- Trace: []*govulncheckapi.Frame{
- {
- Module: "example.com/repo/module",
- Version: "v0.0.1",
- Package: "example.com/repo/module/package",
- Function: "func",
- Position: &govulncheckapi.Position{},
- },
- },
- }
-
- uncalledFinding := &govulncheckapi.Finding{
- OSV: osvID,
- FixedVersion: "",
- Trace: []*govulncheckapi.Frame{
- {
- Module: "example.com/repo/module",
- Version: "v0.0.1",
- Package: "example.com/repo/module/package",
- Position: nil,
- },
- },
- }
-
- t.Run("Called finding overwrites uncalled w/ same ID", func(t *testing.T) {
- h := NewMetricsHandler()
- h.Finding(uncalledFinding)
- h.Finding(calledFinding)
- findings := h.Findings()
- if len(findings) != 1 || findings[0] != calledFinding {
- t.Errorf("MetricsHandler.Finding() error: expected %v, got %v", calledFinding, findings[0])
- }
- })
-}
diff --git a/internal/govulncheckapi/result.go b/internal/govulncheckapi/result.go
index 8853792..d3307cf 100644
--- a/internal/govulncheckapi/result.go
+++ b/internal/govulncheckapi/result.go
@@ -21,12 +21,12 @@
Finding *Finding `json:"finding,omitempty"`
}
-// ProtocolVersion is the current protocol version this file implements
-const ProtocolVersion = "v0.1.0"
-
+// Config must occur as the first message of a stream and informs the client
+// about the information used to generate the findings.
+// The only required field is the protocol version.
type Config struct {
// ProtocolVersion specifies the version of the JSON protocol.
- ProtocolVersion string `json:"protocol_version,omitempty"`
+ ProtocolVersion string `json:"protocol_version"`
// ScannerName is the name of the tool, for example, govulncheck.
//
@@ -48,17 +48,15 @@
// vulnerabilities.
GoVersion string `json:"go_version,omitempty"`
- // Consider only vulnerabilities that apply to this OS.
- GOOS string `json:"goos,omitempty"`
-
- // Consider only vulnerabilities that apply to this architecture.
- GOARCH string `json:"goarch,omitempty"`
-
- // ImportsOnly instructs vulncheck to analyze import chains only.
- // Otherwise, call chains are analyzed too.
- ImportsOnly bool `json:"imports_only,omitempty"`
+ // ScanLevel instructs govulncheck to analyze at a specific level of detail.
+ // Valid values include module, package and symbol.
+ ScanLevel ScanLevel `json:"scan_level,omitempty"`
}
+// Progress messages are informational only, intended to allow users to monitor
+// the progress of a long running scan.
+// A stream must remain fully valid and able to be interpreted with all progress
+// messages removed.
type Progress struct {
// A time stamp for the message.
Timestamp *time.Time `json:"time,omitempty"`
@@ -67,7 +65,14 @@
Message string `json:"message,omitempty"`
}
-// Vuln represents a single OSV entry.
+// Finding contains information on a discovered vulnerability. Each vulnerability
+// will likely have multiple findings in JSON mode. This is because govulncheck
+// emits findings as it does work, and therefore could emit one module level,
+// one package level, and potentially multiple symbol level findings depending
+// on scan level.
+// Multiple symbol level findings can be emitted when multiple symbols of the
+// same vuln are called or govulncheck decides to show multiple traces for the
+// same symbol.
type Finding struct {
// OSV is the id of the detected vulnerability.
OSV string `json:"osv,omitempty"`
@@ -97,8 +102,10 @@
// In binary mode, trace will contain a single-frame with no position
// information.
//
- // When a package is imported but no vulnerable symbol is called, the trace
- // will contain a single-frame with no symbol or position information.
+ // For module level source findings, the trace will contain a single-frame
+ // with no symbol, position, or package information. For package level source
+ // findings, the trace will contain a single-frame with no symbol or position
+ // information.
Trace []*Frame `json:"trace,omitempty"`
}
@@ -130,11 +137,31 @@
Position *Position `json:"position,omitempty"`
}
-// Position is a copy of token.Position used to marshal/unmarshal
-// JSON correctly.
+// Position represents arbitrary source position.
type Position struct {
Filename string `json:"filename,omitempty"` // filename, if any
- Offset int `json:"offset"` // offset, starting at 0
+ Offset int `json:"offset"` // byte offset, starting at 0
Line int `json:"line"` // line number, starting at 1
Column int `json:"column"` // column number, starting at 1 (byte count)
}
+
+// ScanLevel represents the detail level at which a scan occurred.
+// This can be necessary to correctly interpret the findings, for instance if
+// a scan is at symbol level and a finding does not have a symbol it means the
+// vulnerability was imported but not called. If the scan however was at
+// "package" level, that determination cannot be made.
+type ScanLevel string
+
+const (
+ ScanLevelModule = "module"
+ ScanLevelPackage = "package"
+ ScanLevelSymbol = "symbol"
+)
+
+// WantSymbols can be used to check whether the scan level is one that is able
+// to generate symbol-level findings.
+func (l ScanLevel) WantSymbols() bool { return l == ScanLevelSymbol }
+
+// WantPackages can be used to check whether the scan level is one that is able
+// to generate package-level findings.
+func (l ScanLevel) WantPackages() bool { return l == ScanLevelPackage || l == ScanLevelSymbol }
diff --git a/internal/worker/govulncheck_scan.go b/internal/worker/govulncheck_scan.go
index e7ef7d7..43da0dc 100644
--- a/internal/worker/govulncheck_scan.go
+++ b/internal/worker/govulncheck_scan.go
@@ -27,22 +27,47 @@
)
const (
- // modeImports is used to report results of vulnerability detection at
- // imports level precision. It cannot be directly triggered by scan
- // endpoints. Instead, ModeGovulncheck mode reports its results to show
- // difference in precision of vulnerability detection.
- modeImports string = "IMPORTS"
-
- // ModeGovulncheck runs the govulncheck binary in default (source) mode.
+ // ModeGovulncheck is an ecosystem metrics mode that runs the govulncheck
+ // binary in default (source) mode.
ModeGovulncheck = "GOVULNCHECK"
- // ModeCompare finds compilable binaries and runs govulncheck in both source
- // and binary mode.
+ // ModeCompare is an ecosystem metrics mode that finds compilable binaries
+ // and runs govulncheck in both source and binary mode and reports results.
ModeCompare = "COMPARE"
+)
- // modeBinary is only used by ModeCompare for reporting results. It cannot
- // be directly triggered by scan endpoints.
- modeBinary string = "BINARY"
+// modes is a set of supported govulncheck ecosystem metrics modes.
+var modes = map[string]bool{
+ ModeGovulncheck: true,
+ ModeCompare: true,
+}
+
+const (
+ // scanModeSourceSymbol is used to designate results at govulncheck source
+ // '-scan symbol' level of precision.
+ //
+ // Note that this is not an ecosystem metrics mode. Its value is "GOVULNCHECK"
+ // for historical reasons.
+ scanModeSourceSymbol = "GOVULNCHECK"
+
+ // scanModeSourcePackage is used to designate results at govulncheck source
+ // '-scan package' level of precision.
+ //
+ // Note that this is not an ecosystem metrics mode.
+ scanModeSourcePackage string = "IMPORTS"
+
+ // scanModeSourceModule is used to designate results at govulncheck source
+ // '-scan module' level of precision.
+ //
+ // Note that this is not an ecosystem metrics mode.
+ scanModeSourceModule string = "REQUIRES"
+
+ // scanModeBinarySymbol is used to designate results at govulncheck binary
+ // '-scan symbol' level of precision.
+ //
+ // Note that this is not an ecosystem metrics mode. Its value is "BINARY"
+ // for historical reasons.
+ scanModeBinarySymbol string = "BINARY"
// sandboxGoCache is the location of the Go cache inside the sandbox. The
// user is root and their $HOME directory is /root. The Go cache resides
@@ -50,15 +75,9 @@
sandboxGoCache = "root/.cache/go-build"
)
-// modes is a set of govulncheck modes externally visible.
-var modes = map[string]bool{
- ModeGovulncheck: true,
- ModeCompare: true,
-}
-
func modeToGovulncheckFlag(mode string) string {
switch mode {
- case modeBinary: // for sanity
+ case scanModeBinarySymbol:
return govulncheck.FlagBinary
default:
return govulncheck.FlagSource
@@ -217,9 +236,9 @@
// CompareModule gets results of govulncheck source and binary mode on each binary defined in a module.
//
-// It discards all results where there is a failure that is not specific to the comparison, i.e., failures
-// that appear in GOVULNCHECK or IMPORTS mode. Examples are situations where the module is malformed,
-// govulncheck fails, or it is not possible to build a found binary within the module.
+// It discards all results where there is a failure that is not specific to the comparison. Examples are
+// situations where the module is malformed, govulncheck fails, or it is not possible to build a found
+// binary within the module.
func (s *scanner) CompareModule(ctx context.Context, w http.ResponseWriter, sreq *govulncheck.Request, info *proxy.VersionInfo, baseRow *govulncheck.Result) (err error) {
defer derrors.Wrap(&err, "CompareModule")
err = doScan(ctx, baseRow.ModulePath, info.Version, s.insecure, func() (err error) {
@@ -250,8 +269,8 @@
continue
}
- binRow := createComparisonRow(pkg, &results.BinaryResults, baseRow, modeBinary)
- srcRow := createComparisonRow(pkg, &results.SourceResults, baseRow, ModeGovulncheck)
+ binRow := createComparisonRow(pkg, &results.BinaryResults, baseRow, scanModeBinarySymbol)
+ srcRow := createComparisonRow(pkg, &results.SourceResults, baseRow, scanModeSourceSymbol)
log.Infof(ctx, "found %d vulns in binary mode and %d vulns in source mode for package %s (module: %s)", len(binRow.Vulns), len(srcRow.Vulns), pkg, sreq.Path())
rows = append(rows, binRow, srcRow)
}
@@ -268,7 +287,7 @@
return nil
}
-func createComparisonRow(pkg string, result *govulncheck.SandboxResponse, baseRow *govulncheck.Result, mode string) (row *govulncheck.Result) {
+func createComparisonRow(pkg string, result *govulncheck.SandboxResponse, baseRow *govulncheck.Result, scanMode string) (row *govulncheck.Result) {
row = &govulncheck.Result{
CreatedAt: baseRow.CreatedAt,
Suffix: pkg,
@@ -279,22 +298,16 @@
CommitTime: baseRow.CommitTime,
WorkVersion: baseRow.WorkVersion,
}
- if mode == modeBinary {
+ if scanMode == scanModeBinarySymbol {
row.ScanMode = "COMPARE - BINARY"
row.BinaryBuildSeconds = bigquery.NullFloat(result.Stats.BuildTime.Seconds())
} else {
row.ScanMode = "COMPARE - SOURCE"
}
- vulns := []*govulncheck.Vuln{}
- for _, finding := range result.Findings {
- vulns = append(vulns, govulncheck.ConvertGovulncheckFinding(finding))
- }
- row.Vulns = vulnsForMode(vulns, mode)
-
+ row.Vulns = vulnsForScanMode(result.Findings, scanMode)
row.ScanMemory = int64(result.Stats.ScanMemory)
row.ScanSeconds = result.Stats.ScanSeconds
-
return row
}
@@ -303,42 +316,46 @@
if sreq.Module == "std" {
return nil, nil // ignore the standard library
}
- row := &govulncheck.Result{
+ // baseRow is used to return on a premature error and
+ // to create actual bq rows otherwise.
+ baseRow := &govulncheck.Result{
ModulePath: sreq.Module,
Suffix: sreq.Suffix,
WorkVersion: *s.workVersion,
- ScanMode: sreq.Mode,
ImportedBy: sreq.ImportedBy,
}
- row.VulnDBLastModified = s.workVersion.VulnDBLastModified
+ baseRow.VulnDBLastModified = s.workVersion.VulnDBLastModified
// Scan the version.
log.Debugf(ctx, "fetching proxy info: %s@%s", sreq.Path(), sreq.Version)
info, err := s.proxyClient.Info(ctx, sreq.Module, sreq.Version)
if err != nil {
log.Infof(ctx, "proxy error: %s@%s %v", sreq.Path(), sreq.Version, err)
- row.AddError(fmt.Errorf("%v: %w", err, derrors.ProxyError))
- // TODO: should we also make a copy for imports mode?
- if err := writeResult(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, row); err != nil {
+ baseRow.AddError(fmt.Errorf("%v: %w", err, derrors.ProxyError))
+ // If proxy failed, put the scan mode as the incoming ecosystem mode
+ // for now.
+ // TODO: is there a better way of doing this?
+ baseRow.ScanMode = sreq.Mode
+ if err := writeResult(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, baseRow); err != nil {
return nil, err
}
- return row.WorkState(), nil
+ return baseRow.WorkState(), nil
}
- row.Version = info.Version
- row.SortVersion = version.ForSorting(row.Version)
- row.CommitTime = info.Time
+ baseRow.Version = info.Version
+ baseRow.SortVersion = version.ForSorting(baseRow.Version)
+ baseRow.CommitTime = info.Time
if sreq.Mode == ModeCompare {
- err := s.CompareModule(ctx, w, sreq, info, row)
+ err := s.CompareModule(ctx, w, sreq, info, baseRow)
// TODO: WorkState for CompareModule requests?
return nil, err
}
log.Infof(ctx, "running scanner.runScanModule: %s@%s", sreq.Path(), sreq.Version)
stats := &govulncheck.ScanStats{}
- vulns, err := s.runScanModule(ctx, sreq.Module, info.Version, sreq.Mode, stats)
- row.ScanSeconds = stats.ScanSeconds
- row.ScanMemory = int64(stats.ScanMemory)
+ findings, err := s.runScanModule(ctx, sreq.Module, info.Version, sreq.Mode, stats)
+ baseRow.ScanSeconds = stats.ScanSeconds
+ baseRow.ScanMemory = int64(stats.ScanMemory)
if err != nil {
switch {
case isGovulncheckLoadError(err) || isBuildIssue(err):
@@ -372,69 +389,71 @@
default:
err = fmt.Errorf("%v: %w", err, derrors.ScanModuleGovulncheckError)
}
- row.AddError(err)
- } else {
- row.Vulns = vulnsForMode(vulns, sreq.Mode)
+ baseRow.AddError(err)
}
- log.Infof(ctx, "scanner.runScanModule returned %d vulns for %s: row.Vulns=%d err=%v", len(vulns), sreq.Path(), len(row.Vulns), err)
- rows := []bigquery.Row{row}
- if sreq.Mode == ModeGovulncheck {
- // For ModeGovulncheck, add the copy of row and report
- // each vulnerability as imported. We set the performance
- // numbers to 0 since we don't actually perform a scan
- // at the level of import chains. Also makes a copy if
- // the original row has an error and no vulns.
- impRow := *row
- impRow.ScanMode = modeImports
- impRow.ScanSeconds = 0
- impRow.ScanMemory = 0
- impRow.Vulns = vulnsForMode(vulns, modeImports)
- log.Infof(ctx, "scanner.runScanModule also storing imports vulns for %s: row.Vulns=%d", sreq.Path(), len(impRow.Vulns))
- rows = append(rows, &impRow)
+ // create a row for each precision level
+ var rows []bigquery.Row
+ for _, mode := range []string{scanModeSourceSymbol, scanModeSourcePackage, scanModeSourceModule} {
+ row := *baseRow
+ row.ScanMode = mode
+ // We use govulncheck command execution time as the approx. time for symbol level analysis.
+ // We currently don't have a way of approximating time for measuring time for module and
+ // package level scans. We could run govulncheck with -scan package and -scan module, but
+ // that would put more pressure on the pipeline and use more resources.
+ // TODO: could we instrument handler to measure this for us?
+ if mode != ModeGovulncheck {
+ row.ScanSeconds = 0
+ row.ScanMemory = 0
+ }
+ row.Vulns = vulnsForScanMode(findings, mode)
+ log.Infof(ctx, "scanner.runScanModule returned %d findings and err=%v for %s with row.Vulns=%d in scan mode=%s", len(findings), err, sreq.Path(), len(row.Vulns), mode)
+ rows = append(rows, &row)
}
if err := writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows); err != nil {
return nil, err
}
- return row.WorkState(), nil
+ return baseRow.WorkState(), nil
}
-// vulnsForMode returns vulns that make sense to report for
-// a particular mode.
-//
-// For ModeGovulncheck, these are all vulns that are actually
-// called. For modeImports, these are all vulns, called or just
-// imported. For modeBinary, these are exactly all the vulns
-// since binary analysis does not distinguish between called
-// and imported vulnerabilities.
-func vulnsForMode(vulns []*govulncheck.Vuln, mode string) []*govulncheck.Vuln {
- if mode == modeBinary {
- return vulns
- }
-
- var vs []*govulncheck.Vuln
- for _, v := range vulns {
- if mode == ModeGovulncheck {
- // Return only the called vulns for ModeGovulncheck.
- if v.Called {
- vs = append(vs, v)
+// vulnsForScanMode produces Vulns from findings at the specified
+// govulncheck scan mode.
+func vulnsForScanMode(findings []*govulncheckapi.Finding, mode string) []*govulncheck.Vuln {
+ var modeFindings []*govulncheckapi.Finding
+ for _, f := range findings {
+ fr := f.Trace[0]
+ switch mode {
+ case scanModeSourceSymbol, scanModeBinarySymbol:
+ if fr.Function != "" {
+ modeFindings = append(modeFindings, f)
}
- } else if mode == modeImports {
- // For imports mode, return the vulnerability as it
- // is imported, but not called.
- nv := *v
- nv.Called = false
- vs = append(vs, &nv)
- } else {
- panic(fmt.Sprintf("vulnsForMode unsupported mode %s", mode))
+ case scanModeSourcePackage:
+ if fr.Package != "" && fr.Function == "" {
+ modeFindings = append(modeFindings, f)
+ }
+ case scanModeSourceModule:
+ if fr.Package == "" && fr.Function == "" { // fr.Module is always set
+ modeFindings = append(modeFindings, f)
+ }
}
}
- return vs
+
+ var vulns []*govulncheck.Vuln
+ seen := make(map[govulncheck.Vuln]bool) // avoid duplicates
+ for _, f := range modeFindings {
+ v := govulncheck.ConvertGovulncheckFinding(f)
+ if seen[*v] {
+ continue
+ }
+ seen[*v] = true
+ vulns = append(vulns, v)
+ }
+ return vulns
}
// runScanModule fetches the module version from the proxy, and analyzes its source
// code for vulnerabilities. The analysis of binaries is done in CompareModules.
-func (s *scanner) runScanModule(ctx context.Context, modulePath, version, mode string, stats *govulncheck.ScanStats) (bvulns []*govulncheck.Vuln, err error) {
+func (s *scanner) runScanModule(ctx context.Context, modulePath, version, mode string, stats *govulncheck.ScanStats) (findings []*govulncheckapi.Finding, err error) {
err = doScan(ctx, modulePath, version, s.insecure, func() (err error) {
// Download the module first.
inputPath := moduleDir(modulePath, version)
@@ -444,23 +463,15 @@
return err
}
- var findings []*govulncheckapi.Finding
if s.insecure {
findings, err = s.runGovulncheckScanInsecure(inputPath, mode, stats)
} else {
findings, err = s.runGovulncheckScanSandbox(ctx, inputPath, mode, stats)
}
- if err != nil {
- return err
- }
log.Debugf(ctx, "govulncheck stats: %dkb | %vs", stats.ScanMemory, stats.ScanSeconds)
-
- for _, v := range findings {
- bvulns = append(bvulns, govulncheck.ConvertGovulncheckFinding(v))
- }
- return nil
+ return err
})
- return bvulns, err
+ return findings, err
}
func (s *scanner) runGovulncheckScanSandbox(ctx context.Context, inputPath, mode string, stats *govulncheck.ScanStats) (_ []*govulncheckapi.Finding, err error) {
@@ -468,7 +479,7 @@
err = s.sbox.Validate()
log.Debugf(ctx, "sandbox Validate returned %v", err)
- response, err := s.runGovulncheckSandbox(ctx, modeToGovulncheckFlag(mode), smdir)
+ response, err := s.runGovulncheckSandbox(ctx, mode, smdir)
if err != nil {
return nil, err
}
diff --git a/internal/worker/govulncheck_scan_test.go b/internal/worker/govulncheck_scan_test.go
index 2c4dfa9..796a81c 100644
--- a/internal/worker/govulncheck_scan_test.go
+++ b/internal/worker/govulncheck_scan_test.go
@@ -15,6 +15,7 @@
"golang.org/x/pkgsite-metrics/internal/buildtest"
"golang.org/x/pkgsite-metrics/internal/govulncheck"
+ "golang.org/x/pkgsite-metrics/internal/govulncheckapi"
)
func TestAsScanError(t *testing.T) {
@@ -28,16 +29,17 @@
}
func TestVulnsForMode(t *testing.T) {
- vulns := []*govulncheck.Vuln{
- &govulncheck.Vuln{ID: "A"},
- &govulncheck.Vuln{ID: "B", Called: false},
- &govulncheck.Vuln{ID: "C", Called: true},
+ findings := []*govulncheckapi.Finding{
+ {Trace: []*govulncheckapi.Frame{{Module: "M1", Package: "P1", Function: "F1"}}},
+ {Trace: []*govulncheckapi.Frame{{Module: "M1", Package: "P1"}}},
+ {Trace: []*govulncheckapi.Frame{{Module: "M1"}}},
+ {Trace: []*govulncheckapi.Frame{{Module: "M2"}}},
}
vulnsStr := func(vulns []*govulncheck.Vuln) string {
var vs []string
for _, v := range vulns {
- vs = append(vs, fmt.Sprintf("%s:%t", v.ID, v.Called))
+ vs = append(vs, fmt.Sprintf("%s:%s", v.ModulePath, v.PackagePath))
}
return strings.Join(vs, ", ")
}
@@ -46,14 +48,15 @@
mode string
want string
}{
- {modeImports, "A:false, B:false, C:false"},
- {ModeGovulncheck, "C:true"},
- {modeBinary, "A:false, B:false, C:true"},
+ {scanModeSourceSymbol, "M1:P1"},
+ {scanModeBinarySymbol, "M1:P1"},
+ {scanModeSourcePackage, "M1:P1"},
+ {scanModeSourceModule, "M1:, M2:"},
} {
tc := tc
t.Run(tc.mode, func(t *testing.T) {
- modeVulns := vulnsForMode(vulns, tc.mode)
- if got := vulnsStr(modeVulns); got != tc.want {
+ vs := vulnsForScanMode(findings, tc.mode)
+ if got := vulnsStr(vs); got != tc.want {
t.Errorf("got %s; want %s", got, tc.want)
}
})