all: add new schema for the govulncheck table
Also rename the table to "govulncheck".
Change-Id: I6302a4ceb283b8fed520cb8c4d07ffbef77185e9
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/476775
Reviewed-by: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/devtools/bqerrors.sh b/devtools/bqerrors.sh
index d06d083..7b6fca6 100755
--- a/devtools/bqerrors.sh
+++ b/devtools/bqerrors.sh
@@ -34,7 +34,7 @@
die "missing TF_VAR_prod_project"
fi
- bq_error_query $project.$dataset.vulncheck
+ bq_error_query $project.$dataset.govulncheck
}
diff --git a/internal/goenv.go b/internal/goenv.go
new file mode 100644
index 0000000..7a10b76
--- /dev/null
+++ b/internal/goenv.go
@@ -0,0 +1,23 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+ "encoding/json"
+ "os/exec"
+)
+
+// GoEnv returns the key-value map of `go env`.
+func GoEnv() (map[string]string, error) {
+ out, err := exec.Command("go", "env", "-json").Output()
+ if err != nil {
+ return nil, err
+ }
+ env := make(map[string]string)
+ if err := json.Unmarshal(out, &env); err != nil {
+ return nil, err
+ }
+ return env, nil
+}
diff --git a/internal/goenv_test.go b/internal/goenv_test.go
new file mode 100644
index 0000000..b25a5b7
--- /dev/null
+++ b/internal/goenv_test.go
@@ -0,0 +1,34 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+ "testing"
+
+ test "golang.org/x/pkgsite-metrics/internal/testing"
+)
+
+func TestGoEnv(t *testing.T) {
+ test.NeedsGoEnv(t)
+
+ for _, key := range []string{"GOVERSION", "GOROOT", "GOPATH", "GOMODCACHE"} {
+ if m, err := GoEnv(); m[key] == "" {
+ t.Errorf("want something for go env %s; got nothing", key)
+ } else if err != nil {
+ t.Errorf("unexpected error for go env %s: %v", key, err)
+ }
+ }
+}
+
+func TestGoEnvNonVariable(t *testing.T) {
+ test.NeedsGoEnv(t)
+
+ key := "NOT_A_GO_ENV_VARIABLE"
+ if m, err := GoEnv(); m[key] != "" {
+ t.Errorf("expected nothing for go env %s; got %s", key, m[key])
+ } else if err != nil {
+ t.Errorf("unexpected error for go env %s: %v", key, err)
+ }
+}
diff --git a/internal/govulncheck/govulncheck.go b/internal/govulncheck/govulncheck.go
index 0883e68..aa2dd35 100644
--- a/internal/govulncheck/govulncheck.go
+++ b/internal/govulncheck/govulncheck.go
@@ -14,7 +14,6 @@
"strings"
"time"
- bq "cloud.google.com/go/bigquery"
"cloud.google.com/go/civil"
"golang.org/x/exp/maps"
"golang.org/x/pkgsite-metrics/internal/bigquery"
@@ -87,49 +86,23 @@
func ConvertGovulncheckOutput(v *govulncheck.Vuln) (vulns []*Vuln) {
for _, module := range v.Modules {
- for pkgNum, pkg := range module.Packages {
- addedSymbols := make(map[string]bool)
- baseVuln := &Vuln{
+ for _, pkg := range module.Packages {
+ vuln := &Vuln{
ID: v.OSV.ID,
ModulePath: module.Path,
PackagePath: pkg.Path,
- CallSink: bigquery.NullInt(0),
- ImportSink: bigquery.NullInt(pkgNum + 1),
- RequireSink: bigquery.NullInt(pkgNum + 1),
+ Version: module.FoundVersion,
}
-
- // For each called symbol, reconstruct sinks and create the corresponding bigquery vuln
- for symbolNum, cs := range pkg.CallStacks {
- addedSymbols[cs.Symbol] = true
- toAdd := *baseVuln
- toAdd.Symbol = cs.Symbol
- toAdd.CallSink = bigquery.NullInt(symbolNum + 1)
- vulns = append(vulns, &toAdd)
+ if len(pkg.CallStacks) > 0 {
+ vuln.Called = true
}
-
- // Find the rest of the vulnerable imported symbols that haven't been called
- // and create corresponding bigquery vulns
- for _, affected := range v.OSV.Affected {
- if affected.Package.Name == module.Path {
- for _, imp := range affected.EcosystemSpecific.Imports {
- if imp.Path == pkg.Path {
- for _, symbol := range imp.Symbols {
- if !addedSymbols[symbol] {
- toAdd := *baseVuln
- toAdd.Symbol = symbol
- vulns = append(vulns, &toAdd)
- }
- }
- }
- }
- }
- }
+ vulns = append(vulns, vuln)
}
}
return vulns
}
-const TableName = "vulncheck"
+const TableName = "govulncheck"
// Note: before modifying Result or Vuln, make sure the change
// is a valid schema modification.
@@ -152,21 +125,18 @@
CommitTime time.Time `bigquery:"commit_time"`
ScanSeconds float64 `bigquery:"scan_seconds"`
ScanMemory int64 `bigquery:"scan_memory"`
- PkgsMemory int64 `bigquery:"pkgs_memory"`
ScanMode string `bigquery:"scan_mode"`
- // Workers is the concurrency limit under which a module is
- // analyzed. Useful for interpreting memory measurements when
- // there are multiple modules analyzed in the same process.
- // 0 if no limit is specified, -1 for potential errors.
- Workers int `bigquery:"workers"`
- WorkVersion // InferSchema flattens embedded fields
- Vulns []*Vuln `bigquery:"vulns"`
+ WorkVersion // InferSchema flattens embedded fields
+ Vulns []*Vuln `bigquery:"vulns"`
}
// WorkVersion contains information that can be used to avoid duplicate work.
// Given two WorkVersion values v1 and v2 for the same module path and version,
// if v1.Equal(v2) then it is not necessary to scan the module.
type WorkVersion struct {
+ // GoVersion used at path. Allows precise interpretation
+ // of detected stdlib vulnerabilities.
+ GoVersion string `bigquery:"go_version"`
// The version of the currently running code. This tracks changes in the
// logic of module scanning and processing.
WorkerVersion string `bigquery:"worker_version"`
@@ -182,7 +152,8 @@
if v1 == nil || v2 == nil {
return v1 == v2
}
- return v1.WorkerVersion == v2.WorkerVersion &&
+ return v1.GoVersion == v2.GoVersion &&
+ v1.WorkerVersion == v2.WorkerVersion &&
v1.SchemaVersion == v2.SchemaVersion &&
v1.VulnVersion == v2.VulnVersion &&
v1.VulnDBLastModified.Equal(v2.VulnDBLastModified)
@@ -200,13 +171,16 @@
// Vuln is a record in Result.
type Vuln struct {
- ID string `bigquery:"id"`
- Symbol string `bigquery:"symbol"`
- PackagePath string `bigquery:"package_path"`
- ModulePath string `bigquery:"module_path"`
- CallSink bq.NullInt64 `bigquery:"call_sink"`
- ImportSink bq.NullInt64 `bigquery:"import_sink"`
- RequireSink bq.NullInt64 `bigquery:"require_sink"`
+ ID string `bigquery:"id"`
+ PackagePath string `bigquery:"package_path"`
+ ModulePath string `bigquery:"module_path"`
+ Version string `bigquery:"version"`
+ // Called is currently used to differentiate between
+ // called and imported vulnerabilities. We need it
+ // because we don't conduct an imports analysis yet
+ // use the full results of govulncheck source analysis.
+ // It is not part of the bigquery schema.
+ Called bool `bigquery:"-"`
}
// SchemaVersion changes whenever the govulncheck schema changes.
diff --git a/internal/govulncheck/govulncheck_test.go b/internal/govulncheck/govulncheck_test.go
index e4da8f7..58481a5 100644
--- a/internal/govulncheck/govulncheck_test.go
+++ b/internal/govulncheck/govulncheck_test.go
@@ -52,7 +52,8 @@
OSV: osvEntry,
Modules: []*govulncheck.Module{
{
- Path: "example.com/repo/module",
+ FoundVersion: "v0.0.1",
+ Path: "example.com/repo/module",
Packages: []*govulncheck.Package{
{
Path: "example.com/repo/module/package",
@@ -73,7 +74,8 @@
OSV: osvEntry,
Modules: []*govulncheck.Module{
{
- Path: "example.com/repo/module",
+ FoundVersion: "v1.0.0",
+ Path: "example.com/repo/module",
Packages: []*govulncheck.Package{
{
Path: "example.com/repo/module/package",
@@ -89,57 +91,35 @@
wantVulns []*Vuln
}{
{
- name: "Call One Symbol",
+ name: "call one symbol but not all",
vuln: vuln1,
wantVulns: []*Vuln{
{
ID: "GO-YYYY-1234",
- Symbol: "Symbol",
PackagePath: "example.com/repo/module/package",
ModulePath: "example.com/repo/module",
- CallSink: bigquery.NullInt(1),
- ImportSink: bigquery.NullInt(1),
- RequireSink: bigquery.NullInt(1),
- },
- {
- ID: "GO-YYYY-1234",
- Symbol: "Another",
- PackagePath: "example.com/repo/module/package",
- ModulePath: "example.com/repo/module",
- CallSink: bigquery.NullInt(0),
- ImportSink: bigquery.NullInt(1),
- RequireSink: bigquery.NullInt(1),
+ Version: "v0.0.1",
+ Called: true,
},
},
},
{
- name: "Call no symbols",
+ name: "call no symbols",
vuln: vuln2,
wantVulns: []*Vuln{
{
ID: "GO-YYYY-1234",
PackagePath: "example.com/repo/module/package",
ModulePath: "example.com/repo/module",
- Symbol: "Symbol",
- CallSink: bigquery.NullInt(0),
- ImportSink: bigquery.NullInt(1),
- RequireSink: bigquery.NullInt(1),
- },
- {
- ID: "GO-YYYY-1234",
- PackagePath: "example.com/repo/module/package",
- ModulePath: "example.com/repo/module",
- Symbol: "Another",
- CallSink: bigquery.NullInt(0),
- ImportSink: bigquery.NullInt(1),
- RequireSink: bigquery.NullInt(1),
+ Version: "v1.0.0",
+ Called: false,
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- if diff := cmp.Diff(ConvertGovulncheckOutput(tt.vuln), tt.wantVulns, cmpopts.EquateEmpty()); diff != "" {
+ if diff := cmp.Diff(ConvertGovulncheckOutput(tt.vuln), tt.wantVulns, cmpopts.EquateEmpty(), cmp.AllowUnexported(Vuln{})); diff != "" {
t.Errorf("mismatch (-got, +want): %s", diff)
}
})
@@ -208,6 +188,7 @@
SortVersion: "sv",
ImportedBy: 10,
WorkVersion: WorkVersion{
+ GoVersion: "go1.19.6",
WorkerVersion: "1",
SchemaVersion: "s",
VulnVersion: "2",
diff --git a/internal/testing/testenv.go b/internal/testing/testenv.go
new file mode 100644
index 0000000..cc34a02
--- /dev/null
+++ b/internal/testing/testenv.go
@@ -0,0 +1,20 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testing
+
+import (
+ "os/exec"
+ "testing"
+)
+
+// NeedsGoEnv skips t if the current system can't get the environment with
+// “go env” in a subprocess.
+func NeedsGoEnv(t testing.TB) {
+ t.Helper()
+
+ if _, err := exec.LookPath("go"); err != nil {
+ t.Skip("skipping test: can't run go env")
+ }
+}
diff --git a/internal/worker/govulncheck.go b/internal/worker/govulncheck.go
index 6f48380..aa5d06e 100644
--- a/internal/worker/govulncheck.go
+++ b/internal/worker/govulncheck.go
@@ -10,6 +10,7 @@
"fmt"
"runtime/debug"
+ "golang.org/x/pkgsite-metrics/internal"
"golang.org/x/pkgsite-metrics/internal/derrors"
"golang.org/x/pkgsite-metrics/internal/govulncheck"
"golang.org/x/pkgsite-metrics/internal/log"
@@ -53,7 +54,12 @@
if err != nil {
return nil, err
}
+ goEnv, err := internal.GoEnv()
+ if err != nil {
+ return nil, err
+ }
h.workVersion = &govulncheck.WorkVersion{
+ GoVersion: goEnv["GOVERSION"],
VulnDBLastModified: lmt,
WorkerVersion: h.cfg.VersionID,
SchemaVersion: govulncheck.SchemaVersion,
diff --git a/internal/worker/govulncheck_scan.go b/internal/worker/govulncheck_scan.go
index 5086f36..93f5156 100644
--- a/internal/worker/govulncheck_scan.go
+++ b/internal/worker/govulncheck_scan.go
@@ -19,7 +19,6 @@
"cloud.google.com/go/storage"
"golang.org/x/exp/event"
"golang.org/x/pkgsite-metrics/internal/bigquery"
- "golang.org/x/pkgsite-metrics/internal/config"
"golang.org/x/pkgsite-metrics/internal/derrors"
"golang.org/x/pkgsite-metrics/internal/govulncheck"
"golang.org/x/pkgsite-metrics/internal/log"
@@ -208,7 +207,6 @@
vulns, err := s.runScanModule(ctx, sreq.Module, info.Version, sreq.Suffix, sreq.Mode, stats)
row.ScanSeconds = stats.scanSeconds
row.ScanMemory = int64(stats.scanMemory)
- row.Workers = config.GetEnvInt("CLOUD_RUN_CONCURRENCY", "0", -1)
if err != nil {
switch {
case errors.Is(err, derrors.LoadPackagesNoGoModError) ||
@@ -272,14 +270,14 @@
for _, v := range vulns {
if mode == ModeGovulncheck {
// Return only the called vulns for ModeGovulncheck.
- if v.CallSink.Valid && v.CallSink.Int64 != 0 {
+ if v.Called {
vs = append(vs, v)
}
} else if mode == modeImports {
// For imports mode, return the vulnerability as it
// is imported, but not called.
nv := *v
- nv.CallSink = bigquery.NullInt(0)
+ nv.Called = false
vs = append(vs, &nv)
} else {
panic(fmt.Sprintf("vulnsForMode unsupported mode %s", mode))
diff --git a/internal/worker/govulncheck_scan_test.go b/internal/worker/govulncheck_scan_test.go
index 0f02bdd..0039731 100644
--- a/internal/worker/govulncheck_scan_test.go
+++ b/internal/worker/govulncheck_scan_test.go
@@ -15,7 +15,6 @@
"testing"
"cloud.google.com/go/storage"
- "golang.org/x/pkgsite-metrics/internal/bigquery"
"golang.org/x/pkgsite-metrics/internal/buildtest"
"golang.org/x/pkgsite-metrics/internal/govulncheck"
)
@@ -34,15 +33,15 @@
func TestVulnsForMode(t *testing.T) {
vulns := []*govulncheck.Vuln{
- &govulncheck.Vuln{Symbol: "A", CallSink: bigquery.NullInt(0)},
- &govulncheck.Vuln{Symbol: "B"},
- &govulncheck.Vuln{Symbol: "C", CallSink: bigquery.NullInt(9)},
+ &govulncheck.Vuln{ID: "A"},
+ &govulncheck.Vuln{ID: "B", Called: false},
+ &govulncheck.Vuln{ID: "C", Called: true},
}
vulnsStr := func(vulns []*govulncheck.Vuln) string {
var vs []string
for _, v := range vulns {
- vs = append(vs, fmt.Sprintf("%s:%d", v.Symbol, v.CallSink.Int64))
+ vs = append(vs, fmt.Sprintf("%s:%t", v.ID, v.Called))
}
return strings.Join(vs, ", ")
}
@@ -51,9 +50,9 @@
mode string
want string
}{
- {modeImports, "A:0, B:0, C:0"},
- {ModeGovulncheck, "C:9"},
- {ModeBinary, "A:0, B:0, C:9"},
+ {modeImports, "A:false, B:false, C:false"},
+ {ModeGovulncheck, "C:true"},
+ {ModeBinary, "A:false, B:false, C:true"},
} {
tc := tc
t.Run(tc.mode, func(t *testing.T) {