all: add new schema for the govulncheck table

Also rename the table to "govulncheck".

Change-Id: I6302a4ceb283b8fed520cb8c4d07ffbef77185e9
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/476775
Reviewed-by: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/devtools/bqerrors.sh b/devtools/bqerrors.sh
index d06d083..7b6fca6 100755
--- a/devtools/bqerrors.sh
+++ b/devtools/bqerrors.sh
@@ -34,7 +34,7 @@
     die "missing TF_VAR_prod_project"
   fi
 
-  bq_error_query $project.$dataset.vulncheck
+  bq_error_query $project.$dataset.govulncheck
 }
 
 
diff --git a/internal/goenv.go b/internal/goenv.go
new file mode 100644
index 0000000..7a10b76
--- /dev/null
+++ b/internal/goenv.go
@@ -0,0 +1,23 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"encoding/json"
+	"os/exec"
+)
+
+// GoEnv returns the key-value map of `go env`.
+func GoEnv() (map[string]string, error) {
+	out, err := exec.Command("go", "env", "-json").Output()
+	if err != nil {
+		return nil, err
+	}
+	env := make(map[string]string)
+	if err := json.Unmarshal(out, &env); err != nil {
+		return nil, err
+	}
+	return env, nil
+}
diff --git a/internal/goenv_test.go b/internal/goenv_test.go
new file mode 100644
index 0000000..b25a5b7
--- /dev/null
+++ b/internal/goenv_test.go
@@ -0,0 +1,34 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"testing"
+
+	test "golang.org/x/pkgsite-metrics/internal/testing"
+)
+
+func TestGoEnv(t *testing.T) {
+	test.NeedsGoEnv(t)
+
+	for _, key := range []string{"GOVERSION", "GOROOT", "GOPATH", "GOMODCACHE"} {
+		if m, err := GoEnv(); m[key] == "" {
+			t.Errorf("want something for go env %s; got nothing", key)
+		} else if err != nil {
+			t.Errorf("unexpected error for go env %s: %v", key, err)
+		}
+	}
+}
+
+func TestGoEnvNonVariable(t *testing.T) {
+	test.NeedsGoEnv(t)
+
+	key := "NOT_A_GO_ENV_VARIABLE"
+	if m, err := GoEnv(); m[key] != "" {
+		t.Errorf("expected nothing for go env %s; got %s", key, m[key])
+	} else if err != nil {
+		t.Errorf("unexpected error for go env %s: %v", key, err)
+	}
+}
diff --git a/internal/govulncheck/govulncheck.go b/internal/govulncheck/govulncheck.go
index 0883e68..aa2dd35 100644
--- a/internal/govulncheck/govulncheck.go
+++ b/internal/govulncheck/govulncheck.go
@@ -14,7 +14,6 @@
 	"strings"
 	"time"
 
-	bq "cloud.google.com/go/bigquery"
 	"cloud.google.com/go/civil"
 	"golang.org/x/exp/maps"
 	"golang.org/x/pkgsite-metrics/internal/bigquery"
@@ -87,49 +86,23 @@
 
 func ConvertGovulncheckOutput(v *govulncheck.Vuln) (vulns []*Vuln) {
 	for _, module := range v.Modules {
-		for pkgNum, pkg := range module.Packages {
-			addedSymbols := make(map[string]bool)
-			baseVuln := &Vuln{
+		for _, pkg := range module.Packages {
+			vuln := &Vuln{
 				ID:          v.OSV.ID,
 				ModulePath:  module.Path,
 				PackagePath: pkg.Path,
-				CallSink:    bigquery.NullInt(0),
-				ImportSink:  bigquery.NullInt(pkgNum + 1),
-				RequireSink: bigquery.NullInt(pkgNum + 1),
+				Version:     module.FoundVersion,
 			}
-
-			// For each called symbol, reconstruct sinks and create the corresponding bigquery vuln
-			for symbolNum, cs := range pkg.CallStacks {
-				addedSymbols[cs.Symbol] = true
-				toAdd := *baseVuln
-				toAdd.Symbol = cs.Symbol
-				toAdd.CallSink = bigquery.NullInt(symbolNum + 1)
-				vulns = append(vulns, &toAdd)
+			if len(pkg.CallStacks) > 0 {
+				vuln.Called = true
 			}
-
-			// Find the rest of the vulnerable imported symbols that haven't been called
-			// and create corresponding bigquery vulns
-			for _, affected := range v.OSV.Affected {
-				if affected.Package.Name == module.Path {
-					for _, imp := range affected.EcosystemSpecific.Imports {
-						if imp.Path == pkg.Path {
-							for _, symbol := range imp.Symbols {
-								if !addedSymbols[symbol] {
-									toAdd := *baseVuln
-									toAdd.Symbol = symbol
-									vulns = append(vulns, &toAdd)
-								}
-							}
-						}
-					}
-				}
-			}
+			vulns = append(vulns, vuln)
 		}
 	}
 	return vulns
 }
 
-const TableName = "vulncheck"
+const TableName = "govulncheck"
 
 // Note: before modifying Result or Vuln, make sure the change
 // is a valid schema modification.
@@ -152,21 +125,18 @@
 	CommitTime    time.Time `bigquery:"commit_time"`
 	ScanSeconds   float64   `bigquery:"scan_seconds"`
 	ScanMemory    int64     `bigquery:"scan_memory"`
-	PkgsMemory    int64     `bigquery:"pkgs_memory"`
 	ScanMode      string    `bigquery:"scan_mode"`
-	// Workers is the concurrency limit under which a module is
-	// analyzed. Useful for interpreting memory measurements when
-	// there are multiple modules analyzed in the same process.
-	// 0 if no limit is specified, -1 for potential errors.
-	Workers     int     `bigquery:"workers"`
-	WorkVersion         // InferSchema flattens embedded fields
-	Vulns       []*Vuln `bigquery:"vulns"`
+	WorkVersion             // InferSchema flattens embedded fields
+	Vulns         []*Vuln   `bigquery:"vulns"`
 }
 
 // WorkVersion contains information that can be used to avoid duplicate work.
 // Given two WorkVersion values v1 and v2 for the same module path and version,
 // if v1.Equal(v2) then it is not necessary to scan the module.
 type WorkVersion struct {
+	// GoVersion used at path. Allows precise interpretation
+	// of detected stdlib vulnerabilities.
+	GoVersion string `bigquery:"go_version"`
 	// The version of the currently running code. This tracks changes in the
 	// logic of module scanning and processing.
 	WorkerVersion string `bigquery:"worker_version"`
@@ -182,7 +152,8 @@
 	if v1 == nil || v2 == nil {
 		return v1 == v2
 	}
-	return v1.WorkerVersion == v2.WorkerVersion &&
+	return v1.GoVersion == v2.GoVersion &&
+		v1.WorkerVersion == v2.WorkerVersion &&
 		v1.SchemaVersion == v2.SchemaVersion &&
 		v1.VulnVersion == v2.VulnVersion &&
 		v1.VulnDBLastModified.Equal(v2.VulnDBLastModified)
@@ -200,13 +171,16 @@
 
 // Vuln is a record in Result.
 type Vuln struct {
-	ID          string       `bigquery:"id"`
-	Symbol      string       `bigquery:"symbol"`
-	PackagePath string       `bigquery:"package_path"`
-	ModulePath  string       `bigquery:"module_path"`
-	CallSink    bq.NullInt64 `bigquery:"call_sink"`
-	ImportSink  bq.NullInt64 `bigquery:"import_sink"`
-	RequireSink bq.NullInt64 `bigquery:"require_sink"`
+	ID          string `bigquery:"id"`
+	PackagePath string `bigquery:"package_path"`
+	ModulePath  string `bigquery:"module_path"`
+	Version     string `bigquery:"version"`
+	// Called is currently used to differentiate between
+	// called and imported vulnerabilities. We need it
+	// because we don't conduct an imports analysis yet
+	// use the full results of govulncheck source analysis.
+	// It is not part of the bigquery schema.
+	Called bool `bigquery:"-"`
 }
 
 // SchemaVersion changes whenever the govulncheck schema changes.
diff --git a/internal/govulncheck/govulncheck_test.go b/internal/govulncheck/govulncheck_test.go
index e4da8f7..58481a5 100644
--- a/internal/govulncheck/govulncheck_test.go
+++ b/internal/govulncheck/govulncheck_test.go
@@ -52,7 +52,8 @@
 			OSV: osvEntry,
 			Modules: []*govulncheck.Module{
 				{
-					Path: "example.com/repo/module",
+					FoundVersion: "v0.0.1",
+					Path:         "example.com/repo/module",
 					Packages: []*govulncheck.Package{
 						{
 							Path: "example.com/repo/module/package",
@@ -73,7 +74,8 @@
 			OSV: osvEntry,
 			Modules: []*govulncheck.Module{
 				{
-					Path: "example.com/repo/module",
+					FoundVersion: "v1.0.0",
+					Path:         "example.com/repo/module",
 					Packages: []*govulncheck.Package{
 						{
 							Path: "example.com/repo/module/package",
@@ -89,57 +91,35 @@
 		wantVulns []*Vuln
 	}{
 		{
-			name: "Call One Symbol",
+			name: "call one symbol but not all",
 			vuln: vuln1,
 			wantVulns: []*Vuln{
 				{
 					ID:          "GO-YYYY-1234",
-					Symbol:      "Symbol",
 					PackagePath: "example.com/repo/module/package",
 					ModulePath:  "example.com/repo/module",
-					CallSink:    bigquery.NullInt(1),
-					ImportSink:  bigquery.NullInt(1),
-					RequireSink: bigquery.NullInt(1),
-				},
-				{
-					ID:          "GO-YYYY-1234",
-					Symbol:      "Another",
-					PackagePath: "example.com/repo/module/package",
-					ModulePath:  "example.com/repo/module",
-					CallSink:    bigquery.NullInt(0),
-					ImportSink:  bigquery.NullInt(1),
-					RequireSink: bigquery.NullInt(1),
+					Version:     "v0.0.1",
+					Called:      true,
 				},
 			},
 		},
 		{
-			name: "Call no symbols",
+			name: "call no symbols",
 			vuln: vuln2,
 			wantVulns: []*Vuln{
 				{
 					ID:          "GO-YYYY-1234",
 					PackagePath: "example.com/repo/module/package",
 					ModulePath:  "example.com/repo/module",
-					Symbol:      "Symbol",
-					CallSink:    bigquery.NullInt(0),
-					ImportSink:  bigquery.NullInt(1),
-					RequireSink: bigquery.NullInt(1),
-				},
-				{
-					ID:          "GO-YYYY-1234",
-					PackagePath: "example.com/repo/module/package",
-					ModulePath:  "example.com/repo/module",
-					Symbol:      "Another",
-					CallSink:    bigquery.NullInt(0),
-					ImportSink:  bigquery.NullInt(1),
-					RequireSink: bigquery.NullInt(1),
+					Version:     "v1.0.0",
+					Called:      false,
 				},
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if diff := cmp.Diff(ConvertGovulncheckOutput(tt.vuln), tt.wantVulns, cmpopts.EquateEmpty()); diff != "" {
+			if diff := cmp.Diff(ConvertGovulncheckOutput(tt.vuln), tt.wantVulns, cmpopts.EquateEmpty(), cmp.AllowUnexported(Vuln{})); diff != "" {
 				t.Errorf("mismatch (-got, +want): %s", diff)
 			}
 		})
@@ -208,6 +188,7 @@
 		SortVersion: "sv",
 		ImportedBy:  10,
 		WorkVersion: WorkVersion{
+			GoVersion:          "go1.19.6",
 			WorkerVersion:      "1",
 			SchemaVersion:      "s",
 			VulnVersion:        "2",
diff --git a/internal/testing/testenv.go b/internal/testing/testenv.go
new file mode 100644
index 0000000..cc34a02
--- /dev/null
+++ b/internal/testing/testenv.go
@@ -0,0 +1,20 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package testing
+
+import (
+	"os/exec"
+	"testing"
+)
+
+// NeedsGoEnv skips t if the current system can't get the environment with
+// “go env” in a subprocess.
+func NeedsGoEnv(t testing.TB) {
+	t.Helper()
+
+	if _, err := exec.LookPath("go"); err != nil {
+		t.Skip("skipping test: can't run go env")
+	}
+}
diff --git a/internal/worker/govulncheck.go b/internal/worker/govulncheck.go
index 6f48380..aa5d06e 100644
--- a/internal/worker/govulncheck.go
+++ b/internal/worker/govulncheck.go
@@ -10,6 +10,7 @@
 	"fmt"
 	"runtime/debug"
 
+	"golang.org/x/pkgsite-metrics/internal"
 	"golang.org/x/pkgsite-metrics/internal/derrors"
 	"golang.org/x/pkgsite-metrics/internal/govulncheck"
 	"golang.org/x/pkgsite-metrics/internal/log"
@@ -53,7 +54,12 @@
 		if err != nil {
 			return nil, err
 		}
+		goEnv, err := internal.GoEnv()
+		if err != nil {
+			return nil, err
+		}
 		h.workVersion = &govulncheck.WorkVersion{
+			GoVersion:          goEnv["GOVERSION"],
 			VulnDBLastModified: lmt,
 			WorkerVersion:      h.cfg.VersionID,
 			SchemaVersion:      govulncheck.SchemaVersion,
diff --git a/internal/worker/govulncheck_scan.go b/internal/worker/govulncheck_scan.go
index 5086f36..93f5156 100644
--- a/internal/worker/govulncheck_scan.go
+++ b/internal/worker/govulncheck_scan.go
@@ -19,7 +19,6 @@
 	"cloud.google.com/go/storage"
 	"golang.org/x/exp/event"
 	"golang.org/x/pkgsite-metrics/internal/bigquery"
-	"golang.org/x/pkgsite-metrics/internal/config"
 	"golang.org/x/pkgsite-metrics/internal/derrors"
 	"golang.org/x/pkgsite-metrics/internal/govulncheck"
 	"golang.org/x/pkgsite-metrics/internal/log"
@@ -208,7 +207,6 @@
 	vulns, err := s.runScanModule(ctx, sreq.Module, info.Version, sreq.Suffix, sreq.Mode, stats)
 	row.ScanSeconds = stats.scanSeconds
 	row.ScanMemory = int64(stats.scanMemory)
-	row.Workers = config.GetEnvInt("CLOUD_RUN_CONCURRENCY", "0", -1)
 	if err != nil {
 		switch {
 		case errors.Is(err, derrors.LoadPackagesNoGoModError) ||
@@ -272,14 +270,14 @@
 	for _, v := range vulns {
 		if mode == ModeGovulncheck {
 			// Return only the called vulns for ModeGovulncheck.
-			if v.CallSink.Valid && v.CallSink.Int64 != 0 {
+			if v.Called {
 				vs = append(vs, v)
 			}
 		} else if mode == modeImports {
 			// For imports mode, return the vulnerability as it
 			// is imported, but not called.
 			nv := *v
-			nv.CallSink = bigquery.NullInt(0)
+			nv.Called = false
 			vs = append(vs, &nv)
 		} else {
 			panic(fmt.Sprintf("vulnsForMode unsupported mode %s", mode))
diff --git a/internal/worker/govulncheck_scan_test.go b/internal/worker/govulncheck_scan_test.go
index 0f02bdd..0039731 100644
--- a/internal/worker/govulncheck_scan_test.go
+++ b/internal/worker/govulncheck_scan_test.go
@@ -15,7 +15,6 @@
 	"testing"
 
 	"cloud.google.com/go/storage"
-	"golang.org/x/pkgsite-metrics/internal/bigquery"
 	"golang.org/x/pkgsite-metrics/internal/buildtest"
 	"golang.org/x/pkgsite-metrics/internal/govulncheck"
 )
@@ -34,15 +33,15 @@
 
 func TestVulnsForMode(t *testing.T) {
 	vulns := []*govulncheck.Vuln{
-		&govulncheck.Vuln{Symbol: "A", CallSink: bigquery.NullInt(0)},
-		&govulncheck.Vuln{Symbol: "B"},
-		&govulncheck.Vuln{Symbol: "C", CallSink: bigquery.NullInt(9)},
+		&govulncheck.Vuln{ID: "A"},
+		&govulncheck.Vuln{ID: "B", Called: false},
+		&govulncheck.Vuln{ID: "C", Called: true},
 	}
 
 	vulnsStr := func(vulns []*govulncheck.Vuln) string {
 		var vs []string
 		for _, v := range vulns {
-			vs = append(vs, fmt.Sprintf("%s:%d", v.Symbol, v.CallSink.Int64))
+			vs = append(vs, fmt.Sprintf("%s:%t", v.ID, v.Called))
 		}
 		return strings.Join(vs, ", ")
 	}
@@ -51,9 +50,9 @@
 		mode string
 		want string
 	}{
-		{modeImports, "A:0, B:0, C:0"},
-		{ModeGovulncheck, "C:9"},
-		{ModeBinary, "A:0, B:0, C:9"},
+		{modeImports, "A:false, B:false, C:false"},
+		{ModeGovulncheck, "C:true"},
+		{ModeBinary, "A:false, B:false, C:true"},
 	} {
 		tc := tc
 		t.Run(tc.mode, func(t *testing.T) {