all: enable integration testing via environment variable

When set, the variable also runs all tests that need to talk to the
cloud infrastructure. It is expected that it will be set by default, so
running the tests will be seamless. The tests will be skipped on
Trybots. In addition, this CL also:

- runs the make tests in internal/sandbox as integration test
- prints a message whenever possible if the flag is not set, for
  increased visibility.
- fixes some issues found by running the integration tests

Change-Id: I44989ce4f200d8a0740aafaf105ac0a9880a3d55
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/478335
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/all_test.go b/all_test.go
index 8e24c5b..7da86ec 100644
--- a/all_test.go
+++ b/all_test.go
@@ -14,6 +14,10 @@
 )
 
 func Test(t *testing.T) {
+	if os.Getenv("GO_ECOSYSTEM_INTEGRATION_TESTING") != "1" {
+		t.Log("warning: running go test ./... will skip checking integration tests")
+	}
+
 	bash, err := exec.LookPath("bash")
 	if err != nil {
 		t.Skipf("skipping: %v", err)
diff --git a/checks.bash b/checks.bash
index ee962ca..5875972 100755
--- a/checks.bash
+++ b/checks.bash
@@ -90,6 +90,14 @@
     -o -type f -not -name modules.txt -not -name '*.svg' -not -name '*.ts.snap' -not -name '*.json')
 }
 
+# check_integration prints a warning if the environment
+# variable for integration testing is not set.
+check_integration() {
+  if [[ "${GO_ECOSYSTEM_INTEGRATION_TESTING}" != "1" ]]; then
+    warn "Running go test ./... will skip integration tests (GO_ECOSYSTEM_INTEGRATION_TESTING != 1)"
+  fi
+}
+
 go_linters() {
   check_vet
   check_staticcheck
@@ -102,6 +110,7 @@
 }
 
 runchecks() {
+  check_integration
   check_headers
   go_linters
   go_modtidy
diff --git a/internal/bigquery/bigquery_test.go b/internal/bigquery/bigquery_test.go
index 559b45e..1e16550 100644
--- a/internal/bigquery/bigquery_test.go
+++ b/internal/bigquery/bigquery_test.go
@@ -6,7 +6,6 @@
 
 import (
 	"context"
-	"flag"
 	"fmt"
 	"testing"
 	"time"
@@ -16,11 +15,12 @@
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"golang.org/x/exp/slices"
+	test "golang.org/x/pkgsite-metrics/internal/testing"
 )
 
-var integration = flag.Bool("integration", false, "test against actual service")
-
 func TestIntegration(t *testing.T) {
+	test.NeedsIntegrationEnv(t)
+
 	must := func(err error) {
 		t.Helper()
 		if err != nil {
@@ -28,9 +28,6 @@
 		}
 	}
 
-	if !*integration {
-		t.Skip("missing -integration")
-	}
 	ctx := context.Background()
 	const projectID = "go-ecosystem"
 
@@ -81,9 +78,8 @@
 }
 
 func TestIsNotFoundError(t *testing.T) {
-	if !*integration {
-		t.Skip("missing -integration")
-	}
+	test.NeedsIntegrationEnv(t)
+
 	client, err := bq.NewClient(context.Background(), "go-ecosystem")
 	if err != nil {
 		t.Fatal(err)
diff --git a/internal/govulncheck/govulncheck.go b/internal/govulncheck/govulncheck.go
index 394a8b4..df4625e 100644
--- a/internal/govulncheck/govulncheck.go
+++ b/internal/govulncheck/govulncheck.go
@@ -195,7 +195,7 @@
 	m := map[[2]string]*WorkVersion{}
 	query := bigquery.PartitionQuery{
 		Table:       c.FullTableName(TableName),
-		Columns:     "module_path, version, worker_version, schema_version, x_vuln_version, vulndb_last_modified",
+		Columns:     "module_path, version, go_version, worker_version, schema_version, x_vuln_version, vulndb_last_modified",
 		PartitionOn: "module_path, sort_version",
 		OrderBy:     "created_at DESC",
 	}.String()
diff --git a/internal/govulncheck/govulncheck_test.go b/internal/govulncheck/govulncheck_test.go
index 2af221a..b50014f 100644
--- a/internal/govulncheck/govulncheck_test.go
+++ b/internal/govulncheck/govulncheck_test.go
@@ -6,7 +6,6 @@
 
 import (
 	"context"
-	"flag"
 	"fmt"
 	"testing"
 	"time"
@@ -15,6 +14,7 @@
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"golang.org/x/pkgsite-metrics/internal/bigquery"
+	test "golang.org/x/pkgsite-metrics/internal/testing"
 	"golang.org/x/vuln/exp/govulncheck"
 	"golang.org/x/vuln/osv"
 	"google.golang.org/api/iterator"
@@ -146,9 +146,9 @@
 	}
 }
 
-var integration = flag.Bool("integration", false, "test against actual service")
-
 func TestIntegration(t *testing.T) {
+	test.NeedsIntegrationEnv(t)
+
 	must := func(err error) {
 		t.Helper()
 		if err != nil {
@@ -156,9 +156,6 @@
 		}
 	}
 
-	if !*integration {
-		t.Skip("missing -integration")
-	}
 	ctx := context.Background()
 	const projectID = "go-ecosystem"
 
diff --git a/internal/sandbox/sandbox_test.go b/internal/sandbox/sandbox_test.go
index b24dfa4..3da43ca 100644
--- a/internal/sandbox/sandbox_test.go
+++ b/internal/sandbox/sandbox_test.go
@@ -12,13 +12,24 @@
 	"testing"
 
 	"golang.org/x/pkgsite-metrics/internal/derrors"
+	test "golang.org/x/pkgsite-metrics/internal/testing"
 )
 
-// These tests require a minimal bundle, in testdata/bundle.
-// The Makefile in this directory will build and install
-// the binaries needed for the test.
+func TestIntegration(t *testing.T) {
+	test.NeedsIntegrationEnv(t)
 
-func Test(t *testing.T) {
+	cmd := exec.Command("make")
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// TestSandbox tests require a minimal bundle, in testdata/bundle.
+// The Makefile in this directory will build and install the binaries
+// needed for the test. See TestIntegration above.
+func TestSandbox(t *testing.T) {
 	if os.Getenv("RUN_FROM_MAKE") != "1" {
 		t.Skip("skipping; must run with 'make'.")
 	}
diff --git a/internal/testing/testenv.go b/internal/testing/testenv.go
index cc34a02..d353e5a 100644
--- a/internal/testing/testenv.go
+++ b/internal/testing/testenv.go
@@ -5,6 +5,7 @@
 package testing
 
 import (
+	"os"
 	"os/exec"
 	"testing"
 )
@@ -18,3 +19,17 @@
 		t.Skip("skipping test: can't run go env")
 	}
 }
+
+// NeedsIntegrationEnv skips t if the underlying test satisfies integration
+// requirements. It must be executed in the non-short test mode with an
+// appropriate integration environment.
+func NeedsIntegrationEnv(t testing.TB) {
+	t.Helper()
+
+	if os.Getenv("GO_ECOSYSTEM_INTEGRATION_TESTING") != "1" {
+		t.Skip("skipping; need local test environment with GCS permissions")
+	}
+	if testing.Short() {
+		t.Skip("skipping; integration tests must be run in non-short mode")
+	}
+}
diff --git a/internal/worker/govulncheck_scan.go b/internal/worker/govulncheck_scan.go
index 4e44f97..ba52eb8 100644
--- a/internal/worker/govulncheck_scan.go
+++ b/internal/worker/govulncheck_scan.go
@@ -422,7 +422,6 @@
 	if err := copyToLocalFile(localPathname, false, gcsPathname, gcsOpenFileFunc(ctx, s.gcsBucket)); err != nil {
 		return nil, err
 	}
-
 	vulns, err := s.runGovulncheckCmd(localPathname, "", stats)
 	if err != nil {
 		return nil, err
@@ -435,10 +434,10 @@
 	govulncheckCmd := exec.Command(s.govulncheckPath, "-json", pattern)
 	govulncheckCmd.Env = append(govulncheckCmd.Environ(), "GOVULNDB=file://"+s.vulnDBDir)
 	govulncheckCmd.Dir = tempDir
-	output, err := govulncheckCmd.Output()
+	output, err := govulncheckCmd.CombinedOutput()
 	if err != nil {
 		if e := (&exec.ExitError{}); !errors.As(err, &e) || e.ProcessState.ExitCode() != 3 {
-			return nil, err
+			return nil, fmt.Errorf("govulncheck error: err=%v out=%s", err, output)
 		}
 	}
 	stats.scanSeconds = time.Since(start).Seconds()
diff --git a/internal/worker/govulncheck_scan_test.go b/internal/worker/govulncheck_scan_test.go
index 0039731..c111fc0 100644
--- a/internal/worker/govulncheck_scan_test.go
+++ b/internal/worker/govulncheck_scan_test.go
@@ -7,7 +7,6 @@
 import (
 	"context"
 	"errors"
-	"flag"
 	"fmt"
 	"io"
 	"path/filepath"
@@ -17,10 +16,9 @@
 	"cloud.google.com/go/storage"
 	"golang.org/x/pkgsite-metrics/internal/buildtest"
 	"golang.org/x/pkgsite-metrics/internal/govulncheck"
+	test "golang.org/x/pkgsite-metrics/internal/testing"
 )
 
-var integration = flag.Bool("integration", false, "test against actual service")
-
 func TestAsScanError(t *testing.T) {
 	check := func(err error, want bool) {
 		if got := errors.As(err, new(scanError)); got != want {
@@ -80,52 +78,53 @@
 	}
 
 	ctx := context.Background()
-	t.Run("govulncheck", func(t *testing.T) {
-		s := &scanner{insecure: true, govulncheckPath: govulncheckPath, vulnDBDir: vulndb}
-		stats := &scanStats{}
-		vulns, err := s.runGovulncheckScanInsecure(ctx,
-			"golang.org/vuln", "v0.0.0",
-			"../testdata/module", ModeGovulncheck, stats)
-		if err != nil {
-			t.Fatal(err)
-		}
-		wantID := "GO-2021-0113"
-		found := false
-		for _, v := range vulns {
-			if v.OSV.ID == wantID {
-				found = true
-				break
+	for _, tc := range []struct {
+		name  string
+		input string
+		mode  string
+	}{
+		{"govulncheck", "../testdata/module", ModeGovulncheck},
+		// test_vuln binary on gcs is built from ../testdata/module.
+		{"govulncheck", "test_vuln", ModeBinary},
+	} {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			s := &scanner{insecure: true, govulncheckPath: govulncheckPath, vulnDBDir: vulndb}
+
+			if tc.mode == ModeBinary {
+				test.NeedsIntegrationEnv(t)
+
+				gcsClient, err := storage.NewClient(ctx)
+				if err != nil {
+					t.Fatal(err)
+				}
+				s.gcsBucket = gcsClient.Bucket("go-ecosystem")
 			}
-		}
-		if !found {
-			t.Errorf("want %s, did not find it in %d vulns", wantID, len(vulns))
-		}
-		if got := stats.scanSeconds; got <= 0 {
-			t.Errorf("scan time not collected or negative: %v", got)
-		}
-		if got := stats.scanMemory; got <= 0 {
-			t.Errorf("scan memory not collected or negative: %v", got)
-		}
-	})
-	t.Run("binary", func(t *testing.T) {
-		if !*integration { // needs GCS read permission, not available on kokoro
-			t.Skip("missing -integration")
-		}
-		s := &scanner{govulncheckPath: govulncheckPath, vulnDBDir: vulndb}
-		gcsClient, err := storage.NewClient(context.Background())
-		if err != nil {
-			t.Fatal(err)
-		}
-		s.gcsBucket = gcsClient.Bucket("go-ecosystem")
-		stats := &scanStats{}
-		vulns, err := s.runGovulncheckScanInsecure(ctx,
-			"golang.org/vuln", "v0.0.0",
-			"cmd/worker", ModeBinary, stats)
-		if err != nil {
-			t.Fatal(err)
-		}
-		if g, w := len(vulns), 14; g != w {
-			t.Errorf("got %d vulns, want %d", g, w)
-		}
-	})
+
+			stats := &scanStats{}
+			vulns, err := s.runGovulncheckScanInsecure(ctx,
+				"golang.org/vuln", "v0.0.0",
+				tc.input, tc.mode, stats)
+			if err != nil {
+				t.Fatal(err)
+			}
+			wantID := "GO-2021-0113"
+			found := false
+			for _, v := range vulns {
+				if v.OSV.ID == wantID {
+					found = true
+					break
+				}
+			}
+			if !found {
+				t.Errorf("want %s, did not find it in %d vulns", wantID, len(vulns))
+			}
+			if got := stats.scanSeconds; got <= 0 {
+				t.Errorf("scan time not collected or negative: %v", got)
+			}
+			if got := stats.scanMemory; got <= 0 {
+				t.Errorf("scan memory not collected or negative: %v", got)
+			}
+		})
+	}
 }