internal/config: add BinaryDir

Make the directory in which we keep binaries a configuration parameter.

We decided to do this because it was hard to come up with a directory
for binaries that would work on both Cloud Run and locally.

Using /tmp is fine locally, but Cloud Run mounts something on /tmp,
so a docker image that writes files to /tmp will see those files
when executed directly with `docker run`, but not on Cloud Run.

In the Dockerfile, we use /app/binaries, and the sandbox bind-mounts
that to the same path. But that won't work locally because normal
users can't create top-level directories. Locally, a user can set
GO_ECOSYSTEM_BINARY_DIR, or let the binary directory default to
/tmp/binaries.

Change-Id: I128c5cece34ee0b4612dd879bd103d57e68d5484
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/477475
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
diff --git a/cmd/worker/Dockerfile b/cmd/worker/Dockerfile
index 98ff116..775d554 100644
--- a/cmd/worker/Dockerfile
+++ b/cmd/worker/Dockerfile
@@ -8,6 +8,11 @@
 #   make test
 # from the repo root.
 
+# NOTE: don't put anything in /tmp here. It will work locally,
+# but Cloud Run mounts something else to /tmp, so anything
+# installed here will be shadowed.
+
+
 FROM golang:1.19.4
 
 LABEL maintainer="Go Ecosystem Team <go-ecosystem-team@google.com>"
@@ -19,17 +24,17 @@
 # Create some directories.
 
 # The worker binary and related files live here.
-RUN mkdir app
-# The module being analyzed is unzipped here.
-# The sandbox mounts this directory.
-RUN mkdir module
+RUN mkdir /app
 
-# Where binaries and modules live.
-# The sandbox config.json file maps these to the same paths
-# inside the sandbox.
-RUN mkdir /tmp/binaries
-RUN mkdir /tmp/modules
+# Where binaries live.
+# Mapped by the sandbox config to the same place inside the sandbox.
+# If you change this, you must also edit the bind mount in config.json.commented.
+#
+# We use an ARG command here to make a variable, but this is not intended to be
+# provided as a command-line argument to `docker build`.
+ARG BINARY_DIR=/app/binaries
 
+RUN mkdir $BINARY_DIR
 
 #### Sandbox setup
 
@@ -78,12 +83,11 @@
 # Build the worker binary and put it in /app.
 RUN go build -mod=readonly -o /app/worker ./cmd/worker
 
-# Install the latest version of govulncheck in the binaries directory.
-RUN GOBIN=/tmp/binaries go install golang.org/x/vuln/cmd/govulncheck@latest
+# Install the latest version of govulncheck in app/binaries.
+RUN GOBIN=$BINARY_DIR go install golang.org/x/vuln/cmd/govulncheck@latest
 
-# Build the program that runs govulncheck inside the sandbox and install it in the sandbox's
-# binaries directory.
-RUN go build -mod=readonly -o /tmp/binaries/govulncheck_sandbox ./cmd/govulncheck_sandbox
+# Build the program that runs govulncheck inside the sandbox.
+RUN go build -mod=readonly -o $BINARY_DIR/govulncheck_sandbox ./cmd/govulncheck_sandbox
 
 # Build the sandbox runner program and put it in the bundle root.
 RUN go build -mod=readonly -o /bundle/rootfs/runner ./internal/sandbox/runner.go
@@ -99,4 +103,6 @@
 ARG BQ_DATASET
 ENV GO_ECOSYSTEM_BIGQUERY_DATASET=$BQ_DATASET
 
+ENV GO_ECOSYSTEM_BINARY_DIR=$BINARY_DIR
+
 CMD ["./worker"]
diff --git a/config.json.commented b/config.json.commented
index cb210de..1d87669 100644
--- a/config.json.commented
+++ b/config.json.commented
@@ -99,11 +99,11 @@
         # files in that directory will be hidden to code running inside the
         # sandbox.
         {
-            # Mount /tmp/binaries inside the sandbox to
+            # Mount /app/binaries inside the sandbox to
             # the same directory outside.
-            "destination": "/tmp/binaries",
+            "destination": "/app/binaries",
             "type": "none",
-            "source": "/tmp/binaries",
+            "source": "/app/binaries",
             "options": ["bind"]
         },
         {
diff --git a/internal/config/config.go b/internal/config/config.go
index c414f77..0e5bbde 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -83,6 +83,9 @@
 	// BinaryBucket holds binaries for govulncheck scanning.
 	BinaryBucket string
 
+	// BinaryDir is the local directory for binaries.
+	BinaryDir string
+
 	// The host, port and user of the pkgsite database used to find
 	// modules to scan.
 	PkgsiteDBHost string
@@ -124,6 +127,7 @@
 		QueueURL:              os.Getenv("GO_ECOSYSTEM_QUEUE_URL"),
 		VulnDBBucketProjectID: os.Getenv("GO_ECOSYSTEM_VULNDB_BUCKET_PROJECT"),
 		BinaryBucket:          os.Getenv("GO_ECOSYSTEM_BINARY_BUCKET"),
+		BinaryDir:             GetEnv("GO_ECOSYSTEM_BINARY_DIR", "/tmp/binaries"),
 		PkgsiteDBHost:         GetEnv("GO_ECOSYSTEM_PKGSITE_DB_HOST", "localhost"),
 		PkgsiteDBPort:         GetEnv("GO_ECOSYSTEM_PKGSITE_DB_PORT", "5432"),
 		PkgsiteDBName:         GetEnv("GO_ECOSYSTEM_PKGSITE_DB_NAME", "discovery-db"),
diff --git a/internal/worker/analysis.go b/internal/worker/analysis.go
index df25c77..6c6ba64 100644
--- a/internal/worker/analysis.go
+++ b/internal/worker/analysis.go
@@ -77,7 +77,7 @@
 	if req.Binary != path.Base(req.Binary) {
 		return fmt.Errorf("%w: analysis: binary name contains slashes (must be a basename)", derrors.InvalidArgument)
 	}
-	localBinaryPath := path.Join(binaryDir, req.Binary)
+	localBinaryPath := path.Join(s.cfg.BinaryDir, req.Binary)
 	srcPath := path.Join(analysisBinariesBucketDir, req.Binary)
 	const executable = true
 	if err := copyToLocalFile(localBinaryPath, executable, srcPath, s.openFile); err != nil {
diff --git a/internal/worker/analysis_test.go b/internal/worker/analysis_test.go
index 4388abc..fc31cce 100644
--- a/internal/worker/analysis_test.go
+++ b/internal/worker/analysis_test.go
@@ -6,7 +6,6 @@
 
 import (
 	"context"
-	"os"
 	"path/filepath"
 	"strings"
 	"testing"
@@ -22,7 +21,6 @@
 )
 
 func TestRunAnalysisBinary(t *testing.T) {
-	mustMakeBinaryDir(t)
 	binPath, cleanup := buildtest.GoBuild(t, "testdata/analyzer", "")
 	defer cleanup()
 
@@ -97,7 +95,6 @@
 		modulePath = "a.com/m"
 		version    = "v1.2.3"
 	)
-	mustMakeBinaryDir(t)
 	binaryPath, cleanup := buildtest.GoBuild(t, "testdata/analyzer", "")
 	defer cleanup()
 	proxyClient, cleanup2 := proxytest.SetupTestClient(t, []*proxytest.Module{
@@ -129,6 +126,7 @@
 			proxyClient: proxyClient,
 			cfg: &config.Config{
 				BinaryBucket: "unused",
+				BinaryDir:    t.TempDir(),
 			},
 		},
 	}
@@ -179,9 +177,3 @@
 	}
 	diff(want, got)
 }
-
-func mustMakeBinaryDir(t *testing.T) {
-	if err := os.Mkdir(binaryDir, 0777); err != nil && !os.IsExist(err) {
-		t.Fatal(err)
-	}
-}
diff --git a/internal/worker/govulncheck_scan.go b/internal/worker/govulncheck_scan.go
index 0ebdab9..af19532 100644
--- a/internal/worker/govulncheck_scan.go
+++ b/internal/worker/govulncheck_scan.go
@@ -128,6 +128,7 @@
 	gcsBucket   *storage.BucketHandle
 	insecure    bool
 	sbox        *sandbox.Sandbox
+	binaryDir   string
 }
 
 func newScanner(ctx context.Context, h *GovulncheckServer) (*scanner, error) {
@@ -153,6 +154,7 @@
 		gcsBucket:   bucket,
 		insecure:    h.cfg.Insecure,
 		sbox:        sbox,
+		binaryDir:   h.cfg.BinaryDir,
 	}, nil
 }
 
@@ -283,22 +285,22 @@
 }
 
 // Inside the sandbox, the user is root and their $HOME directory is /root.
-const (
-	// The Go cache resides in its default location, $HOME/.cache/go-build.
-	sandboxGoCache = "root/.cache/go-build"
-	// Where the govulncheck binary lives.
-	govulncheckPath = binaryDir + "/govulncheck"
-)
+
+// The Go cache resides in its default location, $HOME/.cache/go-build.
+const sandboxGoCache = "root/.cache/go-build"
+
+// Where the govulncheck binary lives.
+//	govulncheckPath = binaryDir + "/govulncheck"
 
 // runScanModule fetches the module version from the proxy, and analyzes it for
 // vulnerabilities.
-func (s *scanner) runScanModule(ctx context.Context, modulePath, version, binaryDir, mode string, stats *scanStats) (bvulns []*govulncheck.Vuln, err error) {
+func (s *scanner) runScanModule(ctx context.Context, modulePath, version, binDir, mode string, stats *scanStats) (bvulns []*govulncheck.Vuln, err error) {
 	err = doScan(ctx, modulePath, version, s.insecure, func() error {
 		var vulns []*govulncheckapi.Vuln
 		if s.insecure {
-			vulns, err = s.runGovulncheckScanInsecure(ctx, modulePath, version, binaryDir, mode, stats)
+			vulns, err = s.runGovulncheckScanInsecure(ctx, modulePath, version, binDir, mode, stats)
 		} else {
-			vulns, err = s.runGovulncheckScanSandbox(ctx, modulePath, version, binaryDir, mode, stats)
+			vulns, err = s.runGovulncheckScanSandbox(ctx, modulePath, version, binDir, mode, stats)
 		}
 		if err != nil {
 			return err
@@ -312,6 +314,7 @@
 }
 
 func (s *scanner) runGovulncheckScanSandbox(ctx context.Context, modulePath, version, binDir, mode string, stats *scanStats) (_ []*govulncheckapi.Vuln, err error) {
+
 	if mode == ModeBinary {
 		return s.runBinaryScanSandbox(ctx, modulePath, version, binDir, stats)
 	}
@@ -331,13 +334,7 @@
 		return nil, err
 	}
 
-	stdout, err := s.sbox.Command(binaryDir+"/govulncheck_sandbox", govulncheckPath, ModeGovulncheck, smdir).Output()
-	log.Infof(ctx, "done with govulncheck in sandbox: %s@%s err=%v", modulePath, version, err)
-
-	if err != nil {
-		return nil, errors.New(derrors.IncludeStderr(err))
-	}
-	response, err := govulncheck.UnmarshalSandboxResponse(stdout)
+	response, err := s.runGovulncheckSandbox(ctx, ModeGovulncheck, smdir)
 	if err != nil {
 		return nil, err
 	}
@@ -354,7 +351,7 @@
 	// Copy the binary from GCS to the local disk, because vulncheck.Binary
 	// ultimately requires a ReaderAt and GCS doesn't provide that.
 	gcsPathname := fmt.Sprintf("%s/%s@%s/%s", gcsBinaryDir, modulePath, version, binDir)
-	const destDir = binaryDir
+	destDir := s.binaryDir
 	log.Debug(ctx, "copying",
 		"from", gcsPathname,
 		"to", destDir,
@@ -374,14 +371,7 @@
 		return nil, err
 	}
 
-	log.Infof(ctx, "running govulncheck in sandbox on %s: %s@%s/%s", modulePath, version, binDir, destf.Name())
-	stdout, err := s.sbox.Command(binaryDir+"/govulncheck_sandbox", govulncheckPath, ModeBinary, destf.Name()).Output()
-	log.Infof(ctx, "done with govulncheck in sandbox on %s: %s@%s/%s err=%v", modulePath, version, binDir, destf.Name(), err)
-
-	if err != nil {
-		return nil, errors.New(derrors.IncludeStderr(err))
-	}
-	response, err := govulncheck.UnmarshalSandboxResponse(stdout)
+	response, err := s.runGovulncheckSandbox(ctx, ModeBinary, destf.Name())
 	if err != nil {
 		return nil, err
 	}
@@ -391,6 +381,17 @@
 	return response.Res.Vulns, nil
 }
 
+func (s *scanner) runGovulncheckSandbox(ctx context.Context, mode, arg string) (*govulncheck.SandboxResponse, error) {
+	log.Infof(ctx, "running govulncheck in sandbox: mode %s, arg %q", mode, arg)
+	cmd := s.sbox.Command(filepath.Join(s.binaryDir, "govulncheck_sandbox"), filepath.Join(s.binaryDir, "govulncheck"), mode, arg)
+	stdout, err := cmd.Output()
+	log.Infof(ctx, "govulncheck in sandbox finished with err=%v", err)
+	if err != nil {
+		return nil, errors.New(derrors.IncludeStderr(err))
+	}
+	return govulncheck.UnmarshalSandboxResponse(stdout)
+}
+
 func (s *scanner) runGovulncheckScanInsecure(ctx context.Context, modulePath, version, binaryDir, mode string, stats *scanStats) (_ []*govulncheckapi.Vuln, err error) {
 	if mode == ModeBinary {
 		return s.runBinaryScanInsecure(ctx, modulePath, version, binaryDir, os.TempDir(), stats)
@@ -401,7 +402,7 @@
 	if err := prepareModule(ctx, modulePath, version, mdir, s.proxyClient, true); err != nil {
 		return nil, err
 	}
-	vulns, err := runGovulncheckCmd("./...", mdir, stats)
+	vulns, err := s.runGovulncheckCmd("./...", mdir, stats)
 	if err != nil {
 		return nil, err
 	}
@@ -422,21 +423,21 @@
 		return nil, err
 	}
 
-	vulns, err := runGovulncheckCmd(localPathname, "", stats)
+	vulns, err := s.runGovulncheckCmd(localPathname, "", stats)
 	if err != nil {
 		return nil, err
 	}
 	return vulns, nil
 }
 
-func runGovulncheckCmd(pattern, tempDir string, stats *scanStats) ([]*govulncheckapi.Vuln, error) {
-	govulncheckName := govulncheckPath
-	if !fileExists(govulncheckName) {
-		govulncheckName = "govulncheck"
+func (s *scanner) runGovulncheckCmd(pattern, tempDir string, stats *scanStats) ([]*govulncheckapi.Vuln, error) {
+	govulncheckPath := filepath.Join(s.binaryDir, "govulncheck")
+	if !fileExists(govulncheckPath) {
+		govulncheckPath = "govulncheck"
 	}
 
 	start := time.Now()
-	govulncheckCmd := exec.Command(govulncheckName, "-json", pattern)
+	govulncheckCmd := exec.Command(govulncheckPath, "-json", pattern)
 	govulncheckCmd.Dir = tempDir
 	output, err := govulncheckCmd.Output()
 	if e := (&exec.ExitError{}); !errors.As(err, &e) && e.ProcessState.ExitCode() != 3 {
diff --git a/internal/worker/scan.go b/internal/worker/scan.go
index d8087f0..6501c73 100644
--- a/internal/worker/scan.go
+++ b/internal/worker/scan.go
@@ -34,10 +34,9 @@
 	// The Go module cache resides in its default location, $HOME/go/pkg/mod.
 	sandboxGoModCache = "root/go/pkg/mod"
 
-	// The directories where binaries and modules live.
-	// The sandbox mounts this directory to the same path internally, so this
-	// path works for both secure and insecure modes.
-	binaryDir  = "/tmp/binaries"
+	// The directory where modules live.
+	// The sandbox mounts this directory to the same path internally, so
+	// this path works for both secure and insecure modes.
 	modulesDir = "/tmp/modules"
 )