internal/worker: run "go clean" in sandbox directly

We want to run "go clean" in the sandbox to remove cached files,
so we don't run out of disk (which is memory in Cloud Run).

Previously, we ran the vulncheck_sandbox binary, and it had
code to run go clean.

But we could just invoke "go" in the sandbox and pass it "clean"
and the other arguments.

This CL does that, and removes the now-dead code from
vulncheck_sandbox. It also verifies that go clean is doing something
by logging the disk usage before and after.

Change-Id: Ieac31588d21e4db5f60933ed20ac88516c920eeb
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/471162
Reviewed-by: Maceo Thompson <maceothompson@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/cmd/vulncheck_sandbox/vulncheck_sandbox.go b/cmd/vulncheck_sandbox/vulncheck_sandbox.go
index 88b27b7..5a5b0a9 100644
--- a/cmd/vulncheck_sandbox/vulncheck_sandbox.go
+++ b/cmd/vulncheck_sandbox/vulncheck_sandbox.go
@@ -19,28 +19,19 @@
 	"io"
 	"log"
 	"os"
-	"os/exec"
 
 	"golang.org/x/pkgsite-metrics/internal/load"
 	"golang.org/x/pkgsite-metrics/internal/worker"
 	"golang.org/x/vuln/vulncheck"
 )
 
-var (
-	// vulnDBDir should contain a local copy of the vuln DB, with a LAST_MODIFIED
-	// file containing a timestamp.
-	vulnDBDir = flag.String("vulndb", "/go-vulndb", "directory of local vuln DB")
-
-	clean = flag.Bool("clean", false, "clean caches instead of running a module")
-)
+// vulnDBDir should contain a local copy of the vuln DB, with a LAST_MODIFIED
+// file containing a timestamp.
+var vulnDBDir = flag.String("vulndb", "/go-vulndb", "directory of local vuln DB")
 
 func main() {
 	flag.Parse()
-	if *clean {
-		cleanGoCaches()
-	} else {
-		run(os.Stdout, flag.Args(), *vulnDBDir)
-	}
+	run(os.Stdout, flag.Args(), *vulnDBDir)
 }
 
 func run(w io.Writer, args []string, vulnDBDir string) {
@@ -117,12 +108,3 @@
 	return res, nil
 
 }
-
-func cleanGoCaches() {
-	_, err := exec.Command("go", "clean", "-cache", "-modcache").CombinedOutput()
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "%s\n", err)
-		os.Exit(1)
-	}
-	fmt.Printf("go clean succeeded in sandbox\n")
-}
diff --git a/internal/worker/vulncheck_scan.go b/internal/worker/vulncheck_scan.go
index 7b664c5..c509b04 100644
--- a/internal/worker/vulncheck_scan.go
+++ b/internal/worker/vulncheck_scan.go
@@ -265,6 +265,7 @@
 		if activeScans.Add(-1) == 0 {
 			logMemory(ctx, fmt.Sprintf("before 'go clean' for %s@%s", modulePath, version))
 			s.cleanGoCaches(ctx)
+			logMemory(ctx, "after 'go clean'")
 		}
 	}()
 
@@ -742,18 +743,29 @@
 }
 
 func (s *scanner) cleanGoCaches(ctx context.Context) error {
-	if !config.OnCloudRun() {
-		log.Infof(ctx, "not on Cloud Run, so not cleaning caches")
-		return nil
-	}
 	var (
 		out []byte
 		err error
 	)
+
+	logDiskUsage := func(msg string) {
+		log.Debugf(ctx, "sandbox disk usage %s clean:\n%s",
+			msg, diskUsage("/bundle/rootfs/root", "/bundle/rootfs/modules"))
+	}
+
 	if s.insecure {
+		if !config.OnCloudRun() {
+			// Avoid cleaning the developer's local caches.
+			log.Infof(ctx, "not on Cloud Run, so not cleaning caches")
+			return nil
+		}
 		out, err = exec.Command("go", "clean", "-cache", "-modcache").CombinedOutput()
 	} else {
-		out, err = s.sbox.Run(ctx, "/binaries/vulncheck_sandbox", "-clean")
+		logDiskUsage("before")
+		out, err = s.sbox.Run(ctx, "go", "clean", "-cache", "-modcache")
+		if err == nil {
+			logDiskUsage("after")
+		}
 	}
 	if err != nil {
 		return fmt.Errorf("cleaning Go caches: %s", derrors.IncludeStderr(err))
@@ -817,3 +829,13 @@
 		vulncheckRequestParams: rp,
 	}, nil
 }
+
+// diskUsage runs the du command to determine how much disk space the given
+// directories occupy.
+func diskUsage(dirs ...string) string {
+	out, err := exec.Command("du", append([]string{"-h", "-s"}, dirs...)...).Output()
+	if err != nil {
+		return fmt.Sprintf("ERROR: %s", derrors.IncludeStderr(err))
+	}
+	return strings.TrimSpace(string(out))
+}