sandbox: add gvisor runsc-based sandbox

This creates a VM (running Container-Optimized OS) with configuration
such that it boots up and downloads/configures the runsc Docker
runtime, reloading the existing Docker daemon on the VM, and then
creates a new privileged Docker container with the host's
/var/run/docker.sock available to the container. From within that
container it's then possible for the new sandbox HTTP server to create
its own Docker containers running under gvisor (using docker run
--runtime=runsc).

This then adds a regional us-central1 load balancer and instance group
manager & instane template to run these VMs automatically across
us-central1. Then the play.golang.org frontend can hit that URL
(http://sandbox.play-sandbox-fwd.il4.us-central1.lb.golang-org.internal)

Fixes golang/go#25224
Updates golang/go#30439 (remove nacl)
Updates golang/go#33629 (this CL makes the playground support 2 versions)

Change-Id: I56c8a86875abcde9d29fa7592b23c0ecd3861458
Reviewed-on: https://go-review.googlesource.com/c/playground/+/195983
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Alexander Rakoczy <alex@golang.org>
Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3fa8c86
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.terraform
diff --git a/Dockerfile b/Dockerfile
index 2e84a6c..063f9a5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,12 +2,14 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 
+############################################################################
 FROM debian:stretch AS nacl
 
 RUN apt-get update && apt-get install -y --no-install-recommends curl bzip2 ca-certificates
 
 RUN curl -s https://storage.googleapis.com/nativeclient-mirror/nacl/nacl_sdk/trunk.544461/naclsdk_linux.tar.bz2 | tar -xj -C /tmp --strip-components=2 pepper_67/tools/sel_ldr_x86_64
 
+############################################################################
 FROM debian:stretch AS build
 LABEL maintainer="golang-dev@googlegroups.com"
 
@@ -64,12 +66,32 @@
 WORKDIR /go/src/playground
 RUN go install
 
+############################################################################
+# Temporary Docker stage to add a pre-Go1.14 $GOROOT into our
+# container for early linux/amd64 testing.
+FROM golang:1.13 AS temp_pre_go14
+
+ENV BUILD_DEPS 'curl git gcc patch libc6-dev ca-certificates'
+RUN apt-get update && apt-get install -y --no-install-recommends ${BUILD_DEPS}
+
+# go1.14beta1:
+ENV GO_REV a5bfd9da1d1b24f326399b6b75558ded14514f23
+
+RUN cd /usr/local && git clone https://go.googlesource.com/go go1.14 && cd go1.14 && git reset --hard ${GO_REV}
+WORKDIR /usr/local/go1.14/src
+RUN ./make.bash
+ENV GOROOT /usr/local/go1.14
+RUN ../bin/go install --tags=faketime std
+
+############################################################################
+# Final stage.
 FROM debian:stretch
 
 RUN apt-get update && apt-get install -y git ca-certificates --no-install-recommends
 
 COPY --from=build /usr/local/go /usr/local/go
 COPY --from=nacl /tmp/sel_ldr_x86_64 /usr/local/bin
+COPY --from=temp_pre_go14 /usr/local/go1.14 /usr/local/go1.14
 
 ENV GOPATH /go
 ENV PATH /usr/local/go/bin:$GOPATH/bin:$PATH
@@ -101,9 +123,6 @@
 COPY static /app/static
 WORKDIR /app
 
-# Run tests
-RUN /app/playground test
-
 # Whether we allow third-party imports via proxy.golang.org:
 ENV ALLOW_PLAY_MODULE_DOWNLOADS true
 
diff --git a/Makefile b/Makefile
index 2cf568d..bac164f 100644
--- a/Makefile
+++ b/Makefile
@@ -9,12 +9,27 @@
 docker:
 	docker build -t golang/playground .
 
-test:
+runlocal:
+	docker network create sandnet || true
+	docker kill play_dev || true
+	docker run --name=play_dev --rm --network=sandnet -ti -p 127.0.0.1:8081:8080/tcp golang/playground
+
+test_go:
 	# Run fast tests first: (and tests whether, say, things compile)
 	GO111MODULE=on go test -v
-	# Then run the slower tests, which happen as one of the
-	# Dockerfile RUN steps:
-	docker build -t golang/playground .
+
+test_nacl: docker
+	docker kill sandbox_front_test || true
+	docker run --rm --name=sandbox_front_test --network=sandnet -t golang/playground testnacl
+
+test_gvisor: docker
+	docker kill sandbox_front_test || true
+	docker run --rm --name=sandbox_front_test --network=sandnet -t golang/playground test
+
+# Note: test_gvisor is not included in "test" yet, because it requires
+# running a separate server first ("make runlocal" in the sandbox
+# directory)
+test: test_go test_nacl
 
 update-cloudbuild-trigger:
 	# The gcloud CLI doesn't yet support updating a trigger.
diff --git a/go.sum b/go.sum
index cc44352..0c2bd23 100644
--- a/go.sum
+++ b/go.sum
@@ -59,14 +59,12 @@
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190502183928-7f726cade0ab/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 h1:Wo7BWFiOk0QRFMLYMqJGFMd9CgUAcGx7V+qEg/h5IBI=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA=
 golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -75,16 +73,13 @@
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82 h1:vsphBvatvfbhlb4PO1BYSr9dzugGxJ/SQHoNufZJq1w=
 golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2 h1:z99zHgr7hKfrUcX/KsoJk5FJfjTceCKIp96+biqP4To=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -103,17 +98,14 @@
 google.golang.org/api v0.4.0 h1:KKgc1aqhV8wDPbDzlDtpvyjZFY3vjz85FP7p4wcQUyI=
 google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.5.0 h1:KxkO13IPW4Lslp2bz+KHP2E3gtFlrIGNThxkZQ3g+4c=
 google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7 h1:ZUjXAXmrAyrmmCPHgCA/vChHcpsX27MZ3yBonD/z1KE=
 google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
 google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873 h1:nfPFGzJkUDX6uBmpN/pSw7MbOAWegH5QDQuoXFHedLg=
 google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/grpc v1.19.0 h1:cfg4PD8YEdSFnm7qLV4++93WcmhH2nIUhMjhdCvl3j8=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
 google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
diff --git a/main.go b/main.go
index 94a96bf..3bba9ab 100644
--- a/main.go
+++ b/main.go
@@ -45,6 +45,10 @@
 		s.test()
 		return
 	}
+	if len(os.Args) > 1 && os.Args[1] == "testnacl" {
+		s.testNacl()
+		return
+	}
 
 	port := os.Getenv("PORT")
 	if port == "" {
diff --git a/sandbox.go b/sandbox.go
index 7b6909f..e559725 100644
--- a/sandbox.go
+++ b/sandbox.go
@@ -12,6 +12,7 @@
 	"context"
 	"crypto/sha256"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"go/ast"
 	"go/doc"
@@ -26,11 +27,12 @@
 	"runtime"
 	"strconv"
 	"strings"
-	"syscall"
 	"text/template"
 	"time"
 
+	"cloud.google.com/go/compute/metadata"
 	"github.com/bradfitz/gomemcache/memcache"
+	"golang.org/x/playground/sandbox/sandboxtypes"
 )
 
 const (
@@ -79,7 +81,7 @@
 // If there is no cached *response for the combination of cachePrefix and request.Body,
 // handler calls cmdFunc and in case of a nil error, stores the value of *response in the cache.
 // The handler returned supports Cross-Origin Resource Sharing (CORS) from any domain.
-func (s *server) commandHandler(cachePrefix string, cmdFunc func(*request) (*response, error)) http.HandlerFunc {
+func (s *server) commandHandler(cachePrefix string, cmdFunc func(context.Context, *request) (*response, error)) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		cachePrefix := cachePrefix // so we can modify it below
 		w.Header().Set("Access-Control-Allow-Origin", "*")
@@ -110,7 +112,7 @@
 			if err != memcache.ErrCacheMiss {
 				s.log.Errorf("s.cache.Get(%q, &response): %v", key, err)
 			}
-			resp, err = cmdFunc(&req)
+			resp, err = cmdFunc(r.Context(), &req)
 			if err != nil {
 				s.log.Errorf("cmdFunc error: %v", err)
 				http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
@@ -315,7 +317,7 @@
 // The output of successfully ran program is returned in *response.Events.
 // If a program cannot be built or has timed out,
 // *response.Errors contains an explanation for a user.
-func compileAndRun(req *request) (*response, error) {
+func compileAndRun(ctx context.Context, req *request) (*response, error) {
 	// TODO(andybons): Add semaphore to limit number of running programs at once.
 	tmpDir, err := ioutil.TempDir("", "sandbox")
 	if err != nil {
@@ -368,15 +370,34 @@
 		}
 	}
 
+	// TODO: remove all this once Go 1.14 is out. This is a transitional/debug step
+	// to support both nacl & gvisor temporarily.
+	useGvisor := os.Getenv("GO_VERSION") >= "go1.14" ||
+		os.Getenv("DEBUG_FORCE_GVISOR") == "1" ||
+		strings.Contains(req.Body, "//play:gvisor\n")
+
 	exe := filepath.Join(tmpDir, "a.out")
 	goCache := filepath.Join(tmpDir, "gocache")
 
-	ctx, cancel := context.WithTimeout(context.Background(), maxCompileTime)
+	buildCtx, cancel := context.WithTimeout(ctx, maxCompileTime)
 	defer cancel()
-	cmd := exec.CommandContext(ctx, "go", "build", "-o", exe, buildPkgArg)
+	goBin := "go"
+	if useGvisor {
+		goBin = "/usr/local/go1.14/bin/go"
+	}
+	cmd := exec.CommandContext(buildCtx, goBin,
+		"build",
+		"-o", exe,
+		"-tags=faketime", // required for Go 1.14+, no-op before
+		buildPkgArg)
 	cmd.Dir = tmpDir
 	var goPath string
-	cmd.Env = []string{"GOOS=nacl", "GOARCH=amd64p32", "GOCACHE=" + goCache}
+	if useGvisor {
+		cmd.Env = []string{"GOOS=linux", "GOARCH=amd64", "GOROOT=/usr/local/go1.14"}
+	} else {
+		cmd.Env = []string{"GOOS=nacl", "GOARCH=amd64p32"}
+	}
+	cmd.Env = append(cmd.Env, "GOCACHE="+goCache)
 	if useModules {
 		// Create a GOPATH just for modules to be downloaded
 		// into GOPATH/pkg/mod.
@@ -411,28 +432,62 @@
 		}
 		return nil, fmt.Errorf("error building go source: %v", err)
 	}
-	ctx, cancel = context.WithTimeout(context.Background(), maxRunTime)
+	runCtx, cancel := context.WithTimeout(ctx, maxRunTime)
 	defer cancel()
-	cmd = exec.CommandContext(ctx, "sel_ldr_x86_64", "-l", "/dev/null", "-S", "-e", exe, testParam)
 	rec := new(Recorder)
-	cmd.Stdout = rec.Stdout()
-	cmd.Stderr = rec.Stderr()
-	var status int
-	if err := cmd.Run(); err != nil {
-		if ctx.Err() == context.DeadlineExceeded {
-			// Send what was captured before the timeout.
-			events, err := rec.Events()
-			if err != nil {
-				return nil, fmt.Errorf("error decoding events: %v", err)
+	var exitCode int
+	if useGvisor {
+		f, err := os.Open(exe)
+		if err != nil {
+			return nil, err
+		}
+		defer f.Close()
+		req, err := http.NewRequestWithContext(ctx, "POST", sandboxBackendURL(), f)
+		if err != nil {
+			return nil, err
+		}
+		req.Header.Add("Idempotency-Key", "1") // lets Transport do retries with a POST
+		if testParam != "" {
+			req.Header.Add("X-Argument", testParam)
+		}
+		req.GetBody = func() (io.ReadCloser, error) { return os.Open(exe) }
+		res, err := http.DefaultClient.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		defer res.Body.Close()
+		if res.StatusCode != http.StatusOK {
+			return nil, fmt.Errorf("unexpected response from backend: %v", res.Status)
+		}
+		var execRes sandboxtypes.Response
+		if err := json.NewDecoder(res.Body).Decode(&execRes); err != nil {
+			log.Printf("JSON decode error from backend: %v", err)
+			return nil, errors.New("error parsing JSON from backend")
+		}
+		if execRes.Error != "" {
+			return &response{Errors: execRes.Error}, nil
+		}
+		exitCode = execRes.ExitCode
+		rec.Stdout().Write(execRes.Stdout)
+		rec.Stderr().Write(execRes.Stderr)
+	} else {
+		cmd := exec.CommandContext(runCtx, "sel_ldr_x86_64", "-l", "/dev/null", "-S", "-e", exe, testParam)
+		cmd.Stdout = rec.Stdout()
+		cmd.Stderr = rec.Stderr()
+		if err := cmd.Run(); err != nil {
+			if ctx.Err() == context.DeadlineExceeded {
+				// Send what was captured before the timeout.
+				events, err := rec.Events()
+				if err != nil {
+					return nil, fmt.Errorf("error decoding events: %v", err)
+				}
+				return &response{Errors: "process took too long", Events: events}, nil
 			}
-			return &response{Errors: "process took too long", Events: events}, nil
-		}
-		exitErr, ok := err.(*exec.ExitError)
-		if !ok {
-			return nil, fmt.Errorf("error running sandbox: %v", err)
-		}
-		if ws, ok := exitErr.Sys().(syscall.WaitStatus); ok {
-			status = ws.ExitStatus()
+			exitErr, ok := err.(*exec.ExitError)
+			if !ok {
+				return nil, fmt.Errorf("error running sandbox: %v", err)
+			}
+			exitCode = exitErr.ExitCode()
 		}
 	}
 	events, err := rec.Events()
@@ -455,7 +510,7 @@
 	}
 	return &response{
 		Events:      events,
-		Status:      status,
+		Status:      exitCode,
 		IsTest:      testParam != "",
 		TestsFailed: fails,
 		VetErrors:   vetOut,
@@ -490,7 +545,8 @@
 }
 
 func (s *server) healthCheck() error {
-	resp, err := compileAndRun(&request{Body: healthProg})
+	ctx := context.Background() // TODO: cap it to some reasonable timeout
+	resp, err := compileAndRun(ctx, &request{Body: healthProg})
 	if err != nil {
 		return err
 	}
@@ -503,6 +559,18 @@
 	return nil
 }
 
+func sandboxBackendURL() string {
+	if v := os.Getenv("SANDBOX_BACKEND_URL"); v != "" {
+		return v
+	}
+	id, _ := metadata.ProjectID()
+	switch id {
+	case "golang-org":
+		return "http://sandbox.play-sandbox-fwd.il4.us-central1.lb.golang-org.internal/run"
+	}
+	panic(fmt.Sprintf("no SANDBOX_BACKEND_URL environment and no default defined for project %q", id))
+}
+
 const healthProg = `
 package main
 
diff --git a/sandbox/.gitignore b/sandbox/.gitignore
new file mode 100644
index 0000000..0f2bb69
--- /dev/null
+++ b/sandbox/.gitignore
@@ -0,0 +1 @@
+*.yaml.expanded
diff --git a/sandbox/Dockerfile b/sandbox/Dockerfile
new file mode 100644
index 0000000..ceb5401
--- /dev/null
+++ b/sandbox/Dockerfile
@@ -0,0 +1,34 @@
+# This is the sandbox backend server.
+#
+# When it's run, the host maps in /var/run/docker.sock to this
+# environment so the play-sandbox server can connect to the host's
+# docker daemon, which has the gvisor "runsc" runtime available.
+
+FROM golang:1.13 AS build
+
+COPY . /go/src/playground
+WORKDIR /go/src/playground/sandbox
+RUN go install
+
+FROM debian:buster
+
+RUN apt-get update
+
+# Extra stuff for occasional debugging:
+RUN apt-get install --yes strace lsof emacs25-nox net-tools tcpdump procps
+
+# Install Docker CLI:
+RUN apt-get install --yes \
+        apt-transport-https \
+        ca-certificates \
+        curl \
+        gnupg2 \
+        software-properties-common
+RUN bash -c "curl -fsSL https://download.docker.com/linux/debian/gpg | apt-key add -"
+RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian buster stable"
+RUN apt-get update
+RUN apt-get install --yes docker-ce-cli
+
+COPY --from=build /go/bin/sandbox /usr/local/bin/play-sandbox
+
+ENTRYPOINT ["/usr/local/bin/play-sandbox"]
diff --git a/sandbox/Dockerfile.gvisor b/sandbox/Dockerfile.gvisor
new file mode 100644
index 0000000..4218887
--- /dev/null
+++ b/sandbox/Dockerfile.gvisor
@@ -0,0 +1,33 @@
+# This is the environment that the untrusted playground programs run within
+# under gvisor.
+
+############################################################################
+# Import the sandbox server's container (which is assumed to be
+# already built, as enforced by the Makefile), just so we can copy its
+# binary out of it. The same binary is used as both as the server and the
+# gvisor-contained helper.
+FROM golang/playground-sandbox AS server
+
+############################################################################
+# Temporary nacl compatibility for development & incremental
+# deployment purposes, so we can run the new server architecture in
+# nacl mode for a bit, then opt-in linux/amd64 gvisor mode, and
+# then once Go 1.14 is out for real we remove the nacl option and
+# delete all the nacl code.
+FROM debian:buster AS nacl
+RUN apt-get update && apt-get install -y --no-install-recommends curl bzip2 ca-certificates
+RUN curl -s https://storage.googleapis.com/nativeclient-mirror/nacl/nacl_sdk/trunk.544461/naclsdk_linux.tar.bz2 | tar -xj -C /tmp --strip-components=2 pepper_67/tools/sel_ldr_x86_64
+
+
+############################################################################
+# This is the actual environment things run in: a minimal busybox with glibc
+# binaries so we can use cgo.
+FROM busybox:glibc
+
+COPY --from=server /usr/local/bin/play-sandbox /usr/local/bin/play-sandbox
+
+# And this, temporarily, for being able to test the old nacl binaries
+# with the new sandbox:
+COPY --from=nacl /tmp/sel_ldr_x86_64 /usr/local/bin
+
+ENTRYPOINT ["/usr/local/bin/play-sandbox"]
diff --git a/sandbox/Makefile b/sandbox/Makefile
new file mode 100644
index 0000000..06c7496
--- /dev/null
+++ b/sandbox/Makefile
@@ -0,0 +1,52 @@
+ZONE := us-central1-f
+TEST_VM := gvisor-cos-test-vm
+PROJ := golang-org
+NETWORK := golang
+
+# Docker environment for the sandbox server itself (containing docker CLI, etc), running
+# in a privileged container.
+docker:
+	docker build -f Dockerfile --tag=golang/playground-sandbox ..
+	docker tag golang/playground-sandbox gcr.io/$(PROJ)/playground-sandbox:latest
+
+# dockergvisor builds the golang/playground-sandbox-gvisor docker
+# image, which is the environment that the untrusted programs run in
+# (a busybox:glibc world with this directory's sandbox binary which
+# runs in --mode=contained)
+dockergvisor:
+	docker build -f Dockerfile.gvisor --tag=golang/playground-sandbox-gvisor ..
+	docker tag golang/playground-sandbox-gvisor gcr.io/$(PROJ)/playground-sandbox-gvisor:latest
+
+push: docker dockergvisor
+	docker push gcr.io/$(PROJ)/playground-sandbox:latest
+	docker push gcr.io/$(PROJ)/playground-sandbox-gvisor:latest
+
+# runlocal runs the sandbox server locally, for use with the frontend
+# parent directory's "test_nacl" or "test_gvisor" test targets.
+runlocal: docker dockergvisor
+	docker network create sandnet || true
+	docker kill sandbox_dev || true
+	docker run --name=sandbox_dev --rm --network=sandnet -ti -p 127.0.0.1:8080:80/tcp -v /var/run/docker.sock:/var/run/docker.sock golang/playground-sandbox:latest --dev
+
+konlet.yaml.expanded: konlet.yaml
+	sed "s/PROJECT_NAME/$(PROJ)/" konlet.yaml > konlet.yaml.expanded
+
+# create_test_vm creates a test VM for interactive debugging.
+create_test_vm: konlet.yaml.expanded
+	gcloud --project=$(PROJ) compute instances create $(TEST_VM) \
+	--zone $(ZONE) \
+	--network $(NETWORK) \
+	--no-address \
+	--image-project cos-cloud \
+	--image cos-stable-76-12239-60-0 \
+	--metadata-from-file gce-container-declaration=konlet.yaml.expanded,user-data=cloud-init.yaml
+
+# delete_test_vm deletes the test VM from create_test_vm.
+delete_test_vm:
+	gcloud --project=$(PROJ) compute instances delete $(TEST_VM) --quiet --zone $(ZONE)
+
+# ssh connects to the create_test_vm VM. It must be run from the same network.
+ssh:
+	gcloud --project=$(PROJ) compute ssh $(TEST_VM) --internal-ip --zone $(ZONE)
+
+
diff --git a/sandbox/cloud-init.yaml b/sandbox/cloud-init.yaml
new file mode 100644
index 0000000..41195a4
--- /dev/null
+++ b/sandbox/cloud-init.yaml
@@ -0,0 +1,17 @@
+#cloud-config
+
+write_files:
+- path: /etc/docker/daemon.json
+  permissions: 0644
+  owner: root
+  content: |
+    {
+      "live-restore": true,
+      "storage-driver": "overlay2",
+      "runtimes": { "runsc": { "path": "/var/lib/docker/runsc", "runtimeArgs": [] } }
+    }
+
+runcmd:
+- curl -L -o /var/lib/docker/runsc https://storage.googleapis.com/go-builder-data/runsc-1.0.1
+- chmod +x /var/lib/docker/runsc
+- systemctl reload docker.service
diff --git a/sandbox/konlet.yaml b/sandbox/konlet.yaml
new file mode 100644
index 0000000..2850af3
--- /dev/null
+++ b/sandbox/konlet.yaml
@@ -0,0 +1,16 @@
+spec:
+  containers:
+    - name: playground
+      image: 'gcr.io/PROJECT_NAME/gvisor-playground-sandbox:latest'
+      volumeMounts:
+        - name: dockersock
+          mountPath: /var/run/docker.sock
+      securityContext:
+        privileged: true
+      stdin: false
+      tty: true
+  restartPolicy: Always
+  volumes:
+    - name: dockersock
+      hostPath:
+        path: /var/run/docker.sock
diff --git a/sandbox/sandbox.go b/sandbox/sandbox.go
new file mode 100644
index 0000000..be7ab6d
--- /dev/null
+++ b/sandbox/sandbox.go
@@ -0,0 +1,521 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The sandbox program is an HTTP server that receives untrusted
+// linux/amd64 in a POST request and then executes them in a gvisor
+// sandbox using Docker, returning the output as a response to the
+// POST.
+//
+// It's part of the Go playground (https://play.golang.org/).
+package main
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"encoding/json"
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"os/exec"
+	"os/signal"
+	"runtime"
+	"sync"
+	"syscall"
+	"time"
+
+	"golang.org/x/playground/sandbox/sandboxtypes"
+)
+
+var (
+	listenAddr = flag.String("listen", ":80", "HTTP server listen address. Only applicable when --mode=server")
+	mode       = flag.String("mode", "server", "Whether to run in \"server\" mode or \"contained\" mode. The contained mode is used internally by the server mode.")
+	dev        = flag.Bool("dev", false, "run in dev mode (show help messages)")
+	numWorkers = flag.Int("workers", runtime.NumCPU(), "number of parallel gvisor containers to pre-spin up & let run concurrently")
+)
+
+const (
+	maxBinarySize    = 100 << 20
+	runTimeout       = 5 * time.Second
+	maxOutputSize    = 100 << 20
+	memoryLimitBytes = 100 << 20
+)
+
+var errTooMuchOutput = errors.New("Output too large")
+
+// containedStartMessage is the first thing written to stdout by the
+// contained process when it starts up. This lets the parent HTTP
+// server know that a particular container is ready to run a binary.
+const containedStartMessage = "started\n"
+
+var (
+	readyContainer chan *Container
+	runSem         chan struct{}
+)
+
+type Container struct {
+	name   string
+	stdin  io.WriteCloser
+	stdout io.ReadCloser
+	stderr io.ReadCloser
+	cmd    *exec.Cmd
+
+	waitOnce sync.Once
+	waitVal  error
+}
+
+func (c *Container) Close() {
+	setContainerWanted(c.name, false)
+	c.stdin.Close()
+	c.stdout.Close()
+	c.stderr.Close()
+	if c.cmd.Process != nil {
+		c.cmd.Process.Kill()
+		c.Wait() // just in case
+	}
+}
+
+func (c *Container) Wait() error {
+	c.waitOnce.Do(c.wait)
+	return c.waitVal
+}
+
+func (c *Container) wait() {
+	c.waitVal = c.cmd.Wait()
+}
+
+func main() {
+	flag.Parse()
+	if *mode == "contained" {
+		runInGvisor()
+		panic("runInGvisor didn't exit")
+	}
+	if flag.NArg() != 0 {
+		flag.Usage()
+		os.Exit(1)
+	}
+	log.Printf("Go playground sandbox starting.")
+
+	readyContainer = make(chan *Container, *numWorkers)
+	runSem = make(chan struct{}, *numWorkers)
+	go makeWorkers()
+	go handleSignals()
+
+	if out, err := exec.Command("docker", "version").CombinedOutput(); err != nil {
+		log.Fatalf("failed to connect to docker: %v, %s", err, out)
+	}
+	if *dev {
+		log.Printf("Running in dev mode; container published to host at: http://localhost:8080/")
+		// TODO: XXXX FIXME: this is no longer the protocol since the addition of the processMeta JSON header,
+		// so write a client program to do this instead?
+		log.Printf("Run a binary with: curl -v --data-binary @/home/bradfitz/hello http://localhost:8080/run\n")
+	} else {
+		log.Printf("Listening on %s", *listenAddr)
+	}
+
+	http.HandleFunc("/health", healthHandler)
+	http.HandleFunc("/healthz", healthHandler)
+	http.HandleFunc("/", rootHandler)
+	http.HandleFunc("/run", runHandler)
+	log.Fatal(http.ListenAndServe(*listenAddr, nil))
+}
+
+func handleSignals() {
+	c := make(chan os.Signal, 1)
+	signal.Notify(c, syscall.SIGINT)
+	s := <-c
+	log.Fatalf("closing on signal %d: %v", s, s)
+}
+
+func healthHandler(w http.ResponseWriter, r *http.Request) {
+	io.WriteString(w, "OK\n")
+}
+
+func rootHandler(w http.ResponseWriter, r *http.Request) {
+	if r.URL.Path != "/" {
+		http.NotFound(w, r)
+		return
+	}
+	io.WriteString(w, "Hi from sandbox\n")
+}
+
+// processMeta is the JSON sent to the gvisor container before the untrusted binary.
+// It currently contains only the arguments to pass to the binary.
+// It might contain environment or other things later.
+type processMeta struct {
+	Args []string `json:"args"`
+}
+
+// runInGvisor is run when we're now inside gvisor. We have no network
+// at this point. We can read our binary in from stdin and then run
+// it.
+func runInGvisor() {
+	const binPath = "/tmpfs/play"
+	if _, err := io.WriteString(os.Stdout, containedStartMessage); err != nil {
+		log.Fatalf("writing to stdout: %v", err)
+	}
+	slurp, err := ioutil.ReadAll(os.Stdin)
+	if err != nil {
+		log.Fatalf("reading stdin in contained mode: %v", err)
+	}
+	nl := bytes.IndexByte(slurp, '\n')
+	if nl == -1 {
+		log.Fatalf("no newline found in input")
+	}
+	metaJSON, bin := slurp[:nl], slurp[nl+1:]
+
+	if err := ioutil.WriteFile(binPath, bin, 0755); err != nil {
+		log.Fatalf("writing contained binary: %v", err)
+	}
+
+	var meta processMeta
+	if err := json.NewDecoder(bytes.NewReader(metaJSON)).Decode(&meta); err != nil {
+		log.Fatalf("error decoding JSON meta: %v", err)
+	}
+
+	// As part of a temporary transition plan, we also support
+	// running nacl binaries in this sandbox. The point isn't to
+	// double sandbox things as much as it is to let us transition
+	// things in steps: first to split the sandbox into two parts
+	// (frontend & backend), and then to change the type of binary
+	// (nacl to linux/amd64). This means we can do step 1 of the
+	// migration during the Go 1.13 dev cycle and have less
+	// risk/rush during the Go 1.14 release, which should just be
+	// a flag flip.
+	// This isn't a perfect heuristic, but it works and it's cheap:
+	isNacl := bytes.Contains(slurp, []byte("_rt0_amd64p32_nacl"))
+
+	cmd := exec.Command(binPath)
+	if isNacl {
+		cmd = exec.Command("/usr/local/bin/sel_ldr_x86_64", "-l", "/dev/null", "-S", "-e", binPath)
+	}
+	cmd.Args = append(cmd.Args, meta.Args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	if err := cmd.Start(); err != nil {
+		log.Fatal(err)
+	}
+	err = cmd.Wait()
+	os.Remove(binPath) // not that it matters much, this container will be nuked
+	os.Exit(errExitCode(err))
+	return
+
+}
+
+func makeWorkers() {
+	for {
+		c, err := startContainer(context.Background())
+		if err != nil {
+			log.Printf("error starting container: %v", err)
+			time.Sleep(5 * time.Second)
+			continue
+		}
+		readyContainer <- c
+	}
+}
+
+func randHex(n int) string {
+	b := make([]byte, n/2)
+	_, err := rand.Read(b)
+	if err != nil {
+		panic(err)
+	}
+	return fmt.Sprintf("%x", b)
+}
+
+var (
+	wantedMu        sync.Mutex
+	containerWanted = map[string]bool{}
+)
+
+// setContainerWanted records whether a named container is wanted or
+// not. Any unwanted containers are cleaned up asynchronously as a
+// sanity check against leaks.
+//
+// TODO(bradfitz): add leak checker (background docker ps loop)
+func setContainerWanted(name string, wanted bool) {
+	wantedMu.Lock()
+	defer wantedMu.Unlock()
+	if wanted {
+		containerWanted[name] = true
+	} else {
+		delete(containerWanted, name)
+	}
+}
+
+func getContainer(ctx context.Context) (*Container, error) {
+	select {
+	case c := <-readyContainer:
+		return c, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
+func startContainer(ctx context.Context) (c *Container, err error) {
+	name := "play_run_" + randHex(8)
+	setContainerWanted(name, true)
+	var stdin io.WriteCloser
+	var stdout io.ReadCloser
+	var stderr io.ReadCloser
+	defer func() {
+		if err == nil {
+			return
+		}
+		setContainerWanted(name, false)
+		if stdin != nil {
+			stdin.Close()
+		}
+		if stdout != nil {
+			stdout.Close()
+		}
+		if stderr != nil {
+			stderr.Close()
+		}
+	}()
+
+	cmd := exec.Command("docker", "run",
+		"--name="+name,
+		"--rm",
+		"--tmpfs=/tmpfs",
+		"-i", // read stdin
+
+		"--runtime=runsc",
+		"--network=none",
+		"--memory="+fmt.Sprint(memoryLimitBytes),
+
+		"gcr.io/golang-org/playground-sandbox-gvisor:latest",
+		"--mode=contained")
+	stdin, err = cmd.StdinPipe()
+	if err != nil {
+		return nil, err
+	}
+	stdout, err = cmd.StdoutPipe()
+	if err != nil {
+		return nil, err
+	}
+	stderr, err = cmd.StderrPipe()
+	if err != nil {
+		return nil, err
+	}
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
+
+	errc := make(chan error, 1)
+	go func() {
+		buf := make([]byte, len(containedStartMessage))
+		if _, err := io.ReadFull(stdout, buf); err != nil {
+			errc <- fmt.Errorf("error reading header from sandbox container: %v", err)
+			return
+		}
+		if string(buf) != containedStartMessage {
+			errc <- fmt.Errorf("sandbox container sent wrong header %q; want %q", buf, containedStartMessage)
+			return
+		}
+		errc <- nil
+	}()
+	select {
+	case <-ctx.Done():
+		log.Printf("timeout starting container")
+		cmd.Process.Kill()
+		return nil, ctx.Err()
+	case err := <-errc:
+		if err != nil {
+			log.Printf("error starting container: %v", err)
+			return nil, err
+		}
+	}
+	return &Container{
+		name:   name,
+		stdin:  stdin,
+		stdout: stdout,
+		stderr: stderr,
+		cmd:    cmd,
+	}, nil
+}
+
+func runHandler(w http.ResponseWriter, r *http.Request) {
+	t0 := time.Now()
+	tlast := t0
+	var logmu sync.Mutex
+	logf := func(format string, args ...interface{}) {
+		if !*dev {
+			return
+		}
+		logmu.Lock()
+		defer logmu.Unlock()
+		t := time.Now()
+		d := t.Sub(tlast)
+		d0 := t.Sub(t0)
+		tlast = t
+		log.Print(fmt.Sprintf("+%10v +%10v ", d0, d) + fmt.Sprintf(format, args...))
+	}
+	logf("/run")
+
+	if r.Method != "POST" {
+		http.Error(w, "expected a POST", http.StatusBadRequest)
+		return
+	}
+
+	// Bound the number of requests being processed at once.
+	// (Before we slurp the binary into memory)
+	select {
+	case runSem <- struct{}{}:
+	case <-r.Context().Done():
+		return
+	}
+	defer func() { <-runSem }()
+
+	bin, err := ioutil.ReadAll(http.MaxBytesReader(w, r.Body, maxBinarySize))
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	logf("read %d bytes", len(bin))
+
+	c, err := getContainer(r.Context())
+	if err != nil {
+		if cerr := r.Context().Err(); cerr != nil {
+			log.Printf("getContainer, client side cancellation: %v", cerr)
+			return
+		}
+		http.Error(w, "failed to get container", http.StatusInternalServerError)
+		log.Printf("failed to get container: %v", err)
+		return
+	}
+	logf("got container %s", c.name)
+	defer c.Close()
+	defer logf("leaving handler; about to close container")
+
+	runTimer := time.NewTimer(runTimeout)
+	defer runTimer.Stop()
+
+	errc := make(chan error, 2) // user-visible error
+	waitc := make(chan error, 1)
+
+	copyOut := func(which string, dst *[]byte, r io.Reader) {
+		buf := make([]byte, 4<<10)
+		for {
+			n, err := r.Read(buf)
+			logf("%s: Read = %v, %v", which, n, err)
+			*dst = append(*dst, buf[:n]...)
+			if err == io.EOF {
+				return
+			}
+			if len(*dst) > maxOutputSize {
+				errc <- errTooMuchOutput
+				return
+			}
+			if err != nil {
+				log.Printf("reading %s: %v", which, err)
+				errc <- fmt.Errorf("error reading %v", which)
+				return
+			}
+		}
+	}
+
+	res := &sandboxtypes.Response{}
+	go func() {
+		var meta processMeta
+		meta.Args = r.Header["X-Argument"]
+		metaJSON, _ := json.Marshal(&meta)
+		metaJSON = append(metaJSON, '\n')
+		if _, err := c.stdin.Write(metaJSON); err != nil {
+			log.Printf("stdin write meta: %v", err)
+			errc <- errors.New("failed to write meta to child")
+			return
+		}
+		if _, err := c.stdin.Write(bin); err != nil {
+			log.Printf("stdin write: %v", err)
+			errc <- errors.New("failed to write binary to child")
+			return
+		}
+		c.stdin.Close()
+		logf("wrote+closed")
+		go copyOut("stdout", &res.Stdout, c.stdout)
+		go copyOut("stderr", &res.Stderr, c.stderr)
+		waitc <- c.Wait()
+	}()
+	var waitErr error
+	select {
+	case waitErr = <-waitc:
+		logf("waited: %v", waitErr)
+	case err := <-errc:
+		logf("got error: %v", err)
+		if err == errTooMuchOutput {
+			sendError(w, err.Error())
+			return
+		}
+		if err != nil {
+			http.Error(w, "failed to read stdout from docker run", http.StatusInternalServerError)
+			return
+		}
+	case <-runTimer.C:
+		logf("timeout")
+		sendError(w, "timeout running program")
+		return
+	}
+
+	res.ExitCode = errExitCode(waitErr)
+	res.Stderr = cleanStderr(res.Stderr)
+	sendResponse(w, res)
+}
+
+func errExitCode(err error) int {
+	if err == nil {
+		return 0
+	}
+	if ee, ok := err.(*exec.ExitError); ok {
+		return ee.ExitCode()
+	}
+	return 1
+}
+
+func sendError(w http.ResponseWriter, errMsg string) {
+	sendResponse(w, &sandboxtypes.Response{Error: errMsg})
+}
+
+func sendResponse(w http.ResponseWriter, r *sandboxtypes.Response) {
+	jres, err := json.MarshalIndent(r, "", "  ")
+	if err != nil {
+		http.Error(w, "error encoding JSON", http.StatusInternalServerError)
+		log.Printf("json marshal: %v", err)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Content-Length", fmt.Sprint(len(jres)))
+	w.Write(jres)
+}
+
+// cleanStderr removes spam stderr lines from the beginning of x
+// and returns a slice of x.
+func cleanStderr(x []byte) []byte {
+	for {
+		nl := bytes.IndexByte(x, '\n')
+		if nl == -1 || !isSpamStderrLine(x[:nl+1]) {
+			return x
+		}
+		x = x[nl+1:]
+	}
+}
+
+var warningPrefix = []byte("WARNING: ")
+
+// isSpamStderrLine reports whether line is a spammy line of stderr
+// output from Docker. Currently it only matches things starting with
+// "WARNING: " like:
+//     WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
+//
+// TODO: remove this and instead just make the child process start by
+// writing a known header to stderr, then have parent skip everything
+// before that unique header.
+func isSpamStderrLine(line []byte) bool {
+	return bytes.HasPrefix(line, warningPrefix)
+}
diff --git a/sandbox/sandbox.tf b/sandbox/sandbox.tf
new file mode 100644
index 0000000..61029c1
--- /dev/null
+++ b/sandbox/sandbox.tf
@@ -0,0 +1,143 @@
+# TODO: move the network configuration into terraform too? It was created by hand with:
+# gcloud compute networks subnets update golang --region=us-central1 --enable-private-ip-google-access
+
+terraform {
+  backend "gcs" {
+    bucket = "tf-state-prod-golang-org"
+    prefix = "terraform/state"
+  }
+}
+
+provider "google-beta" {
+  project = "golang-org"
+  region  = "us-central1"
+  zone    = "us-central1-f"
+}
+
+provider "google" {
+  project = "golang-org"
+  region  = "us-central1"
+  zone    = "us-central1-f"
+}
+
+data "local_file" "cloud_init" {
+  filename = "${path.module}/cloud-init.yaml"
+}
+
+data "local_file" "konlet" {
+  filename = "${path.module}/konlet.yaml.expanded"
+}
+
+data "google_compute_image" "cos" {
+  family  = "cos-stable"
+  project = "cos-cloud"
+}
+
+resource "google_compute_instance_template" "inst_tmpl" {
+  name         = "play-sandbox-tmpl"
+  machine_type = "n1-standard-1"
+  metadata = {
+    "ssh-keys"                  = "bradfitz:ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCtL4Di+zypBIRmvohzfC4mOzETaz/DtzANPKir/mUE1QiC8HuL8BDpIu3rZZY1gAnQf3IZMpgQYgx90TZskgWfi7xLN2mDh2iBZB0KUDlpNpwn1SzSUTQU10XV5mOLm1B7L+w3QXA1wz7kMjmztxZNli/tvZI5BtAX6c58c4Rn0pjQTPeEVPMMvEj6zPUy4QInWTgtrbTj3On/e95F3ZmUjEKFfjeFxVPh7fF6mSygzcxEyGYxTbMdUod/dNbZD/7HY5eTNQPSvjp+GaoodbFEdYWnk9vtoX/2VWYo0J/+w6oHprDzj7dPzKeJeCjpnKra7DyCS/RxIIPt5Giwdj2F bradfitz@bradfitz-glinux-desktop.sea.corp.google.com"
+    "gce-container-declaration" = data.local_file.konlet.content
+    "user-data"                 = data.local_file.cloud_init.content
+  }
+  network_interface {
+    network = "golang"
+  }
+  service_account {
+    scopes = ["storage-ro"]
+  }
+  disk {
+    source_image = data.google_compute_image.cos.self_link
+    auto_delete  = true
+    boot         = true
+  }
+  scheduling {
+    automatic_restart   = true
+    on_host_maintenance = "MIGRATE"
+  }
+  lifecycle {
+    create_before_destroy = true
+  }
+}
+
+resource "google_compute_region_autoscaler" "default" {
+  provider = "google-beta"
+
+  name   = "play-sandbox-autoscaler"
+  region = "us-central1"
+  target = "${google_compute_region_instance_group_manager.rigm.self_link}"
+
+  autoscaling_policy {
+    max_replicas    = 10
+    min_replicas    = 2
+    cooldown_period = 60
+
+    cpu_utilization {
+      target = 0.5
+    }
+  }
+}
+
+resource "google_compute_region_instance_group_manager" "rigm" {
+  provider = "google-beta"
+  name     = "play-sandbox-rigm"
+
+  base_instance_name = "playsandbox"
+  region             = "us-central1"
+
+  version {
+    name              = "primary"
+    instance_template = "${google_compute_instance_template.inst_tmpl.self_link}"
+  }
+
+  named_port {
+    name = "http"
+    port = 80
+  }
+
+  auto_healing_policies {
+    health_check      = "${google_compute_health_check.default.self_link}"
+    initial_delay_sec = 30
+  }
+}
+
+data "google_compute_region_instance_group" "rig" {
+  provider  = "google-beta"
+  self_link = "${google_compute_region_instance_group_manager.rigm.instance_group}"
+}
+
+resource "google_compute_health_check" "default" {
+  name                = "play-sandbox-rigm-health-check"
+  check_interval_sec  = 5
+  timeout_sec         = 5
+  healthy_threshold   = 2
+  unhealthy_threshold = 10 # 50 seconds
+  http_health_check {
+    request_path = "/healthz"
+    port         = 80
+  }
+}
+
+resource "google_compute_region_backend_service" "default" {
+  name          = "play-sandbox-backend-service"
+  region        = "us-central1"
+  health_checks = ["${google_compute_health_check.default.self_link}"]
+  backend {
+    group = "${data.google_compute_region_instance_group.rig.self_link}"
+  }
+}
+
+resource "google_compute_forwarding_rule" "default" {
+  name                  = "play-sandbox-fwd"
+  region                = "us-central1"
+  network               = "golang"
+  ports                 = ["80"]
+  load_balancing_scheme = "INTERNAL"
+  ip_protocol           = "TCP"
+  backend_service       = "${google_compute_region_backend_service.default.self_link}"
+
+  # Adding a service label gives us a DNS name:
+  # sandbox.play-sandbox-fwd.il4.us-central1.lb.golang-org.internal
+  service_label = "sandbox"
+}
diff --git a/sandbox/sandboxtypes/types.go b/sandbox/sandboxtypes/types.go
new file mode 100644
index 0000000..3ebfb46
--- /dev/null
+++ b/sandbox/sandboxtypes/types.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The sandboxtypes package contains the shared types
+// to communicate between the different sandbox components.
+package sandboxtypes
+
+// Response is the response from the x/playground/sandbox backend to
+// the x/playground frontend.
+//
+// The stdout/stderr are base64 encoded which isn't ideal but is good
+// enough for now. Maybe we'll move to protobufs later.
+type Response struct {
+	// Error, if non-empty, means we failed to run the binary.
+	// It's meant to be user-visible.
+	Error string `json:"error,omitempty"`
+
+	ExitCode int    `json:"exitCode"`
+	Stdout   []byte `json:"stdout"`
+	Stderr   []byte `json:"stderr"`
+}
diff --git a/server_test.go b/server_test.go
index 773a093..40ba065 100644
--- a/server_test.go
+++ b/server_test.go
@@ -161,7 +161,7 @@
 	if err != nil {
 		t.Fatalf("newServer(testingOptions(t)): %v", err)
 	}
-	testHandler := s.commandHandler("test", func(r *request) (*response, error) {
+	testHandler := s.commandHandler("test", func(_ context.Context, r *request) (*response, error) {
 		if r.Body == "fail" {
 			return nil, fmt.Errorf("non recoverable")
 		}
diff --git a/tests.go b/tests.go
index 18f6ea0..4022b6a 100644
--- a/tests.go
+++ b/tests.go
@@ -8,8 +8,10 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	stdlog "log"
+	"net"
 	"os"
 	"reflect"
 	"strings"
@@ -19,12 +21,27 @@
 type compileTest struct {
 	name               string // test name
 	prog, want, errors string
+	wantFunc           func(got string) error // alternative to want
 	withVet            bool
 	wantEvents         []Event
 	wantVetErrors      string
 }
 
+func (s *server) testNacl() {
+	log.Printf("testing nacl mode")
+	s.runTests()
+}
+
 func (s *server) test() {
+	if _, err := net.ResolveIPAddr("ip", "sandbox_dev.sandnet."); err != nil {
+		log.Fatalf("sandbox_dev.sandnet not available")
+	}
+	os.Setenv("DEBUG_FORCE_GVISOR", "1")
+	os.Setenv("SANDBOX_BACKEND_URL", "http://sandbox_dev.sandnet/run")
+	s.runTests()
+}
+
+func (s *server) runTests() {
 	if err := s.healthCheck(); err != nil {
 		stdlog.Fatal(err)
 	}
@@ -33,44 +50,66 @@
 	defer func(old string) { os.Setenv("ALLOW_PLAY_MODULE_DOWNLOADS", old) }(os.Getenv("ALLOW_PLAY_MODULE_DOWNLOADS"))
 	os.Setenv("ALLOW_PLAY_MODULE_DOWNLOADS", "true")
 
+	failed := false
 	for i, t := range tests {
-		fmt.Printf("testing case %d (%q)...\n", i, t.name)
-		resp, err := compileAndRun(&request{Body: t.prog, WithVet: t.withVet})
+		stdlog.Printf("testing case %d (%q)...\n", i, t.name)
+		resp, err := compileAndRun(context.Background(), &request{Body: t.prog, WithVet: t.withVet})
 		if err != nil {
 			stdlog.Fatal(err)
 		}
 		if t.wantEvents != nil {
 			if !reflect.DeepEqual(resp.Events, t.wantEvents) {
-				stdlog.Fatalf("resp.Events = %q, want %q", resp.Events, t.wantEvents)
+				stdlog.Printf("resp.Events = %q, want %q", resp.Events, t.wantEvents)
+				failed = true
 			}
 			continue
 		}
 		if t.errors != "" {
 			if resp.Errors != t.errors {
-				stdlog.Fatalf("resp.Errors = %q, want %q", resp.Errors, t.errors)
+				stdlog.Printf("resp.Errors = %q, want %q", resp.Errors, t.errors)
+				failed = true
 			}
 			continue
 		}
 		if resp.Errors != "" {
-			stdlog.Fatal(resp.Errors)
+			stdlog.Print(resp.Errors)
+			failed = true
+			continue
 		}
 		if resp.VetErrors != t.wantVetErrors {
-			stdlog.Fatalf("resp.VetErrs = %q, want %q", resp.VetErrors, t.wantVetErrors)
+			stdlog.Printf("resp.VetErrs = %q, want %q", resp.VetErrors, t.wantVetErrors)
+			failed = true
+			continue
 		}
 		if t.withVet && (resp.VetErrors != "") == resp.VetOK {
-			stdlog.Fatalf("resp.VetErrs & VetOK inconsistent; VetErrs = %q; VetOK = %v", resp.VetErrors, resp.VetOK)
+			stdlog.Printf("resp.VetErrs & VetOK inconsistent; VetErrs = %q; VetOK = %v", resp.VetErrors, resp.VetOK)
+			failed = true
+			continue
 		}
 		if len(resp.Events) == 0 {
-			stdlog.Fatalf("unexpected output: %q, want %q", "", t.want)
+			stdlog.Printf("unexpected output: %q, want %q", "", t.want)
+			failed = true
+			continue
 		}
 		var b strings.Builder
 		for _, e := range resp.Events {
 			b.WriteString(e.Message)
 		}
-		if !strings.Contains(b.String(), t.want) {
-			stdlog.Fatalf("unexpected output: %q, want %q", b.String(), t.want)
+		if t.wantFunc != nil {
+			if err := t.wantFunc(b.String()); err != nil {
+				stdlog.Printf("%v\n", err)
+				failed = true
+			}
+		} else {
+			if !strings.Contains(b.String(), t.want) {
+				stdlog.Printf("unexpected output: %q, want %q", b.String(), t.want)
+				failed = true
+			}
 		}
 	}
+	if failed {
+		stdlog.Fatalf("FAILED")
+	}
 	fmt.Println("OK")
 }
 
@@ -190,11 +229,16 @@
 
 func main() {
 	filepath.Walk("/", func(path string, info os.FileInfo, err error) error {
+		if path == "/proc" || path == "/sys" {
+			return filepath.SkipDir
+		}
 		fmt.Println(path)
 		return nil
 	})
 }
-`, want: `/
+`, wantFunc: func(got string) error {
+			// The environment for the old nacl sandbox:
+			if strings.TrimSpace(got) == `/
 /dev
 /dev/null
 /dev/random
@@ -211,8 +255,31 @@
 /usr/local/go
 /usr/local/go/lib
 /usr/local/go/lib/time
-/usr/local/go/lib/time/zoneinfo.zip`},
-
+/usr/local/go/lib/time/zoneinfo.zip` {
+				return nil
+			}
+			have := map[string]bool{}
+			for _, f := range strings.Split(got, "\n") {
+				have[f] = true
+			}
+			for _, expect := range []string{
+				"/.dockerenv",
+				"/__runsc_containers__",
+				"/etc/hostname",
+				"/dev/zero",
+				"/lib/ld-linux-x86-64.so.2",
+				"/lib/libc.so.6",
+				"/etc/nsswitch.conf",
+				"/bin/env",
+				"/tmpfs",
+			} {
+				if !have[expect] {
+					return fmt.Errorf("missing expected sandbox file %q; got:\n%s", expect, got)
+				}
+			}
+			return nil
+		},
+	},
 	{
 		name: "test_passes",
 		prog: `
@@ -404,7 +471,9 @@
 package main
 import ("fmt"; "github.com/bradfitz/iter")
 func main() { for i := range iter.N(5) { fmt.Println(i) } }
-`, want: "0\n1\n2\n3\n4\n"},
+`,
+		want: "0\n1\n2\n3\n4\n",
+	},
 
 	{
 		name:          "compile_with_vet",
diff --git a/vet.go b/vet.go
index a9cae12..032a5aa 100644
--- a/vet.go
+++ b/vet.go
@@ -5,6 +5,7 @@
 package main
 
 import (
+	"context"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -21,7 +22,7 @@
 // the /compile (compileAndRun) handler instead with the WithVet
 // boolean set. This code path doesn't support modules and only exists
 // as a temporary compatiblity bridge to older javascript clients.
-func vetCheck(req *request) (*response, error) {
+func vetCheck(ctx context.Context, req *request) (*response, error) {
 	tmpDir, err := ioutil.TempDir("", "vet")
 	if err != nil {
 		return nil, fmt.Errorf("error creating temp directory: %v", err)