playground: show a meaningful error on build timeout

Add support for returning a meaningful error message to the user, along
with partial build output if present.

This change also improves graceful command termination throughout the
playground. It introduces internal.WaitOrStop, which will wait for a
command to finish, or gracefully stop the command if it has not finished
by the time the context is cancelled. This also resolves comments from
CL 227351.

Updates golang/go#38052
Updates golang/go#38530
Updates golang/go#25224

Change-Id: I3a36ad2c5f3626c9cd5b3c1139543ccde3e17ba1
Reviewed-on: https://go-review.googlesource.com/c/playground/+/228438
Run-TryBot: Alexander Rakoczy <alex@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/internal/internal.go b/internal/internal.go
new file mode 100644
index 0000000..1540e93
--- /dev/null
+++ b/internal/internal.go
@@ -0,0 +1,66 @@
+package internal
+
+import (
+	"context"
+	"os"
+	"os/exec"
+	"time"
+)
+
+// WaitOrStop waits for the already-started command cmd by calling its Wait method.
+//
+// If cmd does not return before ctx is done, WaitOrStop sends it the given interrupt signal.
+// If killDelay is positive, WaitOrStop waits that additional period for Wait to return before sending os.Kill.
+func WaitOrStop(ctx context.Context, cmd *exec.Cmd, interrupt os.Signal, killDelay time.Duration) error {
+	if cmd.Process == nil {
+		panic("WaitOrStop called with a nil cmd.Process — missing Start call?")
+	}
+	if interrupt == nil {
+		panic("WaitOrStop requires a non-nil interrupt signal")
+	}
+
+	errc := make(chan error)
+	go func() {
+		select {
+		case errc <- nil:
+			return
+		case <-ctx.Done():
+		}
+
+		err := cmd.Process.Signal(interrupt)
+		if err == nil {
+			err = ctx.Err() // Report ctx.Err() as the reason we interrupted.
+		} else if err.Error() == "os: process already finished" {
+			errc <- nil
+			return
+		}
+
+		if killDelay > 0 {
+			timer := time.NewTimer(killDelay)
+			select {
+			// Report ctx.Err() as the reason we interrupted the process...
+			case errc <- ctx.Err():
+				timer.Stop()
+				return
+			// ...but after killDelay has elapsed, fall back to a stronger signal.
+			case <-timer.C:
+			}
+
+			// Wait still hasn't returned.
+			// Kill the process harder to make sure that it exits.
+			//
+			// Ignore any error: if cmd.Process has already terminated, we still
+			// want to send ctx.Err() (or the error from the Interrupt call)
+			// to properly attribute the signal that may have terminated it.
+			_ = cmd.Process.Kill()
+		}
+
+		errc <- err
+	}()
+
+	waitErr := cmd.Wait()
+	if interruptErr := <-errc; interruptErr != nil {
+		return interruptErr
+	}
+	return waitErr
+}
diff --git a/sandbox.go b/sandbox.go
index 7d4c54e..b5d5de9 100644
--- a/sandbox.go
+++ b/sandbox.go
@@ -36,6 +36,7 @@
 
 	"cloud.google.com/go/compute/metadata"
 	"github.com/bradfitz/gomemcache/memcache"
+	"golang.org/x/playground/internal"
 	"golang.org/x/playground/internal/gcpdial"
 	"golang.org/x/playground/sandbox/sandboxtypes"
 )
@@ -123,6 +124,11 @@
 				http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
 				return
 			}
+			if strings.Contains(resp.Errors, goBuildTimeoutError) {
+				// TODO(golang.org/issue/38052) - This should be a http.StatusBadRequest, but the UI requires a 200 to parse the response.
+				s.writeResponse(w, resp, http.StatusOK)
+				return
+			}
 			for _, e := range nonCachingErrors {
 				if strings.Contains(resp.Errors, e) {
 					s.log.Errorf("cmdFunc compilation error: %q", resp.Errors)
@@ -147,16 +153,21 @@
 			}
 		}
 
-		var buf bytes.Buffer
-		if err := json.NewEncoder(&buf).Encode(resp); err != nil {
-			s.log.Errorf("error encoding response: %v", err)
-			http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
-			return
-		}
-		if _, err := io.Copy(w, &buf); err != nil {
-			s.log.Errorf("io.Copy(w, &buf): %v", err)
-			return
-		}
+		s.writeResponse(w, resp, http.StatusOK)
+	}
+}
+
+func (s *server) writeResponse(w http.ResponseWriter, resp *response, status int) {
+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(resp); err != nil {
+		s.log.Errorf("error encoding response: %v", err)
+		http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
+		return
+	}
+	w.WriteHeader(status)
+	if _, err := io.Copy(w, &buf); err != nil {
+		s.log.Errorf("io.Copy(w, &buf): %v", err)
+		return
 	}
 }
 
@@ -450,9 +461,7 @@
 	br.exePath = filepath.Join(tmpDir, "a.out")
 	goCache := filepath.Join(tmpDir, "gocache")
 
-	ctx, cancel := context.WithTimeout(ctx, maxCompileTime)
-	defer cancel()
-	cmd := exec.CommandContext(ctx, "/usr/local/go-faketime/bin/go", "build", "-o", br.exePath, "-tags=faketime")
+	cmd := exec.Command("/usr/local/go-faketime/bin/go", "build", "-o", br.exePath, "-tags=faketime")
 	cmd.Dir = tmpDir
 	cmd.Env = []string{"GOOS=linux", "GOARCH=amd64", "GOROOT=/usr/local/go-faketime"}
 	cmd.Env = append(cmd.Env, "GOCACHE="+goCache)
@@ -472,25 +481,30 @@
 	}
 	cmd.Args = append(cmd.Args, buildPkgArg)
 	cmd.Env = append(cmd.Env, "GOPATH="+br.goPath)
-	t0 := time.Now()
-	if out, err := cmd.CombinedOutput(); err != nil {
-		if ctx.Err() == context.DeadlineExceeded {
-			log.Printf("go build timed out after %v", time.Since(t0))
-			return &buildResult{errorMessage: goBuildTimeoutError}, nil
+	out := &bytes.Buffer{}
+	cmd.Stderr, cmd.Stdout = out, out
+
+	if err := cmd.Start(); err != nil {
+		return nil, fmt.Errorf("error starting go build: %v", err)
+	}
+	ctx, cancel := context.WithTimeout(ctx, maxCompileTime)
+	defer cancel()
+	if err := internal.WaitOrStop(ctx, cmd, os.Interrupt, 250*time.Millisecond); err != nil {
+		if errors.Is(err, context.DeadlineExceeded) {
+			br.errorMessage = fmt.Sprintln(goBuildTimeoutError)
+		} else if ee := (*exec.ExitError)(nil); !errors.As(err, &ee) {
+			log.Printf("error building program: %v", err)
+			return nil, fmt.Errorf("error building go source: %v", err)
 		}
-		if _, ok := err.(*exec.ExitError); ok {
-			// Return compile errors to the user.
+		// Return compile errors to the user.
+		// Rewrite compiler errors to strip the tmpDir name.
+		br.errorMessage = br.errorMessage + strings.Replace(string(out.Bytes()), tmpDir+"/", "", -1)
 
-			// Rewrite compiler errors to strip the tmpDir name.
-			br.errorMessage = strings.Replace(string(out), tmpDir+"/", "", -1)
+		// "go build", invoked with a file name, puts this odd
+		// message before any compile errors; strip it.
+		br.errorMessage = strings.Replace(br.errorMessage, "# command-line-arguments\n", "", 1)
 
-			// "go build", invoked with a file name, puts this odd
-			// message before any compile errors; strip it.
-			br.errorMessage = strings.Replace(br.errorMessage, "# command-line-arguments\n", "", 1)
-
-			return br, nil
-		}
-		return nil, fmt.Errorf("error building go source: %v", err)
+		return br, nil
 	}
 	const maxBinarySize = 100 << 20 // copied from sandbox backend; TODO: unify?
 	if fi, err := os.Stat(br.exePath); err != nil || fi.Size() == 0 || fi.Size() > maxBinarySize {
diff --git a/sandbox/sandbox.go b/sandbox/sandbox.go
index 07de286..f8741b7 100644
--- a/sandbox/sandbox.go
+++ b/sandbox/sandbox.go
@@ -30,6 +30,7 @@
 	"syscall"
 	"time"
 
+	"golang.org/x/playground/internal"
 	"golang.org/x/playground/sandbox/sandboxtypes"
 )
 
@@ -70,34 +71,31 @@
 )
 
 type Container struct {
-	name   string
+	name string
+
 	stdin  io.WriteCloser
 	stdout *limitedWriter
 	stderr *limitedWriter
-	cmd    *exec.Cmd
 
-	waitOnce sync.Once
-	waitVal  error
+	cmd       *exec.Cmd
+	cancelCmd context.CancelFunc
+
+	waitErr chan error // 1-buffered; receives error from WaitOrStop(..., cmd, ...)
 }
 
 func (c *Container) Close() {
 	setContainerWanted(c.name, false)
 
-	if c.cmd.Process != nil {
-		gracefulStop(c.cmd.Process, 250*time.Millisecond)
-		if err := c.Wait(); err != nil {
-			log.Printf("error in c.Wait() for %q: %v", c.name, err)
-		}
+	c.cancelCmd()
+	if err := c.Wait(); err != nil {
+		log.Printf("error in c.Wait() for %q: %v", c.name, err)
 	}
 }
 
 func (c *Container) Wait() error {
-	c.waitOnce.Do(c.wait)
-	return c.waitVal
-}
-
-func (c *Container) wait() {
-	c.waitVal = c.cmd.Wait()
+	err := <-c.waitErr
+	c.waitErr <- err
+	return err
 }
 
 var httpServer *http.Server
@@ -267,34 +265,17 @@
 	if err := cmd.Start(); err != nil {
 		log.Fatalf("cmd.Start(): %v", err)
 	}
-	timer := time.AfterFunc(runTimeout-(500*time.Millisecond), func() {
-		fmt.Fprintln(os.Stderr, "timeout running program")
-		gracefulStop(cmd.Process, 250*time.Millisecond)
-	})
-	defer timer.Stop()
-	err = cmd.Wait()
+	ctx, cancel := context.WithTimeout(context.Background(), runTimeout-500*time.Millisecond)
+	defer cancel()
+	if err = internal.WaitOrStop(ctx, cmd, os.Interrupt, 250*time.Millisecond); err != nil {
+		if errors.Is(err, context.DeadlineExceeded) {
+			fmt.Fprintln(os.Stderr, "timeout running program")
+		}
+	}
 	os.Exit(errExitCode(err))
 	return
 }
 
-// gracefulStop attempts to send a SIGINT before a SIGKILL.
-//
-// The process will be sent a SIGINT immediately. If the context has still not been cancelled,
-// the process will be sent a SIGKILL after delay has passed since sending the SIGINT.
-//
-// TODO(golang.org/issue/38343) - Change SIGINT to SIGQUIT once decision is made.
-func gracefulStop(p *os.Process, delay time.Duration) {
-	// TODO(golang.org/issue/38343) - Change to syscall.SIGQUIT once decision is made.
-	if err := p.Signal(os.Interrupt); err != nil {
-		log.Printf("cmd.Process.Signal(%v): %v", os.Interrupt, err)
-	}
-	time.AfterFunc(delay, func() {
-		if err := p.Kill(); err != nil {
-			log.Printf("cmd.Process.Kill(): %v", err)
-		}
-	})
-}
-
 func makeWorkers() {
 	for {
 		c, err := startContainer(context.Background())
@@ -372,15 +353,25 @@
 	if err := cmd.Start(); err != nil {
 		return nil, err
 	}
+
+	ctx, cancel := context.WithCancel(ctx)
+	c = &Container{
+		name:      name,
+		stdin:     stdin,
+		stdout:    stdout,
+		stderr:    stderr,
+		cmd:       cmd,
+		cancelCmd: cancel,
+		waitErr:   make(chan error, 1),
+	}
+	go func() {
+		c.waitErr <- internal.WaitOrStop(ctx, cmd, os.Interrupt, 250*time.Millisecond)
+	}()
 	defer func() {
 		if err != nil {
-			log.Printf("error starting container %q: %v", name, err)
-			gracefulStop(cmd.Process, 250*time.Millisecond)
-			setContainerWanted(name, false)
+			c.Close()
 		}
 	}()
-	ctx, cancel := context.WithTimeout(ctx, startTimeout)
-	defer cancel()
 
 	startErr := make(chan error, 1)
 	go func() {
@@ -395,25 +386,23 @@
 		}
 	}()
 
+	timer := time.NewTimer(startTimeout)
+	defer timer.Stop()
 	select {
-	case <-ctx.Done():
-		err := fmt.Errorf("timeout starting container %q: %w", name, ctx.Err())
-		pw.Close()
+	case <-timer.C:
+		err := fmt.Errorf("timeout starting container %q", name)
+		cancel()
 		<-startErr
 		return nil, err
-	case err = <-startErr:
+
+	case err := <-startErr:
 		if err != nil {
 			return nil, err
 		}
 	}
+
 	log.Printf("started container %q", name)
-	return &Container{
-		name:   name,
-		stdin:  stdin,
-		stdout: stdout,
-		stderr: stderr,
-		cmd:    cmd,
-	}, nil
+	return c, nil
 }
 
 func runHandler(w http.ResponseWriter, r *http.Request) {