sweet: split benchmark results from log and/or debug output

Currently benchmark output all comes out in one big stream and ends up
in the results/<benchmark>/<toolchain-name>.results file. Recently, we
started dumping a lot more logging information from benchmarks by
default, but this has caused problems with benchmark result parsing,
since they contain lines that look naively like key-value pairs in the
benchmark format.

This change resolves this problem by splitting the benchmark result
stream from the log and/or debug output stream, placing the latter in
results/<benchmark>/toolchain-name>.log. By convention, we make it so
that all benchmark driver binaries emit benchmark results to stdout and
everything else to stderr to easily separate the streams in the harness
and redirect them to different files.

Change-Id: Iea4cc253a72db176378a6cc9f68b7f36b9ff60ae
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/602135
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/sweet/benchmarks/go-build/main.go b/sweet/benchmarks/go-build/main.go
index 01cf6a8..3edc700 100644
--- a/sweet/benchmarks/go-build/main.go
+++ b/sweet/benchmarks/go-build/main.go
@@ -104,7 +104,7 @@
 	baseCmd := exec.Command(cmdArgs[0], cmdArgs[1:]...)
 	baseCmd.Dir = pkgPath
 	baseCmd.Env = common.NewEnvFromEnviron().MustSet("GOROOT=" + filepath.Dir(filepath.Dir(goTool))).Collapse()
-	baseCmd.Stdout = os.Stdout
+	baseCmd.Stdout = os.Stderr // Redirect all tool output to stderr.
 	baseCmd.Stderr = os.Stderr
 	cmd, err := cgroups.WrapCommand(baseCmd, "test.scope")
 	if err != nil {
@@ -133,7 +133,8 @@
 			if err != nil {
 				return err
 			}
-			if _, err := io.Copy(os.Stderr, f); err != nil {
+			// Results canonically go to stdout.
+			if _, err := io.Copy(os.Stdout, f); err != nil {
 				f.Close()
 				return err
 			}
@@ -154,6 +155,9 @@
 		benchmark = true
 	default:
 		cmd := exec.Command(flag.Args()[0], flag.Args()[1:]...)
+		// Passing through stdout is necessary here because the go tool needs
+		// to be able to read stdout from some of these tools. This won't appear
+		// in the final output.
 		cmd.Stdout = os.Stdout
 		cmd.Stderr = os.Stderr
 		return cmd.Run()
@@ -186,6 +190,9 @@
 		}
 	}
 	cmd := exec.Command(flag.Args()[0], append(extraFlags, flag.Args()[1:]...)...)
+	// Passing through stdout is necessary here because the go tool needs
+	// to be able to read stdout from some of these tools. This won't appear
+	// in the final output.
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if benchmark {
diff --git a/sweet/benchmarks/internal/driver/driver.go b/sweet/benchmarks/internal/driver/driver.go
index 2ae8688..403c8fb 100644
--- a/sweet/benchmarks/internal/driver/driver.go
+++ b/sweet/benchmarks/internal/driver/driver.go
@@ -387,7 +387,7 @@
 	})
 
 	// Write out stats.
-	var out io.Writer = os.Stderr
+	var out io.Writer = os.Stdout
 	if b.resultsWriter != nil {
 		out = b.resultsWriter
 	}
diff --git a/sweet/cmd/sweet/benchmark.go b/sweet/cmd/sweet/benchmark.go
index d4c09f9..8a0e07c 100644
--- a/sweet/cmd/sweet/benchmark.go
+++ b/sweet/cmd/sweet/benchmark.go
@@ -348,17 +348,24 @@
 			args = append(args, dc.DriverArgs()...)
 		}
 
+		// Create log and results file.
 		results, err := os.Create(filepath.Join(resultsDir, fmt.Sprintf("%s.results", cfg.Name)))
 		if err != nil {
 			return fmt.Errorf("create %s results file for %s: %v", b.name, cfg.Name, err)
 		}
 		defer results.Close()
+		log, err := os.Create(filepath.Join(resultsDir, fmt.Sprintf("%s.log", cfg.Name)))
+		if err != nil {
+			return fmt.Errorf("create %s log file for %s: %v", b.name, cfg.Name, err)
+		}
+		defer results.Close()
 		setups = append(setups, common.RunConfig{
 			BinDir:    binDir,
 			TmpDir:    tmpDir,
 			AssetsDir: assetsDir,
 			Args:      args,
 			Results:   results,
+			Log:       log,
 			Short:     r.short,
 		})
 	}
@@ -384,11 +391,11 @@
 			if err := b.harness.Run(cfgs[i], &setup); err != nil {
 				debug.SetGCPercent(gogc)
 				// Useful error messages are often in the log. Grab the end.
-				logTail, tailErr := readFileTail(setup.Results)
+				logTail, tailErr := readFileTail(setup.Log)
 				if tailErr != nil {
 					logTail = fmt.Sprintf("error reading log tail: %s", tailErr)
 				}
-				setup.Results.Close()
+				setup.Log.Close()
 				return fmt.Errorf("run benchmark %s for config %s: %v\nLog tail:\n%s", b.name, cfgs[i].Name, err, logTail)
 			}
 			debug.SetGCPercent(gogc)
diff --git a/sweet/cmd/sweet/integration_test.go b/sweet/cmd/sweet/integration_test.go
index 9d58ea9..e87702e 100644
--- a/sweet/cmd/sweet/integration_test.go
+++ b/sweet/cmd/sweet/integration_test.go
@@ -153,38 +153,44 @@
 		defer outputMu.Unlock()
 
 		// Poke at the results directory.
-		var matches []string
-		addMatches := func(fileName string) {
-			m1, err := filepath.Glob(filepath.Join(resultsDir, "*", fileName))
+		var resultFiles []string
+		addResultFiles := func(fileName string) {
+			matches, err := filepath.Glob(filepath.Join(resultsDir, "*", fileName))
 			if err != nil {
 				t.Errorf("failed to search results directory for %s: %v", fileName, err)
-			} else if len(m1) == 0 {
+			} else if len(matches) == 0 {
 				t.Logf("no %s results", fileName)
 			}
-			matches = append(matches, m1...)
+			resultFiles = append(resultFiles, matches...)
 		}
 		if hasPGO {
-			addMatches("go.profile.results")
+			addResultFiles("go.profile.results")
 		}
-		addMatches("go.results")
+		addResultFiles("go.results")
 
 		// Dump additional information in case of error, and
 		// check for reasonable results in the case of no error.
-		for _, match := range matches {
-			benchmark := filepath.Base(filepath.Dir(match))
+		for _, resultFile := range resultFiles {
+			benchmark := filepath.Base(filepath.Dir(resultFile))
 			if runErr != nil {
 				t.Logf("output for %s:", benchmark)
+				logFile := resultFile[:len(resultFile)-len(filepath.Ext(resultFile))] + ".log"
+				log, err := os.ReadFile(logFile)
+				if err != nil {
+					t.Errorf("failed to read log for %s: %v", benchmark, err)
+					continue
+				}
+				t.Log(string(log))
 			}
-			data, err := os.ReadFile(match)
+			data, err := os.ReadFile(resultFile)
 			if err != nil {
-				t.Errorf("failed to read results for %si: %v", benchmark, err)
+				t.Errorf("failed to read results for %s: %v", benchmark, err)
 				continue
 			}
-			if runErr != nil {
-				t.Log(string(data))
-				continue
+			// TODO(mknyszek): Do some more exhaustive checking with the benchfmt package.
+			if !strings.Contains(string(data), "Benchmark") {
+				t.Errorf("no benchmark data found in result file for %s", benchmark)
 			}
-			// TODO(mknyszek): Check to make sure the results look reasonable.
 		}
 		if runErr != nil {
 			t.Logf("command output:\n%s", string(output))
diff --git a/sweet/common/harness.go b/sweet/common/harness.go
index 01e7724..3df943e 100644
--- a/sweet/common/harness.go
+++ b/sweet/common/harness.go
@@ -69,8 +69,16 @@
 
 	// Results is the file to which benchmark results should be appended
 	// in the Go benchmark format.
+	//
+	// By convention, this is the benchmark binary's stdout.
 	Results *os.File
 
+	// Log contains additional information information from the benchmark,
+	// for example for debugging.
+	//
+	// By convention, this is the benchmark binary's stderr.
+	Log *os.File
+
 	// Short indicates whether or not to run a short version of the benchmarks
 	// for testing. Guaranteed to be the same as GetConfig.Short and
 	// BuildConfig.Short.
diff --git a/sweet/harnesses/cockroachdb.go b/sweet/harnesses/cockroachdb.go
index 04cecb7..3053791 100644
--- a/sweet/harnesses/cockroachdb.go
+++ b/sweet/harnesses/cockroachdb.go
@@ -163,7 +163,7 @@
 		)
 		cmd.Env = cfg.ExecEnv.Collapse()
 		cmd.Stdout = rcfg.Results
-		cmd.Stderr = rcfg.Results
+		cmd.Stderr = rcfg.Log
 		log.TraceCommand(cmd, false)
 		if err := cmd.Start(); err != nil {
 			return err
diff --git a/sweet/harnesses/etcd.go b/sweet/harnesses/etcd.go
index dd15ae8..21cd0dc 100644
--- a/sweet/harnesses/etcd.go
+++ b/sweet/harnesses/etcd.go
@@ -82,7 +82,7 @@
 		)
 		cmd.Env = cfg.ExecEnv.Collapse()
 		cmd.Stdout = rcfg.Results
-		cmd.Stderr = rcfg.Results
+		cmd.Stderr = rcfg.Log
 		log.TraceCommand(cmd, false)
 		if err := cmd.Run(); err != nil {
 			return err
diff --git a/sweet/harnesses/go-build.go b/sweet/harnesses/go-build.go
index cb78cf2..4648a50 100644
--- a/sweet/harnesses/go-build.go
+++ b/sweet/harnesses/go-build.go
@@ -150,7 +150,7 @@
 		)
 		cmd.Env = cfg.ExecEnv.Collapse()
 		cmd.Stdout = rcfg.Results
-		cmd.Stderr = rcfg.Results
+		cmd.Stderr = rcfg.Log
 		log.TraceCommand(cmd, false)
 		if err := cmd.Run(); err != nil {
 			return err
diff --git a/sweet/harnesses/gvisor.go b/sweet/harnesses/gvisor.go
index f45cd07..2a0f692 100644
--- a/sweet/harnesses/gvisor.go
+++ b/sweet/harnesses/gvisor.go
@@ -74,7 +74,7 @@
 	)
 	cmd.Env = cfg.ExecEnv.Collapse()
 	cmd.Stdout = rcfg.Results
-	cmd.Stderr = rcfg.Results
+	cmd.Stderr = rcfg.Log
 	log.TraceCommand(cmd, false)
 	return cmd.Run()
 }
diff --git a/sweet/harnesses/local.go b/sweet/harnesses/local.go
index af1d71c..34fa2ac 100644
--- a/sweet/harnesses/local.go
+++ b/sweet/harnesses/local.go
@@ -16,7 +16,6 @@
 	binName   string
 	genArgs   func(cfg *common.Config, rcfg *common.RunConfig) []string
 	beforeRun func(cfg *common.Config, rcfg *common.RunConfig) error
-	noStdout  bool
 }
 
 func (h *localBenchHarness) CheckPrerequisites() error {
@@ -42,10 +41,8 @@
 		append(rcfg.Args, h.genArgs(cfg, rcfg)...)...,
 	)
 	cmd.Env = cfg.ExecEnv.Collapse()
-	if !h.noStdout {
-		cmd.Stdout = rcfg.Results
-	}
-	cmd.Stderr = rcfg.Results
+	cmd.Stdout = rcfg.Results
+	cmd.Stderr = rcfg.Log
 	log.TraceCommand(cmd, false)
 	return cmd.Run()
 }
diff --git a/sweet/harnesses/tile38.go b/sweet/harnesses/tile38.go
index db626b2..62643a0 100644
--- a/sweet/harnesses/tile38.go
+++ b/sweet/harnesses/tile38.go
@@ -85,7 +85,7 @@
 	)
 	cmd.Env = cfg.ExecEnv.Collapse()
 	cmd.Stdout = rcfg.Results
-	cmd.Stderr = rcfg.Results
+	cmd.Stderr = rcfg.Log
 	log.TraceCommand(cmd, false)
 	return cmd.Run()
 }