sweet: don't sleep until cockroachdb cluster workload is ready

Currently there's a time.Sleep to wait until the cluster workload is
ready for the CockroachDB benchmark, but I suspect this is flaky. Run
the benchmarking tool with a really small duration instead to try and
identify whether the cluster is ready.

Change-Id: Ie79c930442e0a7d8c5b1d9951974472f50178fd9
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/615418
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
diff --git a/sweet/benchmarks/cockroachdb/main.go b/sweet/benchmarks/cockroachdb/main.go
index ad31946..4393450 100644
--- a/sweet/benchmarks/cockroachdb/main.go
+++ b/sweet/benchmarks/cockroachdb/main.go
@@ -337,6 +337,7 @@
 	args        []string
 	longArgs    []string // if !config.short
 	shortArgs   []string // if config.short
+	pingArgs    []string
 	metricTypes []string
 	timeout     time.Duration
 }
@@ -367,7 +368,7 @@
 			"--max-block-bytes=1024",
 			"--concurrency=10000",
 			"--max-rate=30000",
-			//Pre-splitting and scattering the ranges should help stabilize results.
+			// Pre-splitting and scattering the ranges should help stabilize results.
 			"--scatter",
 			"--splits=5",
 		},
@@ -381,6 +382,11 @@
 			"--ramp=5s",
 			"--duration=30s",
 		},
+		// Just to ping whether the workload is ready.
+		pingArgs: []string{
+			"--ramp=0s",
+			"--duration=500ms",
+		},
 	}
 }
 
@@ -407,17 +413,28 @@
 	var stdout, stderr bytes.Buffer
 	initCmd.Stdout = &stdout
 	initCmd.Stderr = &stderr
-	if err = initCmd.Run(); err != nil {
+	if err := initCmd.Run(); err != nil {
 		return err
 	}
 
-	log.Println("sleeping")
-
-	// If we try and start the workload right after loading in the schema
-	// it will spam us with database does not exist errors. We could repeatedly
-	// retry until the database exists by parsing the output, or we can just
-	// wait 5 seconds.
-	time.Sleep(5 * time.Second)
+	// Make sure the server is ready to accept work by pinging it with very short
+	// benchmark runs. If they fail, we assume that the server isn't ready.
+	log.Println("pinging server with benchmark tool")
+	pingArgs := cfg.bench.args
+	pingArgs = append(pingArgs, cfg.bench.pingArgs...)
+	pingArgs = append(pingArgs, pgurls...)
+	pingCmd := exec.Command(cfg.cockroachdbBin, pingArgs...)
+	pingStart := time.Now()
+	var pingOutput []byte
+	var pingErr error
+	for time.Now().Sub(pingStart) < 30*time.Second {
+		if pingOutput, pingErr = pingCmd.CombinedOutput(); pingErr == nil {
+			break
+		}
+	}
+	if pingErr != nil {
+		return fmt.Errorf("workload failed to become available within timeout: error: %v: output:\n%s", pingErr, pingOutput)
+	}
 
 	args := cfg.bench.args
 	if cfg.short {
@@ -427,7 +444,7 @@
 	}
 	args = append(args, pgurls...)
 
-	log.Println("running benchmark timeout")
+	log.Println("running benchmark tool")
 	cmd := exec.Command(cfg.cockroachdbBin, args...)
 	fmt.Fprintln(os.Stderr, cmd.String())
 
@@ -446,7 +463,7 @@
 	var benchmarkErr error
 	go func() {
 		b.ResetTimer()
-		if err = cmd.Run(); err != nil {
+		if err := cmd.Run(); err != nil {
 			benchmarkErr = err
 		}
 		b.StopTimer()