perf: improve regressions page on performance dashboard

This change makes a few improvements to the regressions page:
1. It binds regressions to benchmark+unit, so now the regressions page
   might break up the groupings compared to before (which is probably
   OK).
2. This binding lets us avoid putting regression information into the
   benchmark name, which previously broke the title's link, so this
   change also moves that information to just above each graph.
3. This change adds a link to the regression information which jumps to
   the offending commit on the per-benchmark+unit page.
4. It adds a file-an-issue link to the regressions page which
   streamlines some of the overhead of triaging.

Note that deltaScore isn't exported to the regressions page anymore
because JSON can't tolerate negative infinities, and the new code is
trying to be as simple as possible. The next step is probably to filter
out non-regressions entirely, rather than try to make the deltaScore
work. We can also always bring it back as a string if we feel it's very
useful.

Change-Id: Ib7cf16ef92534fd1d9b9c23798e9474e20f4e8c2
Reviewed-on: https://go-review.googlesource.com/c/build/+/459518
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/perf/app/dashboard.go b/perf/app/dashboard.go
index 6ba8a20..01bab8c 100644
--- a/perf/app/dashboard.go
+++ b/perf/app/dashboard.go
@@ -45,11 +45,14 @@
 // We could try to shoehorn this into benchfmt.Result, but that isn't really
 // the best fit for a graph.
 type BenchmarkJSON struct {
-	Name string
-	Unit string
+	Name           string
+	Unit           string
+	HigherIsBetter bool
 
 	// These will be sorted by CommitDate.
 	Values []ValueJSON
+
+	Regression *RegressionJSON
 }
 
 type ValueJSON struct {
@@ -316,13 +319,13 @@
 	return groupBenchmarkResults(res, regressions)
 }
 
-type regression struct {
-	change         float64 // endpoint regression, if any
-	deltaIndex     int     // index at which largest increase of regression occurs
-	deltaScore     float64 // score of that change (in 95%ile boxes)
-	delta          float64 // size of that changes
-	unit           string
-	ignoredBecause string
+type RegressionJSON struct {
+	Change         float64 // endpoint regression, if any
+	DeltaIndex     int     // index at which largest increase of regression occurs
+	Delta          float64 // size of that changes
+	IgnoredBecause string
+
+	deltaScore float64 // score of that change (in 95%ile boxes)
 }
 
 // queryToJson process a QueryTableResult into a slice of BenchmarkJSON,
@@ -354,8 +357,9 @@
 		b, ok := m[k]
 		if !ok {
 			b = &BenchmarkJSON{
-				Name: name,
-				Unit: unit,
+				Name:           name,
+				Unit:           unit,
+				HigherIsBetter: isHigherBetter(unit),
 			}
 			m[k] = b
 		}
@@ -380,69 +384,35 @@
 	return s, nil
 }
 
-// reorderRegressionsFirst sorts the benchmarks in s so that those with the
-// largest detectable regressions come first, and returns a map from benchmark name
-// to the worst regression for that name (across all units)
-func reorderRegressionsFirst(s []*BenchmarkJSON) map[string]regression {
-	r := make(map[string]regression) // worst regression for each benchmark name, across all units for that benchmark
-
+// filterAndSortRegressions filters out benchmarks that didn't regress and sorts the
+// benchmarks in s so that those with the largest detectable regressions come first.
+func filterAndSortRegressions(s []*BenchmarkJSON) []*BenchmarkJSON {
 	// Compute per-benchmark estimates of point where the most interesting regression happened.
-	// TODO This information might be worth adding to the graphs in general, once we do that for the regressions view.
 	for _, b := range s {
-		if worst := worstRegression(b); worst.deltaScore > 1 && worst.delta > r[b.Name].delta {
-			r[b.Name] = worst
-		} else if _, ok := r[b.Name]; !ok { // don't overwrite success for one unit w/ failure explanation for another
-			r[b.Name] = worst // This is for regression ordering debugging and might end up removed.
-		}
+		b.Regression = worstRegression(b)
+		// TODO(mknyszek, drchase, mpratt): Filter out benchmarks once we're confident this
+		// algorithm works OK.
 	}
 
 	// Sort benchmarks with detectable regressions first, ordered by
 	// size of regression at end of sample.  Also sort the remaining
-	// benchmarks into end-of-sample regression order.  Keep benchmarks
-	// with the same name grouped together, which is assumed by the
-	// graph presentation server.
+	// benchmarks into end-of-sample regression order.
 	sort.Slice(s, func(i, j int) bool {
+		ri, rj := s[i].Regression, s[j].Regression
+		// regressions w/ a delta index come first
+		if (ri.DeltaIndex < 0) != (rj.DeltaIndex < 0) {
+			return rj.DeltaIndex < 0
+		}
+		if ri.Change != rj.Change {
+			// put larger regression first.
+			return ri.Change > rj.Change
+		}
 		if s[i].Name == s[j].Name {
 			return s[i].Unit < s[j].Unit
 		}
-		ri, iok := r[s[i].Name]
-		rj, jok := r[s[j].Name]
-		if iok != jok {
-			// a name with regression information attached comes first.
-			return iok
-		}
-		// regressions w/ a delta index come first
-		if (ri.deltaIndex < 0) != (rj.deltaIndex < 0) {
-			return rj.deltaIndex < 0
-		}
-		if ri.change != rj.change {
-			// put larger regression first.
-			return ri.change > rj.change
-		}
 		return s[i].Name < s[j].Name
 	})
-
-	return r
-}
-
-// renameBenchmarksWithRegressions changes the names of benchmarks to include information about regressions,
-// and returns the number of regression points that were detected.
-// TODO(drchase, mpratt) better if this can be done in the graph or surrounding text.
-func renameBenchmarksWithRegressions(s []*BenchmarkJSON, r map[string]regression) {
-	detected := 0
-	// This injects regression information into the titles.
-	for i, b := range s {
-		if change, ok := r[b.Name]; ok {
-			if change.deltaIndex >= 0 {
-				detected++
-				v := b.Values[change.deltaIndex]
-				commit, date := v.CommitHash[:7], v.CommitDate.Format(time.RFC3339)
-				s[i].Name = fmt.Sprintf("%s %s %3.1f%% regression, %3.1f%%point change at %s on %s (score %3.1f)", b.Name, change.unit, 100*change.change, 100*change.delta, commit, date, change.deltaScore)
-			} else {
-				s[i].Name = fmt.Sprintf("%s %s not ranked because %s", b.Name, change.unit, change.ignoredBecause)
-			}
-		}
-	}
+	return s
 }
 
 // groupBenchmarkResults groups all benchmark results from the passed query.
@@ -453,22 +423,17 @@
 	if err != nil {
 		return nil, err
 	}
-
-	if !byRegression {
-		// Keep benchmarks with the same name grouped together, which is
-		// assumed by the JS.
-		sort.Slice(s, func(i, j int) bool {
-			if s[i].Name == s[j].Name {
-				return s[i].Unit < s[j].Unit
-			}
-			return s[i].Name < s[j].Name
-		})
-		return s, nil
+	if byRegression {
+		return filterAndSortRegressions(s), nil
 	}
-
-	regressions := reorderRegressionsFirst(s)
-	renameBenchmarksWithRegressions(s, regressions)
-
+	// Keep benchmarks with the same name grouped together, which is
+	// assumed by the JS.
+	sort.Slice(s, func(i, j int) bool {
+		if s[i].Name == s[j].Name {
+			return s[i].Unit < s[j].Unit
+		}
+		return s[i].Name < s[j].Name
+	})
 	return s, nil
 }
 
@@ -508,23 +473,33 @@
 	}
 }
 
-func worstRegression(b *BenchmarkJSON) regression {
+func isHigherBetter(unit string) bool {
+	switch unit {
+	case "B/s", "ops/s":
+		return true
+	}
+	return false
+}
+
+func worstRegression(b *BenchmarkJSON) *RegressionJSON {
 	values := b.Values
 	l := len(values)
 	ninf := math.Inf(-1)
 
 	sign := 1.0
-	// TODO unit "good direction" information must be available from somewhere else, but for now, do this.
-	switch b.Unit {
-	case "B/s", "ops/s":
-		sign = -1
+	if b.HigherIsBetter {
+		sign = -1.0
 	}
 
 	min := sign * values[l-1].Center
-	worst := regression{deltaScore: ninf, deltaIndex: -1, change: min, unit: b.Unit}
+	worst := &RegressionJSON{
+		DeltaIndex: -1,
+		Change:     min,
+		deltaScore: ninf,
+	}
 
 	if len(values) < 4 {
-		worst.ignoredBecause = "too few values"
+		worst.IgnoredBecause = "too few values"
 		return worst
 	}
 
@@ -541,12 +516,12 @@
 
 	// MAGIC NUMBER "1".  Removing this added 25% to the "detected regressions", but they were all junk.
 	if median > 1 {
-		worst.ignoredBecause = "median change score > 1"
+		worst.IgnoredBecause = "median change score > 1"
 		return worst
 	}
 
 	if math.IsNaN(median) {
-		worst.ignoredBecause = "median is NaN"
+		worst.IgnoredBecause = "median is NaN"
 		return worst
 	}
 
@@ -559,16 +534,16 @@
 		score := sign * changeScore(v1.Low, v1.Center, v1.High, v0.Low, v0.Center, v0.High)
 
 		if score > magicScoreThreshold && sign*v1.Center < min && score > worst.deltaScore {
-			worst.deltaIndex = i
+			worst.DeltaIndex = i
 			worst.deltaScore = score
-			worst.delta = sign * (v0.Center - v1.Center)
+			worst.Delta = sign * (v0.Center - v1.Center)
 		}
 
 		min = math.Min(sign*v0.Center, min)
 	}
 
-	if worst.deltaIndex == -1 {
-		worst.ignoredBecause = "didn't detect outlier regression"
+	if worst.DeltaIndex == -1 {
+		worst.IgnoredBecause = "didn't detect outlier regression"
 	}
 
 	return worst
@@ -692,7 +667,8 @@
 		w = &gzipResponseWriter{w: gz, ResponseWriter: w}
 	}
 
-	w.WriteHeader(http.StatusOK)
-	e := json.NewEncoder(w)
-	e.Encode(benchmarks)
+	if err := json.NewEncoder(w).Encode(benchmarks); err != nil {
+		log.Printf("Error encoding results: %v", err)
+		http.Error(w, "Internal error, see logs", 500)
+	}
 }
diff --git a/perf/app/dashboard/index.html b/perf/app/dashboard/index.html
index 88779e0..a6f53fe 100644
--- a/perf/app/dashboard/index.html
+++ b/perf/app/dashboard/index.html
@@ -155,17 +155,44 @@
 
 		let item = document.createElement("div");
 		item.classList.add("Dashboard-grid-item");
+		if (bench.Regression) {
+			const p = document.createElement("p");
+			p.classList.add("Dashboard-regression-description");
+			const r = bench.Regression;
+			if (r.DeltaIndex >= 0) {
+				// Generate some text indicating the regression.
+				const rd = bench.Values[r.DeltaIndex];
+				const regression = (Math.abs(r.Change)*100).toFixed(2);
+				const shortCommit = rd.CommitHash.slice(0, 7);
+				let diffText = "regression";
+				if ((bench.HigherIsBetter && r.Change < 0) || (bench.HigherIsBetter && r.Change > 0)) {
+					diffText = "improvement";
+				}
+				p.innerHTML = `${regression}% ${diffText}, ${(r.Delta*100).toFixed(2)}%-point change at <a href="?benchmark=${bench.Name}&unit=${bench.Unit}#${rd.CommitHash}">${shortCommit}</a>.`;
+
+				// Add a link to file a bug.
+				const title = `affected/package: ${regression}% regression in ${bench.Name} ${bench.Unit} at ${shortCommit}`;
+				const body = `Discovered a regression in ${bench.Unit} of ${regression}% for benchmark ${bench.Name} at ${shortCommit}.\n\n<ADD MORE DETAILS>.`
+				let query = `?title=${encodeURIComponent(title)}&body=${encodeURIComponent(body)}&labels=Performance`;
+				p.innerHTML += ` <a href="https://github.com/golang/go/issues/new${query}">File an issue</a>.`;
+			} else {
+				p.textContext = `Not ranked because ${r.IgnoredBecause}.`;
+			}
+			item.appendChild(p);
+		}
 		item.appendChild(BandChart(bench.Values, {
 			benchmark: bench.Name,
 			unit: bench.Unit,
 			repository: repository,
 			minViewDeltaPercent: minViewDeltaPercent,
+			higherIsBetter: bench.HigherIsBetter,
 		}));
 		grid.appendChild(item);
 	}
 }
 
 function addTable(bench, unit, repository) {
+	const commitSelected = window.location.hash;
 	let dashboard = document.getElementById("dashboard");
 
 	removeLoadingMessage();
@@ -236,6 +263,9 @@
 
 		// Create a row per value.
 		const row = document.createElement("tr");
+		if (commitSelected && commitSelected.slice(1) === v.CommitHash) {
+			row.classList.add("selected");
+		}
 
 		// Commit date.
 		row.appendChild(createCell(Intl.DateTimeFormat([], {
@@ -247,6 +277,7 @@
 		const commitHash = createCell("", false);
 		const commitLink = document.createElement("a");
 		commitLink.href = "https://go.googlesource.com/" + repository + "/+show/" + v.CommitHash;
+		commitLink.id = v.CommitHash;
 		commitLink.textContent = v.CommitHash.slice(0, 7);
 		commitHash.appendChild(commitLink);
 		commitHash.classList.add("Dashboard-table-commit")
@@ -254,12 +285,22 @@
 
 		// Range visualization.
 		const range = createCell("", false);
-		range.appendChild(Range(v.Low, v.Center, v.High, min, max, 640, 32, bench.Unit));
+		range.appendChild(Range(v.Low, v.Center, v.High, min, max, 640, 32, bench.Unit, bench.HigherIsBetter));
 		range.classList.add("Dashboard-table-range")
 		row.appendChild(range);
 
 		table.appendChild(row);
 	}
+
+	if (commitSelected) {
+		// Now that we've generated anchors for every commit, let's scroll to the
+		// right one. The browser won't do this automatically because the anchors
+		// don't exist when the page is loaded.
+		const anchor = document.querySelector(commitSelected);
+		window.scrollTo({
+			top: anchor.getBoundingClientRect().top + window.pageYOffset,
+		})
+	}
 }
 
 function failure(name, response) {
diff --git a/perf/app/dashboard/static/range.js b/perf/app/dashboard/static/range.js
index dcff4c9..3312042 100644
--- a/perf/app/dashboard/static/range.js
+++ b/perf/app/dashboard/static/range.js
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-function Range(low, center, high, min, max, width, height, unit) {
+function Range(low, center, high, min, max, width, height, unit, higherIsBetter) {
 	const margin = 40;
 	const svg = d3.create("svg")
 		.attr("width", width)
@@ -13,11 +13,7 @@
 	const goodColor = "#005AB5";
 	const badColor = "#DC3220";
 	const pickColor = function(n) {
-		const higherIsBetter = {
-			"B/s": true,
-			"ops/s": true
-		};
-		if (unit in higherIsBetter) {
+		if (higherIsBetter) {
 			if (n > 0) {
 				return goodColor;
 			}
diff --git a/perf/app/dashboard/static/style.css b/perf/app/dashboard/static/style.css
index ba9f65c..eb14846 100644
--- a/perf/app/dashboard/static/style.css
+++ b/perf/app/dashboard/static/style.css
@@ -163,6 +163,8 @@
   justify-content: left;
 }
 .Dashboard-grid-item {
+  display: flex;
+  flex-direction: column;
 }
 .Dashboard-table {
   padding: 16px;
@@ -171,6 +173,12 @@
 .Dashboard-table tr:nth-child(2n+1) {
   background: #f4f4f4;
 }
+.Dashboard-table tr:hover {
+  background: #ffff99;
+}
+.Dashboard-table tr.selected {
+  background: #ffff99;
+}
 .Dashboard-table th {
   background: #e0ebf5;
   color: #375eab;
@@ -185,3 +193,9 @@
 }
 .Dashboard-table-range {
 }
+.Dashboard-regression-description {
+  display: inline-block;
+  width: 480px;
+  font-style: oblique;
+  text-align: center;
+}
diff --git a/third_party/bandchart/bandchart.js b/third_party/bandchart/bandchart.js
index 279d3fc..91068f5 100644
--- a/third_party/bandchart/bandchart.js
+++ b/third_party/bandchart/bandchart.js
@@ -14,6 +14,7 @@
 	unit,
 	repository,
 	minViewDeltaPercent,
+	higherIsBetter,
 } = {}) {
 	// Compute values.
 	const C = d3.map(data, d => d.CommitHash);
@@ -123,11 +124,7 @@
 	// By default, lower is better.
 	var bottomColor = goodColor;
 	var topColor = badColor;
-	const higherIsBetter = {
-		"B/s": true,
-		"ops/s": true
-	};
-	if (unit in higherIsBetter) {
+	if (higherIsBetter) {
 		bottomColor = badColor;
 		topColor = goodColor;
 	}