blob: 45820665b7de55a0c3fd705d116a5c0a11bb2597 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package app
import (
"compress/gzip"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"log"
"math"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/influxdata/influxdb-client-go/v2/api"
"github.com/influxdata/influxdb-client-go/v2/api/query"
"golang.org/x/build/internal/influx"
"golang.org/x/build/third_party/bandchart"
)
// /dashboard/ displays a dashboard of benchmark results over time for
// performance monitoring.
//go:embed dashboard/*
var dashboardFS embed.FS
// dashboardRegisterOnMux registers the dashboard URLs on mux.
func (a *App) dashboardRegisterOnMux(mux *http.ServeMux) {
mux.Handle("/dashboard/", http.FileServer(http.FS(dashboardFS)))
mux.Handle("/dashboard/third_party/bandchart/", http.StripPrefix("/dashboard/third_party/bandchart/", http.FileServer(http.FS(bandchart.FS))))
mux.HandleFunc("/dashboard/data.json", a.dashboardData)
}
// BenchmarkJSON contains the timeseries values for a single benchmark name +
// unit.
//
// We could try to shoehorn this into benchfmt.Result, but that isn't really
// the best fit for a graph.
type BenchmarkJSON struct {
Name string
Unit string
// These will be sorted by CommitDate.
Values []ValueJSON
}
type ValueJSON struct {
CommitHash string
CommitDate time.Time
// These are pre-formatted as percent change.
Low float64
Center float64
High float64
}
func fluxRecordToValue(rec *query.FluxRecord) (ValueJSON, error) {
low, ok := rec.ValueByKey("low").(float64)
if !ok {
return ValueJSON{}, fmt.Errorf("record %s low value got type %T want float64", rec, rec.ValueByKey("low"))
}
center, ok := rec.ValueByKey("center").(float64)
if !ok {
return ValueJSON{}, fmt.Errorf("record %s center value got type %T want float64", rec, rec.ValueByKey("center"))
}
high, ok := rec.ValueByKey("high").(float64)
if !ok {
return ValueJSON{}, fmt.Errorf("record %s high value got type %T want float64", rec, rec.ValueByKey("high"))
}
commit, ok := rec.ValueByKey("experiment-commit").(string)
if !ok {
return ValueJSON{}, fmt.Errorf("record %s experiment-commit value got type %T want float64", rec, rec.ValueByKey("experiment-commit"))
}
return ValueJSON{
CommitDate: rec.Time(),
CommitHash: commit,
Low: low - 1,
Center: center - 1,
High: high - 1,
}, nil
}
// validateRe is an allowlist of characters for a Flux string literal for
// benchmark names. The string will be quoted, so we must not allow ending the
// quote sequence.
var validateRe = regexp.MustCompile(`[a-zA-Z/_:-]+`)
func validateFluxString(s string) error {
if !validateRe.MatchString(s) {
return fmt.Errorf("malformed value %q", s)
}
return nil
}
var errBenchmarkNotFound = errors.New("benchmark not found")
// fetchNamedUnitBenchmark queries Influx for a specific name + unit benchmark.
func fetchNamedUnitBenchmark(ctx context.Context, qc api.QueryAPI, start, end time.Time, name, unit string) (*BenchmarkJSON, error) {
if err := validateFluxString(name); err != nil {
return nil, fmt.Errorf("invalid benchmark name: %w", err)
}
if err := validateFluxString(unit); err != nil {
return nil, fmt.Errorf("invalid benchmark name: %w", err)
}
query := fmt.Sprintf(`
from(bucket: "perf")
|> range(start: %s, stop: %s)
|> filter(fn: (r) => r["_measurement"] == "benchmark-result")
|> filter(fn: (r) => r["name"] == "%s")
|> filter(fn: (r) => r["unit"] == "%s")
|> filter(fn: (r) => r["branch"] == "master")
|> filter(fn: (r) => r["goos"] == "linux")
|> filter(fn: (r) => r["goarch"] == "amd64")
|> pivot(columnKey: ["_field"], rowKey: ["_time"], valueColumn: "_value")
|> yield(name: "last")
`, start.Format(time.RFC3339), end.Format(time.RFC3339), name, unit)
res, err := qc.Query(ctx, query)
if err != nil {
return nil, fmt.Errorf("error performing query: %W", err)
}
b, err := groupBenchmarkResults(res, false)
if err != nil {
return nil, err
}
if len(b) == 0 {
return nil, errBenchmarkNotFound
}
if len(b) > 1 {
return nil, fmt.Errorf("query returned too many benchmarks: %+v", b)
}
return b[0], nil
}
// fetchDefaultBenchmarks queries Influx for the default benchmark set.
func fetchDefaultBenchmarks(ctx context.Context, qc api.QueryAPI, start, end time.Time) ([]*BenchmarkJSON, error) {
// Keep benchmarks with the same name grouped together, which is
// assumed by the JS.
benchmarks := []struct{ name, unit string }{
{
name: "Tile38WithinCircle100kmRequest",
unit: "sec/op",
},
{
name: "Tile38WithinCircle100kmRequest",
unit: "p50-latency-sec",
},
{
name: "Tile38WithinCircle100kmRequest",
unit: "p90-latency-sec",
},
{
name: "Tile38WithinCircle100kmRequest",
unit: "average-RSS-bytes",
},
{
name: "Tile38WithinCircle100kmRequest",
unit: "peak-RSS-bytes",
},
{
name: "BleveQuery",
unit: "sec/op",
},
{
name: "BleveQuery",
unit: "average-RSS-bytes",
},
{
name: "BleveQuery",
unit: "peak-RSS-bytes",
},
{
name: "GoBuildKubelet",
unit: "sec/op",
},
{
name: "GoBuildKubeletLink",
unit: "sec/op",
},
{
name: "RegexMatch-8",
unit: "sec/op",
},
{
name: "BuildJSON-8",
unit: "sec/op",
},
{
name: "ZapJSON-8",
unit: "sec/op",
},
}
ret := make([]*BenchmarkJSON, 0, len(benchmarks))
for _, bench := range benchmarks {
b, err := fetchNamedUnitBenchmark(ctx, qc, start, end, bench.name, bench.unit)
if err != nil {
return nil, fmt.Errorf("error fetching benchmark %s/%s: %w", bench.name, bench.unit, err)
}
ret = append(ret, b)
}
return ret, nil
}
// fetchNamedBenchmark queries Influx for all benchmark results with the passed
// name (for all units).
func fetchNamedBenchmark(ctx context.Context, qc api.QueryAPI, start, end time.Time, name string) ([]*BenchmarkJSON, error) {
if err := validateFluxString(name); err != nil {
return nil, fmt.Errorf("invalid benchmark name: %w", err)
}
query := fmt.Sprintf(`
from(bucket: "perf")
|> range(start: %s, stop: %s)
|> filter(fn: (r) => r["_measurement"] == "benchmark-result")
|> filter(fn: (r) => r["name"] == "%s")
|> filter(fn: (r) => r["branch"] == "master")
|> filter(fn: (r) => r["goos"] == "linux")
|> filter(fn: (r) => r["goarch"] == "amd64")
|> pivot(columnKey: ["_field"], rowKey: ["_time"], valueColumn: "_value")
|> yield(name: "last")
`, start.Format(time.RFC3339), end.Format(time.RFC3339), name)
res, err := qc.Query(ctx, query)
if err != nil {
return nil, fmt.Errorf("error performing query: %W", err)
}
b, err := groupBenchmarkResults(res, false)
if err != nil {
return nil, err
}
if len(b) == 0 {
return nil, errBenchmarkNotFound
}
return b, nil
}
// fetchAllBenchmarks queries Influx for all benchmark results.
func fetchAllBenchmarks(ctx context.Context, qc api.QueryAPI, regressions bool, start, end time.Time) ([]*BenchmarkJSON, error) {
query := fmt.Sprintf(`
from(bucket: "perf")
|> range(start: %s, stop: %s)
|> filter(fn: (r) => r["_measurement"] == "benchmark-result")
|> filter(fn: (r) => r["branch"] == "master")
|> filter(fn: (r) => r["goos"] == "linux")
|> filter(fn: (r) => r["goarch"] == "amd64")
|> pivot(columnKey: ["_field"], rowKey: ["_time"], valueColumn: "_value")
|> yield(name: "last")
`, start.Format(time.RFC3339), end.Format(time.RFC3339))
res, err := qc.Query(ctx, query)
if err != nil {
return nil, fmt.Errorf("error performing query: %W", err)
}
return groupBenchmarkResults(res, regressions)
}
type regression struct {
change float64 // endpoint regression, if any
deltaIndex int // index at which largest increase of regression occurs
deltaScore float64 // score of that change (in 95%ile boxes)
delta float64 // size of that changes
unit string
ignoredBecause string
}
// queryToJson process a QueryTableResult into a slice of BenchmarkJSON,
// with that slice in no particular order (i.e., it needs to be sorted or
// run-to-run results will vary). For each benchmark in the slice, however,
// results are sorted into commit-date order.
func queryToJson(res *api.QueryTableResult) ([]*BenchmarkJSON, error) {
type key struct {
name string
unit string
}
m := make(map[key]*BenchmarkJSON)
for res.Next() {
rec := res.Record()
name, ok := rec.ValueByKey("name").(string)
if !ok {
return nil, fmt.Errorf("record %s name value got type %T want string", rec, rec.ValueByKey("name"))
}
unit, ok := rec.ValueByKey("unit").(string)
if !ok {
return nil, fmt.Errorf("record %s unit value got type %T want string", rec, rec.ValueByKey("unit"))
}
k := key{name, unit}
b, ok := m[k]
if !ok {
b = &BenchmarkJSON{
Name: name,
Unit: unit,
}
m[k] = b
}
v, err := fluxRecordToValue(res.Record())
if err != nil {
return nil, err
}
b.Values = append(b.Values, v)
}
s := make([]*BenchmarkJSON, 0, len(m))
for _, b := range m {
// Ensure that the benchmarks are commit-date ordered.
sort.Slice(b.Values, func(i, j int) bool {
return b.Values[i].CommitDate.Before(b.Values[j].CommitDate)
})
s = append(s, b)
}
return s, nil
}
// reorderRegressionsFirst sorts the benchmarks in s so that those with the
// largest detectable regressions come first, and returns a map from benchmark name
// to the worst regression for that name (across all units)
func reorderRegressionsFirst(s []*BenchmarkJSON) map[string]regression {
r := make(map[string]regression) // worst regression for each benchmark name, across all units for that benchmark
// Compute per-benchmark estimates of point where the most interesting regression happened.
// TODO This information might be worth adding to the graphs in general, once we do that for the regressions view.
for _, b := range s {
if worst := worstRegression(b); worst.deltaScore > 1 && worst.delta > r[b.Name].delta {
r[b.Name] = worst
} else if _, ok := r[b.Name]; !ok { // don't overwrite success for one unit w/ failure explanation for another
r[b.Name] = worst // This is for regression ordering debugging and might end up removed.
}
}
// Sort benchmarks with detectable regressions first, ordered by
// size of regression at end of sample. Also sort the remaining
// benchmarks into end-of-sample regression order. Keep benchmarks
// with the same name grouped together, which is assumed by the
// graph presentation server.
sort.Slice(s, func(i, j int) bool {
if s[i].Name == s[j].Name {
return s[i].Unit < s[j].Unit
}
ri, iok := r[s[i].Name]
rj, jok := r[s[j].Name]
if iok != jok {
// a name with regression information attached comes first.
return iok
}
// regressions w/ a delta index come first
if (ri.deltaIndex < 0) != (rj.deltaIndex < 0) {
return rj.deltaIndex < 0
}
if ri.change != rj.change {
// put larger regression first.
return ri.change > rj.change
}
return s[i].Name < s[j].Name
})
return r
}
// renameBenchmarksWithRegressions changes the names of benchmarks to include information about regressions,
// and returns the number of regression points that were detected.
// TODO(drchase, mpratt) better if this can be done in the graph or surrounding text.
func renameBenchmarksWithRegressions(s []*BenchmarkJSON, r map[string]regression) {
detected := 0
// This injects regression information into the titles.
for i, b := range s {
if change, ok := r[b.Name]; ok {
if change.deltaIndex >= 0 {
detected++
v := b.Values[change.deltaIndex]
commit, date := v.CommitHash[:7], v.CommitDate.Format(time.RFC3339)
s[i].Name = fmt.Sprintf("%s %s %3.1f%% regression, %3.1f%%point change at %s on %s (score %3.1f)", b.Name, change.unit, 100*change.change, 100*change.delta, commit, date, change.deltaScore)
} else {
s[i].Name = fmt.Sprintf("%s %s not ranked because %s", b.Name, change.unit, change.ignoredBecause)
}
}
}
}
// groupBenchmarkResults groups all benchmark results from the passed query.
// if byRegression is true, order the benchmarks with largest current regressions
// with detectable points first.
func groupBenchmarkResults(res *api.QueryTableResult, byRegression bool) ([]*BenchmarkJSON, error) {
s, err := queryToJson(res)
if err != nil {
return nil, err
}
if !byRegression {
// Keep benchmarks with the same name grouped together, which is
// assumed by the JS.
sort.Slice(s, func(i, j int) bool {
if s[i].Name == s[j].Name {
return s[i].Unit < s[j].Unit
}
return s[i].Name < s[j].Name
})
return s, nil
}
regressions := reorderRegressionsFirst(s)
renameBenchmarksWithRegressions(s, regressions)
return s, nil
}
type gzipResponseWriter struct {
http.ResponseWriter
w *gzip.Writer
}
func (w *gzipResponseWriter) Write(b []byte) (int, error) {
return w.w.Write(b)
}
const (
defaultDays = 30
maxDays = 366
)
// changeScore returns an indicator of the change and direction.
// This is a heuristic measure of the lack of overlap between
// two confidence intervals; minimum lack of overlap (i.e., same
// confidence intervals) is zero. Exact non-overlap, meaning
// the high end of one interval is equal to the low end of the
// other, is one. A gap of size G between the two intervals
// yields a score of 1 + G/M where M is the size of the larger
// interval (this suppresses changescores adjacent to noise).
// A partial overlap of size G yields a score of
// 1 - G/M.
//
// Empty confidence intervals are problematic and produces infinities
// or NaNs.
func changeScore(l1, c1, h1, l2, c2, h2 float64) float64 {
sign := 1.0
if c1 > c2 {
l1, c1, h1, l2, c2, h2 = l2, c2, h2, l1, c1, h1
sign = -sign
}
r := math.Max(h1-l1, h2-l2)
// we know l1 < c1 < h1, c1 < c2, l2 < c2 < h2
// therefore l1 < c1 < c2 < h2
if h1 > l2 { // overlap
overlapHigh, overlapLow := h1, l2
if overlapHigh > h2 {
overlapHigh = h2
}
if overlapLow < l1 {
overlapLow = l1
}
return sign * (1 - (overlapHigh-overlapLow)/r) // perfect overlap == 0
} else { // no overlap
return sign * (1 + (l2-h1)/r) // just touching, l2 == h1, magnitude == 1, and then increases w/ the gap between intervals.
}
}
func worstRegression(b *BenchmarkJSON) regression {
values := b.Values
l := len(values)
ninf := math.Inf(-1)
sign := 1.0
// TODO unit "good direction" information must be available from somewhere else, but for now, do this.
switch b.Unit {
case "B/s", "ops/s":
sign = -1
}
min := sign * values[l-1].Center
worst := regression{deltaScore: ninf, deltaIndex: -1, change: min, unit: b.Unit}
if len(values) < 4 {
worst.ignoredBecause = "too few values"
return worst
}
scores := []float64{}
// First classify benchmarks that are too darn noisy, and get a feel for noisiness.
for i := l - 1; i > 0; i-- {
v1, v0 := values[i-1], values[i]
scores = append(scores, math.Abs(changeScore(v1.Low, v1.Center, v1.High, v0.Low, v0.Center, v0.High)))
}
sort.Float64s(scores)
median := (scores[len(scores)/2] + scores[(len(scores)-1)/2]) / 2
// MAGIC NUMBER "1". Removing this added 25% to the "detected regressions", but they were all junk.
if median > 1 {
worst.ignoredBecause = "median change score > 1"
return worst
}
if math.IsNaN(median) {
worst.ignoredBecause = "median is NaN"
return worst
}
// MAGIC NUMBER "1.2". Smaller than that tends to admit junky benchmarks.
magicScoreThreshold := math.Max(2*median, 1.2)
// Scan backwards looking for most recent outlier regression
for i := l - 1; i > 0; i-- {
v1, v0 := values[i-1], values[i]
score := sign * changeScore(v1.Low, v1.Center, v1.High, v0.Low, v0.Center, v0.High)
if score > magicScoreThreshold && sign*v1.Center < min && score > worst.deltaScore {
worst.deltaIndex = i
worst.deltaScore = score
worst.delta = sign * (v0.Center - v1.Center)
}
min = math.Min(sign*v0.Center, min)
}
if worst.deltaIndex == -1 {
worst.ignoredBecause = "didn't detect outlier regression"
}
return worst
}
// search handles /dashboard/data.json.
//
// TODO(prattmic): Consider caching Influx results in-memory for a few mintures
// to reduce load on Influx.
func (a *App) dashboardData(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
days := uint64(defaultDays)
dayParam := r.FormValue("days")
if dayParam != "" {
var err error
days, err = strconv.ParseUint(dayParam, 10, 32)
if err != nil {
log.Printf("Error parsing days %q: %v", dayParam, err)
http.Error(w, fmt.Sprintf("day parameter must be a positive integer less than or equal to %d", maxDays), http.StatusBadRequest)
return
}
if days == 0 || days > maxDays {
log.Printf("days %d too large", days)
http.Error(w, fmt.Sprintf("day parameter must be a positive integer less than or equal to %d", maxDays), http.StatusBadRequest)
return
}
}
end := time.Now()
endParam := r.FormValue("end")
if endParam != "" {
var err error
// Quirk: Browsers don't have an easy built-in way to deal with
// timezone in input boxes. The datetime input type yields a
// string in this form, with no timezone (either local or UTC).
// Thus, we just treat this as UTC.
end, err = time.Parse("2006-01-02T15:04", endParam)
if err != nil {
log.Printf("Error parsing end %q: %v", endParam, err)
http.Error(w, "end parameter must be a timestamp similar to RFC3339 without a time zone, like 2000-12-31T15:00", http.StatusBadRequest)
return
}
}
start := end.Add(-24 * time.Hour * time.Duration(days))
methStart := time.Now()
defer func() {
log.Printf("Dashboard total query time: %s", time.Since(methStart))
}()
ifxc, err := a.influxClient(ctx)
if err != nil {
log.Printf("Error getting Influx client: %v", err)
http.Error(w, "Error connecting to Influx", 500)
return
}
defer ifxc.Close()
qc := ifxc.QueryAPI(influx.Org)
benchmark := r.FormValue("benchmark")
var benchmarks []*BenchmarkJSON
if benchmark == "" {
benchmarks, err = fetchDefaultBenchmarks(ctx, qc, start, end)
} else if benchmark == "all" {
benchmarks, err = fetchAllBenchmarks(ctx, qc, false, start, end)
} else if benchmark == "regressions" {
benchmarks, err = fetchAllBenchmarks(ctx, qc, true, start, end)
} else {
benchmarks, err = fetchNamedBenchmark(ctx, qc, start, end, benchmark)
}
if err == errBenchmarkNotFound {
log.Printf("Benchmark not found: %q", benchmark)
http.Error(w, "Benchmark not found", 404)
return
}
if err != nil {
log.Printf("Error fetching benchmarks: %v", err)
http.Error(w, "Error fetching benchmarks", 500)
return
}
w.Header().Set("Content-Type", "application/json")
if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
w.Header().Set("Content-Encoding", "gzip")
gz := gzip.NewWriter(w)
defer gz.Close()
w = &gzipResponseWriter{w: gz, ResponseWriter: w}
}
w.WriteHeader(http.StatusOK)
e := json.NewEncoder(w)
e.Encode(benchmarks)
}