benchstat: add support for sorting benchstat output.
Change-Id: If63789fdbe901ca6894a2d00860bedab3160d757
Reviewed-on: https://go-review.googlesource.com/88915
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/benchstat/data.go b/benchstat/data.go
index 1123532..50777e8 100644
--- a/benchstat/data.go
+++ b/benchstat/data.go
@@ -44,6 +44,12 @@
// SplitBy specifies the labels to split results by.
// By default, results will only be split by full name.
SplitBy []string
+
+ // SortBy specifies the function by which tables in this collection
+ // should be sorted.
+ // By default, tables will not be specifically sorted, and will appear
+ // in the order they were read in
+ SortBy SortFunc
}
// A Key identifies one metric (e.g., "ns/op", "B/op") from one
diff --git a/benchstat/sort.go b/benchstat/sort.go
new file mode 100644
index 0000000..ef68821
--- /dev/null
+++ b/benchstat/sort.go
@@ -0,0 +1,43 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchstat
+
+import (
+ "math"
+ "sort"
+)
+
+// A SortFunc abstracts the sorting interface to compare two rows of a Table
+type SortFunc func(*Table, int, int) bool
+
+// ByName sorts tables by the Benchmark name column
+func ByName(t *Table, i, j int) bool {
+ return t.Rows[i].Benchmark < t.Rows[j].Benchmark
+}
+
+// ByDelta sorts tables by the Delta column (comparing the numerical value
+// rather than the lexical value)
+// The sort takes into account the Change value as well, which indicates
+// whether a given delta is "good" or "bad"
+func ByDelta(t *Table, i, j int) bool {
+ return math.Abs(t.Rows[i].PctDelta)*float64(t.Rows[i].Change) <
+ math.Abs(t.Rows[j].PctDelta)*float64(t.Rows[j].Change)
+}
+
+// ByChange sorts tables by the unprinted Change column which indicates
+// whether a delta is negative, zero, or positive
+func ByChange(t *Table, i, j int) bool {
+ return t.Rows[i].Change < t.Rows[j].Change
+}
+
+// SortReverse returns a SortFunc that is the reverse of the input SortFunc
+func SortReverse(sortFunc SortFunc) SortFunc {
+ return func(t *Table, i, j int) bool { return !sortFunc(t, i, j) }
+}
+
+// SortTable sorts a Table t (in place) by the given SortFunc
+func SortTable(t *Table, sortFunc SortFunc) {
+ sort.Slice(t.Rows, func(i, j int) bool { return sortFunc(t, i, j) })
+}
diff --git a/benchstat/sort_test.go b/benchstat/sort_test.go
new file mode 100644
index 0000000..618490c
--- /dev/null
+++ b/benchstat/sort_test.go
@@ -0,0 +1,132 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchstat
+
+import (
+ "io/ioutil"
+ "log"
+ "sort"
+ "testing"
+)
+
+var file1 = "../cmd/benchstat/testdata/old.txt"
+var file2 = "../cmd/benchstat/testdata/new.txt"
+
+func extractRowBenchmark(row *Row) string {
+ return row.Benchmark
+}
+func extractRowDelta(row *Row) float64 {
+ return row.PctDelta
+}
+func extractRowChange(row *Row) int {
+ return row.Change
+}
+
+func benchmarkSortTest(t *testing.T, sampleTable *Table) {
+ numRows := len(sampleTable.Rows)
+ benchmarks := make([]string, numRows)
+ SortTable(sampleTable, ByName)
+ for idx, row := range sampleTable.Rows {
+ benchmarks[idx] = extractRowBenchmark(row)
+ }
+ t.Run("BenchSorted", func(t *testing.T) {
+ if !sort.StringsAreSorted(benchmarks) {
+ t.Error("Table not sorted by benchmarks")
+ }
+ })
+ SortTable(sampleTable, SortReverse(ByName))
+ for idx, row := range sampleTable.Rows {
+ benchmarks[numRows-idx-1] = extractRowBenchmark(row)
+ }
+ t.Run("BenchSortReversed", func(t *testing.T) {
+ if !sort.StringsAreSorted(benchmarks) {
+ t.Error("Table not reverse sorted by benchmarks")
+ }
+ })
+}
+
+func deltaSortTest(t *testing.T, sampleTable *Table) {
+ numRows := len(sampleTable.Rows)
+ deltas := make([]float64, numRows)
+ SortTable(sampleTable, ByDelta)
+ for idx, row := range sampleTable.Rows {
+ deltas[idx] = extractRowDelta(row)
+ }
+ t.Run("DeltaSorted", func(t *testing.T) {
+ if !sort.Float64sAreSorted(deltas) {
+ t.Error("Table not sorted by deltas")
+ }
+ })
+ SortTable(sampleTable, SortReverse(ByDelta))
+ for idx, row := range sampleTable.Rows {
+ deltas[numRows-idx-1] = extractRowDelta(row)
+ }
+ t.Run("DeltaSortReversed", func(t *testing.T) {
+ if !sort.Float64sAreSorted(deltas) {
+ t.Error("Table not reverse sorted by deltas")
+ }
+ })
+}
+
+func changeSortTest(t *testing.T, sampleTable *Table) {
+ numRows := len(sampleTable.Rows)
+ changes := make([]int, numRows)
+ SortTable(sampleTable, ByChange)
+ for idx, row := range sampleTable.Rows {
+ changes[idx] = extractRowChange(row)
+ }
+ t.Run("ChangeSorted", func(t *testing.T) {
+ if !sort.IntsAreSorted(changes) {
+ t.Error("Table not sorted by changes")
+ }
+ })
+ SortTable(sampleTable, SortReverse(ByChange))
+ for idx, row := range sampleTable.Rows {
+ changes[numRows-idx-1] = extractRowChange(row)
+ }
+ t.Run("ChangeSortReversed", func(t *testing.T) {
+ if !sort.IntsAreSorted(changes) {
+ t.Error("Table not reverse sorted by changes")
+ }
+ })
+}
+
+func TestCompareCollection(t *testing.T) {
+ sampleCompareCollection := Collection{Alpha: 0.05, AddGeoMean: false, DeltaTest: UTest}
+ file1Data, err := ioutil.ReadFile(file1)
+ if err != nil {
+ log.Fatal(err)
+ }
+ file2Data, err := ioutil.ReadFile(file2)
+ if err != nil {
+ log.Fatal(err)
+ }
+ sampleCompareCollection.AddConfig(file1, file1Data)
+ sampleCompareCollection.AddConfig(file2, file2Data)
+ // data has both time and speed tables, test only the speed table
+ sampleTable := sampleCompareCollection.Tables()[0]
+ t.Run("BenchmarkSort", func(t *testing.T) {
+ benchmarkSortTest(t, sampleTable)
+ })
+ t.Run("DeltaSort", func(t *testing.T) {
+ deltaSortTest(t, sampleTable)
+ })
+ t.Run("ChangeSort", func(t *testing.T) {
+ changeSortTest(t, sampleTable)
+ })
+}
+
+func TestSingleCollection(t *testing.T) {
+ sampleCollection := Collection{Alpha: 0.05, AddGeoMean: false, DeltaTest: UTest}
+ file1Data, err1 := ioutil.ReadFile(file1)
+ if err1 != nil {
+ log.Fatal(err1)
+ }
+ sampleCollection.AddConfig(file1, file1Data)
+ sampleTable := sampleCollection.Tables()[0]
+ t.Run("BenchmarkSort", func(t *testing.T) {
+ benchmarkSortTest(t, sampleTable)
+ })
+}
diff --git a/benchstat/table.go b/benchstat/table.go
index cd8a027..b90cfd7 100644
--- a/benchstat/table.go
+++ b/benchstat/table.go
@@ -26,6 +26,7 @@
Group string // group name
Scaler Scaler // formatter for stats means
Metrics []*Metrics // columns of statistics
+ PctDelta float64 // unformatted percent change
Delta string // formatted percent change
Note string // additional information
Change int // +1 better, -1 worse, 0 unchanged
@@ -89,6 +90,7 @@
continue
}
pval, testerr := deltaTest(old, new)
+ row.PctDelta = 0.00
row.Delta = "~"
if testerr == stats.ErrZeroVariance {
row.Note = "(zero variance)"
@@ -103,6 +105,7 @@
row.Delta = "0.00%"
} else {
pct := ((new.Mean / old.Mean) - 1.0) * 100.0
+ row.PctDelta = pct
row.Delta = fmt.Sprintf("%+.2f%%", pct)
if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
row.Change = +1
@@ -121,6 +124,9 @@
}
if len(table.Rows) > 0 {
+ if c.SortBy != nil {
+ SortTable(table, c.SortBy)
+ }
if c.AddGeoMean {
addGeomean(c, table, key.Unit, table.OldNewDelta)
}
@@ -200,7 +206,9 @@
return
}
if delta {
- row.Delta = fmt.Sprintf("%+.2f%%", ((geomeans[1]/geomeans[0])-1.0)*100.0)
+ pct := ((geomeans[1] / geomeans[0]) - 1.0) * 100.0
+ row.PctDelta = pct
+ row.Delta = fmt.Sprintf("%+.2f%%", pct)
}
t.Rows = append(t.Rows, row)
}
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 1ec3808..43830ec 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go
@@ -110,11 +110,13 @@
}
var (
- flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
- flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
- flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file")
- flagHTML = flag.Bool("html", false, "print results as an HTML table")
- flagSplit = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`")
+ flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
+ flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
+ flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file")
+ flagHTML = flag.Bool("html", false, "print results as an HTML table")
+ flagSplit = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`")
+ flagSort = flag.String("sort", "none", "sort by this `header`: benchmark, delta, change")
+ flagReverseSort = flag.Bool("reverse", false, "reverse the sort order")
)
var deltaTestNames = map[string]benchstat.DeltaTest{
@@ -127,13 +129,21 @@
"ttest": benchstat.TTest,
}
+var sortNames = map[string]benchstat.SortFunc{
+ "none": nil,
+ "benchmark": benchstat.ByName,
+ "delta": benchstat.ByDelta,
+ "change": benchstat.ByChange,
+}
+
func main() {
log.SetPrefix("benchstat: ")
log.SetFlags(0)
flag.Usage = usage
flag.Parse()
deltaTest := deltaTestNames[strings.ToLower(*flagDeltaTest)]
- if flag.NArg() < 1 || deltaTest == nil {
+ sortType, ok := sortNames[strings.ToLower(*flagSort)]
+ if flag.NArg() < 1 || deltaTest == nil || !ok {
flag.Usage()
}
@@ -145,6 +155,12 @@
if *flagSplit != "" {
c.SplitBy = strings.Split(*flagSplit, ",")
}
+ if sortType != nil {
+ if *flagReverseSort {
+ sortType = benchstat.SortReverse(sortType)
+ }
+ c.SortBy = sortType
+ }
for _, file := range flag.Args() {
data, err := ioutil.ReadFile(file)
if err != nil {
@@ -154,7 +170,6 @@
}
tables := c.Tables()
-
var buf bytes.Buffer
if *flagHTML {
buf.WriteString(htmlHeader)
diff --git a/cmd/benchstat/main_test.go b/cmd/benchstat/main_test.go
index 4a50020..4cc56eb 100644
--- a/cmd/benchstat/main_test.go
+++ b/cmd/benchstat/main_test.go
@@ -40,6 +40,9 @@
check(t, "packages", "packagesold.txt", "packagesnew.txt")
check(t, "units", "units-old.txt", "units-new.txt")
check(t, "zero", "-delta-test=none", "zero-old.txt", "zero-new.txt")
+ check(t, "benchsort", "-sort=benchmark", "old.txt", "new.txt")
+ check(t, "deltasort", "-sort=delta", "old.txt", "new.txt")
+ check(t, "changesort", "-sort=change", "old.txt", "new.txt")
}
func check(t *testing.T, name string, files ...string) {
diff --git a/cmd/benchstat/testdata/benchsort.golden b/cmd/benchstat/testdata/benchsort.golden
new file mode 100644
index 0000000..2dea70b
--- /dev/null
+++ b/cmd/benchstat/testdata/benchsort.golden
@@ -0,0 +1,75 @@
+name old time/op new time/op delta
+CRC32/poly=Castagnoli/size=15/align=0-8 16.4ns ± 3% 16.3ns ± 2% ~ (p=0.615 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 17.2ns ± 2% 17.3ns ± 2% ~ (p=0.650 n=9+10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8 65.5ns ± 1% 66.2ns ± 1% +1.01% (p=0.003 n=9+8)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 70.1ns ± 6% 68.5ns ± 2% ~ (p=0.190 n=10+9)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.22µs ± 4% 1.21µs ± 3% ~ (p=0.882 n=9+9)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.26µs ± 3% 1.22µs ± 4% -3.48% (p=0.002 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 17.4ns ± 2% 17.5ns ± 4% ~ (p=0.694 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 19.7ns ± 3% 19.4ns ± 2% -1.62% (p=0.036 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 163ns ± 5% 159ns ± 3% -2.46% (p=0.032 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 169ns ± 6% 162ns ± 3% -4.60% (p=0.005 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=0-8 40.2ns ± 2% 40.1ns ± 4% ~ (p=0.614 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 42.1ns ± 3% 41.9ns ± 2% ~ (p=0.952 n=10+9)
+CRC32/poly=IEEE/size=15/align=0-8 46.9ns ± 8% 44.5ns ± 3% -5.01% (p=0.008 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 44.7ns ± 5% 44.5ns ± 4% ~ (p=0.539 n=10+10)
+CRC32/poly=IEEE/size=1kB/align=0-8 452ns ± 4% 94ns ± 2% -79.20% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=1kB/align=1-8 444ns ± 2% 93ns ± 2% -78.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=32kB/align=0-8 15.0µs ± 7% 2.2µs ± 3% -85.57% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 14.2µs ± 7% 2.2µs ± 3% -84.65% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=40/align=0-8 41.0ns ± 1% 42.5ns ± 6% +3.56% (p=0.000 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 41.1ns ± 1% 42.0ns ± 3% +2.34% (p=0.000 n=9+10)
+CRC32/poly=IEEE/size=4kB/align=0-8 1.74µs ± 8% 0.30µs ± 1% -82.87% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=4kB/align=1-8 1.76µs ± 6% 0.30µs ± 3% -83.05% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 238ns ± 5% 57ns ± 3% -76.00% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=1-8 236ns ± 3% 57ns ± 3% -75.72% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=15/align=0-8 36.5ns ±11% 35.6ns ± 3% ~ (p=0.216 n=10+10)
+CRC32/poly=Koopman/size=15/align=1-8 35.1ns ± 5% 35.5ns ± 1% ~ (p=0.508 n=10+9)
+CRC32/poly=Koopman/size=1kB/align=0-8 2.24µs ± 6% 2.34µs ± 4% +4.34% (p=0.010 n=9+10)
+CRC32/poly=Koopman/size=1kB/align=1-8 2.15µs ± 2% 2.36µs ± 5% +9.84% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=0-8 72.4µs ± 9% 72.9µs ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 69.6µs ± 3% 74.3µs ± 3% +6.70% (p=0.000 n=8+10)
+CRC32/poly=Koopman/size=40/align=0-8 91.6ns ± 9% 87.6ns ± 2% -4.35% (p=0.002 n=10+10)
+CRC32/poly=Koopman/size=40/align=1-8 91.1ns ± 6% 88.0ns ± 3% ~ (p=0.055 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 9.03µs ± 6% 9.00µs ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 8.94µs ±10% 9.05µs ±12% ~ (p=0.754 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 1.13µs ± 5% 1.08µs ± 3% -4.93% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 1.13µs ± 6% 1.17µs ± 8% ~ (p=0.143 n=10+10)
+
+name old speed new speed delta
+CRC32/poly=Castagnoli/size=15/align=0-8 916MB/s ± 2% 920MB/s ± 2% ~ (p=0.489 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 870MB/s ± 2% 867MB/s ± 2% ~ (p=0.661 n=9+10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8 15.6GB/s ± 1% 15.5GB/s ± 1% -1.02% (p=0.002 n=9+8)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 14.6GB/s ± 6% 15.0GB/s ± 2% ~ (p=0.211 n=10+9)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 26.9GB/s ± 4% 26.8GB/s ± 5% ~ (p=0.842 n=9+10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 25.9GB/s ± 3% 26.8GB/s ± 4% +3.62% (p=0.002 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 2.30GB/s ± 2% 2.28GB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 2.03GB/s ± 3% 2.06GB/s ± 2% ~ (p=0.063 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 25.1GB/s ± 5% 25.7GB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 24.1GB/s ± 6% 25.3GB/s ± 3% +4.71% (p=0.005 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=0-8 12.7GB/s ± 2% 12.8GB/s ± 4% ~ (p=0.529 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 12.1GB/s ± 3% 12.2GB/s ± 1% ~ (p=0.780 n=10+9)
+CRC32/poly=IEEE/size=15/align=0-8 321MB/s ± 8% 337MB/s ± 3% +5.06% (p=0.009 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 336MB/s ± 4% 337MB/s ± 4% ~ (p=0.579 n=10+10)
+CRC32/poly=IEEE/size=1kB/align=0-8 2.26GB/s ± 4% 10.88GB/s ± 2% +381.12% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=1kB/align=1-8 2.31GB/s ± 2% 10.98GB/s ± 2% +375.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=32kB/align=0-8 2.19GB/s ± 7% 15.19GB/s ± 3% +591.99% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 2.31GB/s ± 8% 15.04GB/s ± 3% +550.07% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=40/align=0-8 975MB/s ± 1% 942MB/s ± 5% -3.37% (p=0.001 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 974MB/s ± 1% 952MB/s ± 3% -2.25% (p=0.000 n=9+10)
+CRC32/poly=IEEE/size=4kB/align=0-8 2.36GB/s ± 7% 13.73GB/s ± 1% +482.26% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=4kB/align=1-8 2.33GB/s ± 6% 13.68GB/s ± 3% +488.23% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 2.15GB/s ± 4% 8.97GB/s ± 3% +317.65% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=1-8 2.17GB/s ± 3% 8.96GB/s ± 3% +312.89% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=15/align=0-8 412MB/s ±10% 421MB/s ± 3% ~ (p=0.218 n=10+10)
+CRC32/poly=Koopman/size=15/align=1-8 427MB/s ± 5% 422MB/s ± 1% ~ (p=0.497 n=10+9)
+CRC32/poly=Koopman/size=1kB/align=0-8 452MB/s ± 9% 438MB/s ± 4% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=1kB/align=1-8 477MB/s ± 2% 434MB/s ± 5% -8.92% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=0-8 453MB/s ± 8% 450MB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 471MB/s ± 3% 441MB/s ± 3% -6.25% (p=0.000 n=8+10)
+CRC32/poly=Koopman/size=40/align=0-8 437MB/s ± 9% 456MB/s ± 2% +4.50% (p=0.002 n=10+10)
+CRC32/poly=Koopman/size=40/align=1-8 440MB/s ± 6% 455MB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 454MB/s ± 5% 455MB/s ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 459MB/s ± 9% 455MB/s ±11% ~ (p=0.739 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 453MB/s ± 5% 476MB/s ± 3% +5.09% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 455MB/s ± 6% 440MB/s ± 8% ~ (p=0.143 n=10+10)
diff --git a/cmd/benchstat/testdata/changesort.golden b/cmd/benchstat/testdata/changesort.golden
new file mode 100644
index 0000000..83bfd24
--- /dev/null
+++ b/cmd/benchstat/testdata/changesort.golden
@@ -0,0 +1,75 @@
+name old time/op new time/op delta
+CRC32/poly=Castagnoli/size=1kB/align=0-8 65.5ns ± 1% 66.2ns ± 1% +1.01% (p=0.003 n=9+8)
+CRC32/poly=Koopman/size=1kB/align=1-8 2.15µs ± 2% 2.36µs ± 5% +9.84% (p=0.000 n=9+10)
+CRC32/poly=IEEE/size=40/align=0-8 41.0ns ± 1% 42.5ns ± 6% +3.56% (p=0.000 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 41.1ns ± 1% 42.0ns ± 3% +2.34% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=1kB/align=0-8 2.24µs ± 6% 2.34µs ± 4% +4.34% (p=0.010 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 69.6µs ± 3% 74.3µs ± 3% +6.70% (p=0.000 n=8+10)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.22µs ± 4% 1.21µs ± 3% ~ (p=0.882 n=9+9)
+CRC32/poly=Koopman/size=32kB/align=0-8 72.4µs ± 9% 72.9µs ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 8.94µs ±10% 9.05µs ±12% ~ (p=0.754 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 9.03µs ± 6% 9.00µs ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 44.7ns ± 5% 44.5ns ± 4% ~ (p=0.539 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 1.13µs ± 6% 1.17µs ± 8% ~ (p=0.143 n=10+10)
+CRC32/poly=Castagnoli/size=15/align=0-8 16.4ns ± 3% 16.3ns ± 2% ~ (p=0.615 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 17.2ns ± 2% 17.3ns ± 2% ~ (p=0.650 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 17.4ns ± 2% 17.5ns ± 4% ~ (p=0.694 n=10+10)
+CRC32/poly=Koopman/size=40/align=1-8 91.1ns ± 6% 88.0ns ± 3% ~ (p=0.055 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=0-8 40.2ns ± 2% 40.1ns ± 4% ~ (p=0.614 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 42.1ns ± 3% 41.9ns ± 2% ~ (p=0.952 n=10+9)
+CRC32/poly=Koopman/size=15/align=1-8 35.1ns ± 5% 35.5ns ± 1% ~ (p=0.508 n=10+9)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 70.1ns ± 6% 68.5ns ± 2% ~ (p=0.190 n=10+9)
+CRC32/poly=Koopman/size=15/align=0-8 36.5ns ±11% 35.6ns ± 3% ~ (p=0.216 n=10+10)
+CRC32/poly=IEEE/size=1kB/align=0-8 452ns ± 4% 94ns ± 2% -79.20% (p=0.000 n=10+8)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 169ns ± 6% 162ns ± 3% -4.60% (p=0.005 n=10+10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.26µs ± 3% 1.22µs ± 4% -3.48% (p=0.002 n=9+10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 163ns ± 5% 159ns ± 3% -2.46% (p=0.032 n=10+10)
+CRC32/poly=IEEE/size=512/align=1-8 236ns ± 3% 57ns ± 3% -75.72% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=40/align=0-8 91.6ns ± 9% 87.6ns ± 2% -4.35% (p=0.002 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 19.7ns ± 3% 19.4ns ± 2% -1.62% (p=0.036 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 1.13µs ± 5% 1.08µs ± 3% -4.93% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 14.2µs ± 7% 2.2µs ± 3% -84.65% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 238ns ± 5% 57ns ± 3% -76.00% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=0-8 15.0µs ± 7% 2.2µs ± 3% -85.57% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=4kB/align=1-8 1.76µs ± 6% 0.30µs ± 3% -83.05% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=4kB/align=0-8 1.74µs ± 8% 0.30µs ± 1% -82.87% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=1kB/align=1-8 444ns ± 2% 93ns ± 2% -78.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=15/align=0-8 46.9ns ± 8% 44.5ns ± 3% -5.01% (p=0.008 n=10+10)
+
+name old speed new speed delta
+CRC32/poly=Castagnoli/size=1kB/align=0-8 15.6GB/s ± 1% 15.5GB/s ± 1% -1.02% (p=0.002 n=9+8)
+CRC32/poly=Koopman/size=1kB/align=1-8 477MB/s ± 2% 434MB/s ± 5% -8.92% (p=0.000 n=9+10)
+CRC32/poly=IEEE/size=40/align=0-8 975MB/s ± 1% 942MB/s ± 5% -3.37% (p=0.001 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 974MB/s ± 1% 952MB/s ± 3% -2.25% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 471MB/s ± 3% 441MB/s ± 3% -6.25% (p=0.000 n=8+10)
+CRC32/poly=Koopman/size=15/align=0-8 412MB/s ±10% 421MB/s ± 3% ~ (p=0.218 n=10+10)
+CRC32/poly=Koopman/size=32kB/align=0-8 453MB/s ± 8% 450MB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 459MB/s ± 9% 455MB/s ±11% ~ (p=0.739 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 454MB/s ± 5% 455MB/s ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 336MB/s ± 4% 337MB/s ± 4% ~ (p=0.579 n=10+10)
+CRC32/poly=Koopman/size=1kB/align=0-8 452MB/s ± 9% 438MB/s ± 4% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 455MB/s ± 6% 440MB/s ± 8% ~ (p=0.143 n=10+10)
+CRC32/poly=Castagnoli/size=15/align=0-8 916MB/s ± 2% 920MB/s ± 2% ~ (p=0.489 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 870MB/s ± 2% 867MB/s ± 2% ~ (p=0.661 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 2.30GB/s ± 2% 2.28GB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 2.03GB/s ± 3% 2.06GB/s ± 2% ~ (p=0.063 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=0-8 12.7GB/s ± 2% 12.8GB/s ± 4% ~ (p=0.529 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 12.1GB/s ± 3% 12.2GB/s ± 1% ~ (p=0.780 n=10+9)
+CRC32/poly=Koopman/size=40/align=1-8 440MB/s ± 6% 455MB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 14.6GB/s ± 6% 15.0GB/s ± 2% ~ (p=0.211 n=10+9)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 25.1GB/s ± 5% 25.7GB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=15/align=1-8 427MB/s ± 5% 422MB/s ± 1% ~ (p=0.497 n=10+9)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 26.9GB/s ± 4% 26.8GB/s ± 5% ~ (p=0.842 n=9+10)
+CRC32/poly=IEEE/size=512/align=1-8 2.17GB/s ± 3% 8.96GB/s ± 3% +312.89% (p=0.000 n=10+10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 25.9GB/s ± 3% 26.8GB/s ± 4% +3.62% (p=0.002 n=9+10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 24.1GB/s ± 6% 25.3GB/s ± 3% +4.71% (p=0.005 n=10+10)
+CRC32/poly=Koopman/size=40/align=0-8 437MB/s ± 9% 456MB/s ± 2% +4.50% (p=0.002 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 2.15GB/s ± 4% 8.97GB/s ± 3% +317.65% (p=0.000 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 453MB/s ± 5% 476MB/s ± 3% +5.09% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 2.31GB/s ± 8% 15.04GB/s ± 3% +550.07% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=0-8 2.19GB/s ± 7% 15.19GB/s ± 3% +591.99% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=4kB/align=1-8 2.33GB/s ± 6% 13.68GB/s ± 3% +488.23% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=4kB/align=0-8 2.36GB/s ± 7% 13.73GB/s ± 1% +482.26% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=1kB/align=1-8 2.31GB/s ± 2% 10.98GB/s ± 2% +375.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=1kB/align=0-8 2.26GB/s ± 4% 10.88GB/s ± 2% +381.12% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=15/align=0-8 321MB/s ± 8% 337MB/s ± 3% +5.06% (p=0.009 n=10+10)
diff --git a/cmd/benchstat/testdata/deltasort.golden b/cmd/benchstat/testdata/deltasort.golden
new file mode 100644
index 0000000..367d10c
--- /dev/null
+++ b/cmd/benchstat/testdata/deltasort.golden
@@ -0,0 +1,75 @@
+name old time/op new time/op delta
+CRC32/poly=Koopman/size=1kB/align=1-8 2.15µs ± 2% 2.36µs ± 5% +9.84% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 69.6µs ± 3% 74.3µs ± 3% +6.70% (p=0.000 n=8+10)
+CRC32/poly=Koopman/size=1kB/align=0-8 2.24µs ± 6% 2.34µs ± 4% +4.34% (p=0.010 n=9+10)
+CRC32/poly=IEEE/size=40/align=0-8 41.0ns ± 1% 42.5ns ± 6% +3.56% (p=0.000 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 41.1ns ± 1% 42.0ns ± 3% +2.34% (p=0.000 n=9+10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8 65.5ns ± 1% 66.2ns ± 1% +1.01% (p=0.003 n=9+8)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.22µs ± 4% 1.21µs ± 3% ~ (p=0.882 n=9+9)
+CRC32/poly=Koopman/size=32kB/align=0-8 72.4µs ± 9% 72.9µs ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 8.94µs ±10% 9.05µs ±12% ~ (p=0.754 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 9.03µs ± 6% 9.00µs ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 44.7ns ± 5% 44.5ns ± 4% ~ (p=0.539 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 1.13µs ± 6% 1.17µs ± 8% ~ (p=0.143 n=10+10)
+CRC32/poly=Castagnoli/size=15/align=0-8 16.4ns ± 3% 16.3ns ± 2% ~ (p=0.615 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 17.2ns ± 2% 17.3ns ± 2% ~ (p=0.650 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 17.4ns ± 2% 17.5ns ± 4% ~ (p=0.694 n=10+10)
+CRC32/poly=Koopman/size=15/align=1-8 35.1ns ± 5% 35.5ns ± 1% ~ (p=0.508 n=10+9)
+CRC32/poly=Castagnoli/size=512/align=0-8 40.2ns ± 2% 40.1ns ± 4% ~ (p=0.614 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 42.1ns ± 3% 41.9ns ± 2% ~ (p=0.952 n=10+9)
+CRC32/poly=Koopman/size=15/align=0-8 36.5ns ±11% 35.6ns ± 3% ~ (p=0.216 n=10+10)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 70.1ns ± 6% 68.5ns ± 2% ~ (p=0.190 n=10+9)
+CRC32/poly=Koopman/size=40/align=1-8 91.1ns ± 6% 88.0ns ± 3% ~ (p=0.055 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 19.7ns ± 3% 19.4ns ± 2% -1.62% (p=0.036 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 163ns ± 5% 159ns ± 3% -2.46% (p=0.032 n=10+10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.26µs ± 3% 1.22µs ± 4% -3.48% (p=0.002 n=9+10)
+CRC32/poly=Koopman/size=40/align=0-8 91.6ns ± 9% 87.6ns ± 2% -4.35% (p=0.002 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 169ns ± 6% 162ns ± 3% -4.60% (p=0.005 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 1.13µs ± 5% 1.08µs ± 3% -4.93% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=15/align=0-8 46.9ns ± 8% 44.5ns ± 3% -5.01% (p=0.008 n=10+10)
+CRC32/poly=IEEE/size=512/align=1-8 236ns ± 3% 57ns ± 3% -75.72% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 238ns ± 5% 57ns ± 3% -76.00% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=1kB/align=1-8 444ns ± 2% 93ns ± 2% -78.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=1kB/align=0-8 452ns ± 4% 94ns ± 2% -79.20% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=4kB/align=0-8 1.74µs ± 8% 0.30µs ± 1% -82.87% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=4kB/align=1-8 1.76µs ± 6% 0.30µs ± 3% -83.05% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 14.2µs ± 7% 2.2µs ± 3% -84.65% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=0-8 15.0µs ± 7% 2.2µs ± 3% -85.57% (p=0.000 n=10+10)
+
+name old speed new speed delta
+CRC32/poly=Koopman/size=1kB/align=1-8 477MB/s ± 2% 434MB/s ± 5% -8.92% (p=0.000 n=9+10)
+CRC32/poly=Koopman/size=32kB/align=1-8 471MB/s ± 3% 441MB/s ± 3% -6.25% (p=0.000 n=8+10)
+CRC32/poly=IEEE/size=40/align=0-8 975MB/s ± 1% 942MB/s ± 5% -3.37% (p=0.001 n=8+10)
+CRC32/poly=IEEE/size=40/align=1-8 974MB/s ± 1% 952MB/s ± 3% -2.25% (p=0.000 n=9+10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8 15.6GB/s ± 1% 15.5GB/s ± 1% -1.02% (p=0.002 n=9+8)
+CRC32/poly=Koopman/size=15/align=1-8 427MB/s ± 5% 422MB/s ± 1% ~ (p=0.497 n=10+9)
+CRC32/poly=Koopman/size=32kB/align=0-8 453MB/s ± 8% 450MB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=1-8 459MB/s ± 9% 455MB/s ±11% ~ (p=0.739 n=10+10)
+CRC32/poly=Koopman/size=4kB/align=0-8 454MB/s ± 5% 455MB/s ± 6% ~ (p=0.971 n=10+10)
+CRC32/poly=IEEE/size=15/align=1-8 336MB/s ± 4% 337MB/s ± 4% ~ (p=0.579 n=10+10)
+CRC32/poly=Koopman/size=1kB/align=0-8 452MB/s ± 9% 438MB/s ± 4% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=512/align=1-8 455MB/s ± 6% 440MB/s ± 8% ~ (p=0.143 n=10+10)
+CRC32/poly=Castagnoli/size=15/align=0-8 916MB/s ± 2% 920MB/s ± 2% ~ (p=0.489 n=9+9)
+CRC32/poly=Castagnoli/size=15/align=1-8 870MB/s ± 2% 867MB/s ± 2% ~ (p=0.661 n=9+10)
+CRC32/poly=Castagnoli/size=40/align=0-8 2.30GB/s ± 2% 2.28GB/s ± 4% ~ (p=0.684 n=10+10)
+CRC32/poly=Castagnoli/size=40/align=1-8 2.03GB/s ± 3% 2.06GB/s ± 2% ~ (p=0.063 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=0-8 12.7GB/s ± 2% 12.8GB/s ± 4% ~ (p=0.529 n=10+10)
+CRC32/poly=Castagnoli/size=512/align=1-8 12.1GB/s ± 3% 12.2GB/s ± 1% ~ (p=0.780 n=10+9)
+CRC32/poly=Koopman/size=40/align=1-8 440MB/s ± 6% 455MB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Castagnoli/size=1kB/align=1-8 14.6GB/s ± 6% 15.0GB/s ± 2% ~ (p=0.211 n=10+9)
+CRC32/poly=Castagnoli/size=4kB/align=0-8 25.1GB/s ± 5% 25.7GB/s ± 3% ~ (p=0.052 n=10+10)
+CRC32/poly=Koopman/size=15/align=0-8 412MB/s ±10% 421MB/s ± 3% ~ (p=0.218 n=10+10)
+CRC32/poly=Castagnoli/size=32kB/align=0-8 26.9GB/s ± 4% 26.8GB/s ± 5% ~ (p=0.842 n=9+10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8 25.9GB/s ± 3% 26.8GB/s ± 4% +3.62% (p=0.002 n=9+10)
+CRC32/poly=Koopman/size=40/align=0-8 437MB/s ± 9% 456MB/s ± 2% +4.50% (p=0.002 n=10+10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8 24.1GB/s ± 6% 25.3GB/s ± 3% +4.71% (p=0.005 n=10+10)
+CRC32/poly=IEEE/size=15/align=0-8 321MB/s ± 8% 337MB/s ± 3% +5.06% (p=0.009 n=10+10)
+CRC32/poly=Koopman/size=512/align=0-8 453MB/s ± 5% 476MB/s ± 3% +5.09% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=1-8 2.17GB/s ± 3% 8.96GB/s ± 3% +312.89% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=512/align=0-8 2.15GB/s ± 4% 8.97GB/s ± 3% +317.65% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=1kB/align=1-8 2.31GB/s ± 2% 10.98GB/s ± 2% +375.97% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=1kB/align=0-8 2.26GB/s ± 4% 10.88GB/s ± 2% +381.12% (p=0.000 n=10+8)
+CRC32/poly=IEEE/size=4kB/align=0-8 2.36GB/s ± 7% 13.73GB/s ± 1% +482.26% (p=0.000 n=10+9)
+CRC32/poly=IEEE/size=4kB/align=1-8 2.33GB/s ± 6% 13.68GB/s ± 3% +488.23% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=1-8 2.31GB/s ± 8% 15.04GB/s ± 3% +550.07% (p=0.000 n=10+10)
+CRC32/poly=IEEE/size=32kB/align=0-8 2.19GB/s ± 7% 15.19GB/s ± 3% +591.99% (p=0.000 n=10+10)