benchstat, cmd/benchstat: group benchmark results
Go 1.9 and up write "pkg", "goos", and "goarch" keys in benchmark
output. benchstat now understands benchmark labels, and uses them to
separate incomparable benchmark results. cmd/benchstat gains a
command-line flag called "-split" to control this, defaulting to
"pkg,goos,goarch".
Change-Id: I00413ab348bbff31743b59e81d88c4faab1a8dca
Reviewed-on: https://go-review.googlesource.com/38584
Run-TryBot: Quentin Smith <quentin@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/analysis/app/compare.go b/analysis/app/compare.go
index c60cefd..03c3416 100644
--- a/analysis/app/compare.go
+++ b/analysis/app/compare.go
@@ -288,6 +288,7 @@
// Compute benchstat
c := &benchstat.Collection{
AddGeoMean: true,
+ SplitBy: []string{"pkg", "goos", "goarch"},
}
for _, g := range groups {
c.AddResults(g.Q, g.results)
diff --git a/benchstat/data.go b/benchstat/data.go
index 756e2b0..1123532 100644
--- a/benchstat/data.go
+++ b/benchstat/data.go
@@ -5,6 +5,7 @@
package benchstat
import (
+ "bytes"
"fmt"
"strconv"
"strings"
@@ -15,10 +16,15 @@
// A Collection is a collection of benchmark results.
type Collection struct {
- // Configs, Benchmarks, and Units give the set of configs,
- // benchmarks, and units from the keys in Stats in an order
+ // Configs, Groups, and Units give the set of configs,
+ // groups, and units from the keys in Stats in an order
// meant to match the order the benchmarks were read in.
- Configs, Benchmarks, Units []string
+ Configs, Groups, Units []string
+
+ // Benchmarks gives the set of benchmarks from the keys in
+ // Stats by group in an order meant to match the order
+ // benchmarks were read in.
+ Benchmarks map[string][]string
// Metrics holds the accumulated metrics for each key.
Metrics map[Key]*Metrics
@@ -34,13 +40,17 @@
// AddGeoMean specifies whether to add a line to the table
// showing the geometric mean of all the benchmark results.
AddGeoMean bool
+
+ // SplitBy specifies the labels to split results by.
+ // By default, results will only be split by full name.
+ SplitBy []string
}
// A Key identifies one metric (e.g., "ns/op", "B/op") from one
-// benchmark (function name sans "Benchmark" prefix) in one
-// configuration (input file name).
+// benchmark (function name sans "Benchmark" prefix) and optional
+// group in one configuration (input file name).
type Key struct {
- Config, Benchmark, Unit string
+ Config, Group, Benchmark, Unit string
}
// A Metrics holds the measurements of a single metric
@@ -129,7 +139,13 @@
*strings = append(*strings, add)
}
addString(&c.Configs, key.Config)
- addString(&c.Benchmarks, key.Benchmark)
+ addString(&c.Groups, key.Group)
+ if c.Benchmarks == nil {
+ c.Benchmarks = make(map[string][]string)
+ }
+ benchmarks := c.Benchmarks[key.Group]
+ addString(&benchmarks, key.Benchmark)
+ c.Benchmarks[key.Group] = benchmarks
addString(&c.Units, key.Unit)
m := &Metrics{Unit: key.Unit}
c.Metrics[key] = m
@@ -141,7 +157,14 @@
func (c *Collection) AddConfig(config string, data []byte) {
c.Configs = append(c.Configs, config)
key := Key{Config: config}
- c.addText(key, string(data))
+ br := benchfmt.NewReader(bytes.NewReader(data))
+ for br.Next() {
+ c.addResult(key, br.Result())
+ }
+ if err := br.Err(); err != nil {
+ // bytes.Reader never returns errors
+ panic(err)
+ }
}
// AddResults adds the benchmark results to the named configuration.
@@ -149,35 +172,50 @@
c.Configs = append(c.Configs, config)
key := Key{Config: config}
for _, r := range results {
- c.addText(key, r.Content)
+ c.addResult(key, r)
}
}
-func (c *Collection) addText(key Key, data string) {
- for _, line := range strings.Split(string(data), "\n") {
- f := strings.Fields(line)
- if len(f) < 4 {
+func (c *Collection) addResult(key Key, r *benchfmt.Result) {
+ f := strings.Fields(r.Content)
+ if len(f) < 4 {
+ return
+ }
+ name := f[0]
+ if !strings.HasPrefix(name, "Benchmark") {
+ return
+ }
+ name = strings.TrimPrefix(name, "Benchmark")
+ n, _ := strconv.Atoi(f[1])
+ if n == 0 {
+ return
+ }
+ key.Group = c.makeGroup(r)
+ key.Benchmark = name
+ for i := 2; i+2 <= len(f); i += 2 {
+ val, err := strconv.ParseFloat(f[i], 64)
+ if err != nil {
continue
}
- name := f[0]
- if !strings.HasPrefix(name, "Benchmark") {
- continue
- }
- name = strings.TrimPrefix(name, "Benchmark")
- n, _ := strconv.Atoi(f[1])
- if n == 0 {
- continue
- }
+ key.Unit = f[i+1]
+ m := c.addMetrics(key)
+ m.Values = append(m.Values, val)
+ }
+}
- key.Benchmark = name
- for i := 2; i+2 <= len(f); i += 2 {
- val, err := strconv.ParseFloat(f[i], 64)
- if err != nil {
- continue
+func (c *Collection) makeGroup(r *benchfmt.Result) string {
+ var out string
+ for _, s := range c.SplitBy {
+ v := r.NameLabels[s]
+ if v == "" {
+ v = r.Labels[s]
+ }
+ if v != "" {
+ if out != "" {
+ out = out + " "
}
- key.Unit = f[i+1]
- m := c.addMetrics(key)
- m.Values = append(m.Values, val)
+ out += fmt.Sprintf("%s:%s", s, v)
}
}
+ return out
}
diff --git a/benchstat/html.go b/benchstat/html.go
index 3817e77..eea80df 100644
--- a/benchstat/html.go
+++ b/benchstat/html.go
@@ -25,7 +25,9 @@
<tr><th><th>{{.Metric}}
{{else -}}
<tr><th><th colspan='{{len .Configs}}' class='metric'>{{.Metric}}{{if .OldNewDelta}}<th>delta{{end}}
-{{end}}{{range $row := $table.Rows -}}
+{{end}}{{range $group := group $table.Rows -}}
+{{if and (gt (len $table.Groups) 1) (len (index . 0).Group)}}<tr class='group'><th colspan='{{colspan (len $table.Configs) $table.OldNewDelta}}'>{{(index . 0).Group}}{{end}}
+{{- range $row := . -}}
{{if $table.OldNewDelta -}}
<tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'>
{{- else -}}
@@ -33,6 +35,7 @@
{{- end -}}
<td>{{.Benchmark}}{{range .Metrics}}<td>{{.Format $row.Scaler}}{{end}}{{if $table.OldNewDelta}}<td class='{{if eq .Delta "~"}}nodelta{{else}}delta{{end}}'>{{replace .Delta "-" "−" -1}}<td class='note'>{{.Note}}{{end}}
{{end -}}
+{{- end -}}
<tr><td>
</tbody>
{{end}}
@@ -42,6 +45,34 @@
var htmlFuncs = template.FuncMap{
"replace": strings.Replace,
+ "group": htmlGroup,
+ "colspan": htmlColspan,
+}
+
+func htmlColspan(configs int, delta bool) int {
+ if delta {
+ configs++
+ }
+ return configs + 1
+}
+
+func htmlGroup(rows []*Row) (out [][]*Row) {
+ var group string
+ var cur []*Row
+ for _, r := range rows {
+ if r.Group != group {
+ group = r.Group
+ if len(cur) > 0 {
+ out = append(out, cur)
+ cur = nil
+ }
+ }
+ cur = append(cur, r)
+ }
+ if len(cur) > 0 {
+ out = append(out, cur)
+ }
+ return
}
// FormatHTML appends an HTML formatting of the tables to buf.
diff --git a/benchstat/table.go b/benchstat/table.go
index 190f8ec..9693389 100644
--- a/benchstat/table.go
+++ b/benchstat/table.go
@@ -15,12 +15,14 @@
Metric string
OldNewDelta bool // is this an old-new-delta table?
Configs []string
+ Groups []string
Rows []*Row
}
// A Row is a table row for display in the benchstat output.
type Row struct {
Benchmark string // benchmark name
+ Group string // group name
Scaler Scaler // formatter for stats means
Metrics []*Metrics // columns of statistics
Delta string // formatted percent change
@@ -49,61 +51,68 @@
for _, key.Unit = range c.Units {
table := new(Table)
table.Configs = c.Configs
+ table.Groups = c.Groups
table.Metric = metricOf(key.Unit)
table.OldNewDelta = len(c.Configs) == 2
- for _, key.Benchmark = range c.Benchmarks {
- row := &Row{Benchmark: key.Benchmark}
+ for _, key.Group = range c.Groups {
+ for _, key.Benchmark = range c.Benchmarks[key.Group] {
+ row := &Row{Benchmark: key.Benchmark}
+ if len(c.Groups) > 1 {
+ // Show group headers if there is more than one group.
+ row.Group = key.Group
+ }
- for _, key.Config = range c.Configs {
- m := c.Metrics[key]
- if m == nil {
- row.Metrics = append(row.Metrics, new(Metrics))
- continue
- }
- row.Metrics = append(row.Metrics, m)
- if row.Scaler == nil {
- row.Scaler = NewScaler(m.Mean, m.Unit)
- }
- }
-
- // If there are only two configs being compared, add stats.
- if table.OldNewDelta {
- k0 := key
- k0.Config = c.Configs[0]
- k1 := key
- k1.Config = c.Configs[1]
- old := c.Metrics[k0]
- new := c.Metrics[k1]
- // If one is missing, omit row entirely.
- // TODO: Control this better.
- if old == nil || new == nil {
- continue
- }
- pval, testerr := deltaTest(old, new)
- row.Delta = "~"
- if testerr == stats.ErrZeroVariance {
- row.Note = "(zero variance)"
- } else if testerr == stats.ErrSampleSize {
- row.Note = "(too few samples)"
- } else if testerr == stats.ErrSamplesEqual {
- row.Note = "(all equal)"
- } else if testerr != nil {
- row.Note = fmt.Sprintf("(%s)", testerr)
- } else if pval < alpha {
- pct := ((new.Mean / old.Mean) - 1.0) * 100.0
- row.Delta = fmt.Sprintf("%+.2f%%", pct)
- if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
- row.Change = +1
- } else {
- row.Change = -1
+ for _, key.Config = range c.Configs {
+ m := c.Metrics[key]
+ if m == nil {
+ row.Metrics = append(row.Metrics, new(Metrics))
+ continue
+ }
+ row.Metrics = append(row.Metrics, m)
+ if row.Scaler == nil {
+ row.Scaler = NewScaler(m.Mean, m.Unit)
}
}
- if row.Note == "" && pval != -1 {
- row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
- }
- }
- table.Rows = append(table.Rows, row)
+ // If there are only two configs being compared, add stats.
+ if table.OldNewDelta {
+ k0 := key
+ k0.Config = c.Configs[0]
+ k1 := key
+ k1.Config = c.Configs[1]
+ old := c.Metrics[k0]
+ new := c.Metrics[k1]
+ // If one is missing, omit row entirely.
+ // TODO: Control this better.
+ if old == nil || new == nil {
+ continue
+ }
+ pval, testerr := deltaTest(old, new)
+ row.Delta = "~"
+ if testerr == stats.ErrZeroVariance {
+ row.Note = "(zero variance)"
+ } else if testerr == stats.ErrSampleSize {
+ row.Note = "(too few samples)"
+ } else if testerr == stats.ErrSamplesEqual {
+ row.Note = "(all equal)"
+ } else if testerr != nil {
+ row.Note = fmt.Sprintf("(%s)", testerr)
+ } else if pval < alpha {
+ pct := ((new.Mean / old.Mean) - 1.0) * 100.0
+ row.Delta = fmt.Sprintf("%+.2f%%", pct)
+ if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
+ row.Change = +1
+ } else {
+ row.Change = -1
+ }
+ }
+ if row.Note == "" && pval != -1 {
+ row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
+ }
+ }
+
+ table.Rows = append(table.Rows, row)
+ }
}
if len(table.Rows) > 0 {
@@ -140,16 +149,18 @@
maxCount := 0
for _, key.Config = range c.Configs {
var means []float64
- for _, key.Benchmark = range c.Benchmarks {
- m := c.Metrics[key]
- // Omit 0 values from the geomean calculation,
- // as these either make the geomean undefined
- // or zero (depending on who you ask). This
- // typically comes up with things like
- // allocation counts, where it's fine to just
- // ignore the benchmark.
- if m != nil && m.Mean != 0 {
- means = append(means, m.Mean)
+ for _, key.Group = range c.Groups {
+ for _, key.Benchmark = range c.Benchmarks[key.Group] {
+ m := c.Metrics[key]
+ // Omit 0 values from the geomean calculation,
+ // as these either make the geomean undefined
+ // or zero (depending on who you ask). This
+ // typically comes up with things like
+ // allocation counts, where it's fine to just
+ // ignore the benchmark.
+ if m != nil && m.Mean != 0 {
+ means = append(means, m.Mean)
+ }
}
}
if len(means) > maxCount {
diff --git a/benchstat/text.go b/benchstat/text.go
index b8189f8..99ec736 100644
--- a/benchstat/text.go
+++ b/benchstat/text.go
@@ -20,6 +20,10 @@
var max []int
for _, table := range textTables {
for _, row := range table {
+ if len(row.cols) == 1 {
+ // Header row
+ continue
+ }
for len(max) < len(row.cols) {
max = append(max, 0)
}
@@ -53,8 +57,11 @@
// data
for _, row := range table[1:] {
for i, s := range row.cols {
- switch i {
- case 0:
+ switch {
+ case len(row.cols) == 1:
+ // Header row
+ fmt.Fprint(w, s)
+ case i == 0:
fmt.Fprintf(w, "%-*s", max[i], s)
default:
if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' {
@@ -104,7 +111,13 @@
textRows = append(textRows, row)
}
+ var group string
+
for _, row := range t.Rows {
+ if row.Group != group {
+ group = row.Group
+ textRows = append(textRows, newTextRow(group))
+ }
text := newTextRow(row.Benchmark)
for _, m := range row.Metrics {
text.cols = append(text.cols, m.Format(row.Scaler))
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 570b186..b7fb691 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go
@@ -114,6 +114,7 @@
flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file")
flagHTML = flag.Bool("html", false, "print results as an HTML table")
+ flagSplit = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`")
)
var deltaTestNames = map[string]benchstat.DeltaTest{
@@ -141,6 +142,9 @@
AddGeoMean: *flagGeomean,
DeltaTest: deltaTest,
}
+ if *flagSplit != "" {
+ c.SplitBy = strings.Split(*flagSplit, ",")
+ }
for _, file := range flag.Args() {
data, err := ioutil.ReadFile(file)
if err != nil {
diff --git a/cmd/benchstat/main_test.go b/cmd/benchstat/main_test.go
index 8bdd58b..26f5433 100644
--- a/cmd/benchstat/main_test.go
+++ b/cmd/benchstat/main_test.go
@@ -6,6 +6,7 @@
import (
"bytes"
+ "flag"
"io/ioutil"
"os"
"os/exec"
@@ -28,12 +29,15 @@
t.Fatal("skipping other tests")
}
check(t, "all", "new.txt", "old.txt", "slashslash4.txt", "x386.txt")
+ check(t, "allnosplit", "-split", "", "new.txt", "old.txt", "slashslash4.txt", "x386.txt")
check(t, "oldnew", "old.txt", "new.txt")
check(t, "oldnewgeo", "-geomean", "old.txt", "new.txt")
check(t, "new4", "new.txt", "slashslash4.txt")
check(t, "oldnewhtml", "-html", "old.txt", "new.txt")
check(t, "oldnew4html", "-html", "old.txt", "new.txt", "slashslash4.txt")
check(t, "oldnewttest", "-delta-test=ttest", "old.txt", "new.txt")
+ check(t, "packagesold", "packagesold.txt")
+ check(t, "packages", "packagesold.txt", "packagesnew.txt")
}
func check(t *testing.T, name string, files ...string) {
@@ -59,6 +63,7 @@
*flagGeomean = false
*flagHTML = false
*flagDeltaTest = "utest"
+ *flagSplit = flag.Lookup("split").DefValue
main()
diff --git a/cmd/benchstat/testdata/all.golden b/cmd/benchstat/testdata/all.golden
index eff91db..2ce36ba 100644
--- a/cmd/benchstat/testdata/all.golden
+++ b/cmd/benchstat/testdata/all.golden
@@ -1,75 +1,151 @@
name \ time/op new.txt old.txt slashslash4.txt x386.txt
-CRC32/poly=IEEE/size=15/align=0-8 44.5ns ± 3% 46.9ns ± 8% 62.4ns ± 9%
-CRC32/poly=IEEE/size=15/align=1-8 44.5ns ± 4% 44.7ns ± 5% 63.5ns ± 8%
-CRC32/poly=IEEE/size=40/align=0-8 42.5ns ± 6% 41.0ns ± 1% 42.1ns ± 3% 57.4ns ± 3%
-CRC32/poly=IEEE/size=40/align=1-8 42.0ns ± 3% 41.1ns ± 1% 41.7ns ± 5% 57.3ns ± 3%
-CRC32/poly=IEEE/size=512/align=0-8 57.1ns ± 3% 238.0ns ± 5% 332.3ns ± 2%
-CRC32/poly=IEEE/size=512/align=1-8 57.2ns ± 3% 235.5ns ± 3% 335.3ns ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8 94.1ns ± 2% 452.5ns ± 4% 626.3ns ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8 93.3ns ± 2% 443.6ns ± 2% 635.3ns ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8 298ns ± 1% 1740ns ± 8% 1682ns ± 2% 2457ns ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8 299ns ± 3% 1764ns ± 6% 1690ns ± 4% 2434ns ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8 2.16µs ± 3% 14.95µs ± 7% 19.15µs ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8 2.18µs ± 3% 14.19µs ± 7% 19.42µs ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8 16.3ns ± 2% 16.4ns ± 3% 59.4ns ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8 17.3ns ± 2% 17.2ns ± 2% 59.4ns ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8 17.5ns ± 4% 17.4ns ± 2% 18.6ns ±11% 59.7ns ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8 19.4ns ± 2% 19.7ns ± 3% 19.6ns ± 2% 58.1ns ± 7%
-CRC32/poly=Castagnoli/size=512/align=0-8 40.1ns ± 4% 40.2ns ± 2% 350.8ns ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8 41.9ns ± 2% 42.1ns ± 3% 349.8ns ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8 66.2ns ± 1% 65.5ns ± 1% 656.7ns ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8 68.5ns ± 2% 70.1ns ± 6% 656.8ns ± 6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8 159ns ± 3% 163ns ± 5% 161ns ± 8% 2452ns ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8 162ns ± 3% 169ns ± 6% 170ns ± 8% 2448ns ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8 1.21µs ± 3% 1.22µs ± 4% 20.53µs ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8 1.22µs ± 4% 1.26µs ± 3% 20.18µs ± 9%
-CRC32/poly=Koopman/size=15/align=0-8 35.6ns ± 3% 36.5ns ±11% 58.2ns ± 4%
-CRC32/poly=Koopman/size=15/align=1-8 35.5ns ± 1% 35.1ns ± 5% 56.4ns ± 7%
-CRC32/poly=Koopman/size=40/align=0-8 87.6ns ± 2% 91.6ns ± 9% 93.8ns ±13% 142.3ns ± 8%
-CRC32/poly=Koopman/size=40/align=1-8 88.0ns ± 3% 91.1ns ± 6% 86.9ns ± 3% 136.2ns ± 5%
-CRC32/poly=Koopman/size=512/align=0-8 1.08µs ± 3% 1.13µs ± 5% 1.68µs ± 5%
-CRC32/poly=Koopman/size=512/align=1-8 1.17µs ± 8% 1.13µs ± 6% 1.64µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=0-8 2.34µs ± 4% 2.24µs ± 6% 3.31µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8 2.36µs ± 5% 2.15µs ± 2% 3.28µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8 9.00µs ± 6% 9.03µs ± 6% 9.08µs ± 8% 13.32µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8 9.05µs ±12% 8.94µs ±10% 9.46µs ± 8% 13.16µs ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8 72.9µs ± 4% 72.4µs ± 9% 106.9µs ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8 74.3µs ± 3% 69.6µs ± 3% 106.1µs ± 4%
+pkg:hash/crc32 goos:darwin goarch:amd64
+CRC32/poly=IEEE/size=15/align=0-8 44.5ns ± 3% 46.9ns ± 8%
+CRC32/poly=IEEE/size=15/align=1-8 44.5ns ± 4% 44.7ns ± 5%
+CRC32/poly=IEEE/size=40/align=0-8 42.5ns ± 6% 41.0ns ± 1% 42.1ns ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 42.0ns ± 3% 41.1ns ± 1% 41.7ns ± 5%
+CRC32/poly=IEEE/size=512/align=0-8 57.1ns ± 3% 238.0ns ± 5%
+CRC32/poly=IEEE/size=512/align=1-8 57.2ns ± 3% 235.5ns ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 94.1ns ± 2% 452.5ns ± 4%
+CRC32/poly=IEEE/size=1kB/align=1-8 93.3ns ± 2% 443.6ns ± 2%
+CRC32/poly=IEEE/size=4kB/align=0-8 298ns ± 1% 1740ns ± 8% 1682ns ± 2%
+CRC32/poly=IEEE/size=4kB/align=1-8 299ns ± 3% 1764ns ± 6% 1690ns ± 4%
+CRC32/poly=IEEE/size=32kB/align=0-8 2.16µs ± 3% 14.95µs ± 7%
+CRC32/poly=IEEE/size=32kB/align=1-8 2.18µs ± 3% 14.19µs ± 7%
+CRC32/poly=Castagnoli/size=15/align=0-8 16.3ns ± 2% 16.4ns ± 3%
+CRC32/poly=Castagnoli/size=15/align=1-8 17.3ns ± 2% 17.2ns ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 17.5ns ± 4% 17.4ns ± 2% 18.6ns ±11%
+CRC32/poly=Castagnoli/size=40/align=1-8 19.4ns ± 2% 19.7ns ± 3% 19.6ns ± 2%
+CRC32/poly=Castagnoli/size=512/align=0-8 40.1ns ± 4% 40.2ns ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8 41.9ns ± 2% 42.1ns ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 66.2ns ± 1% 65.5ns ± 1%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 68.5ns ± 2% 70.1ns ± 6%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 159ns ± 3% 163ns ± 5% 161ns ± 8%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 162ns ± 3% 169ns ± 6% 170ns ± 8%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.21µs ± 3% 1.22µs ± 4%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.22µs ± 4% 1.26µs ± 3%
+CRC32/poly=Koopman/size=15/align=0-8 35.6ns ± 3% 36.5ns ±11%
+CRC32/poly=Koopman/size=15/align=1-8 35.5ns ± 1% 35.1ns ± 5%
+CRC32/poly=Koopman/size=40/align=0-8 87.6ns ± 2% 91.6ns ± 9% 93.8ns ±13%
+CRC32/poly=Koopman/size=40/align=1-8 88.0ns ± 3% 91.1ns ± 6% 86.9ns ± 3%
+CRC32/poly=Koopman/size=512/align=0-8 1.08µs ± 3% 1.13µs ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 1.17µs ± 8% 1.13µs ± 6%
+CRC32/poly=Koopman/size=1kB/align=0-8 2.34µs ± 4% 2.24µs ± 6%
+CRC32/poly=Koopman/size=1kB/align=1-8 2.36µs ± 5% 2.15µs ± 2%
+CRC32/poly=Koopman/size=4kB/align=0-8 9.00µs ± 6% 9.03µs ± 6% 9.08µs ± 8%
+CRC32/poly=Koopman/size=4kB/align=1-8 9.05µs ±12% 8.94µs ±10% 9.46µs ± 8%
+CRC32/poly=Koopman/size=32kB/align=0-8 72.9µs ± 4% 72.4µs ± 9%
+CRC32/poly=Koopman/size=32kB/align=1-8 74.3µs ± 3% 69.6µs ± 3%
+pkg:hash/crc32 goos:darwin goarch:386
+CRC32/poly=IEEE/size=15/align=0-8 62.4ns ± 9%
+CRC32/poly=IEEE/size=15/align=1-8 63.5ns ± 8%
+CRC32/poly=IEEE/size=40/align=0-8 57.4ns ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 57.3ns ± 3%
+CRC32/poly=IEEE/size=512/align=0-8 332ns ± 2%
+CRC32/poly=IEEE/size=512/align=1-8 335ns ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 626ns ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8 635ns ± 6%
+CRC32/poly=IEEE/size=4kB/align=0-8 2.46µs ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8 2.43µs ± 5%
+CRC32/poly=IEEE/size=32kB/align=0-8 19.1µs ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8 19.4µs ± 5%
+CRC32/poly=Castagnoli/size=15/align=0-8 59.4ns ± 1%
+CRC32/poly=Castagnoli/size=15/align=1-8 59.4ns ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 59.7ns ± 6%
+CRC32/poly=Castagnoli/size=40/align=1-8 58.1ns ± 7%
+CRC32/poly=Castagnoli/size=512/align=0-8 351ns ± 5%
+CRC32/poly=Castagnoli/size=512/align=1-8 350ns ± 7%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 657ns ± 4%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 657ns ± 6%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 2.45µs ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 2.45µs ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 20.5µs ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 20.2µs ± 9%
+CRC32/poly=Koopman/size=15/align=0-8 58.2ns ± 4%
+CRC32/poly=Koopman/size=15/align=1-8 56.4ns ± 7%
+CRC32/poly=Koopman/size=40/align=0-8 142ns ± 8%
+CRC32/poly=Koopman/size=40/align=1-8 136ns ± 5%
+CRC32/poly=Koopman/size=512/align=0-8 1.68µs ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 1.64µs ± 4%
+CRC32/poly=Koopman/size=1kB/align=0-8 3.31µs ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8 3.28µs ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8 13.3µs ± 3%
+CRC32/poly=Koopman/size=4kB/align=1-8 13.2µs ± 3%
+CRC32/poly=Koopman/size=32kB/align=0-8 107µs ± 6%
+CRC32/poly=Koopman/size=32kB/align=1-8 106µs ± 4%
name \ speed new.txt old.txt slashslash4.txt x386.txt
-CRC32/poly=IEEE/size=15/align=0-8 337MB/s ± 3% 321MB/s ± 8% 241MB/s ± 8%
-CRC32/poly=IEEE/size=15/align=1-8 337MB/s ± 4% 336MB/s ± 4% 237MB/s ± 8%
-CRC32/poly=IEEE/size=40/align=0-8 942MB/s ± 5% 975MB/s ± 1% 951MB/s ± 3% 696MB/s ± 3%
-CRC32/poly=IEEE/size=40/align=1-8 952MB/s ± 3% 974MB/s ± 1% 960MB/s ± 4% 698MB/s ± 3%
-CRC32/poly=IEEE/size=512/align=0-8 8.97GB/s ± 3% 2.15GB/s ± 4% 1.54GB/s ± 2%
-CRC32/poly=IEEE/size=512/align=1-8 8.96GB/s ± 3% 2.17GB/s ± 3% 1.52GB/s ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8 10.9GB/s ± 2% 2.3GB/s ± 4% 1.6GB/s ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8 11.0GB/s ± 2% 2.3GB/s ± 2% 1.6GB/s ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8 13.7GB/s ± 1% 2.4GB/s ± 7% 2.4GB/s ± 2% 1.7GB/s ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8 13.7GB/s ± 3% 2.3GB/s ± 6% 2.4GB/s ± 4% 1.7GB/s ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8 15.2GB/s ± 3% 2.2GB/s ± 7% 1.7GB/s ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8 15.0GB/s ± 3% 2.3GB/s ± 8% 1.7GB/s ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8 920MB/s ± 2% 916MB/s ± 2% 253MB/s ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8 867MB/s ± 2% 870MB/s ± 2% 253MB/s ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8 2.28GB/s ± 4% 2.30GB/s ± 2% 2.16GB/s ±11% 0.67GB/s ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8 2.06GB/s ± 2% 2.03GB/s ± 3% 2.04GB/s ± 2% 0.69GB/s ± 6%
-CRC32/poly=Castagnoli/size=512/align=0-8 12.8GB/s ± 4% 12.7GB/s ± 2% 1.5GB/s ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8 12.2GB/s ± 1% 12.1GB/s ± 3% 1.5GB/s ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8 15.5GB/s ± 1% 15.6GB/s ± 1% 1.6GB/s ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8 15.0GB/s ± 2% 14.6GB/s ± 6% 1.6GB/s ± 5%
-CRC32/poly=Castagnoli/size=4kB/align=0-8 25.7GB/s ± 3% 25.1GB/s ± 5% 25.4GB/s ± 7% 1.7GB/s ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8 25.3GB/s ± 3% 24.1GB/s ± 6% 24.1GB/s ± 8% 1.7GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8 26.8GB/s ± 5% 26.9GB/s ± 4% 1.6GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8 26.8GB/s ± 4% 25.9GB/s ± 3% 1.6GB/s ± 8%
-CRC32/poly=Koopman/size=15/align=0-8 421MB/s ± 3% 412MB/s ±10% 258MB/s ± 5%
-CRC32/poly=Koopman/size=15/align=1-8 422MB/s ± 1% 427MB/s ± 5% 266MB/s ± 6%
-CRC32/poly=Koopman/size=40/align=0-8 456MB/s ± 2% 437MB/s ± 9% 428MB/s ±12% 281MB/s ± 7%
-CRC32/poly=Koopman/size=40/align=1-8 455MB/s ± 3% 440MB/s ± 6% 461MB/s ± 3% 290MB/s ± 8%
-CRC32/poly=Koopman/size=512/align=0-8 476MB/s ± 3% 453MB/s ± 5% 304MB/s ± 5%
-CRC32/poly=Koopman/size=512/align=1-8 440MB/s ± 8% 455MB/s ± 6% 312MB/s ± 3%
-CRC32/poly=Koopman/size=1kB/align=0-8 438MB/s ± 4% 452MB/s ± 9% 310MB/s ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8 434MB/s ± 5% 477MB/s ± 2% 312MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8 455MB/s ± 6% 454MB/s ± 5% 452MB/s ± 8% 308MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8 455MB/s ±11% 459MB/s ± 9% 434MB/s ± 9% 311MB/s ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8 450MB/s ± 4% 453MB/s ± 8% 307MB/s ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8 441MB/s ± 3% 471MB/s ± 3% 309MB/s ± 4%
+pkg:hash/crc32 goos:darwin goarch:amd64
+CRC32/poly=IEEE/size=15/align=0-8 337MB/s ± 3% 321MB/s ± 8%
+CRC32/poly=IEEE/size=15/align=1-8 337MB/s ± 4% 336MB/s ± 4%
+CRC32/poly=IEEE/size=40/align=0-8 942MB/s ± 5% 975MB/s ± 1% 951MB/s ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 952MB/s ± 3% 974MB/s ± 1% 960MB/s ± 4%
+CRC32/poly=IEEE/size=512/align=0-8 8.97GB/s ± 3% 2.15GB/s ± 4%
+CRC32/poly=IEEE/size=512/align=1-8 8.96GB/s ± 3% 2.17GB/s ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 10.9GB/s ± 2% 2.3GB/s ± 4%
+CRC32/poly=IEEE/size=1kB/align=1-8 11.0GB/s ± 2% 2.3GB/s ± 2%
+CRC32/poly=IEEE/size=4kB/align=0-8 13.7GB/s ± 1% 2.4GB/s ± 7% 2.4GB/s ± 2%
+CRC32/poly=IEEE/size=4kB/align=1-8 13.7GB/s ± 3% 2.3GB/s ± 6% 2.4GB/s ± 4%
+CRC32/poly=IEEE/size=32kB/align=0-8 15.2GB/s ± 3% 2.2GB/s ± 7%
+CRC32/poly=IEEE/size=32kB/align=1-8 15.0GB/s ± 3% 2.3GB/s ± 8%
+CRC32/poly=Castagnoli/size=15/align=0-8 920MB/s ± 2% 916MB/s ± 2%
+CRC32/poly=Castagnoli/size=15/align=1-8 867MB/s ± 2% 870MB/s ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 2.28GB/s ± 4% 2.30GB/s ± 2% 2.16GB/s ±11%
+CRC32/poly=Castagnoli/size=40/align=1-8 2.06GB/s ± 2% 2.03GB/s ± 3% 2.04GB/s ± 2%
+CRC32/poly=Castagnoli/size=512/align=0-8 12.8GB/s ± 4% 12.7GB/s ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8 12.2GB/s ± 1% 12.1GB/s ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 15.5GB/s ± 1% 15.6GB/s ± 1%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 15.0GB/s ± 2% 14.6GB/s ± 6%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 25.7GB/s ± 3% 25.1GB/s ± 5% 25.4GB/s ± 7%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 25.3GB/s ± 3% 24.1GB/s ± 6% 24.1GB/s ± 8%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 26.8GB/s ± 5% 26.9GB/s ± 4%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 26.8GB/s ± 4% 25.9GB/s ± 3%
+CRC32/poly=Koopman/size=15/align=0-8 421MB/s ± 3% 412MB/s ±10%
+CRC32/poly=Koopman/size=15/align=1-8 422MB/s ± 1% 427MB/s ± 5%
+CRC32/poly=Koopman/size=40/align=0-8 456MB/s ± 2% 437MB/s ± 9% 428MB/s ±12%
+CRC32/poly=Koopman/size=40/align=1-8 455MB/s ± 3% 440MB/s ± 6% 461MB/s ± 3%
+CRC32/poly=Koopman/size=512/align=0-8 476MB/s ± 3% 453MB/s ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 440MB/s ± 8% 455MB/s ± 6%
+CRC32/poly=Koopman/size=1kB/align=0-8 438MB/s ± 4% 452MB/s ± 9%
+CRC32/poly=Koopman/size=1kB/align=1-8 434MB/s ± 5% 477MB/s ± 2%
+CRC32/poly=Koopman/size=4kB/align=0-8 455MB/s ± 6% 454MB/s ± 5% 452MB/s ± 8%
+CRC32/poly=Koopman/size=4kB/align=1-8 455MB/s ±11% 459MB/s ± 9% 434MB/s ± 9%
+CRC32/poly=Koopman/size=32kB/align=0-8 450MB/s ± 4% 453MB/s ± 8%
+CRC32/poly=Koopman/size=32kB/align=1-8 441MB/s ± 3% 471MB/s ± 3%
+pkg:hash/crc32 goos:darwin goarch:386
+CRC32/poly=IEEE/size=15/align=0-8 241MB/s ± 8%
+CRC32/poly=IEEE/size=15/align=1-8 237MB/s ± 8%
+CRC32/poly=IEEE/size=40/align=0-8 696MB/s ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 698MB/s ± 3%
+CRC32/poly=IEEE/size=512/align=0-8 1.54GB/s ± 2%
+CRC32/poly=IEEE/size=512/align=1-8 1.52GB/s ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 1.63GB/s ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8 1.61GB/s ± 6%
+CRC32/poly=IEEE/size=4kB/align=0-8 1.67GB/s ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8 1.68GB/s ± 5%
+CRC32/poly=IEEE/size=32kB/align=0-8 1.71GB/s ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8 1.69GB/s ± 5%
+CRC32/poly=Castagnoli/size=15/align=0-8 253MB/s ± 1%
+CRC32/poly=Castagnoli/size=15/align=1-8 253MB/s ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 671MB/s ± 6%
+CRC32/poly=Castagnoli/size=40/align=1-8 689MB/s ± 6%
+CRC32/poly=Castagnoli/size=512/align=0-8 1.46GB/s ± 5%
+CRC32/poly=Castagnoli/size=512/align=1-8 1.46GB/s ± 7%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 1.56GB/s ± 4%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 1.56GB/s ± 5%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 1.67GB/s ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 1.67GB/s ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.60GB/s ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.63GB/s ± 8%
+CRC32/poly=Koopman/size=15/align=0-8 258MB/s ± 5%
+CRC32/poly=Koopman/size=15/align=1-8 266MB/s ± 6%
+CRC32/poly=Koopman/size=40/align=0-8 281MB/s ± 7%
+CRC32/poly=Koopman/size=40/align=1-8 290MB/s ± 8%
+CRC32/poly=Koopman/size=512/align=0-8 304MB/s ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 312MB/s ± 3%
+CRC32/poly=Koopman/size=1kB/align=0-8 310MB/s ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8 312MB/s ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8 308MB/s ± 3%
+CRC32/poly=Koopman/size=4kB/align=1-8 311MB/s ± 3%
+CRC32/poly=Koopman/size=32kB/align=0-8 307MB/s ± 6%
+CRC32/poly=Koopman/size=32kB/align=1-8 309MB/s ± 4%
diff --git a/cmd/benchstat/testdata/allnosplit.golden b/cmd/benchstat/testdata/allnosplit.golden
new file mode 100644
index 0000000..eff91db
--- /dev/null
+++ b/cmd/benchstat/testdata/allnosplit.golden
@@ -0,0 +1,75 @@
+name \ time/op new.txt old.txt slashslash4.txt x386.txt
+CRC32/poly=IEEE/size=15/align=0-8 44.5ns ± 3% 46.9ns ± 8% 62.4ns ± 9%
+CRC32/poly=IEEE/size=15/align=1-8 44.5ns ± 4% 44.7ns ± 5% 63.5ns ± 8%
+CRC32/poly=IEEE/size=40/align=0-8 42.5ns ± 6% 41.0ns ± 1% 42.1ns ± 3% 57.4ns ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 42.0ns ± 3% 41.1ns ± 1% 41.7ns ± 5% 57.3ns ± 3%
+CRC32/poly=IEEE/size=512/align=0-8 57.1ns ± 3% 238.0ns ± 5% 332.3ns ± 2%
+CRC32/poly=IEEE/size=512/align=1-8 57.2ns ± 3% 235.5ns ± 3% 335.3ns ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 94.1ns ± 2% 452.5ns ± 4% 626.3ns ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8 93.3ns ± 2% 443.6ns ± 2% 635.3ns ± 6%
+CRC32/poly=IEEE/size=4kB/align=0-8 298ns ± 1% 1740ns ± 8% 1682ns ± 2% 2457ns ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8 299ns ± 3% 1764ns ± 6% 1690ns ± 4% 2434ns ± 5%
+CRC32/poly=IEEE/size=32kB/align=0-8 2.16µs ± 3% 14.95µs ± 7% 19.15µs ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8 2.18µs ± 3% 14.19µs ± 7% 19.42µs ± 5%
+CRC32/poly=Castagnoli/size=15/align=0-8 16.3ns ± 2% 16.4ns ± 3% 59.4ns ± 1%
+CRC32/poly=Castagnoli/size=15/align=1-8 17.3ns ± 2% 17.2ns ± 2% 59.4ns ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 17.5ns ± 4% 17.4ns ± 2% 18.6ns ±11% 59.7ns ± 6%
+CRC32/poly=Castagnoli/size=40/align=1-8 19.4ns ± 2% 19.7ns ± 3% 19.6ns ± 2% 58.1ns ± 7%
+CRC32/poly=Castagnoli/size=512/align=0-8 40.1ns ± 4% 40.2ns ± 2% 350.8ns ± 5%
+CRC32/poly=Castagnoli/size=512/align=1-8 41.9ns ± 2% 42.1ns ± 3% 349.8ns ± 7%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 66.2ns ± 1% 65.5ns ± 1% 656.7ns ± 4%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 68.5ns ± 2% 70.1ns ± 6% 656.8ns ± 6%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 159ns ± 3% 163ns ± 5% 161ns ± 8% 2452ns ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 162ns ± 3% 169ns ± 6% 170ns ± 8% 2448ns ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 1.21µs ± 3% 1.22µs ± 4% 20.53µs ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 1.22µs ± 4% 1.26µs ± 3% 20.18µs ± 9%
+CRC32/poly=Koopman/size=15/align=0-8 35.6ns ± 3% 36.5ns ±11% 58.2ns ± 4%
+CRC32/poly=Koopman/size=15/align=1-8 35.5ns ± 1% 35.1ns ± 5% 56.4ns ± 7%
+CRC32/poly=Koopman/size=40/align=0-8 87.6ns ± 2% 91.6ns ± 9% 93.8ns ±13% 142.3ns ± 8%
+CRC32/poly=Koopman/size=40/align=1-8 88.0ns ± 3% 91.1ns ± 6% 86.9ns ± 3% 136.2ns ± 5%
+CRC32/poly=Koopman/size=512/align=0-8 1.08µs ± 3% 1.13µs ± 5% 1.68µs ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 1.17µs ± 8% 1.13µs ± 6% 1.64µs ± 4%
+CRC32/poly=Koopman/size=1kB/align=0-8 2.34µs ± 4% 2.24µs ± 6% 3.31µs ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8 2.36µs ± 5% 2.15µs ± 2% 3.28µs ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8 9.00µs ± 6% 9.03µs ± 6% 9.08µs ± 8% 13.32µs ± 3%
+CRC32/poly=Koopman/size=4kB/align=1-8 9.05µs ±12% 8.94µs ±10% 9.46µs ± 8% 13.16µs ± 3%
+CRC32/poly=Koopman/size=32kB/align=0-8 72.9µs ± 4% 72.4µs ± 9% 106.9µs ± 6%
+CRC32/poly=Koopman/size=32kB/align=1-8 74.3µs ± 3% 69.6µs ± 3% 106.1µs ± 4%
+
+name \ speed new.txt old.txt slashslash4.txt x386.txt
+CRC32/poly=IEEE/size=15/align=0-8 337MB/s ± 3% 321MB/s ± 8% 241MB/s ± 8%
+CRC32/poly=IEEE/size=15/align=1-8 337MB/s ± 4% 336MB/s ± 4% 237MB/s ± 8%
+CRC32/poly=IEEE/size=40/align=0-8 942MB/s ± 5% 975MB/s ± 1% 951MB/s ± 3% 696MB/s ± 3%
+CRC32/poly=IEEE/size=40/align=1-8 952MB/s ± 3% 974MB/s ± 1% 960MB/s ± 4% 698MB/s ± 3%
+CRC32/poly=IEEE/size=512/align=0-8 8.97GB/s ± 3% 2.15GB/s ± 4% 1.54GB/s ± 2%
+CRC32/poly=IEEE/size=512/align=1-8 8.96GB/s ± 3% 2.17GB/s ± 3% 1.52GB/s ± 3%
+CRC32/poly=IEEE/size=1kB/align=0-8 10.9GB/s ± 2% 2.3GB/s ± 4% 1.6GB/s ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8 11.0GB/s ± 2% 2.3GB/s ± 2% 1.6GB/s ± 6%
+CRC32/poly=IEEE/size=4kB/align=0-8 13.7GB/s ± 1% 2.4GB/s ± 7% 2.4GB/s ± 2% 1.7GB/s ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8 13.7GB/s ± 3% 2.3GB/s ± 6% 2.4GB/s ± 4% 1.7GB/s ± 5%
+CRC32/poly=IEEE/size=32kB/align=0-8 15.2GB/s ± 3% 2.2GB/s ± 7% 1.7GB/s ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8 15.0GB/s ± 3% 2.3GB/s ± 8% 1.7GB/s ± 5%
+CRC32/poly=Castagnoli/size=15/align=0-8 920MB/s ± 2% 916MB/s ± 2% 253MB/s ± 1%
+CRC32/poly=Castagnoli/size=15/align=1-8 867MB/s ± 2% 870MB/s ± 2% 253MB/s ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8 2.28GB/s ± 4% 2.30GB/s ± 2% 2.16GB/s ±11% 0.67GB/s ± 6%
+CRC32/poly=Castagnoli/size=40/align=1-8 2.06GB/s ± 2% 2.03GB/s ± 3% 2.04GB/s ± 2% 0.69GB/s ± 6%
+CRC32/poly=Castagnoli/size=512/align=0-8 12.8GB/s ± 4% 12.7GB/s ± 2% 1.5GB/s ± 5%
+CRC32/poly=Castagnoli/size=512/align=1-8 12.2GB/s ± 1% 12.1GB/s ± 3% 1.5GB/s ± 7%
+CRC32/poly=Castagnoli/size=1kB/align=0-8 15.5GB/s ± 1% 15.6GB/s ± 1% 1.6GB/s ± 4%
+CRC32/poly=Castagnoli/size=1kB/align=1-8 15.0GB/s ± 2% 14.6GB/s ± 6% 1.6GB/s ± 5%
+CRC32/poly=Castagnoli/size=4kB/align=0-8 25.7GB/s ± 3% 25.1GB/s ± 5% 25.4GB/s ± 7% 1.7GB/s ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8 25.3GB/s ± 3% 24.1GB/s ± 6% 24.1GB/s ± 8% 1.7GB/s ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=0-8 26.8GB/s ± 5% 26.9GB/s ± 4% 1.6GB/s ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=1-8 26.8GB/s ± 4% 25.9GB/s ± 3% 1.6GB/s ± 8%
+CRC32/poly=Koopman/size=15/align=0-8 421MB/s ± 3% 412MB/s ±10% 258MB/s ± 5%
+CRC32/poly=Koopman/size=15/align=1-8 422MB/s ± 1% 427MB/s ± 5% 266MB/s ± 6%
+CRC32/poly=Koopman/size=40/align=0-8 456MB/s ± 2% 437MB/s ± 9% 428MB/s ±12% 281MB/s ± 7%
+CRC32/poly=Koopman/size=40/align=1-8 455MB/s ± 3% 440MB/s ± 6% 461MB/s ± 3% 290MB/s ± 8%
+CRC32/poly=Koopman/size=512/align=0-8 476MB/s ± 3% 453MB/s ± 5% 304MB/s ± 5%
+CRC32/poly=Koopman/size=512/align=1-8 440MB/s ± 8% 455MB/s ± 6% 312MB/s ± 3%
+CRC32/poly=Koopman/size=1kB/align=0-8 438MB/s ± 4% 452MB/s ± 9% 310MB/s ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8 434MB/s ± 5% 477MB/s ± 2% 312MB/s ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8 455MB/s ± 6% 454MB/s ± 5% 452MB/s ± 8% 308MB/s ± 3%
+CRC32/poly=Koopman/size=4kB/align=1-8 455MB/s ±11% 459MB/s ± 9% 434MB/s ± 9% 311MB/s ± 3%
+CRC32/poly=Koopman/size=32kB/align=0-8 450MB/s ± 4% 453MB/s ± 8% 307MB/s ± 6%
+CRC32/poly=Koopman/size=32kB/align=1-8 441MB/s ± 3% 471MB/s ± 3% 309MB/s ± 4%
diff --git a/cmd/benchstat/testdata/packages.golden b/cmd/benchstat/testdata/packages.golden
new file mode 100644
index 0000000..05cf0c7
--- /dev/null
+++ b/cmd/benchstat/testdata/packages.golden
@@ -0,0 +1,11 @@
+name old time/op new time/op delta
+pkg:encoding/gob
+GobEncode 13.6ms ± 1% 11.8ms ± 1% -13.31% (p=0.016 n=4+5)
+pkg:encoding/json
+JSONEncode 32.1ms ± 1% 31.8ms ± 1% ~ (p=0.286 n=4+5)
+
+name old speed new speed delta
+pkg:encoding/gob
+GobEncode 56.4MB/s ± 1% 65.1MB/s ± 1% +15.36% (p=0.016 n=4+5)
+pkg:encoding/json
+JSONEncode 60.4MB/s ± 1% 61.1MB/s ± 2% ~ (p=0.286 n=4+5)
diff --git a/cmd/benchstat/testdata/packagesnew.txt b/cmd/benchstat/testdata/packagesnew.txt
new file mode 100644
index 0000000..7732820
--- /dev/null
+++ b/cmd/benchstat/testdata/packagesnew.txt
@@ -0,0 +1,12 @@
+pkg: encoding/gob
+BenchmarkGobEncode 100 11773189 ns/op 65.19 MB/s
+BenchmarkGobEncode 100 11942588 ns/op 64.27 MB/s
+BenchmarkGobEncode 100 11786159 ns/op 65.12 MB/s
+BenchmarkGobEncode 100 11628583 ns/op 66.00 MB/s
+BenchmarkGobEncode 100 11815924 ns/op 64.96 MB/s
+pkg: encoding/json
+BenchmarkJSONEncode 50 32036529 ns/op 60.57 MB/s
+BenchmarkJSONEncode 50 32156552 ns/op 60.34 MB/s
+BenchmarkJSONEncode 50 31288355 ns/op 62.02 MB/s
+BenchmarkJSONEncode 50 31559706 ns/op 61.49 MB/s
+BenchmarkJSONEncode 50 31765634 ns/op 61.09 MB/s
diff --git a/cmd/benchstat/testdata/packagesold.golden b/cmd/benchstat/testdata/packagesold.golden
new file mode 100644
index 0000000..4c20063
--- /dev/null
+++ b/cmd/benchstat/testdata/packagesold.golden
@@ -0,0 +1,11 @@
+name time/op
+pkg:encoding/gob
+GobEncode 13.6ms ± 1%
+pkg:encoding/json
+JSONEncode 32.1ms ± 1%
+
+name speed
+pkg:encoding/gob
+GobEncode 56.4MB/s ± 1%
+pkg:encoding/json
+JSONEncode 60.4MB/s ± 1%
diff --git a/cmd/benchstat/testdata/packagesold.txt b/cmd/benchstat/testdata/packagesold.txt
new file mode 100644
index 0000000..add4b7a
--- /dev/null
+++ b/cmd/benchstat/testdata/packagesold.txt
@@ -0,0 +1,10 @@
+pkg: encoding/gob
+BenchmarkGobEncode 100 13552735 ns/op 56.63 MB/s
+BenchmarkGobEncode 100 13553943 ns/op 56.63 MB/s
+BenchmarkGobEncode 100 13606356 ns/op 56.41 MB/s
+BenchmarkGobEncode 100 13683198 ns/op 56.09 MB/s
+pkg: encoding/json
+BenchmarkJSONEncode 50 32395067 ns/op 59.90 MB/s
+BenchmarkJSONEncode 50 32334214 ns/op 60.01 MB/s
+BenchmarkJSONEncode 50 31992891 ns/op 60.65 MB/s
+BenchmarkJSONEncode 50 31735022 ns/op 61.15 MB/s