diff --git a/benchstat/doc.go b/benchstat/doc.go
new file mode 100644
index 0000000..0ca6010
--- /dev/null
+++ b/benchstat/doc.go
@@ -0,0 +1,13 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package benchstat is deprecated.
+//
+// This package contains the underlying implementation of an old
+// version of the benchstat command.
+//
+// Deprecated: The latest version of benchstat can be found at
+// golang.org/x/perf/cmd/benchstat. To work with benchmark data, see
+// golang.org/x/perf/benchproc and golang.org/x/perf/benchmath.
+package benchstat
diff --git a/cmd/benchstat/README.md b/cmd/benchstat/README.md
deleted file mode 100644
index db5ec61..0000000
--- a/cmd/benchstat/README.md
+++ /dev/null
@@ -1,91 +0,0 @@
-# Benchstat
-
-Benchstat computes and compares statistics about benchmarks.
-
-Usage:
-
-    benchstat [options] old.txt [new.txt] [more.txt ...]
-
-Run `benchstat -h` for the list of supported options.
-
-Each input file should contain the concatenated output of a number of runs
-of `go test -bench`. For each different benchmark listed in an input file,
-benchstat computes the mean, minimum, and maximum run time, after removing
-outliers using the interquartile range rule.
-
-If invoked on a single input file, benchstat prints the per-benchmark
-statistics for that file.
-
-If invoked on a pair of input files, benchstat adds to the output a column
-showing the statistics from the second file and a column showing the percent
-change in mean from the first to the second file. Next to the percent
-change, benchstat shows the p-value and sample sizes from a test of the two
-distributions of benchmark times. Small p-values indicate that the two
-distributions are significantly different. If the test indicates that there
-was no significant change between the two benchmarks (defined as p > 0.05),
-benchstat displays a single ~ instead of the percent change.
-
-The -delta-test option controls which significance test is applied: utest
-(Mann-Whitney U-test), ttest (two-sample Welch t-test), or none. The default
-is the U-test, sometimes also referred to as the Wilcoxon rank sum test.
-
-If invoked on more than two input files, benchstat prints the per-benchmark
-statistics for all the files, showing one column of statistics for each
-file, with no column for percent change or statistical significance.
-
-The -html option causes benchstat to print the results as an HTML table.
-
-## Example
-
-Suppose we collect results from running a set of benchmarks twenty times
-before a particular change:
-
-    go test -run='^$' -bench=. -count=20 > old.txt
-
-And the same benchmarks twenty times after:
-
-    go test -run='^$' -bench=. -count=20 > new.txt
-
-The file old.txt contains:
-
-    BenchmarkGobEncode   	100	  13552735 ns/op	  56.63 MB/s
-    BenchmarkJSONEncode  	 50	  32395067 ns/op	  59.90 MB/s
-    BenchmarkGobEncode   	100	  13553943 ns/op	  56.63 MB/s
-    BenchmarkJSONEncode  	 50	  32334214 ns/op	  60.01 MB/s
-    BenchmarkGobEncode   	100	  13606356 ns/op	  56.41 MB/s
-    BenchmarkJSONEncode  	 50	  31992891 ns/op	  60.65 MB/s
-    BenchmarkGobEncode   	100	  13683198 ns/op	  56.09 MB/s
-    BenchmarkJSONEncode  	 50	  31735022 ns/op	  61.15 MB/s
-
-The file new.txt contains:
-
-    BenchmarkGobEncode   	 100	  11773189 ns/op	  65.19 MB/s
-    BenchmarkJSONEncode  	  50	  32036529 ns/op	  60.57 MB/s
-    BenchmarkGobEncode   	 100	  11942588 ns/op	  64.27 MB/s
-    BenchmarkJSONEncode  	  50	  32156552 ns/op	  60.34 MB/s
-    BenchmarkGobEncode   	 100	  11786159 ns/op	  65.12 MB/s
-    BenchmarkJSONEncode  	  50	  31288355 ns/op	  62.02 MB/s
-    BenchmarkGobEncode   	 100	  11628583 ns/op	  66.00 MB/s
-    BenchmarkJSONEncode  	  50	  31559706 ns/op	  61.49 MB/s
-    BenchmarkGobEncode   	 100	  11815924 ns/op	  64.96 MB/s
-    BenchmarkJSONEncode  	  50	  31765634 ns/op	  61.09 MB/s
-
-The order of the lines in the file does not matter, except that the output
-lists benchmarks in order of appearance.
-
-If run with just one input file, benchstat summarizes that file:
-
-    $ benchstat old.txt
-    name        time/op
-    GobEncode   13.6ms ± 1%
-    JSONEncode  32.1ms ± 1%
-
-If run with two input files, benchstat summarizes and compares:
-
-    $ benchstat old.txt new.txt
-    name        old time/op  new time/op  delta
-    GobEncode   13.6ms ± 1%  11.8ms ± 1%  -13.31% (p=0.016 n=4+5)
-    JSONEncode  32.1ms ± 1%  31.8ms ± 1%     ~    (p=0.286 n=4+5)
-
-Note that the JSONEncode result is reported as statistically insignificant
-instead of a -0.93% delta.
diff --git a/cmd/benchstat/doc_test.go b/cmd/benchstat/doc_test.go
new file mode 100644
index 0000000..e736361
--- /dev/null
+++ b/cmd/benchstat/doc_test.go
@@ -0,0 +1,106 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"go/ast"
+	"go/doc"
+	"go/parser"
+	"go/token"
+	"os"
+	"regexp"
+	"strings"
+	"testing"
+)
+
+// Test that the examples in the command documentation do what they
+// say.
+func TestDoc(t *testing.T) {
+	// Read the package documentation.
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, "main.go", nil, parser.ParseComments)
+	if err != nil {
+		t.Fatal(err)
+	}
+	p, err := doc.NewFromFiles(fset, []*ast.File{f}, "p")
+	if err != nil {
+		t.Fatal(err)
+	}
+	tests := parseDocTests(p.Doc)
+	if len(tests) == 0 {
+		t.Fatal("failed to parse doc tests: found 0 tests")
+	}
+
+	// Run the tests.
+	if err := os.Chdir("testdata"); err != nil {
+		t.Fatal(err)
+	}
+	defer os.Chdir("..")
+	for _, test := range tests {
+		var got, gotErr bytes.Buffer
+		t.Logf("benchstat %s", strings.Join(test.args, " "))
+		if err := benchstat(&got, &gotErr, test.args); err != nil {
+			t.Fatalf("unexpected error: %s", err)
+		}
+
+		// None of the doc tests should have error output.
+		if gotErr.Len() != 0 {
+			t.Errorf("unexpected stderr output:\n%s", gotErr.String())
+			continue
+		}
+
+		// Compare the output
+		diff(t, []byte(test.want), got.Bytes())
+	}
+}
+
+type docTest struct {
+	args []string
+	want string
+}
+
+var docTestRe = regexp.MustCompile(`(?m)^[ \t]+\$ benchstat (.*)\n((?:\t.*\n|\n)+)`)
+
+func parseDocTests(doc string) []*docTest {
+	var tests []*docTest
+	for _, m := range docTestRe.FindAllStringSubmatch(doc, -1) {
+		want := m[2]
+		// Strip extra trailing newlines
+		want = strings.TrimRight(want, "\n") + "\n"
+		// Strip \t at the beginning of each line
+		want = strings.Replace(want[1:], "\n\t", "\n", -1)
+		tests = append(tests, &docTest{
+			args: parseArgs(m[1]),
+			want: want,
+		})
+	}
+	return tests
+}
+
+// parseArgs is a very basic parser for shell-like word lists.
+func parseArgs(x string) []string {
+	// TODO: Use strings.Cut
+	var out []string
+	x = strings.Trim(x, " ")
+	for len(x) > 0 {
+		if x[0] == '"' {
+			x = x[1:]
+			i := strings.Index(x, "\"")
+			if i < 0 {
+				panic("missing \"")
+			}
+			out = append(out, x[:i])
+			x = strings.TrimLeft(x[i+1:], " ")
+		} else if i := strings.Index(x, " "); i < 0 {
+			out = append(out, x)
+			x = ""
+		} else {
+			out = append(out, x[:i])
+			x = strings.TrimLeft(x[i+1:], " ")
+		}
+	}
+	return out
+}
diff --git a/cmd/benchstat/internal/benchtab/builder.go b/cmd/benchstat/internal/benchtab/builder.go
new file mode 100644
index 0000000..f5849fa
--- /dev/null
+++ b/cmd/benchstat/internal/benchtab/builder.go
@@ -0,0 +1,422 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package benchtab presents benchmark results as comparison tables.
+package benchtab
+
+import (
+	"encoding/csv"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"runtime"
+	"strings"
+	"sync"
+
+	"github.com/aclements/go-moremath/stats"
+	"golang.org/x/perf/benchfmt"
+	"golang.org/x/perf/benchmath"
+	"golang.org/x/perf/benchproc"
+)
+
+// TODO: Color by good/bad (or nothing for unknown units)
+
+// A Builder collects benchmark results into a Tables set.
+type Builder struct {
+	tableBy, rowBy, colBy *benchproc.Projection
+	residue               *benchproc.Projection
+
+	unitField *benchproc.Field
+
+	// tables maps from tableBy to table.
+	tables map[benchproc.Key]*builderTable
+}
+
+type builderTable struct {
+	// Observed row and col Keys within this group. Within the
+	// group, we show only the row and col labels for the data in
+	// the group, but we sort them according to the global
+	// observation order for consistency across groups.
+	rows map[benchproc.Key]struct{}
+	cols map[benchproc.Key]struct{}
+
+	// cells maps from (row, col) to each cell.
+	cells map[TableKey]*builderCell
+}
+
+type builderCell struct {
+	// values is the observed values in this cell.
+	values []float64
+	// residue is the set of residue keys mapped to this cell.
+	// It is used to check for non-unique keys.
+	residue map[benchproc.Key]struct{}
+}
+
+// NewBuilder creates a new Builder for collecting benchmark results
+// into tables. Each result will be mapped to a Table by tableBy.
+// Within each table, the results are mapped to cells by rowBy and
+// colBy. Any results within a single cell that vary by residue will
+// be reported as warnings. tableBy must have a ".unit" field.
+func NewBuilder(tableBy, rowBy, colBy, residue *benchproc.Projection) *Builder {
+	tableFields := tableBy.Fields()
+	unitField := tableFields[len(tableFields)-1]
+	if unitField.Name != ".unit" {
+		panic("tableBy projection missing .unit field")
+	}
+	return &Builder{
+		tableBy: tableBy, rowBy: rowBy, colBy: colBy, residue: residue,
+		unitField: unitField,
+		tables:    make(map[benchproc.Key]*builderTable),
+	}
+}
+
+// Add adds all of the values in result to the tables in the Builder.
+func (b *Builder) Add(result *benchfmt.Result) {
+	// Project the result.
+	tableKeys := b.tableBy.ProjectValues(result)
+	rowKey := b.rowBy.Project(result)
+	colKey := b.colBy.Project(result)
+	residueKey := b.residue.Project(result)
+	cellKey := TableKey{rowKey, colKey}
+
+	// Map to tables.
+	for unitI, tableKey := range tableKeys {
+		table := b.tables[tableKey]
+		if table == nil {
+			table = b.newTable()
+			b.tables[tableKey] = table
+		}
+
+		// Map to a cell.
+		c := table.cells[cellKey]
+		if c == nil {
+			c = new(builderCell)
+			c.residue = make(map[benchproc.Key]struct{})
+			table.cells[cellKey] = c
+			table.rows[rowKey] = struct{}{}
+			table.cols[colKey] = struct{}{}
+		}
+
+		// Add to the cell.
+		c.values = append(c.values, result.Values[unitI].Value)
+		c.residue[residueKey] = struct{}{}
+	}
+}
+
+func (b *Builder) newTable() *builderTable {
+	return &builderTable{
+		rows:  make(map[benchproc.Key]struct{}),
+		cols:  make(map[benchproc.Key]struct{}),
+		cells: make(map[TableKey]*builderCell),
+	}
+}
+
+// TableOpts provides options for constructing the final analysis
+// tables from a Builder.
+type TableOpts struct {
+	// Confidence is the desired confidence level in summary
+	// intervals; e.g., 0.95 for 95%.
+	Confidence float64
+
+	// Thresholds is the thresholds to use for statistical tests.
+	Thresholds *benchmath.Thresholds
+
+	// Units is the unit metadata. This gives distributional
+	// assumptions for units, among other properties.
+	Units benchfmt.UnitMetadataMap
+}
+
+// Tables is a sequence of benchmark statistic tables.
+type Tables struct {
+	// Tables is a slice of statistic tables. Within a Table, all
+	// results have the same table Key (including unit).
+	Tables []*Table
+	// Keys is a slice of table keys, corresponding 1:1 to
+	// the Tables slice. These always end with a ".unit"
+	// field giving the unit.
+	Keys []benchproc.Key
+}
+
+// ToTables finalizes a Builder into a sequence of statistic tables.
+func (b *Builder) ToTables(opts TableOpts) *Tables {
+	// Sort tables.
+	var keys []benchproc.Key
+	for k := range b.tables {
+		keys = append(keys, k)
+	}
+	benchproc.SortKeys(keys)
+
+	// We're going to compute table cells in parallel because the
+	// statistics are somewhat expensive. This is entirely
+	// CPU-bound, so we put a simple concurrency limit on it.
+	limit := make(chan struct{}, 2*runtime.GOMAXPROCS(-1))
+	var wg sync.WaitGroup
+
+	// Process each table.
+	var tables []*Table
+	for _, k := range keys {
+		cTable := b.tables[k]
+
+		// Get the configured assumption for this unit.
+		unit := k.Get(b.unitField)
+		assumption := opts.Units.GetAssumption(unit)
+
+		// Sort the rows and columns.
+		rowKeys, colKeys := mapKeys(cTable.rows), mapKeys(cTable.cols)
+		table := &Table{
+			Unit:       unit,
+			Opts:       opts,
+			Assumption: assumption,
+			Rows:       rowKeys,
+			Cols:       colKeys,
+			Cells:      make(map[TableKey]*TableCell),
+		}
+		tables = append(tables, table)
+
+		// Create all TableCells and fill their Samples. This
+		// is fast enough it's not worth parallelizing. This
+		// enables the second pass to look up baselines and
+		// their samples.
+		for k, cCell := range cTable.cells {
+			table.Cells[k] = &TableCell{
+				Sample: benchmath.NewSample(cCell.values, opts.Thresholds),
+			}
+		}
+
+		// Populate cells.
+		baselineCfg := colKeys[0]
+		wg.Add(len(cTable.cells))
+		for k, cCell := range cTable.cells {
+			cell := table.Cells[k]
+
+			// Look up the baseline.
+			if k.Col != baselineCfg {
+				base, ok := table.Cells[TableKey{k.Row, baselineCfg}]
+				if ok {
+					cell.Baseline = base
+				}
+			}
+
+			limit <- struct{}{}
+			cCell := cCell
+			go func() {
+				summarizeCell(cCell, cell, assumption, opts.Confidence)
+				<-limit
+				wg.Done()
+			}()
+		}
+	}
+	wg.Wait()
+
+	// Add summary rows to each table.
+	for _, table := range tables {
+		table.SummaryLabel = "geomean"
+		table.Summary = make(map[benchproc.Key]*TableSummary)
+
+		// Count the number of baseline benchmarks. If later
+		// columns don't have the same number of baseline
+		// pairings, we know the benchmark sets don't match.
+		nBase := 0
+		baseCol := table.Cols[0]
+		for _, row := range table.Rows {
+			if _, ok := table.Cells[TableKey{row, baseCol}]; ok {
+				nBase++
+			}
+		}
+
+		for i, col := range table.Cols {
+			var s TableSummary
+			table.Summary[col] = &s
+			isBase := i == 0
+
+			limit <- struct{}{}
+			table, col := table, col
+			wg.Add(1)
+			go func() {
+				summarizeCol(table, col, &s, nBase, isBase)
+				<-limit
+				wg.Done()
+			}()
+		}
+	}
+	wg.Wait()
+
+	return &Tables{tables, keys}
+}
+
+func mapKeys(m map[benchproc.Key]struct{}) []benchproc.Key {
+	var keys []benchproc.Key
+	for k := range m {
+		keys = append(keys, k)
+	}
+	benchproc.SortKeys(keys)
+	return keys
+}
+
+func summarizeCell(cCell *builderCell, cell *TableCell, assumption benchmath.Assumption, confidence float64) {
+	cell.Summary = assumption.Summary(cell.Sample, confidence)
+
+	// If there's a baseline, compute comparison.
+	if cell.Baseline != nil {
+		cell.Comparison = assumption.Compare(cell.Baseline.Sample, cell.Sample)
+	}
+
+	// Warn for non-singular keys in this cell.
+	nsk := benchproc.NonSingularFields(mapKeys(cCell.residue))
+	if len(nsk) > 0 {
+		// Emit a warning.
+		var warn strings.Builder
+		warn.WriteString("benchmarks vary in ")
+		for i, field := range nsk {
+			if i > 0 {
+				warn.WriteString(", ")
+			}
+			warn.WriteString(field.Name)
+		}
+
+		cell.Sample.Warnings = append(cell.Sample.Warnings, errors.New(warn.String()))
+	}
+}
+
+func summarizeCol(table *Table, col benchproc.Key, s *TableSummary, nBase int, isBase bool) {
+	// Collect cells.
+	//
+	// This computes the geomean of the summary ratios rather than
+	// ratio of the summary geomeans. These are identical *if* the
+	// benchmark sets are the same. But if the benchmark sets
+	// differ, this leads to more sensible ratios because it's
+	// still the geomean of the column, rather than being a
+	// comparison of two incomparable numbers. It's still easy to
+	// misinterpret, but at least it's not meaningless.
+	var summaries, ratios []float64
+	badRatio := false
+	for _, row := range table.Rows {
+		cell, ok := table.Cells[TableKey{row, col}]
+		if !ok {
+			continue
+		}
+		summaries = append(summaries, cell.Summary.Center)
+		if cell.Baseline != nil {
+			var ratio float64
+			a, b := cell.Summary.Center, cell.Baseline.Summary.Center
+			if a == b {
+				// Treat 0/0 as 1.
+				ratio = 1
+			} else if b == 0 {
+				badRatio = true
+				// Keep nBase check working.
+				ratios = append(ratios, 0)
+				continue
+			} else {
+				ratio = a / b
+			}
+			ratios = append(ratios, ratio)
+		}
+	}
+
+	// If the number of cells in this column that had a baseline
+	// is the same as the total number of baselines, then we know
+	// the benchmark sets match. Otherwise, they don't and these
+	// numbers are probably misleading.
+	if !isBase && nBase != len(ratios) {
+		s.Warnings = append(s.Warnings, fmt.Errorf("benchmark set differs from baseline; geomeans may not be comparable"))
+	}
+
+	// Summarize centers.
+	gm := stats.GeoMean(summaries)
+	if math.IsNaN(gm) {
+		s.Warnings = append(s.Warnings, fmt.Errorf("summaries must be >0 to compute geomean"))
+	} else {
+		s.HasSummary = true
+		s.Summary = gm
+	}
+
+	// Summarize ratios.
+	if !isBase && !badRatio {
+		gm := stats.GeoMean(ratios)
+		if math.IsNaN(gm) {
+			s.Warnings = append(s.Warnings, fmt.Errorf("ratios must be >0 to compute geomean"))
+		} else {
+			s.HasRatio = true
+			s.Ratio = gm
+		}
+	}
+}
+
+// ToText renders t to a textual representation, assuming a
+// fixed-width font.
+func (t *Tables) ToText(w io.Writer, color bool) error {
+	return t.printTables(func(hdr string) error {
+		_, err := fmt.Fprintf(w, "%s\n", hdr)
+		return err
+	}, func(table *Table) error {
+		return table.ToText(w, color)
+	})
+}
+
+// ToCSV returns t to CSV (comma-separated values) format.
+//
+// Warnings are written to a separate stream so as not to interrupt
+// the regular format of the CSV table.
+func (t *Tables) ToCSV(w, warnings io.Writer) error {
+	o := csv.NewWriter(w)
+	row := 1
+
+	err := t.printTables(func(hdr string) error {
+		o.Write([]string{hdr})
+		row++
+		return nil
+	}, func(table *Table) error {
+		nRows := table.ToCSV(o, row, warnings)
+		row += nRows
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+	o.Flush()
+	return o.Error()
+}
+
+func (t *Tables) printTables(hdr func(string) error, cb func(*Table) error) error {
+	if len(t.Tables) == 0 {
+		return nil
+	}
+
+	var prevKey benchproc.Key
+	fields := t.Keys[0].Projection().FlattenedFields()
+
+	for i, table := range t.Tables {
+		if i > 0 {
+			// Blank line between tables.
+			if err := hdr(""); err != nil {
+				return err
+			}
+		}
+
+		// Print table key changes.
+		key := t.Keys[i]
+		for _, f := range fields {
+			if f.Name == ".unit" {
+				// Skip .unit because it's shown in the table itself.
+				continue
+			}
+			val := key.Get(f)
+			if prevKey.IsZero() || val != prevKey.Get(f) {
+				if err := hdr(fmt.Sprintf("%s: %s", f.Name, val)); err != nil {
+					return err
+				}
+			}
+		}
+		prevKey = key
+
+		// Print table.
+		if err := cb(table); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/cmd/benchstat/internal/benchtab/table.go b/cmd/benchstat/internal/benchtab/table.go
new file mode 100644
index 0000000..5409a7d
--- /dev/null
+++ b/cmd/benchstat/internal/benchtab/table.go
@@ -0,0 +1,430 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchtab
+
+import (
+	"encoding/csv"
+	"fmt"
+	"io"
+	"strings"
+
+	"golang.org/x/perf/benchmath"
+	"golang.org/x/perf/benchproc"
+	"golang.org/x/perf/benchunit"
+	"golang.org/x/perf/cmd/benchstat/internal/texttab"
+)
+
+// A Table summarizes and compares benchmark results in a 2D grid.
+// Each cell summarizes a Sample of results with identical row and
+// column Keys. Comparisons are done within each row between the
+// Sample in the first column and the Samples in any remaining
+// columns.
+type Table struct {
+	// Opts is the configuration options for this table.
+	Opts TableOpts
+
+	// Unit is the benchmark unit of all samples in this Table.
+	Unit string
+
+	// Assumption is the distributional assumption used for all
+	// samples in this table.
+	Assumption benchmath.Assumption
+
+	// Rows and Cols give the sequence of row and column Keys
+	// in this table. All row Keys have the same Projection and all
+	// col Keys have the same Projection.
+	Rows, Cols []benchproc.Key
+
+	// Cells is the cells in the body of this table. Each key in
+	// this map is a pair of some Key from Rows and some Key
+	// from Cols. However, not all Pairs may be present in the
+	// map.
+	Cells map[TableKey]*TableCell
+
+	// Summary is the final row of this table, which gives summary
+	// information across all benchmarks in this table. It is
+	// keyed by Cols.
+	Summary map[benchproc.Key]*TableSummary
+
+	// SummaryLabel is the label for the summary row.
+	SummaryLabel string
+}
+
+// TableKey is a map key used to index a single cell in a Table.
+type TableKey struct {
+	Row, Col benchproc.Key
+}
+
+// TableCell is a single cell in a Table. It represents a sample of
+// benchmark results with the same row and column Key.
+type TableCell struct {
+	// Sample is the set of benchmark results in this cell.
+	Sample *benchmath.Sample
+
+	// Summary is the summary of Sample, as computed by the
+	// Table's distributional assumption.
+	Summary benchmath.Summary
+
+	// Baseline is the baseline cell used for comparisons with
+	// this cell, or nil if there is no comparison. This is the
+	// cell in the first column of this cell's row, if any.
+	Baseline *TableCell
+
+	// Comparison is the comparison with the Baseline cell, as
+	// computed by the Table's distributional assumption. If
+	// Baseline is nil, this value is meaningless.
+	Comparison benchmath.Comparison
+}
+
+// TableSummary is a cell that summarizes a column of a Table.
+// It appears in the last row of a table.
+type TableSummary struct {
+	// HasSummary indicates that Summary is valid.
+	HasSummary bool
+	// Summary summarizes all of the TableCell.Summary values in
+	// this column.
+	Summary float64
+
+	// HasRatio indicates that Ratio is valid.
+	HasRatio bool
+	// Ratio summarizes all of the TableCell.Comparison values in
+	// this column.
+	Ratio float64
+
+	// Warnings is a list of warnings for this summary cell.
+	Warnings []error
+}
+
+// RowScaler returns a common scaler for the values in row.
+func (t *Table) RowScaler(row benchproc.Key, unitClass benchunit.Class) benchunit.Scaler {
+	// Collect the row summaries.
+	var values []float64
+	for _, col := range t.Cols {
+		cell, ok := t.Cells[TableKey{row, col}]
+		if ok {
+			values = append(values, cell.Summary.Center)
+		}
+	}
+	return benchunit.CommonScale(values, unitClass)
+}
+
+// ToText renders t to a textual representation, assuming a
+// fixed-width font.
+func (t *Table) ToText(w io.Writer, color bool) error {
+	var o texttab.Table
+
+	// Each logical column expands to centerCols columns, plus
+	// deltaCols columns if there's a baseline.
+	const labelCols = 1
+	const centerCols = 3 // <center ±> <CI> <warnings>
+	const deltaCols = 3  // <P%> <(p=0.PPP n=N)> <warnings>
+
+	// startCol returns the index of the first centerCol of
+	// logical column exp.
+	startCol := func(exp int) int {
+		if exp == 0 {
+			return labelCols
+		}
+		// The width of experiment 0 is just centerCols. All
+		// later experiments are centerCols+deltaCols.
+		return labelCols + centerCols + (exp-1)*(centerCols+deltaCols)
+	}
+
+	var warningList []string
+	warningSet := make(map[string]int)
+	warn := func(msgs ...[]error) {
+		var footnotes []string
+		for _, msgs1 := range msgs {
+			for _, msg := range msgs1 {
+				s := msg.Error()
+				i, ok := warningSet[s]
+				if !ok {
+					i = len(warningList)
+					warningSet[s] = i
+					warningList = append(warningList, s)
+				}
+				footnotes = append(footnotes, superscript(i+1))
+			}
+		}
+		s := strings.Join(footnotes, " ")
+		o.Cell(s)
+	}
+
+	// Construct the header.
+	kt := benchproc.NewKeyHeader(t.Cols)
+	rEdge := startCol(len(t.Cols) + 1)
+	nodes := kt.Top
+	for len(nodes) > 0 {
+		// Process this level.
+		var nextNodes []*benchproc.KeyHeaderNode
+		o.Row()
+		for _, node := range nodes {
+			l := startCol(node.Start)
+			r := startCol(node.Start + node.Len)
+			// Configuration headers can span a lot of
+			// columns, so we add a vertical rule to more
+			// clearly delineate the columns they span. We
+			// also add some space so that each logical
+			// column in the rest of the table is better
+			// separated.
+			o.Col(l).Span(r-l, node.Value, texttab.Center, texttab.LeftMargin(" │ "))
+			nextNodes = append(nextNodes, node.Children...)
+		}
+		// Add a vertical bar down the right side to match the other
+		// separators.
+		o.Col(rEdge).Cell("", texttab.LeftMargin(" │"))
+		nodes = nextNodes
+	}
+
+	// Add the column labels row, set margins, and create stretch
+	// columns.
+	o.Row()
+	for i := range t.Cols {
+		l := startCol(i)
+		o.Col(l)
+
+		// Show the unit over the center column group, since
+		// these are values in that unit.
+		o.Span(centerCols, t.Unit, texttab.Center, texttab.LeftMargin(" │ "))
+
+		if i > 0 {
+			// All but the first column will have A/B
+			// comparisons.
+			//
+			// Separate center and delta column groups by
+			// 2 spaces.
+			o.Span(deltaCols, "vs base", texttab.Left, texttab.LeftMargin("  "))
+		}
+
+		// Make all of the interior columns in this column
+		// group shrink columns, leaving on the leftmost and
+		// rightmost to stretch.
+		for j := l + 1; j < o.CurCol(); j++ {
+			o.SetShrink(j, true)
+		}
+	}
+	o.Col(rEdge).Cell("", texttab.LeftMargin(" │"))
+
+	// Emit measurements.
+	unitClass := benchunit.ClassOf(t.Unit)
+	for _, row := range t.Rows {
+		o.Row()
+
+		// TODO: Should I put each row key value in a
+		// column? With the keys as headers?
+		o.Cell(row.StringValues())
+
+		// Get a common scalar across this row.
+		scalar := t.RowScaler(row, unitClass)
+
+		for exp, col := range t.Cols {
+			cell, ok := t.Cells[TableKey{row, col}]
+			if !ok {
+				continue
+			}
+
+			o.Col(startCol(exp))
+			o.Cell(scalar.Format(cell.Summary.Center), texttab.Right)
+			// Put ± in the margin so 1) the ±s line up,
+			// 2) the geomean value (which doesn't have ±)
+			// aligns with the summary column, 3) we can
+			// right align the range column.
+			o.Cell(cell.Summary.PctRangeString(), texttab.Right, texttab.LeftMargin(" ± "))
+			warn(cell.Sample.Warnings, cell.Summary.Warnings)
+			if exp > 0 && cell.Baseline != nil {
+				d := cell.Comparison.FormatDelta(cell.Baseline.Summary.Center, cell.Summary.Center)
+				// TODO: Color the delta for whether
+				// it's good or bad.
+				o.Cell(d, texttab.Right)
+				o.Cell("(" + cell.Comparison.String() + ")")
+				warn(cell.Comparison.Warnings)
+			}
+		}
+	}
+
+	// Emit summary row.
+	if len(t.Rows) > 1 {
+		o.Row()
+		o.Cell(t.SummaryLabel)
+		for exp, col := range t.Cols {
+			tsum, ok := t.Summary[col]
+			if !ok {
+				continue
+			}
+
+			if tsum.HasSummary {
+				o.Col(startCol(exp))
+				o.Cell(benchunit.Scale(tsum.Summary, unitClass), texttab.Right)
+			}
+			if exp > 0 {
+				o.Col(startCol(exp) + centerCols)
+				if tsum.HasRatio {
+					o.Cell(fmt.Sprintf("%+.2f%%", (tsum.Ratio-1)*100), texttab.Right)
+				} else {
+					o.Cell("?")
+				}
+			}
+
+			o.Col(startCol(exp+1) - 1)
+			warn(tsum.Warnings)
+		}
+	}
+
+	// Emit table.
+	if err := o.Format(w); err != nil {
+		return err
+	}
+
+	// Emit warnings.
+	if len(warningList) > 0 {
+		for i, msg := range warningList {
+			if _, err := fmt.Fprintf(w, "%s %s\n", superscript(i+1), msg); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+var superDigits = []rune("⁰¹²³⁴⁵⁶⁷⁸⁹")
+
+func superscript(i int) string {
+	if i == 0 {
+		return string(superDigits[0])
+	}
+
+	var buf [20]rune
+	pos := len(buf)
+	for i > 0 && pos > 0 {
+		pos--
+		buf[pos] = superDigits[i%10]
+		i /= 10
+	}
+	return string(buf[pos:])
+}
+
+// ToCSV renders t to CSV format. Warnings are written in text format
+// to the "warnings" Writer, and prefixed with spreadsheet-style cell
+// references. These references assume the table begins on row
+// "startRow".
+func (t *Table) ToCSV(o *csv.Writer, startRow int, warnings io.Writer) (rowCount int) {
+	const labelCols = 1
+	const centerCols = 2 // <center> <CI>
+	const deltaCols = 2  // <P%> <(p=0.PPP n=N)>
+	startCol := func(exp int) int {
+		if exp == 0 {
+			// Baseline, so no delta.
+			return labelCols
+		}
+		// Center and delta columns.
+		l := labelCols + centerCols + (exp-1)*(centerCols+deltaCols)
+		return l
+	}
+	row := make([]string, startCol(len(t.Cols)))
+	row = row[:0]
+	clearTo := func(col int) {
+		for len(row) < col {
+			row = append(row, "")
+		}
+	}
+	emit := func() {
+		o.Write(row)
+		row = row[:0]
+		rowCount++
+	}
+	warn := func(msgs []error) {
+		// Construct a spreadsheet-style cell label.
+		colName := make([]byte, 10)
+		colNamePos := len(colName)
+		for x := len(row); x > 0; {
+			colNamePos--
+			colName[colNamePos] = 'A' + byte(x%26)
+			x /= 26
+		}
+		if colNamePos == len(colName) {
+			colNamePos--
+			colName[colNamePos] = 'A'
+		}
+		colName = colName[colNamePos:]
+		// Print warnings.
+		for _, msg := range msgs {
+			fmt.Fprintf(warnings, "%s%d: %s\n", colName, startRow+rowCount, msg)
+		}
+	}
+
+	// Emit column configurations header.
+	colFields := t.Cols[0].Projection().FlattenedFields()
+	for _, field := range colFields {
+		for exp, key := range t.Cols {
+			clearTo(startCol(exp))
+			row = append(row, key.Get(field))
+		}
+		emit()
+	}
+
+	// Emit column headers.
+	for exp := range t.Cols {
+		clearTo(startCol(exp))
+		row = append(row, t.Unit, "CI")
+		if exp > 0 {
+			row = append(row, "vs base", "P")
+		}
+	}
+	emit()
+
+	// Emit table.
+	for _, rowKey := range t.Rows {
+		row = append(row, rowKey.StringValues())
+		for exp, colKey := range t.Cols {
+			cell, ok := t.Cells[TableKey{rowKey, colKey}]
+			if !ok {
+				continue
+			}
+
+			clearTo(startCol(exp))
+			warn(cell.Sample.Warnings)
+			warn(cell.Summary.Warnings)
+			row = append(row,
+				fmt.Sprint(cell.Summary.Center),
+				cell.Summary.PctRangeString(),
+			)
+			if exp > 0 && cell.Baseline != nil {
+				warn(cell.Comparison.Warnings)
+				row = append(row,
+					cell.Comparison.FormatDelta(cell.Baseline.Summary.Center, cell.Summary.Center),
+					cell.Comparison.String(),
+				)
+			}
+		}
+		emit()
+	}
+
+	// Emit summary row.
+	row = append(row, t.SummaryLabel)
+	for exp, key := range t.Cols {
+		tsum, ok := t.Summary[key]
+		if !ok {
+			continue
+		}
+
+		clearTo(startCol(exp))
+		warn(tsum.Warnings)
+		if tsum.HasSummary {
+			row = append(row, fmt.Sprint(tsum.Summary))
+		}
+		if exp > 0 {
+			clearTo(startCol(exp) + centerCols)
+			if tsum.HasRatio {
+				row = append(row, fmt.Sprintf("%+.2f%%", (tsum.Ratio-1)*100))
+			} else {
+				row = append(row, "?")
+			}
+		}
+	}
+	emit()
+
+	return
+}
diff --git a/cmd/benchstat/internal/texttab/table.go b/cmd/benchstat/internal/texttab/table.go
new file mode 100644
index 0000000..a703d7b
--- /dev/null
+++ b/cmd/benchstat/internal/texttab/table.go
@@ -0,0 +1,295 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package texttab
+
+import (
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"unicode/utf8"
+)
+
+// Table does layout of text-based tables.
+//
+// Many of its methods return the textTable so callers can easily
+// chain them to build up many cells at once.
+type Table struct {
+	cells []textCell
+	cols  int
+
+	shrink []bool
+
+	curRow, curCol int
+}
+
+type textCell struct {
+	row, col, span int
+	value          string
+	leftMargin     string
+	alignment      align
+}
+
+type CellOption func(c *textCell)
+
+func LeftMargin(x string) CellOption {
+	return func(c *textCell) {
+		c.leftMargin = x
+	}
+}
+
+var (
+	Left   CellOption = func(c *textCell) { c.alignment = alignLeft }
+	Center            = func(c *textCell) { c.alignment = alignCenter }
+	Right             = func(c *textCell) { c.alignment = alignRight }
+)
+
+type align int
+
+const (
+	alignLeft align = iota
+	alignCenter
+	alignRight
+)
+
+func (a align) lpad(s string, w int) string {
+	switch a {
+	default:
+		return s
+	case alignCenter:
+		l := (w - utf8.RuneCountInString(s)) / 2
+		return fmt.Sprintf("%*s%s", l, "", s)
+	case alignRight:
+		return fmt.Sprintf("%*s", w, s)
+	}
+}
+
+// Row starts a new row in table t.
+func (t *Table) Row() *Table {
+	if len(t.cells) > 0 {
+		t.curRow++
+	}
+	t.curCol = 0
+	return t
+}
+
+// Col skips to column "col" in table t. Columns are numbered starting
+// at 0.
+func (t *Table) Col(col int) *Table {
+	if col < t.curCol {
+		panic(fmt.Sprintf("cannot move from column %d to earlier column %d", t.curCol, col))
+	}
+	t.curCol = col
+	return t
+}
+
+// CurCol returns the current column index.
+func (t *Table) CurCol() int {
+	return t.curCol
+}
+
+// Cell adds a single-column cell at the current row and column.
+func (t *Table) Cell(value string, opts ...CellOption) *Table {
+	return t.Span(1, value, opts...)
+}
+
+// Span adds a multi-column cell at the current row and column.
+func (t *Table) Span(cols int, value string, opts ...CellOption) *Table {
+	lMargin := " "
+	if t.curCol == 0 || len(value) == 0 {
+		// For the left-most column or empty cells, we default
+		// to no left margin.
+		lMargin = ""
+	}
+	t.cells = append(t.cells, textCell{t.curRow, t.curCol, cols, value, lMargin, alignLeft})
+	for _, o := range opts {
+		o(&t.cells[len(t.cells)-1])
+	}
+
+	t.curCol += cols
+	if t.curCol > t.cols {
+		t.cols = t.curCol
+	}
+
+	return t
+}
+
+// SetShrink marks a column as a "shrink" column, which will have
+// minimum width.
+func (t *Table) SetShrink(col int, shrink bool) {
+	for len(t.shrink) < col+1 {
+		t.shrink = append(t.shrink, false)
+	}
+	t.shrink[col] = shrink
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+// Format lays out table t and writes it to w.
+func (t *Table) Format(w io.Writer) error {
+	shrink := func(col int) bool {
+		if col < len(t.shrink) {
+			return t.shrink[col]
+		}
+		return false
+	}
+
+	// Collect max length margin for each column.
+	lmargin := make([]int, t.cols)
+	for _, cell := range t.cells {
+		lmargin[cell.col] = max(utf8.RuneCountInString(cell.leftMargin), lmargin[cell.col])
+	}
+
+	// Compute column widths, including their left margins.
+	ws := make([]int, t.cols)
+	// Consider cells in increasing span width.
+	sort.Slice(t.cells, func(i, j int) bool {
+		return t.cells[i].span < t.cells[j].span
+	})
+	var spanCols []int
+	for _, cell := range t.cells {
+		w := utf8.RuneCountInString(cell.value) + lmargin[cell.col]
+
+		if cell.span == 1 {
+			// Easy case.
+			ws[cell.col] = max(ws[cell.col], w)
+			continue
+		}
+
+		// This cell spans multiple columns. Is the total
+		// width of those columns already sufficient?
+		tw := 0
+		for col := cell.col; col < cell.col+cell.span; col++ {
+			tw += ws[col]
+		}
+		if tw >= w {
+			continue
+		}
+
+		// We need to expand columns. The goal is to expand
+		// them all to the necessary average, but some may
+		// already be wider than this average. Hence, we
+		// process columns from widest to narrowest,
+		// subtracting out the columns that are already wider
+		// than the target average (which in turn changes the
+		// target), and then distributing the remaining space
+		// among the narrower ones.
+		spanCols = spanCols[:0]
+		for col := cell.col; col < cell.col+cell.span; col++ {
+			if shrink(col) {
+				// We can't grow a shrink column, so
+				// account for its space, but don't
+				// add it to the columns to adjust.
+				w -= ws[col]
+			} else {
+				spanCols = append(spanCols, col)
+			}
+		}
+		// Process the wider columns first.
+		sort.Slice(spanCols, func(i, j int) bool {
+			return ws[spanCols[i]] > ws[spanCols[j]]
+		})
+		span := len(spanCols)
+		for _, col := range spanCols {
+			// What's the target average width at this
+			// point? Round up w/span.
+			avg := (w + span - 1) / span
+			// Expand column if it isn't wide enough.
+			ws[col] = max(ws[col], avg)
+			// Subtract this column from the space needed.
+			// If the column was already wide enough, this
+			// will redistribute its excess across the
+			// smaller columns. We also do this if we
+			// expanded the column as a convenient way to
+			// spread out the integer rounding of avg.
+			w -= ws[col]
+			span--
+		}
+	}
+
+	// Convert column widths into starting offsets. The offset of
+	// column i is where i's left margin begins. The slice
+	// includes a final offset for the width of the table.
+	offs := make([]int, t.cols+1)
+	off := 0
+	for i, w := range ws {
+		offs[i] = off
+		off += w
+	}
+	offs[len(ws)] = off
+
+	const debugPrintColumns = false
+	if debugPrintColumns {
+		fmt.Println(ws)
+		pos := 0
+		for col, off := range offs {
+			fmt.Fprintf(w, "%*s", off-pos, "")
+			pos = off
+			for i := 0; i < lmargin[col]; i++ {
+				fmt.Fprintf(w, "|")
+				pos++
+			}
+		}
+		fmt.Fprint(w, "\n")
+	}
+
+	// Format the table. Put the cells back into top-to-bottom
+	// left-to-right order.
+	sort.Slice(t.cells, func(i, j int) bool {
+		if t.cells[i].row != t.cells[j].row {
+			return t.cells[i].row < t.cells[j].row
+		}
+		return t.cells[i].col < t.cells[j].col
+	})
+	row, off := 0, 0
+	for _, cell := range t.cells {
+		if strings.TrimSpace(cell.value) == "" && strings.TrimSpace(cell.leftMargin) == "" {
+			// Skip empty cells. This avoids printing
+			// unnecessary trailing spaces if cells appear
+			// at the end of a row.
+			continue
+		}
+
+		// Get to cell's row.
+		for cell.row > row {
+			if _, err := fmt.Fprintf(w, "\n"); err != nil {
+				return err
+			}
+			row++
+			off = 0
+		}
+
+		// Space to the cell's starting offset and print its
+		// left margin.
+		spaces := offs[cell.col] - off
+		if _, err := fmt.Fprintf(w, "%*s%*s", spaces, "", lmargin[cell.col], cell.leftMargin); err != nil {
+			return err
+		}
+		off += spaces + lmargin[cell.col]
+
+		// Compute total cell width, excluding the margin we
+		// just printed.
+		tw := offs[cell.col+cell.span] - offs[cell.col] - lmargin[cell.col]
+
+		// Print cell contents.
+		s := cell.alignment.lpad(cell.value, tw)
+		if _, err := fmt.Fprintf(w, "%s", s); err != nil {
+			return err
+		}
+		off += utf8.RuneCountInString(s)
+	}
+	if len(t.cells) > 0 {
+		if _, err := fmt.Fprintf(w, "\n"); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/cmd/benchstat/internal/texttab/table_test.go b/cmd/benchstat/internal/texttab/table_test.go
new file mode 100644
index 0000000..59fdf4e
--- /dev/null
+++ b/cmd/benchstat/internal/texttab/table_test.go
@@ -0,0 +1,118 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package texttab
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestAlign(t *testing.T) {
+	check := func(s string, a align, w int, want string) {
+		t.Helper()
+		got := a.lpad(s, w)
+		if got != want {
+			t.Errorf("want %q, got %q", want, got)
+		}
+	}
+
+	check("abc", alignLeft, 10, "abc")
+	check("abc", alignCenter, 10, "   abc")
+	check("abc", alignCenter, 11, "    abc")
+	check("abc", alignRight, 10, "       abc")
+	check("☃", alignRight, 4, "   ☃")
+}
+
+func TestTable(t *testing.T) {
+	var tab Table
+	check := func(want string) {
+		t.Helper()
+		var gotBuf strings.Builder
+		tab.Format(&gotBuf)
+		got := gotBuf.String()
+		if want != got {
+			t.Errorf("want:\n%sgot:\n%s", want, got)
+		}
+		// Reset tab.
+		tab = Table{}
+	}
+
+	// Basic test.
+	tab.Row().Cell("a").Cell("b").Cell("c")
+	tab.Row().Cell("d").Cell("e").Cell("f")
+	check("a b c\nd e f\n")
+
+	// Basic cell padding. Also checks that we don't print
+	// unnecessary spaces at the ends of lines.
+	tab.Row().Cell("a").Cell("b").Cell("c")
+	tab.Row().Cell("long").Cell("e").Cell("long")
+	check("a    b c\nlong e long\n")
+
+	// Cell alignment.
+	tab.Row().Cell("a", Left).Cell("b", Center).Cell("c", Right)
+	tab.Row().Cell("xxx").Cell("xxx").Cell("xxx")
+	check("a    b    c\nxxx xxx xxx\n")
+
+	// Margins.
+	tab.Row().Cell("a").Cell("b", LeftMargin("  "))
+	tab.Row().Cell("c").Cell("d")
+	tab.Row().Cell("e").Cell("f", LeftMargin("|"))
+	check("a  b\nc  d\ne |f\n")
+
+	// Missing cell in the middle.
+	tab.Row().Cell("a").Col(2).Cell("c")
+	tab.Row().Cell("d").Cell("e").Cell("f")
+	check("a   c\nd e f\n")
+
+	// Missing cells at the end.
+	tab.Row().Cell("a")
+	tab.Row().Cell("d").Cell("e").Cell("f")
+	check("a\nd e f\n")
+
+	// Blank rows.
+	tab.Row().Cell("a")
+	tab.Row()
+	tab.Row()
+	tab.Row().Cell("b")
+	check("a\n\n\nb\n")
+
+	// Basic spans.
+	tab.Row().Cell("a").Cell("b")
+	tab.Row().Span(2, "abc")
+	check("a b\nabc\n")
+
+	// Spans expanding other cells.
+	tab.Row().Cell("a").Cell("b")
+	tab.Row().Span(2, "abcdefg")
+	check("a   b\nabcdefg\n")
+
+	// Other cells expanding spans.
+	tab.Row().Cell("abc").Cell("def")
+	tab.Row().Span(2, "a", Right)
+	check("abc def\n      a\n")
+
+	// Some cells are already large enough to complete the span.
+	tab.Row().Cell("a").Cell("def")
+	tab.Row().Span(2, "abdef", Right)
+	check("a def\nabdef\n")
+
+	// Larger cells are sufficient, but smaller cells need to be
+	// expanded.
+	tab.Row().Cell("a").Cell("def")
+	tab.Row().Span(2, "abcdef", Right)
+	check("a  def\nabcdef\n")
+
+	// Spans with margins.
+	tab.Row().Cell("a").Cell("b", LeftMargin("  ")).Cell("x")
+	tab.Row().Span(2, "a__b").Cell("x")
+	check("a  b x\na__b x\n")
+
+	// Shrink columns.
+	tab.Row().Span(2, "abcdef")
+	tab.Row().Cell("a").Cell("bc")
+	tab.Row().Cell("x").Cell("y")
+	tab.SetShrink(1, true)
+	check("abcdef\na   bc\nx   y\n")
+}
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 071d6b4..9fe399e 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go
@@ -1,224 +1,538 @@
-// Copyright 2015 The Go Authors.  All rights reserved.
+// Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Benchstat computes and compares statistics about benchmarks.
+// Note: Blocks that begin with "$ benchstat" below will be tested by
+// doc_test.go.
+
+// Benchstat computes statistical summaries and A/B comparisons of Go
+// benchmarks.
 //
 // Usage:
 //
-//	benchstat [-delta-test name] [-geomean] [-html] [-sort order] old.txt [new.txt] [more.txt ...]
+//	benchstat [flags] inputs...
 //
-// Each input file should contain the concatenated output of a number
-// of runs of “go test -bench.” For each different benchmark listed in an input file,
-// benchstat computes the mean, minimum, and maximum run time,
-// after removing outliers using the interquartile range rule.
-//
-// If invoked on a single input file, benchstat prints the per-benchmark statistics
-// for that file.
-//
-// If invoked on a pair of input files, benchstat adds to the output a column
-// showing the statistics from the second file and a column showing the
-// percent change in mean from the first to the second file.
-// Next to the percent change, benchstat shows the p-value and sample
-// sizes from a test of the two distributions of benchmark times.
-// Small p-values indicate that the two distributions are significantly different.
-// If the test indicates that there was no significant change between the two
-// benchmarks (defined as p > 0.05), benchstat displays a single ~ instead of
-// the percent change.
-//
-// The -delta-test option controls which significance test is applied:
-// utest (Mann-Whitney U-test), ttest (two-sample Welch t-test), or none.
-// The default is the U-test, sometimes also referred to as the Wilcoxon rank
-// sum test.
-//
-// If invoked on more than two input files, benchstat prints the per-benchmark
-// statistics for all the files, showing one column of statistics for each file,
-// with no column for percent change or statistical significance.
-//
-// The -html option causes benchstat to print the results as an HTML table.
-//
-// The -sort option specifies an order in which to list the results:
-// none (input order), delta (percent improvement), or name (benchmark name).
-// A leading “-” prefix, as in “-delta”, reverses the order.
+// Each input file should be in the Go benchmark format
+// (https://golang.org/design/14313-benchmark-format), such as the
+// output of “go test -bench .”. Typically, there should be two (or
+// more) inputs files for before and after some change (or series of
+// changes) to be measured. Each benchmark should be run at least 10
+// times to gather a statistically significant sample of results. For
+// each benchmark, benchstat computes the median and the confidence
+// interval for the median. By default, if there are two or more
+// inputs files, it compares each benchmark in the first file to the
+// same benchmark in each subsequent file and reports whether there
+// was a statistically significant difference, though it can be
+// configured to compare on other dimensions.
 //
 // # Example
 //
-// Suppose we collect results from running a set of benchmarks twenty times
+// Suppose we collect results from running a set of benchmarks 10 times
 // before a particular change:
 //
-//	go test -run='^$' -bench=. -count=20 > old.txt
+//	go test -run='^$' -bench=. -count=10 > old.txt
 //
-// And the same benchmarks twenty times after:
+// And the same benchmarks 10 times after:
 //
-//	go test -run='^$' -bench=. -count=20 > new.txt
+//	go test -run='^$' -bench=. -count=10 > new.txt
 //
 // The file old.txt contains:
 //
-//	BenchmarkGobEncode   	100	  13552735 ns/op	  56.63 MB/s
-//	BenchmarkJSONEncode  	 50	  32395067 ns/op	  59.90 MB/s
-//	BenchmarkGobEncode   	100	  13553943 ns/op	  56.63 MB/s
-//	BenchmarkJSONEncode  	 50	  32334214 ns/op	  60.01 MB/s
-//	BenchmarkGobEncode   	100	  13606356 ns/op	  56.41 MB/s
-//	BenchmarkJSONEncode  	 50	  31992891 ns/op	  60.65 MB/s
-//	BenchmarkGobEncode   	100	  13683198 ns/op	  56.09 MB/s
-//	BenchmarkJSONEncode  	 50	  31735022 ns/op	  61.15 MB/s
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	BenchmarkEncode/format=json-48         	  690848	      1726 ns/op
+//	BenchmarkEncode/format=json-48         	  684861	      1723 ns/op
+//	BenchmarkEncode/format=json-48         	  693285	      1707 ns/op
+//	BenchmarkEncode/format=json-48         	  677692	      1707 ns/op
+//	BenchmarkEncode/format=json-48         	  692130	      1713 ns/op
+//	BenchmarkEncode/format=json-48         	  684164	      1729 ns/op
+//	BenchmarkEncode/format=json-48         	  682500	      1736 ns/op
+//	BenchmarkEncode/format=json-48         	  677509	      1707 ns/op
+//	BenchmarkEncode/format=json-48         	  687295	      1705 ns/op
+//	BenchmarkEncode/format=json-48         	  695533	      1774 ns/op
+//	BenchmarkEncode/format=gob-48          	  372699	      3069 ns/op
+//	BenchmarkEncode/format=gob-48          	  394740	      3075 ns/op
+//	BenchmarkEncode/format=gob-48          	  391335	      3069 ns/op
+//	BenchmarkEncode/format=gob-48          	  383588	      3067 ns/op
+//	BenchmarkEncode/format=gob-48          	  385885	      3207 ns/op
+//	BenchmarkEncode/format=gob-48          	  389970	      3064 ns/op
+//	BenchmarkEncode/format=gob-48          	  393361	      3064 ns/op
+//	BenchmarkEncode/format=gob-48          	  393882	      3058 ns/op
+//	BenchmarkEncode/format=gob-48          	  396171	      3059 ns/op
+//	BenchmarkEncode/format=gob-48          	  397812	      3062 ns/op
 //
 // The file new.txt contains:
 //
-//	BenchmarkGobEncode   	 100	  11773189 ns/op	  65.19 MB/s
-//	BenchmarkJSONEncode  	  50	  32036529 ns/op	  60.57 MB/s
-//	BenchmarkGobEncode   	 100	  11942588 ns/op	  64.27 MB/s
-//	BenchmarkJSONEncode  	  50	  32156552 ns/op	  60.34 MB/s
-//	BenchmarkGobEncode   	 100	  11786159 ns/op	  65.12 MB/s
-//	BenchmarkJSONEncode  	  50	  31288355 ns/op	  62.02 MB/s
-//	BenchmarkGobEncode   	 100	  11628583 ns/op	  66.00 MB/s
-//	BenchmarkJSONEncode  	  50	  31559706 ns/op	  61.49 MB/s
-//	BenchmarkGobEncode   	 100	  11815924 ns/op	  64.96 MB/s
-//	BenchmarkJSONEncode  	  50	  31765634 ns/op	  61.09 MB/s
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	BenchmarkEncode/format=json-48         	  714387	      1423 ns/op
+//	BenchmarkEncode/format=json-48         	  845445	      1416 ns/op
+//	BenchmarkEncode/format=json-48         	  815714	      1411 ns/op
+//	BenchmarkEncode/format=json-48         	  828824	      1413 ns/op
+//	BenchmarkEncode/format=json-48         	  834070	      1412 ns/op
+//	BenchmarkEncode/format=json-48         	  828123	      1426 ns/op
+//	BenchmarkEncode/format=json-48         	  834493	      1422 ns/op
+//	BenchmarkEncode/format=json-48         	  838406	      1424 ns/op
+//	BenchmarkEncode/format=json-48         	  836227	      1447 ns/op
+//	BenchmarkEncode/format=json-48         	  830835	      1425 ns/op
+//	BenchmarkEncode/format=gob-48          	  394441	      3075 ns/op
+//	BenchmarkEncode/format=gob-48          	  393207	      3065 ns/op
+//	BenchmarkEncode/format=gob-48          	  392374	      3059 ns/op
+//	BenchmarkEncode/format=gob-48          	  396037	      3065 ns/op
+//	BenchmarkEncode/format=gob-48          	  393255	      3060 ns/op
+//	BenchmarkEncode/format=gob-48          	  382629	      3081 ns/op
+//	BenchmarkEncode/format=gob-48          	  389558	      3186 ns/op
+//	BenchmarkEncode/format=gob-48          	  392668	      3135 ns/op
+//	BenchmarkEncode/format=gob-48          	  392313	      3087 ns/op
+//	BenchmarkEncode/format=gob-48          	  394274	      3062 ns/op
 //
 // The order of the lines in the file does not matter, except that the
 // output lists benchmarks in order of appearance.
 //
-// If run with just one input file, benchstat summarizes that file:
-//
-//	$ benchstat old.txt
-//	name        time/op
-//	GobEncode   13.6ms ± 1%
-//	JSONEncode  32.1ms ± 1%
-//	$
-//
-// If run with two input files, benchstat summarizes and compares:
+// If we run “benchstat old.txt new.txt”, it will summarize the
+// benchmarks and compare the before and after results:
 //
 //	$ benchstat old.txt new.txt
-//	name        old time/op  new time/op  delta
-//	GobEncode   13.6ms ± 1%  11.8ms ± 1%  -13.31% (p=0.016 n=4+5)
-//	JSONEncode  32.1ms ± 1%  31.8ms ± 1%     ~    (p=0.286 n=4+5)
-//	$
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	                      │   old.txt   │               new.txt               │
+//	                      │   sec/op    │   sec/op     vs base                │
+//	Encode/format=json-48   1.718µ ± 1%   1.423µ ± 1%  -17.20% (p=0.000 n=10)
+//	Encode/format=gob-48    3.066µ ± 0%   3.070µ ± 2%        ~ (p=0.446 n=10)
+//	geomean                 2.295µ        2.090µ        -8.94%
 //
-// Note that the JSONEncode result is reported as
-// statistically insignificant instead of a -0.93% delta.
+// Before the comparison table, we see common file-level
+// configuration. If there are benchmarks with different configuration
+// (for example, from different packages), benchstat will print
+// separate tables for each configuration.
+//
+// The table then compares the two input files for each benchmark. It
+// shows the median and 95% confidence interval summaries for each
+// benchmark before and after the change, and an A/B comparison under
+// "vs base". The comparison shows that Encode/format=json got 17.20%
+// faster with a p-value of 0.000 and 10 samples from each input file.
+// The p-value measures how likely it is that any differences were due
+// to random chance (i.e., noise). In this case, it's extremely
+// unlikely the difference between the medians was due to chance. For
+// Encode/format=gob, the "~" means benchstat did not detect a
+// statistically significant difference between the two inputs. In
+// this case, we see a p-value of 0.446, meaning it's very likely the
+// differences for this benchmark are simply due to random chance.
+//
+// Note that "statistically significant" is not the same as "large":
+// with enough low-noise data, even very small changes can be
+// distinguished from noise and considered statistically significant.
+// It is, of course, generally easier to distinguish large changes
+// from noise.
+//
+// Finally, the last row of the table shows the geometric mean of each
+// column, giving an overall picture of how the benchmarks changed.
+// Proportional changes in the geomean reflect proportional changes in
+// the benchmarks. For example, given n benchmarks, if sec/op for one
+// of them increases by a factor of 2, then the sec/op geomean will
+// increase by a factor of ⁿ√2.
+//
+// # Filtering
+//
+// benchstat has a very flexible system of configuring exactly which
+// benchmarks are summarized and compared. First, all inputs are
+// filtered according to an expression provided as the -filter flag.
+//
+// Filters are built from key-value terms:
+//
+//	key:value     - Match if key equals value.
+//	key:"value"   - Same, but value is a double-quoted Go string that
+//	                may contain spaces or other special characters.
+//	"key":value   - Keys may also be double-quoted.
+//	key:/regexp/  - Match if key matches a regular expression.
+//	key:(val1 OR val2 OR ...)
+//	              - Short-hand for key:val1 OR key:val2. Values may be
+//	                double-quoted strings or regexps.
+//	*             - Match everything.
+//
+// These terms can be combined into larger expressions as follows:
+//
+//	x y ...       - Match if x, y, etc. all match.
+//	x AND y       - Same as x y.
+//	x OR y        - Match if x or y match.
+//	-x            - Match if x does not match.
+//	(...)         - Subexpression.
+//
+// Each key is one of the following:
+//
+//	.name         - The base name of a benchmark
+//	.fullname     - The full name of a benchmark (including configuration)
+//	.file         - The name of the input file or user-provided file label
+//	/{name-key}   - Per-benchmark sub-name configuration key
+//	{file-key}    - File-level configuration key
+//	.unit         - The name of a unit for a particular metric
+//
+// For example, the following matches benchmarks with "/format=json"
+// in the sub-name keys with file-level configuration "goos" equal to
+// "linux" and extracts the "ns/op" and "B/op" measurements:
+//
+//	$ benchstat -filter "/format:json goos:linux .unit:(ns/op OR B/op)" old.txt new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	                      │   old.txt   │               new.txt               │
+//	                      │   sec/op    │   sec/op     vs base                │
+//	Encode/format=json-48   1.718µ ± 1%   1.423µ ± 1%  -17.20% (p=0.000 n=10)
+//
+// # Configuring comparisons
+//
+// The way benchstat groups and compares results is configurable using
+// a similar set of keys as used for filtering. By default, benchstat
+// groups results into tables using all file-level configuration keys,
+// then within each table, it groups results into rows by .fullname
+// (the benchmark's full name) and compares across columns by .file
+// (the name of each input file). This can be changed via the
+// following flags:
+//
+//	-table KEYS   - Group results into tables by KEYS
+//	-row KEYS     - Group results into table rows by KEYS
+//	-col KEYS     - Compare across results with different values of KEYS
+//
+// Using these flags, benchstat "projects" each result into a
+// particular table cell. Each KEYS argument is a comma- or
+// space-separated list of keys, each of which can optionally also
+// specify a sort order (described below).
+//
+// Each key is one of the following:
+//
+//	.name         - The base name of a benchmark
+//	.fullname     - The full name of a benchmark (including configuration)
+//	.file         - The name of the input file or user-provided file label
+//	/{name-key}   - Per-benchmark sub-name configuration key
+//	{file-key}    - File-level configuration key
+//	.config       - All file-level configuration keys
+//
+// Some of these keys can overlap. For example, ".config" includes the
+// file-level key "goos", and ".fullname" includes the sub-name key
+// "/format". When keys overlap like this, benchstat omits the more
+// specific key from the general key. For example, if -table is the
+// full file-level configuration ".config", and -col is the specific
+// file key "goos", benchstat will omit "goos" from ".config".
+//
+// Finally, the -ignore flag can list keys that benchstat should
+// ignore when grouping results. Continuing the previous example, if
+// -table is ".config" and -ignore is "goos", benchstat will omit
+// "goos" from ".config", but also not use it for any grouping.
+//
+// For precise details of the filter syntax and supported keys, see
+// https://pkg.go.dev/golang.org/x/perf/benchproc/syntax.
+//
+// # Projection examples
+//
+// Returning to our first example, we can now see how the default
+// projection flags produce this output:
+//
+//	$ benchstat -table .config -row .fullname -col .file old.txt new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	                      │   old.txt   │               new.txt               │
+//	                      │   sec/op    │   sec/op     vs base                │
+//	Encode/format=json-48   1.718µ ± 1%   1.423µ ± 1%  -17.20% (p=0.000 n=10)
+//	Encode/format=gob-48    3.066µ ± 0%   3.070µ ± 2%        ~ (p=0.446 n=10)
+//	geomean                 2.295µ        2.090µ        -8.94%
+//
+// In this example, all benchmarks have the same file-level
+// configuration, consisting of "goos", "goarch", and "pkg", so
+// ".config" groups them into just one table. Within this table,
+// results are grouped into rows by their full name, including
+// configuration, and grouped into columns by the name of each input
+// file.
+//
+// Suppose we instead want to compare json encoding to gob encoding
+// from new.txt.
+//
+//	$ benchstat -col /format new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	          │    json     │                 gob                  │
+//	          │   sec/op    │   sec/op     vs base                 │
+//	Encode-48   1.423µ ± 1%   3.070µ ± 2%  +115.82% (p=0.000 n=10)
+//
+// The columns are now labeled by the "/format" configuration from the
+// benchmark name. benchstat still compares columns even though we've
+// only provided a single input file. We also see that /format has
+// been removed from the benchmark name to make a single row.
+//
+// We can simplify the output by grouping rows by just the benchmark name,
+// rather than the full name:
+//
+//	$ benchstat -col /format -row .name new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	       │    json     │                 gob                  │
+//	       │   sec/op    │   sec/op     vs base                 │
+//	Encode   1.423µ ± 1%   3.070µ ± 2%  +115.82% (p=0.000 n=10)
+//
+// benchstat will attempt to detect and warn if projections strip away
+// too much information. For example, here we group together json and
+// gob results into a single row:
+//
+//	$ benchstat  -row .name new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	       │    new.txt     │
+//	       │     sec/op     │
+//	Encode   2.253µ ± 37% ¹
+//	¹ benchmarks vary in .fullname
+//
+// Since this is probably not a meaningful comparison, benchstat warns
+// that the benchmarks it grouped together vary in a hidden dimension.
+// If this really were our intent, we could -ignore .fullname.
+//
+// # Sorting
+//
+// By default, benchstat sorts each dimension according to the order
+// in which it first observes each value of that dimension. This can
+// be overridden in each projection using the following syntax:
+//
+// {key}@{order} - specifies one of the built-in named sort orders.
+// This can be "alpha" or "num" for alphabetic or numeric sorting.
+// "num" understands basic use of metric and IEC prefixes like "2k"
+// and "1Mi".
+//
+// {key}@({value} {value} ...) - specifies a fixed value order for
+// key. It also specifies a filter: if key has a value that isn't any
+// of the specified values, the result is filtered out.
+//
+// For example, we can use a fixed order to compare the improvement of
+// json over gob rather than the other way around:
+//
+//	$ benchstat -col "/format@(gob json)" -row .name -ignore .file new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	       │     gob     │                json                 │
+//	       │   sec/op    │   sec/op     vs base                │
+//	Encode   3.070µ ± 2%   1.423µ ± 1%  -53.66% (p=0.000 n=10)
+//
+// # Overriding .file
+//
+// Often, you want to compare results from different files, but want
+// to provide more meaningful (or perhaps shorter) column labels than
+// raw file names. File name labels can be overridden by specifying an
+// input argument of the form "label=path" instead of just "path".
+// This provides a custom value for the .file key.
+//
+// For example, the following will perform the default comparison, but
+// label the columns O and N instead of old.txt and new.txt:
+//
+//	$ benchstat O=old.txt N=new.txt
+//	goos: linux
+//	goarch: amd64
+//	pkg: golang.org/x/perf/cmd/benchstat/testdata
+//	                      │      O      │                  N                  │
+//	                      │   sec/op    │   sec/op     vs base                │
+//	Encode/format=json-48   1.718µ ± 1%   1.423µ ± 1%  -17.20% (p=0.000 n=10)
+//	Encode/format=gob-48    3.066µ ± 0%   3.070µ ± 2%        ~ (p=0.446 n=10)
+//	geomean                 2.295µ        2.090µ        -8.94%
+//
+// # Units
+//
+// benchstat normalizes the units "ns" to "sec" and "MB" to "B" to
+// avoid creating nonsense units like "µns/op". These appear in the
+// testing package's default metrics and are also common in custom
+// metrics.
+//
+// benchstat supports custom unit metadata (see
+// https://golang.org/design/14313-benchmark-format). In particular,
+// "assume" metadata is useful for controlling the statistics used by
+// benchstat. By default, units use "assume=nothing", so benchstat
+// uses non-parametric statistics: median for summaries, and the
+// Mann-Whitney U-test for A/B comparisons.
+//
+// Some benchmarks measure things that have no noise, such as the size
+// of a binary produced by a compiler. These do not benefit from
+// repeated measurements or non-parametric statistics. For these
+// units, it's useful to set "assume=exact". This will cause benchstat
+// to warn if there's any variation in the measured values, and to
+// show A/B comparisons even if there's only one before and after
+// measurement.
+//
+// # Tips
+//
+// Reducing noise and/or increasing the number of benchmark runs will
+// enable benchstat to discern smaller changes as "statistically
+// significant". To reduce noise, make sure you run benchmarks on an
+// otherwise idle machine, ideally one that isn't running on battery
+// and isn't likely to be affected by thermal throttling.
+// https://llvm.org/docs/Benchmarking.html has many good tips on
+// reducing noise in benchmarks.
+//
+// It's also important that noise is evenly distributed across
+// benchmark runs. The best way to do this is to interleave before and
+// after runs, rather than running, say, 10 iterations of the before
+// benchmark, and then 10 iterations of the after benchmark. For Go
+// benchmarks, you can often speed up this process by using "go test
+// -c" to pre-compile the benchmark binary.
+//
+// Pick a number of benchmark runs (at least 10, ideally 20) and stick
+// to it. If benchstat reports no statistically significant change,
+// avoid simply rerunning your benchmarks until it reports a
+// significant change. This is known as "multiple testing" and is a
+// common statistical error. By default, benchstat uses an ɑ threshold
+// of 0.05, which means it is *expected* to show a difference 5% of
+// the time even if there is no difference. Hence, if you rerun
+// benchmarks looking for a change, benchstat will probably eventually
+// say there is a change, even if there isn't, which creates a
+// statistical bias.
+//
+// As an extension of this, if you compare a large number of
+// benchmarks, you should expect that about 5% of them will report a
+// statistically significant change even if there is no difference
+// between the before and after.
 package main
 
 import (
-	"bytes"
 	"flag"
 	"fmt"
-	"log"
+	"io"
 	"os"
-	"strings"
 
-	"golang.org/x/perf/benchstat"
+	"golang.org/x/perf/benchfmt"
+	"golang.org/x/perf/benchmath"
+	"golang.org/x/perf/benchproc"
+	"golang.org/x/perf/cmd/benchstat/internal/benchtab"
 )
 
-var exit = os.Exit // replaced during testing
+// TODO: Add a flag to perform Holm–Bonferroni correction for
+// family-wise error rates. This can be done after-the-fact on a
+// collection of benchstat.Comparison values.
 
-func usage() {
-	fmt.Fprintf(os.Stderr, "usage: benchstat [options] old.txt [new.txt] [more.txt ...]\n")
-	fmt.Fprintf(os.Stderr, "options:\n")
-	flag.PrintDefaults()
-	exit(2)
-}
+// TODO: -unit flag.
 
-var (
-	flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
-	flagAlpha     = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
-	flagGeomean   = flag.Bool("geomean", false, "print the geometric mean of each file")
-	flagHTML      = flag.Bool("html", false, "print results as an HTML table")
-	flagCSV       = flag.Bool("csv", false, "print results in CSV form")
-	flagNoRange   = flag.Bool("norange", false, "suppress range columns (CSV only)")
-	flagSplit     = flag.String("split", "pkg,goos,goarch", "split benchmarks by `labels`")
-	flagSort      = flag.String("sort", "none", "sort by `order`: [-]delta, [-]name, none")
-)
+// TODO: Support sorting by commit order.
 
-var deltaTestNames = map[string]benchstat.DeltaTest{
-	"none":   benchstat.NoDeltaTest,
-	"u":      benchstat.UTest,
-	"u-test": benchstat.UTest,
-	"utest":  benchstat.UTest,
-	"t":      benchstat.TTest,
-	"t-test": benchstat.TTest,
-	"ttest":  benchstat.TTest,
-}
+// TODO: Add some quick usage examples to the -h output?
 
-var sortNames = map[string]benchstat.Order{
-	"none":  nil,
-	"name":  benchstat.ByName,
-	"delta": benchstat.ByDelta,
-}
+// TODO: If the projection results in a very sparse table, that's
+// usually the result of correlated keys. Can we detect that and
+// suggest fixes?
 
 func main() {
-	log.SetPrefix("benchstat: ")
-	log.SetFlags(0)
-	flag.Usage = usage
-	flag.Parse()
-	deltaTest := deltaTestNames[strings.ToLower(*flagDeltaTest)]
-	sortName := *flagSort
-	reverse := false
-	if strings.HasPrefix(sortName, "-") {
-		reverse = true
-		sortName = sortName[1:]
+	if err := benchstat(os.Stdout, os.Stderr, os.Args[1:]); err != nil {
+		fmt.Fprintf(os.Stderr, "benchstat: %s\n", err)
 	}
-	order, ok := sortNames[sortName]
-	if flag.NArg() < 1 || deltaTest == nil || !ok {
-		flag.Usage()
-	}
-
-	c := &benchstat.Collection{
-		Alpha:      *flagAlpha,
-		AddGeoMean: *flagGeomean,
-		DeltaTest:  deltaTest,
-	}
-	if *flagSplit != "" {
-		c.SplitBy = strings.Split(*flagSplit, ",")
-	}
-	if order != nil {
-		if reverse {
-			order = benchstat.Reverse(order)
-		}
-		c.Order = order
-	}
-	for _, file := range flag.Args() {
-		f, err := os.Open(file)
-		if err != nil {
-			log.Fatal(err)
-		}
-		if err := c.AddFile(file, f); err != nil {
-			log.Fatal(err)
-		}
-		f.Close()
-	}
-
-	tables := c.Tables()
-	var buf bytes.Buffer
-	if *flagHTML {
-		buf.WriteString(htmlHeader)
-		benchstat.FormatHTML(&buf, tables)
-		buf.WriteString(htmlFooter)
-	} else if *flagCSV {
-		benchstat.FormatCSV(&buf, tables, *flagNoRange)
-	} else {
-		benchstat.FormatText(&buf, tables)
-	}
-	os.Stdout.Write(buf.Bytes())
 }
 
-var htmlHeader = `<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Performance Result Comparison</title>
-<style>
-.benchstat { border-collapse: collapse; }
-.benchstat th:nth-child(1) { text-align: left; }
-.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
-.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
-.benchstat .nodelta { text-align: center !important; }
-.benchstat .better td.delta { font-weight: bold; }
-.benchstat .worse td.delta { font-weight: bold; color: #c00; }
-</style>
-</head>
-<body>
-`
-var htmlFooter = `</body>
-</html>
-`
+func benchstat(w, wErr io.Writer, args []string) error {
+	flags := flag.NewFlagSet("", flag.ExitOnError)
+	flags.Usage = func() {
+		fmt.Fprintf(flags.Output(), `Usage: benchstat [flags] inputs...
+
+benchstat computes statistical summaries and A/B comparisons of Go
+benchmarks. It shows benchmark medians in a table with a row for each
+benchmark and a column for each input file. If there is more than one
+input file, it also shows A/B comparisons between the files. If a
+difference is likely to be noise, it shows "~".
+
+For details, see https://pkg.go.dev/golang.org/x/perf/cmd/benchstat.
+`)
+		flags.PrintDefaults()
+	}
+
+	thresholds := benchmath.DefaultThresholds
+	flagTable := flags.String("table", ".config", "split results into tables by distinct values of `projection`")
+	flagRow := flags.String("row", ".fullname", "split results into rows by distinct values of `projection`")
+	flagCol := flags.String("col", ".file", "split results into columns by distinct values of `projection`")
+	flagIgnore := flags.String("ignore", "", "ignore variations in `keys`")
+	flagFilter := flags.String("filter", "*", "use only benchmarks matching benchfilter `query`")
+	flags.Float64Var(&thresholds.CompareAlpha, "alpha", thresholds.CompareAlpha, "consider change significant if p < `α`")
+	// TODO: Support -confidence none to disable CI column? This
+	// would be equivalent to benchstat v1's -norange for CSV.
+	flagConfidence := flags.Float64("confidence", 0.95, "confidence `level` for ranges")
+	flagFormat := flags.String("format", "text", "print results in `format`:\n  text - plain text\n  csv  - comma-separated values (warnings will be written to stderr)\n")
+	flags.Parse(args)
+
+	if flags.NArg() == 0 {
+		flags.Usage()
+		os.Exit(2)
+	}
+
+	filter, err := benchproc.NewFilter(*flagFilter)
+	if err != nil {
+		return fmt.Errorf("parsing -filter: %s", err)
+	}
+
+	var parser benchproc.ProjectionParser
+	var parseErr error
+	mustParse := func(name, val string, unit bool) *benchproc.Projection {
+		var proj *benchproc.Projection
+		var err error
+		if unit {
+			proj, _, err = parser.ParseWithUnit(val, filter)
+		} else {
+			proj, err = parser.Parse(val, filter)
+		}
+		if err != nil && parseErr == nil {
+			parseErr = fmt.Errorf("parsing %s: %s", name, err)
+		}
+		return proj
+	}
+	tableBy := mustParse("-table", *flagTable, true)
+	rowBy := mustParse("-row", *flagRow, false)
+	colBy := mustParse("-col", *flagCol, false)
+	mustParse("-ignore", *flagIgnore, false)
+	residue := parser.Residue()
+	if parseErr != nil {
+		return parseErr
+	}
+
+	if thresholds.CompareAlpha < 0 || thresholds.CompareAlpha > 1 {
+		return fmt.Errorf("-alpha must be in range [0, 1]")
+	}
+	if *flagConfidence < 0 || *flagConfidence > 1 {
+		return fmt.Errorf("-confidence must be in range [0, 1]")
+	}
+	var format func(t *benchtab.Tables) error
+	switch *flagFormat {
+	default:
+		return fmt.Errorf("-format must be text or csv")
+	case "text":
+		format = func(t *benchtab.Tables) error { return t.ToText(w, false) }
+	case "csv":
+		format = func(t *benchtab.Tables) error { return t.ToCSV(w, wErr) }
+	}
+
+	stat := benchtab.NewBuilder(tableBy, rowBy, colBy, residue)
+	files := benchfmt.Files{Paths: flags.Args(), AllowStdin: true, AllowLabels: true}
+	for files.Scan() {
+		switch rec := files.Result(); rec := rec.(type) {
+		case *benchfmt.SyntaxError:
+			// Non-fatal result parse error. Warn
+			// but keep going.
+			fmt.Fprintln(wErr, rec)
+		case *benchfmt.Result:
+			if ok, err := filter.Apply(rec); !ok {
+				if err != nil {
+					// Print the reason we rejected this result.
+					fmt.Fprintln(wErr, err)
+				}
+				continue
+			}
+
+			stat.Add(rec)
+		}
+	}
+	if err := files.Err(); err != nil {
+		return err
+	}
+
+	tables := stat.ToTables(benchtab.TableOpts{
+		Confidence: *flagConfidence,
+		Thresholds: &thresholds,
+		Units:      files.Units(),
+	})
+	return format(tables)
+}
diff --git a/cmd/benchstat/main_test.go b/cmd/benchstat/main_test.go
index 50f54dc..d7da403 100644
--- a/cmd/benchstat/main_test.go
+++ b/cmd/benchstat/main_test.go
@@ -1,4 +1,4 @@
-// Copyright 2017 The Go Authors. All rights reserved.
+// Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
@@ -6,121 +6,128 @@
 
 import (
 	"bytes"
-	"flag"
-	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"
 	"testing"
 )
 
-func dotSlash(s string) string {
-	return "." + string(filepath.Separator) + s
+func TestCSV(t *testing.T) {
+	golden(t, "csvOldNew", "-format", "csv", "old.txt", "new.txt")
+	golden(t, "csvErrors", "-format", "csv", "-row", ".name", "new.txt")
 }
 
-func TestGolden(t *testing.T) {
+func TestCRC(t *testing.T) {
+	// These have a "note" that "unexpectedly" splits the tables,
+	// and also two units.
+	golden(t, "crcOldNew", "crc-old.txt", "crc-new.txt")
+	// "Fix" the split by note.
+	golden(t, "crcIgnore", "-ignore", "note", "crc-old.txt", "crc-new.txt")
+
+	// Filter to aligned, put size on the X axis and poly on the Y axis.
+	golden(t, "crcSizeVsPoly", "-filter", "/align:0", "-row", "/size", "-col", "/poly", "crc-new.txt")
+}
+
+func TestUnits(t *testing.T) {
+	// Test unit metadata. This tests exact assumptions and
+	// warnings for inexact distributions.
+	golden(t, "units", "-col", "note", "units.txt")
+}
+
+func TestZero(t *testing.T) {
+	// Test printing of near-zero deltas.
+	golden(t, "zero", "-col", "note", "zero.txt")
+}
+
+func TestSmallSample(t *testing.T) {
+	// These benchmarks don't have enough samples to compute a CI
+	// or delta.
+	golden(t, "smallSample", "-col", "note", "smallSample.txt")
+}
+
+func TestIssue19565(t *testing.T) {
+	// Benchmark sets are inconsistent between columns. We show
+	// all results, but warn that the geomeans may not be
+	// comparable. To further stress things, the columns have the
+	// same *number* of benchmarks, but different sets.
+	golden(t, "issue19565", "-col", "note", "issue19565.txt")
+}
+
+func TestIssue19634(t *testing.T) {
+	golden(t, "issue19634", "-col", "note", "issue19634.txt")
+}
+
+func golden(t *testing.T, name string, args ...string) {
+	t.Helper()
+	// TODO: If benchfmt.Files supported fs.FS, we wouldn't need this.
 	if err := os.Chdir("testdata"); err != nil {
 		t.Fatal(err)
 	}
 	defer os.Chdir("..")
-	check(t, "exampleold", "exampleold.txt")
-	check(t, "example", "exampleold.txt", "examplenew.txt")
-	if t.Failed() {
-		t.Fatal("skipping other tests")
-	}
-	check(t, "exampleoldhtml", "-html", "exampleold.txt")
-	check(t, "examplehtml", "-html", "exampleold.txt", "examplenew.txt")
-	if t.Failed() {
-		t.Fatal("skipping other tests")
-	}
-	check(t, "all", "new.txt", "old.txt", "slashslash4.txt", "x386.txt")
-	check(t, "allnosplit", "-split", "", "new.txt", "old.txt", "slashslash4.txt", "x386.txt")
-	check(t, "oldnew", "old.txt", "new.txt")
-	check(t, "oldnewgeo", "-geomean", "old.txt", "new.txt")
-	check(t, "new4", "new.txt", "slashslash4.txt")
-	check(t, "oldnewhtml", "-html", "old.txt", "new.txt")
-	check(t, "oldnew4html", "-html", "old.txt", "new.txt", "slashslash4.txt")
-	check(t, "oldnewttest", "-delta-test=ttest", "old.txt", "new.txt")
-	check(t, "packagesold", "packagesold.txt")
-	check(t, "packages", "packagesold.txt", "packagesnew.txt")
-	check(t, "units", "units-old.txt", "units-new.txt")
-	check(t, "zero", "-delta-test=none", "zero-old.txt", "zero-new.txt")
-	check(t, "namesort", "-sort=name", "old.txt", "new.txt")
-	check(t, "deltasort", "-sort=delta", "old.txt", "new.txt")
-	check(t, "rdeltasort", "-sort=-delta", "old.txt", "new.txt")
 
-	check(t, "oldcsv", "-geomean", "-csv", "old.txt")
-	check(t, "allcsv", "-geomean", "-csv", "old.txt", "new.txt", "slashslash4.txt", "x386.txt")
-	check(t, "allnosplitcsv", "-geomean", "-csv", "-split", "", dotSlash("new.txt"), dotSlash("old.txt"), dotSlash("slashslash4.txt"), dotSlash("x386.txt")) // note order: new old slashslash4 x386; dotSlash tests common prefix removal
-
-	if dotSlash("x") == "./x" { // Golden files have hardcoded "/" path separators in them
-		check(t, "allnorangecsv", "-geomean", "-csv", "-norange", dotSlash("old.txt"), dotSlash("new.txt"), "slashslash4.txt", "x386.txt") // Mixed ./ tests common prefix non-removal
+	// Get the benchstat output.
+	var got, gotErr bytes.Buffer
+	t.Logf("benchstat %s", strings.Join(args, " "))
+	if err := benchstat(&got, &gotErr, args); err != nil {
+		t.Fatalf("unexpected error: %s", err)
 	}
 
+	// Compare to the golden output.
+	compare(t, name, "stdout", got.Bytes())
+	compare(t, name, "stderr", gotErr.Bytes())
 }
 
-func check(t *testing.T, name string, files ...string) {
-	t.Run(name, func(t *testing.T) {
-		os.Args = append([]string{"benchstat"}, files...)
-		t.Logf("running %v", os.Args)
-		r, w, err := os.Pipe()
-		if err != nil {
-			t.Fatal(err)
-		}
-		c := make(chan []byte)
-		go func() {
-			data, err := io.ReadAll(r)
-			if err != nil {
-				t.Error(err)
-			}
-			c <- data
-		}()
-		stdout := os.Stdout
-		stderr := os.Stderr
-		os.Stdout = w
-		os.Stderr = w
-		exit = func(code int) { t.Fatalf("exit %d during main", code) }
-		*flagGeomean = false
-		*flagHTML = false
-		*flagNoRange = false
-		*flagDeltaTest = "utest"
-		*flagSplit = flag.Lookup("split").DefValue
+func compare(t *testing.T, name, sub string, got []byte) {
+	t.Helper()
 
-		main()
-
-		w.Close()
-		os.Stdout = stdout
-		os.Stderr = stderr
-		exit = os.Exit
-
-		data := <-c
-		golden, err := os.ReadFile(name + ".golden")
-		if err != nil {
-			t.Fatal(err)
-		}
-		if !bytes.Equal(data, golden) {
-			t.Errorf("wrong output: diff have want:\n%s", diff(t, data, golden))
-		}
-	})
-}
-
-// diff returns the output of 'diff -u old new'.
-func diff(t *testing.T, old, new []byte) string {
-	data, err := exec.Command("diff", "-u", writeTemp(t, old), writeTemp(t, new)).CombinedOutput()
-	if len(data) > 0 {
-		return string(data)
-	}
-	// Most likely, "diff not found" so print the bad output so there is something.
-	return "ERROR: " + err.Error() + ": test output = \n" + string(old)
-}
-
-func writeTemp(t *testing.T, data []byte) string {
-	f, err := os.CreateTemp("", "benchstat_test")
+	wantPath := name + "." + sub
+	want, err := os.ReadFile(wantPath)
 	if err != nil {
-		t.Fatal(err)
+		if os.IsNotExist(err) {
+			// Treat a missing file as empty.
+			want = nil
+		} else {
+			t.Fatal(err)
+		}
 	}
-	f.Write(data)
-	name := f.Name()
-	f.Close()
-	return name
+
+	if !diff(t, want, got) {
+		return
+	}
+	// diff printed the error.
+
+	// Write a "got" file for reference.
+	gotPath := name + ".got-" + sub
+	if err := os.WriteFile(gotPath, got, 0666); err != nil {
+		t.Fatalf("error writing %s: %s", gotPath, err)
+	}
+}
+
+func diff(t *testing.T, want, got []byte) bool {
+	t.Helper()
+	if bytes.Equal(want, got) {
+		return false
+	}
+
+	d := t.TempDir()
+	wantPath, gotPath := filepath.Join(d, "want"), filepath.Join(d, "got")
+	if err := os.WriteFile(wantPath, want, 0666); err != nil {
+		t.Fatalf("error writing %s: %s", wantPath, err)
+	}
+	if err := os.WriteFile(gotPath, got, 0666); err != nil {
+		t.Fatalf("error writing %s: %s", gotPath, err)
+	}
+
+	cmd := exec.Command("diff", "-Nu", "want", "got")
+	cmd.Dir = d
+	data, _ := cmd.CombinedOutput()
+	if len(data) > 0 {
+		t.Errorf("\n%s", data)
+	} else {
+		// Most likely, "diff not found" so print the bad
+		// output so there is something.
+		t.Errorf("want:\n%sgot:\n%s", want, got)
+	}
+	return true
 }
diff --git a/cmd/benchstat/testdata/.gitignore b/cmd/benchstat/testdata/.gitignore
new file mode 100644
index 0000000..b1093ec
--- /dev/null
+++ b/cmd/benchstat/testdata/.gitignore
@@ -0,0 +1,2 @@
+*.got-stdout
+*.got-stderr
diff --git a/cmd/benchstat/testdata/all.golden b/cmd/benchstat/testdata/all.golden
deleted file mode 100644
index 2ce36ba..0000000
--- a/cmd/benchstat/testdata/all.golden
+++ /dev/null
@@ -1,151 +0,0 @@
-name \ time/op                             new.txt        old.txt        slashslash4.txt  x386.txt
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8            44.5ns ± 3%    46.9ns ± 8%
-CRC32/poly=IEEE/size=15/align=1-8            44.5ns ± 4%    44.7ns ± 5%
-CRC32/poly=IEEE/size=40/align=0-8            42.5ns ± 6%    41.0ns ± 1%      42.1ns ± 3%
-CRC32/poly=IEEE/size=40/align=1-8            42.0ns ± 3%    41.1ns ± 1%      41.7ns ± 5%
-CRC32/poly=IEEE/size=512/align=0-8           57.1ns ± 3%   238.0ns ± 5%
-CRC32/poly=IEEE/size=512/align=1-8           57.2ns ± 3%   235.5ns ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8           94.1ns ± 2%   452.5ns ± 4%
-CRC32/poly=IEEE/size=1kB/align=1-8           93.3ns ± 2%   443.6ns ± 2%
-CRC32/poly=IEEE/size=4kB/align=0-8            298ns ± 1%    1740ns ± 8%      1682ns ± 2%
-CRC32/poly=IEEE/size=4kB/align=1-8            299ns ± 3%    1764ns ± 6%      1690ns ± 4%
-CRC32/poly=IEEE/size=32kB/align=0-8          2.16µs ± 3%   14.95µs ± 7%
-CRC32/poly=IEEE/size=32kB/align=1-8          2.18µs ± 3%   14.19µs ± 7%
-CRC32/poly=Castagnoli/size=15/align=0-8      16.3ns ± 2%    16.4ns ± 3%
-CRC32/poly=Castagnoli/size=15/align=1-8      17.3ns ± 2%    17.2ns ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8      17.5ns ± 4%    17.4ns ± 2%      18.6ns ±11%
-CRC32/poly=Castagnoli/size=40/align=1-8      19.4ns ± 2%    19.7ns ± 3%      19.6ns ± 2%
-CRC32/poly=Castagnoli/size=512/align=0-8     40.1ns ± 4%    40.2ns ± 2%
-CRC32/poly=Castagnoli/size=512/align=1-8     41.9ns ± 2%    42.1ns ± 3%
-CRC32/poly=Castagnoli/size=1kB/align=0-8     66.2ns ± 1%    65.5ns ± 1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8     68.5ns ± 2%    70.1ns ± 6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8      159ns ± 3%     163ns ± 5%       161ns ± 8%
-CRC32/poly=Castagnoli/size=4kB/align=1-8      162ns ± 3%     169ns ± 6%       170ns ± 8%
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.21µs ± 3%    1.22µs ± 4%
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.22µs ± 4%    1.26µs ± 3%
-CRC32/poly=Koopman/size=15/align=0-8         35.6ns ± 3%    36.5ns ±11%
-CRC32/poly=Koopman/size=15/align=1-8         35.5ns ± 1%    35.1ns ± 5%
-CRC32/poly=Koopman/size=40/align=0-8         87.6ns ± 2%    91.6ns ± 9%      93.8ns ±13%
-CRC32/poly=Koopman/size=40/align=1-8         88.0ns ± 3%    91.1ns ± 6%      86.9ns ± 3%
-CRC32/poly=Koopman/size=512/align=0-8        1.08µs ± 3%    1.13µs ± 5%
-CRC32/poly=Koopman/size=512/align=1-8        1.17µs ± 8%    1.13µs ± 6%
-CRC32/poly=Koopman/size=1kB/align=0-8        2.34µs ± 4%    2.24µs ± 6%
-CRC32/poly=Koopman/size=1kB/align=1-8        2.36µs ± 5%    2.15µs ± 2%
-CRC32/poly=Koopman/size=4kB/align=0-8        9.00µs ± 6%    9.03µs ± 6%      9.08µs ± 8%
-CRC32/poly=Koopman/size=4kB/align=1-8        9.05µs ±12%    8.94µs ±10%      9.46µs ± 8%
-CRC32/poly=Koopman/size=32kB/align=0-8       72.9µs ± 4%    72.4µs ± 9%
-CRC32/poly=Koopman/size=32kB/align=1-8       74.3µs ± 3%    69.6µs ± 3%
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8                                                           62.4ns ± 9%
-CRC32/poly=IEEE/size=15/align=1-8                                                           63.5ns ± 8%
-CRC32/poly=IEEE/size=40/align=0-8                                                           57.4ns ± 3%
-CRC32/poly=IEEE/size=40/align=1-8                                                           57.3ns ± 3%
-CRC32/poly=IEEE/size=512/align=0-8                                                           332ns ± 2%
-CRC32/poly=IEEE/size=512/align=1-8                                                           335ns ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8                                                           626ns ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8                                                           635ns ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8                                                          2.46µs ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8                                                          2.43µs ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8                                                         19.1µs ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8                                                         19.4µs ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8                                                     59.4ns ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8                                                     59.4ns ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8                                                     59.7ns ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8                                                     58.1ns ± 7%
-CRC32/poly=Castagnoli/size=512/align=0-8                                                     351ns ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8                                                     350ns ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8                                                     657ns ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8                                                     657ns ± 6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8                                                    2.45µs ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8                                                    2.45µs ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8                                                   20.5µs ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8                                                   20.2µs ± 9%
-CRC32/poly=Koopman/size=15/align=0-8                                                        58.2ns ± 4%
-CRC32/poly=Koopman/size=15/align=1-8                                                        56.4ns ± 7%
-CRC32/poly=Koopman/size=40/align=0-8                                                         142ns ± 8%
-CRC32/poly=Koopman/size=40/align=1-8                                                         136ns ± 5%
-CRC32/poly=Koopman/size=512/align=0-8                                                       1.68µs ± 5%
-CRC32/poly=Koopman/size=512/align=1-8                                                       1.64µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=0-8                                                       3.31µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8                                                       3.28µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8                                                       13.3µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8                                                       13.2µs ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8                                                       107µs ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8                                                       106µs ± 4%
-
-name \ speed                               new.txt        old.txt        slashslash4.txt  x386.txt
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8           337MB/s ± 3%   321MB/s ± 8%
-CRC32/poly=IEEE/size=15/align=1-8           337MB/s ± 4%   336MB/s ± 4%
-CRC32/poly=IEEE/size=40/align=0-8           942MB/s ± 5%   975MB/s ± 1%     951MB/s ± 3%
-CRC32/poly=IEEE/size=40/align=1-8           952MB/s ± 3%   974MB/s ± 1%     960MB/s ± 4%
-CRC32/poly=IEEE/size=512/align=0-8         8.97GB/s ± 3%  2.15GB/s ± 4%
-CRC32/poly=IEEE/size=512/align=1-8         8.96GB/s ± 3%  2.17GB/s ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8         10.9GB/s ± 2%   2.3GB/s ± 4%
-CRC32/poly=IEEE/size=1kB/align=1-8         11.0GB/s ± 2%   2.3GB/s ± 2%
-CRC32/poly=IEEE/size=4kB/align=0-8         13.7GB/s ± 1%   2.4GB/s ± 7%     2.4GB/s ± 2%
-CRC32/poly=IEEE/size=4kB/align=1-8         13.7GB/s ± 3%   2.3GB/s ± 6%     2.4GB/s ± 4%
-CRC32/poly=IEEE/size=32kB/align=0-8        15.2GB/s ± 3%   2.2GB/s ± 7%
-CRC32/poly=IEEE/size=32kB/align=1-8        15.0GB/s ± 3%   2.3GB/s ± 8%
-CRC32/poly=Castagnoli/size=15/align=0-8     920MB/s ± 2%   916MB/s ± 2%
-CRC32/poly=Castagnoli/size=15/align=1-8     867MB/s ± 2%   870MB/s ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8    2.28GB/s ± 4%  2.30GB/s ± 2%    2.16GB/s ±11%
-CRC32/poly=Castagnoli/size=40/align=1-8    2.06GB/s ± 2%  2.03GB/s ± 3%    2.04GB/s ± 2%
-CRC32/poly=Castagnoli/size=512/align=0-8   12.8GB/s ± 4%  12.7GB/s ± 2%
-CRC32/poly=Castagnoli/size=512/align=1-8   12.2GB/s ± 1%  12.1GB/s ± 3%
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.5GB/s ± 1%  15.6GB/s ± 1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8   15.0GB/s ± 2%  14.6GB/s ± 6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.7GB/s ± 3%  25.1GB/s ± 5%    25.4GB/s ± 7%
-CRC32/poly=Castagnoli/size=4kB/align=1-8   25.3GB/s ± 3%  24.1GB/s ± 6%    24.1GB/s ± 8%
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.8GB/s ± 5%  26.9GB/s ± 4%
-CRC32/poly=Castagnoli/size=32kB/align=1-8  26.8GB/s ± 4%  25.9GB/s ± 3%
-CRC32/poly=Koopman/size=15/align=0-8        421MB/s ± 3%   412MB/s ±10%
-CRC32/poly=Koopman/size=15/align=1-8        422MB/s ± 1%   427MB/s ± 5%
-CRC32/poly=Koopman/size=40/align=0-8        456MB/s ± 2%   437MB/s ± 9%     428MB/s ±12%
-CRC32/poly=Koopman/size=40/align=1-8        455MB/s ± 3%   440MB/s ± 6%     461MB/s ± 3%
-CRC32/poly=Koopman/size=512/align=0-8       476MB/s ± 3%   453MB/s ± 5%
-CRC32/poly=Koopman/size=512/align=1-8       440MB/s ± 8%   455MB/s ± 6%
-CRC32/poly=Koopman/size=1kB/align=0-8       438MB/s ± 4%   452MB/s ± 9%
-CRC32/poly=Koopman/size=1kB/align=1-8       434MB/s ± 5%   477MB/s ± 2%
-CRC32/poly=Koopman/size=4kB/align=0-8       455MB/s ± 6%   454MB/s ± 5%     452MB/s ± 8%
-CRC32/poly=Koopman/size=4kB/align=1-8       455MB/s ±11%   459MB/s ± 9%     434MB/s ± 9%
-CRC32/poly=Koopman/size=32kB/align=0-8      450MB/s ± 4%   453MB/s ± 8%
-CRC32/poly=Koopman/size=32kB/align=1-8      441MB/s ± 3%   471MB/s ± 3%
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8                                                          241MB/s ± 8%
-CRC32/poly=IEEE/size=15/align=1-8                                                          237MB/s ± 8%
-CRC32/poly=IEEE/size=40/align=0-8                                                          696MB/s ± 3%
-CRC32/poly=IEEE/size=40/align=1-8                                                          698MB/s ± 3%
-CRC32/poly=IEEE/size=512/align=0-8                                                        1.54GB/s ± 2%
-CRC32/poly=IEEE/size=512/align=1-8                                                        1.52GB/s ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8                                                        1.63GB/s ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8                                                        1.61GB/s ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8                                                        1.67GB/s ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8                                                        1.68GB/s ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8                                                       1.71GB/s ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8                                                       1.69GB/s ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8                                                    253MB/s ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8                                                    253MB/s ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8                                                    671MB/s ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8                                                    689MB/s ± 6%
-CRC32/poly=Castagnoli/size=512/align=0-8                                                  1.46GB/s ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8                                                  1.46GB/s ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8                                                  1.56GB/s ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8                                                  1.56GB/s ± 5%
-CRC32/poly=Castagnoli/size=4kB/align=0-8                                                  1.67GB/s ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8                                                  1.67GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8                                                 1.60GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8                                                 1.63GB/s ± 8%
-CRC32/poly=Koopman/size=15/align=0-8                                                       258MB/s ± 5%
-CRC32/poly=Koopman/size=15/align=1-8                                                       266MB/s ± 6%
-CRC32/poly=Koopman/size=40/align=0-8                                                       281MB/s ± 7%
-CRC32/poly=Koopman/size=40/align=1-8                                                       290MB/s ± 8%
-CRC32/poly=Koopman/size=512/align=0-8                                                      304MB/s ± 5%
-CRC32/poly=Koopman/size=512/align=1-8                                                      312MB/s ± 3%
-CRC32/poly=Koopman/size=1kB/align=0-8                                                      310MB/s ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8                                                      312MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8                                                      308MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8                                                      311MB/s ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8                                                     307MB/s ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8                                                     309MB/s ± 4%
diff --git a/cmd/benchstat/testdata/allcsv.golden b/cmd/benchstat/testdata/allcsv.golden
deleted file mode 100644
index 7480ef2..0000000
--- a/cmd/benchstat/testdata/allcsv.golden
+++ /dev/null
@@ -1,155 +0,0 @@
-name \ time/op (ns/op),old.txt,±,new.txt,±,slashslash4.txt,±,x386.txt,±
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8,4.68700E+01,8%,4.45200E+01,3%
-CRC32/poly=IEEE/size=15/align=1-8,4.47100E+01,5%,4.45000E+01,4%
-CRC32/poly=IEEE/size=40/align=0-8,4.10375E+01,1%,4.25000E+01,6%,4.20800E+01,3%
-CRC32/poly=IEEE/size=40/align=1-8,4.10778E+01,1%,4.20400E+01,3%,4.16800E+01,5%
-CRC32/poly=IEEE/size=512/align=0-8,2.38000E+02,5%,5.71200E+01,3%
-CRC32/poly=IEEE/size=512/align=1-8,2.35500E+02,3%,5.71700E+01,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,4.52500E+02,4%,9.41125E+01,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,4.43600E+02,2%,9.32875E+01,2%
-CRC32/poly=IEEE/size=4kB/align=0-8,1.74000E+03,8%,2.98111E+02,1%,1.68200E+03,2%
-CRC32/poly=IEEE/size=4kB/align=1-8,1.76430E+03,6%,2.99100E+02,3%,1.68980E+03,4%
-CRC32/poly=IEEE/size=32kB/align=0-8,1.49529E+04,7%,2.15800E+03,3%
-CRC32/poly=IEEE/size=32kB/align=1-8,1.41888E+04,7%,2.17830E+03,3%
-CRC32/poly=Castagnoli/size=15/align=0-8,1.63778E+01,3%,1.63000E+01,2%
-CRC32/poly=Castagnoli/size=15/align=1-8,1.72222E+01,2%,1.72900E+01,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,1.74300E+01,2%,1.75300E+01,4%,1.85800E+01,11%
-CRC32/poly=Castagnoli/size=40/align=1-8,1.97100E+01,3%,1.93900E+01,2%,1.96375E+01,2%
-CRC32/poly=Castagnoli/size=512/align=0-8,4.01700E+01,2%,4.01300E+01,4%
-CRC32/poly=Castagnoli/size=512/align=1-8,4.21400E+01,3%,4.19444E+01,2%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,6.55000E+01,1%,6.61625E+01,1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,7.00900E+01,6%,6.84667E+01,2%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,1.62800E+02,5%,1.58800E+02,3%,1.61200E+02,8%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,1.69400E+02,6%,1.61600E+02,3%,1.69600E+02,8%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,1.21822E+03,4%,1.21433E+03,3%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,1.26478E+03,3%,1.22080E+03,4%
-CRC32/poly=Koopman/size=15/align=0-8,3.65100E+01,11%,3.56000E+01,3%
-CRC32/poly=Koopman/size=15/align=1-8,3.51500E+01,5%,3.55111E+01,1%
-CRC32/poly=Koopman/size=40/align=0-8,9.16400E+01,9%,8.76500E+01,2%,9.38100E+01,13%
-CRC32/poly=Koopman/size=40/align=1-8,9.10800E+01,6%,8.80300E+01,3%,8.68600E+01,3%
-CRC32/poly=Koopman/size=512/align=0-8,1.13170E+03,5%,1.07590E+03,3%
-CRC32/poly=Koopman/size=512/align=1-8,1.12680E+03,6%,1.16660E+03,8%
-CRC32/poly=Koopman/size=1kB/align=0-8,2.24333E+03,6%,2.34070E+03,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,2.14867E+03,2%,2.36010E+03,5%
-CRC32/poly=Koopman/size=4kB/align=0-8,9.03150E+03,6%,9.00320E+03,6%,9.08160E+03,8%
-CRC32/poly=Koopman/size=4kB/align=1-8,8.94020E+03,10%,9.04630E+03,12%,9.45520E+03,8%
-CRC32/poly=Koopman/size=32kB/align=0-8,7.24280E+04,9%,7.29005E+04,4%
-CRC32/poly=Koopman/size=32kB/align=1-8,6.96194E+04,3%,7.42809E+04,3%
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8,,,,,,,6.23500E+01,9%
-CRC32/poly=IEEE/size=15/align=1-8,,,,,,,6.34900E+01,8%
-CRC32/poly=IEEE/size=40/align=0-8,,,,,,,5.74400E+01,3%
-CRC32/poly=IEEE/size=40/align=1-8,,,,,,,5.73300E+01,3%
-CRC32/poly=IEEE/size=512/align=0-8,,,,,,,3.32333E+02,2%
-CRC32/poly=IEEE/size=512/align=1-8,,,,,,,3.35300E+02,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,,,,,,,6.26300E+02,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,,,,,,,6.35300E+02,6%
-CRC32/poly=IEEE/size=4kB/align=0-8,,,,,,,2.45670E+03,7%
-CRC32/poly=IEEE/size=4kB/align=1-8,,,,,,,2.43450E+03,5%
-CRC32/poly=IEEE/size=32kB/align=0-8,,,,,,,1.91499E+04,4%
-CRC32/poly=IEEE/size=32kB/align=1-8,,,,,,,1.94203E+04,5%
-CRC32/poly=Castagnoli/size=15/align=0-8,,,,,,,5.93625E+01,1%
-CRC32/poly=Castagnoli/size=15/align=1-8,,,,,,,5.93500E+01,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,,,,,,,5.96600E+01,6%
-CRC32/poly=Castagnoli/size=40/align=1-8,,,,,,,5.81100E+01,7%
-CRC32/poly=Castagnoli/size=512/align=0-8,,,,,,,3.50800E+02,5%
-CRC32/poly=Castagnoli/size=512/align=1-8,,,,,,,3.49800E+02,7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,,,,,,,6.56700E+02,4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,,,,,,,6.56800E+02,6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,,,,,,,2.45180E+03,4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,,,,,,,2.44800E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,,,,,,,2.05261E+04,5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,,,,,,,2.01794E+04,9%
-CRC32/poly=Koopman/size=15/align=0-8,,,,,,,5.82100E+01,4%
-CRC32/poly=Koopman/size=15/align=1-8,,,,,,,5.64000E+01,7%
-CRC32/poly=Koopman/size=40/align=0-8,,,,,,,1.42300E+02,8%
-CRC32/poly=Koopman/size=40/align=1-8,,,,,,,1.36222E+02,5%
-CRC32/poly=Koopman/size=512/align=0-8,,,,,,,1.68440E+03,5%
-CRC32/poly=Koopman/size=512/align=1-8,,,,,,,1.64290E+03,4%
-CRC32/poly=Koopman/size=1kB/align=0-8,,,,,,,3.30890E+03,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,,,,,,,3.27920E+03,3%
-CRC32/poly=Koopman/size=4kB/align=0-8,,,,,,,1.33243E+04,3%
-CRC32/poly=Koopman/size=4kB/align=1-8,,,,,,,1.31612E+04,3%
-CRC32/poly=Koopman/size=32kB/align=0-8,,,,,,,1.06891E+05,6%
-CRC32/poly=Koopman/size=32kB/align=1-8,,,,,,,1.06069E+05,4%
-
-[Geo mean],3.44668E+02,,2.37855E+02,,2.39033E+02,,8.74025E+02
-
-name \ speed (MB/s),old.txt,±,new.txt,±,slashslash4.txt,±,x386.txt,±
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8,3.20711E+02,8%,3.36950E+02,3%
-CRC32/poly=IEEE/size=15/align=1-8,3.35516E+02,4%,3.37066E+02,4%
-CRC32/poly=IEEE/size=40/align=0-8,9.74718E+02,1%,9.41823E+02,5%,9.50988E+02,3%
-CRC32/poly=IEEE/size=40/align=1-8,9.73636E+02,1%,9.51759E+02,3%,9.59978E+02,4%
-CRC32/poly=IEEE/size=512/align=0-8,2.14703E+03,4%,8.96715E+03,3%
-CRC32/poly=IEEE/size=512/align=1-8,2.16913E+03,3%,8.95606E+03,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,2.26152E+03,4%,1.08807E+04,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,2.30619E+03,2%,1.09768E+04,2%
-CRC32/poly=IEEE/size=4kB/align=0-8,2.35732E+03,7%,1.37258E+04,1%,2.43489E+03,2%
-CRC32/poly=IEEE/size=4kB/align=1-8,2.32511E+03,6%,1.36770E+04,3%,2.42393E+03,4%
-CRC32/poly=IEEE/size=32kB/align=0-8,2.19443E+03,7%,1.51852E+04,3%
-CRC32/poly=IEEE/size=32kB/align=1-8,2.31415E+03,8%,1.50437E+04,3%
-CRC32/poly=Castagnoli/size=15/align=0-8,9.15799E+02,2%,9.20433E+02,2%
-CRC32/poly=Castagnoli/size=15/align=1-8,8.70312E+02,2%,8.67298E+02,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,2.29560E+03,2%,2.28265E+03,4%,2.16074E+03,11%
-CRC32/poly=Castagnoli/size=40/align=1-8,2.03023E+03,3%,2.06346E+03,2%,2.03587E+03,2%
-CRC32/poly=Castagnoli/size=512/align=0-8,1.27437E+04,2%,1.27578E+04,4%
-CRC32/poly=Castagnoli/size=512/align=1-8,1.21445E+04,3%,1.22049E+04,1%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,1.56355E+04,1%,1.54766E+04,1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,1.46273E+04,6%,1.49597E+04,2%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,2.50862E+04,5%,2.56897E+04,3%,2.53717E+04,7%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,2.41378E+04,6%,2.52736E+04,3%,2.41249E+04,8%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,2.68975E+04,4%,2.68232E+04,5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,2.59038E+04,3%,2.68422E+04,4%
-CRC32/poly=Koopman/size=15/align=0-8,4.11932E+02,10%,4.21452E+02,3%
-CRC32/poly=Koopman/size=15/align=1-8,4.27408E+02,5%,4.22362E+02,1%
-CRC32/poly=Koopman/size=40/align=0-8,4.36831E+02,9%,4.56472E+02,2%,4.28261E+02,12%
-CRC32/poly=Koopman/size=40/align=1-8,4.39731E+02,6%,4.54515E+02,3%,4.60660E+02,3%
-CRC32/poly=Koopman/size=512/align=0-8,4.52693E+02,5%,4.75749E+02,3%
-CRC32/poly=Koopman/size=512/align=1-8,4.54579E+02,6%,4.39685E+02,8%
-CRC32/poly=Koopman/size=1kB/align=0-8,4.52443E+02,9%,4.37629E+02,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,4.76558E+02,2%,4.34042E+02,5%
-CRC32/poly=Koopman/size=4kB/align=0-8,4.54022E+02,5%,4.55492E+02,6%,4.52301E+02,8%
-CRC32/poly=Koopman/size=4kB/align=1-8,4.59394E+02,9%,4.54627E+02,11%,4.33984E+02,9%
-CRC32/poly=Koopman/size=32kB/align=0-8,4.53471E+02,8%,4.49828E+02,4%
-CRC32/poly=Koopman/size=32kB/align=1-8,4.70784E+02,3%,4.41379E+02,3%
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8,,,,,,,2.41180E+02,8%
-CRC32/poly=IEEE/size=15/align=1-8,,,,,,,2.36728E+02,8%
-CRC32/poly=IEEE/size=40/align=0-8,,,,,,,6.96493E+02,3%
-CRC32/poly=IEEE/size=40/align=1-8,,,,,,,6.97741E+02,3%
-CRC32/poly=IEEE/size=512/align=0-8,,,,,,,1.53793E+03,2%
-CRC32/poly=IEEE/size=512/align=1-8,,,,,,,1.52435E+03,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,,,,,,,1.63354E+03,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,,,,,,,1.61191E+03,6%
-CRC32/poly=IEEE/size=4kB/align=0-8,,,,,,,1.66842E+03,7%
-CRC32/poly=IEEE/size=4kB/align=1-8,,,,,,,1.68370E+03,5%
-CRC32/poly=IEEE/size=32kB/align=0-8,,,,,,,1.71179E+03,4%
-CRC32/poly=IEEE/size=32kB/align=1-8,,,,,,,1.68829E+03,5%
-CRC32/poly=Castagnoli/size=15/align=0-8,,,,,,,2.52761E+02,1%
-CRC32/poly=Castagnoli/size=15/align=1-8,,,,,,,2.52790E+02,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,,,,,,,6.71281E+02,6%
-CRC32/poly=Castagnoli/size=40/align=1-8,,,,,,,6.89073E+02,6%
-CRC32/poly=Castagnoli/size=512/align=0-8,,,,,,,1.45840E+03,5%
-CRC32/poly=Castagnoli/size=512/align=1-8,,,,,,,1.46342E+03,7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,,,,,,,1.55922E+03,4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,,,,,,,1.55926E+03,5%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,,,,,,,1.67078E+03,4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,,,,,,,1.67399E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,,,,,,,1.59801E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,,,,,,,1.62604E+03,8%
-CRC32/poly=Koopman/size=15/align=0-8,,,,,,,2.57893E+02,5%
-CRC32/poly=Koopman/size=15/align=1-8,,,,,,,2.66301E+02,6%
-CRC32/poly=Koopman/size=40/align=0-8,,,,,,,2.80521E+02,7%
-CRC32/poly=Koopman/size=40/align=1-8,,,,,,,2.89866E+02,8%
-CRC32/poly=Koopman/size=512/align=0-8,,,,,,,3.04127E+02,5%
-CRC32/poly=Koopman/size=512/align=1-8,,,,,,,3.11609E+02,3%
-CRC32/poly=Koopman/size=1kB/align=0-8,,,,,,,3.09621E+02,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,,,,,,,3.12318E+02,3%
-CRC32/poly=Koopman/size=4kB/align=0-8,,,,,,,3.07516E+02,3%
-CRC32/poly=Koopman/size=4kB/align=1-8,,,,,,,3.11292E+02,3%
-CRC32/poly=Koopman/size=32kB/align=0-8,,,,,,,3.06816E+02,6%
-CRC32/poly=Koopman/size=32kB/align=1-8,,,,,,,3.09085E+02,4%
-
-[Geo mean],1.71221E+03,,2.48066E+03,,1.69486E+03,,6.75103E+02
diff --git a/cmd/benchstat/testdata/allnorangecsv.golden b/cmd/benchstat/testdata/allnorangecsv.golden
deleted file mode 100644
index 3916a37..0000000
--- a/cmd/benchstat/testdata/allnorangecsv.golden
+++ /dev/null
@@ -1,155 +0,0 @@
-name \ time/op (ns/op),./old.txt,./new.txt,slashslash4.txt,x386.txt
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8,4.68700E+01,4.45200E+01
-CRC32/poly=IEEE/size=15/align=1-8,4.47100E+01,4.45000E+01
-CRC32/poly=IEEE/size=40/align=0-8,4.10375E+01,4.25000E+01,4.20800E+01
-CRC32/poly=IEEE/size=40/align=1-8,4.10778E+01,4.20400E+01,4.16800E+01
-CRC32/poly=IEEE/size=512/align=0-8,2.38000E+02,5.71200E+01
-CRC32/poly=IEEE/size=512/align=1-8,2.35500E+02,5.71700E+01
-CRC32/poly=IEEE/size=1kB/align=0-8,4.52500E+02,9.41125E+01
-CRC32/poly=IEEE/size=1kB/align=1-8,4.43600E+02,9.32875E+01
-CRC32/poly=IEEE/size=4kB/align=0-8,1.74000E+03,2.98111E+02,1.68200E+03
-CRC32/poly=IEEE/size=4kB/align=1-8,1.76430E+03,2.99100E+02,1.68980E+03
-CRC32/poly=IEEE/size=32kB/align=0-8,1.49529E+04,2.15800E+03
-CRC32/poly=IEEE/size=32kB/align=1-8,1.41888E+04,2.17830E+03
-CRC32/poly=Castagnoli/size=15/align=0-8,1.63778E+01,1.63000E+01
-CRC32/poly=Castagnoli/size=15/align=1-8,1.72222E+01,1.72900E+01
-CRC32/poly=Castagnoli/size=40/align=0-8,1.74300E+01,1.75300E+01,1.85800E+01
-CRC32/poly=Castagnoli/size=40/align=1-8,1.97100E+01,1.93900E+01,1.96375E+01
-CRC32/poly=Castagnoli/size=512/align=0-8,4.01700E+01,4.01300E+01
-CRC32/poly=Castagnoli/size=512/align=1-8,4.21400E+01,4.19444E+01
-CRC32/poly=Castagnoli/size=1kB/align=0-8,6.55000E+01,6.61625E+01
-CRC32/poly=Castagnoli/size=1kB/align=1-8,7.00900E+01,6.84667E+01
-CRC32/poly=Castagnoli/size=4kB/align=0-8,1.62800E+02,1.58800E+02,1.61200E+02
-CRC32/poly=Castagnoli/size=4kB/align=1-8,1.69400E+02,1.61600E+02,1.69600E+02
-CRC32/poly=Castagnoli/size=32kB/align=0-8,1.21822E+03,1.21433E+03
-CRC32/poly=Castagnoli/size=32kB/align=1-8,1.26478E+03,1.22080E+03
-CRC32/poly=Koopman/size=15/align=0-8,3.65100E+01,3.56000E+01
-CRC32/poly=Koopman/size=15/align=1-8,3.51500E+01,3.55111E+01
-CRC32/poly=Koopman/size=40/align=0-8,9.16400E+01,8.76500E+01,9.38100E+01
-CRC32/poly=Koopman/size=40/align=1-8,9.10800E+01,8.80300E+01,8.68600E+01
-CRC32/poly=Koopman/size=512/align=0-8,1.13170E+03,1.07590E+03
-CRC32/poly=Koopman/size=512/align=1-8,1.12680E+03,1.16660E+03
-CRC32/poly=Koopman/size=1kB/align=0-8,2.24333E+03,2.34070E+03
-CRC32/poly=Koopman/size=1kB/align=1-8,2.14867E+03,2.36010E+03
-CRC32/poly=Koopman/size=4kB/align=0-8,9.03150E+03,9.00320E+03,9.08160E+03
-CRC32/poly=Koopman/size=4kB/align=1-8,8.94020E+03,9.04630E+03,9.45520E+03
-CRC32/poly=Koopman/size=32kB/align=0-8,7.24280E+04,7.29005E+04
-CRC32/poly=Koopman/size=32kB/align=1-8,6.96194E+04,7.42809E+04
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8,,,,6.23500E+01
-CRC32/poly=IEEE/size=15/align=1-8,,,,6.34900E+01
-CRC32/poly=IEEE/size=40/align=0-8,,,,5.74400E+01
-CRC32/poly=IEEE/size=40/align=1-8,,,,5.73300E+01
-CRC32/poly=IEEE/size=512/align=0-8,,,,3.32333E+02
-CRC32/poly=IEEE/size=512/align=1-8,,,,3.35300E+02
-CRC32/poly=IEEE/size=1kB/align=0-8,,,,6.26300E+02
-CRC32/poly=IEEE/size=1kB/align=1-8,,,,6.35300E+02
-CRC32/poly=IEEE/size=4kB/align=0-8,,,,2.45670E+03
-CRC32/poly=IEEE/size=4kB/align=1-8,,,,2.43450E+03
-CRC32/poly=IEEE/size=32kB/align=0-8,,,,1.91499E+04
-CRC32/poly=IEEE/size=32kB/align=1-8,,,,1.94203E+04
-CRC32/poly=Castagnoli/size=15/align=0-8,,,,5.93625E+01
-CRC32/poly=Castagnoli/size=15/align=1-8,,,,5.93500E+01
-CRC32/poly=Castagnoli/size=40/align=0-8,,,,5.96600E+01
-CRC32/poly=Castagnoli/size=40/align=1-8,,,,5.81100E+01
-CRC32/poly=Castagnoli/size=512/align=0-8,,,,3.50800E+02
-CRC32/poly=Castagnoli/size=512/align=1-8,,,,3.49800E+02
-CRC32/poly=Castagnoli/size=1kB/align=0-8,,,,6.56700E+02
-CRC32/poly=Castagnoli/size=1kB/align=1-8,,,,6.56800E+02
-CRC32/poly=Castagnoli/size=4kB/align=0-8,,,,2.45180E+03
-CRC32/poly=Castagnoli/size=4kB/align=1-8,,,,2.44800E+03
-CRC32/poly=Castagnoli/size=32kB/align=0-8,,,,2.05261E+04
-CRC32/poly=Castagnoli/size=32kB/align=1-8,,,,2.01794E+04
-CRC32/poly=Koopman/size=15/align=0-8,,,,5.82100E+01
-CRC32/poly=Koopman/size=15/align=1-8,,,,5.64000E+01
-CRC32/poly=Koopman/size=40/align=0-8,,,,1.42300E+02
-CRC32/poly=Koopman/size=40/align=1-8,,,,1.36222E+02
-CRC32/poly=Koopman/size=512/align=0-8,,,,1.68440E+03
-CRC32/poly=Koopman/size=512/align=1-8,,,,1.64290E+03
-CRC32/poly=Koopman/size=1kB/align=0-8,,,,3.30890E+03
-CRC32/poly=Koopman/size=1kB/align=1-8,,,,3.27920E+03
-CRC32/poly=Koopman/size=4kB/align=0-8,,,,1.33243E+04
-CRC32/poly=Koopman/size=4kB/align=1-8,,,,1.31612E+04
-CRC32/poly=Koopman/size=32kB/align=0-8,,,,1.06891E+05
-CRC32/poly=Koopman/size=32kB/align=1-8,,,,1.06069E+05
-
-[Geo mean],3.44668E+02,2.37855E+02,2.39033E+02,8.74025E+02
-
-name \ speed (MB/s),./old.txt,./new.txt,slashslash4.txt,x386.txt
-pkg:hash/crc32 goos:darwin goarch:amd64
-CRC32/poly=IEEE/size=15/align=0-8,3.20711E+02,3.36950E+02
-CRC32/poly=IEEE/size=15/align=1-8,3.35516E+02,3.37066E+02
-CRC32/poly=IEEE/size=40/align=0-8,9.74718E+02,9.41823E+02,9.50988E+02
-CRC32/poly=IEEE/size=40/align=1-8,9.73636E+02,9.51759E+02,9.59978E+02
-CRC32/poly=IEEE/size=512/align=0-8,2.14703E+03,8.96715E+03
-CRC32/poly=IEEE/size=512/align=1-8,2.16913E+03,8.95606E+03
-CRC32/poly=IEEE/size=1kB/align=0-8,2.26152E+03,1.08807E+04
-CRC32/poly=IEEE/size=1kB/align=1-8,2.30619E+03,1.09768E+04
-CRC32/poly=IEEE/size=4kB/align=0-8,2.35732E+03,1.37258E+04,2.43489E+03
-CRC32/poly=IEEE/size=4kB/align=1-8,2.32511E+03,1.36770E+04,2.42393E+03
-CRC32/poly=IEEE/size=32kB/align=0-8,2.19443E+03,1.51852E+04
-CRC32/poly=IEEE/size=32kB/align=1-8,2.31415E+03,1.50437E+04
-CRC32/poly=Castagnoli/size=15/align=0-8,9.15799E+02,9.20433E+02
-CRC32/poly=Castagnoli/size=15/align=1-8,8.70312E+02,8.67298E+02
-CRC32/poly=Castagnoli/size=40/align=0-8,2.29560E+03,2.28265E+03,2.16074E+03
-CRC32/poly=Castagnoli/size=40/align=1-8,2.03023E+03,2.06346E+03,2.03587E+03
-CRC32/poly=Castagnoli/size=512/align=0-8,1.27437E+04,1.27578E+04
-CRC32/poly=Castagnoli/size=512/align=1-8,1.21445E+04,1.22049E+04
-CRC32/poly=Castagnoli/size=1kB/align=0-8,1.56355E+04,1.54766E+04
-CRC32/poly=Castagnoli/size=1kB/align=1-8,1.46273E+04,1.49597E+04
-CRC32/poly=Castagnoli/size=4kB/align=0-8,2.50862E+04,2.56897E+04,2.53717E+04
-CRC32/poly=Castagnoli/size=4kB/align=1-8,2.41378E+04,2.52736E+04,2.41249E+04
-CRC32/poly=Castagnoli/size=32kB/align=0-8,2.68975E+04,2.68232E+04
-CRC32/poly=Castagnoli/size=32kB/align=1-8,2.59038E+04,2.68422E+04
-CRC32/poly=Koopman/size=15/align=0-8,4.11932E+02,4.21452E+02
-CRC32/poly=Koopman/size=15/align=1-8,4.27408E+02,4.22362E+02
-CRC32/poly=Koopman/size=40/align=0-8,4.36831E+02,4.56472E+02,4.28261E+02
-CRC32/poly=Koopman/size=40/align=1-8,4.39731E+02,4.54515E+02,4.60660E+02
-CRC32/poly=Koopman/size=512/align=0-8,4.52693E+02,4.75749E+02
-CRC32/poly=Koopman/size=512/align=1-8,4.54579E+02,4.39685E+02
-CRC32/poly=Koopman/size=1kB/align=0-8,4.52443E+02,4.37629E+02
-CRC32/poly=Koopman/size=1kB/align=1-8,4.76558E+02,4.34042E+02
-CRC32/poly=Koopman/size=4kB/align=0-8,4.54022E+02,4.55492E+02,4.52301E+02
-CRC32/poly=Koopman/size=4kB/align=1-8,4.59394E+02,4.54627E+02,4.33984E+02
-CRC32/poly=Koopman/size=32kB/align=0-8,4.53471E+02,4.49828E+02
-CRC32/poly=Koopman/size=32kB/align=1-8,4.70784E+02,4.41379E+02
-pkg:hash/crc32 goos:darwin goarch:386
-CRC32/poly=IEEE/size=15/align=0-8,,,,2.41180E+02
-CRC32/poly=IEEE/size=15/align=1-8,,,,2.36728E+02
-CRC32/poly=IEEE/size=40/align=0-8,,,,6.96493E+02
-CRC32/poly=IEEE/size=40/align=1-8,,,,6.97741E+02
-CRC32/poly=IEEE/size=512/align=0-8,,,,1.53793E+03
-CRC32/poly=IEEE/size=512/align=1-8,,,,1.52435E+03
-CRC32/poly=IEEE/size=1kB/align=0-8,,,,1.63354E+03
-CRC32/poly=IEEE/size=1kB/align=1-8,,,,1.61191E+03
-CRC32/poly=IEEE/size=4kB/align=0-8,,,,1.66842E+03
-CRC32/poly=IEEE/size=4kB/align=1-8,,,,1.68370E+03
-CRC32/poly=IEEE/size=32kB/align=0-8,,,,1.71179E+03
-CRC32/poly=IEEE/size=32kB/align=1-8,,,,1.68829E+03
-CRC32/poly=Castagnoli/size=15/align=0-8,,,,2.52761E+02
-CRC32/poly=Castagnoli/size=15/align=1-8,,,,2.52790E+02
-CRC32/poly=Castagnoli/size=40/align=0-8,,,,6.71281E+02
-CRC32/poly=Castagnoli/size=40/align=1-8,,,,6.89073E+02
-CRC32/poly=Castagnoli/size=512/align=0-8,,,,1.45840E+03
-CRC32/poly=Castagnoli/size=512/align=1-8,,,,1.46342E+03
-CRC32/poly=Castagnoli/size=1kB/align=0-8,,,,1.55922E+03
-CRC32/poly=Castagnoli/size=1kB/align=1-8,,,,1.55926E+03
-CRC32/poly=Castagnoli/size=4kB/align=0-8,,,,1.67078E+03
-CRC32/poly=Castagnoli/size=4kB/align=1-8,,,,1.67399E+03
-CRC32/poly=Castagnoli/size=32kB/align=0-8,,,,1.59801E+03
-CRC32/poly=Castagnoli/size=32kB/align=1-8,,,,1.62604E+03
-CRC32/poly=Koopman/size=15/align=0-8,,,,2.57893E+02
-CRC32/poly=Koopman/size=15/align=1-8,,,,2.66301E+02
-CRC32/poly=Koopman/size=40/align=0-8,,,,2.80521E+02
-CRC32/poly=Koopman/size=40/align=1-8,,,,2.89866E+02
-CRC32/poly=Koopman/size=512/align=0-8,,,,3.04127E+02
-CRC32/poly=Koopman/size=512/align=1-8,,,,3.11609E+02
-CRC32/poly=Koopman/size=1kB/align=0-8,,,,3.09621E+02
-CRC32/poly=Koopman/size=1kB/align=1-8,,,,3.12318E+02
-CRC32/poly=Koopman/size=4kB/align=0-8,,,,3.07516E+02
-CRC32/poly=Koopman/size=4kB/align=1-8,,,,3.11292E+02
-CRC32/poly=Koopman/size=32kB/align=0-8,,,,3.06816E+02
-CRC32/poly=Koopman/size=32kB/align=1-8,,,,3.09085E+02
-
-[Geo mean],1.71221E+03,2.48066E+03,1.69486E+03,6.75103E+02
diff --git a/cmd/benchstat/testdata/allnosplit.golden b/cmd/benchstat/testdata/allnosplit.golden
deleted file mode 100644
index eff91db..0000000
--- a/cmd/benchstat/testdata/allnosplit.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name \ time/op                             new.txt        old.txt        slashslash4.txt  x386.txt
-CRC32/poly=IEEE/size=15/align=0-8            44.5ns ± 3%    46.9ns ± 8%                     62.4ns ± 9%
-CRC32/poly=IEEE/size=15/align=1-8            44.5ns ± 4%    44.7ns ± 5%                     63.5ns ± 8%
-CRC32/poly=IEEE/size=40/align=0-8            42.5ns ± 6%    41.0ns ± 1%      42.1ns ± 3%    57.4ns ± 3%
-CRC32/poly=IEEE/size=40/align=1-8            42.0ns ± 3%    41.1ns ± 1%      41.7ns ± 5%    57.3ns ± 3%
-CRC32/poly=IEEE/size=512/align=0-8           57.1ns ± 3%   238.0ns ± 5%                    332.3ns ± 2%
-CRC32/poly=IEEE/size=512/align=1-8           57.2ns ± 3%   235.5ns ± 3%                    335.3ns ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8           94.1ns ± 2%   452.5ns ± 4%                    626.3ns ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8           93.3ns ± 2%   443.6ns ± 2%                    635.3ns ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8            298ns ± 1%    1740ns ± 8%      1682ns ± 2%    2457ns ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8            299ns ± 3%    1764ns ± 6%      1690ns ± 4%    2434ns ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8          2.16µs ± 3%   14.95µs ± 7%                    19.15µs ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8          2.18µs ± 3%   14.19µs ± 7%                    19.42µs ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8      16.3ns ± 2%    16.4ns ± 3%                     59.4ns ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8      17.3ns ± 2%    17.2ns ± 2%                     59.4ns ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8      17.5ns ± 4%    17.4ns ± 2%      18.6ns ±11%    59.7ns ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8      19.4ns ± 2%    19.7ns ± 3%      19.6ns ± 2%    58.1ns ± 7%
-CRC32/poly=Castagnoli/size=512/align=0-8     40.1ns ± 4%    40.2ns ± 2%                    350.8ns ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8     41.9ns ± 2%    42.1ns ± 3%                    349.8ns ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8     66.2ns ± 1%    65.5ns ± 1%                    656.7ns ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8     68.5ns ± 2%    70.1ns ± 6%                    656.8ns ± 6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8      159ns ± 3%     163ns ± 5%       161ns ± 8%    2452ns ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8      162ns ± 3%     169ns ± 6%       170ns ± 8%    2448ns ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.21µs ± 3%    1.22µs ± 4%                    20.53µs ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.22µs ± 4%    1.26µs ± 3%                    20.18µs ± 9%
-CRC32/poly=Koopman/size=15/align=0-8         35.6ns ± 3%    36.5ns ±11%                     58.2ns ± 4%
-CRC32/poly=Koopman/size=15/align=1-8         35.5ns ± 1%    35.1ns ± 5%                     56.4ns ± 7%
-CRC32/poly=Koopman/size=40/align=0-8         87.6ns ± 2%    91.6ns ± 9%      93.8ns ±13%   142.3ns ± 8%
-CRC32/poly=Koopman/size=40/align=1-8         88.0ns ± 3%    91.1ns ± 6%      86.9ns ± 3%   136.2ns ± 5%
-CRC32/poly=Koopman/size=512/align=0-8        1.08µs ± 3%    1.13µs ± 5%                     1.68µs ± 5%
-CRC32/poly=Koopman/size=512/align=1-8        1.17µs ± 8%    1.13µs ± 6%                     1.64µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=0-8        2.34µs ± 4%    2.24µs ± 6%                     3.31µs ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8        2.36µs ± 5%    2.15µs ± 2%                     3.28µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8        9.00µs ± 6%    9.03µs ± 6%      9.08µs ± 8%   13.32µs ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8        9.05µs ±12%    8.94µs ±10%      9.46µs ± 8%   13.16µs ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8       72.9µs ± 4%    72.4µs ± 9%                    106.9µs ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8       74.3µs ± 3%    69.6µs ± 3%                    106.1µs ± 4%
-
-name \ speed                               new.txt        old.txt        slashslash4.txt  x386.txt
-CRC32/poly=IEEE/size=15/align=0-8           337MB/s ± 3%   321MB/s ± 8%                    241MB/s ± 8%
-CRC32/poly=IEEE/size=15/align=1-8           337MB/s ± 4%   336MB/s ± 4%                    237MB/s ± 8%
-CRC32/poly=IEEE/size=40/align=0-8           942MB/s ± 5%   975MB/s ± 1%     951MB/s ± 3%   696MB/s ± 3%
-CRC32/poly=IEEE/size=40/align=1-8           952MB/s ± 3%   974MB/s ± 1%     960MB/s ± 4%   698MB/s ± 3%
-CRC32/poly=IEEE/size=512/align=0-8         8.97GB/s ± 3%  2.15GB/s ± 4%                   1.54GB/s ± 2%
-CRC32/poly=IEEE/size=512/align=1-8         8.96GB/s ± 3%  2.17GB/s ± 3%                   1.52GB/s ± 3%
-CRC32/poly=IEEE/size=1kB/align=0-8         10.9GB/s ± 2%   2.3GB/s ± 4%                    1.6GB/s ± 2%
-CRC32/poly=IEEE/size=1kB/align=1-8         11.0GB/s ± 2%   2.3GB/s ± 2%                    1.6GB/s ± 6%
-CRC32/poly=IEEE/size=4kB/align=0-8         13.7GB/s ± 1%   2.4GB/s ± 7%     2.4GB/s ± 2%   1.7GB/s ± 7%
-CRC32/poly=IEEE/size=4kB/align=1-8         13.7GB/s ± 3%   2.3GB/s ± 6%     2.4GB/s ± 4%   1.7GB/s ± 5%
-CRC32/poly=IEEE/size=32kB/align=0-8        15.2GB/s ± 3%   2.2GB/s ± 7%                    1.7GB/s ± 4%
-CRC32/poly=IEEE/size=32kB/align=1-8        15.0GB/s ± 3%   2.3GB/s ± 8%                    1.7GB/s ± 5%
-CRC32/poly=Castagnoli/size=15/align=0-8     920MB/s ± 2%   916MB/s ± 2%                    253MB/s ± 1%
-CRC32/poly=Castagnoli/size=15/align=1-8     867MB/s ± 2%   870MB/s ± 2%                    253MB/s ± 2%
-CRC32/poly=Castagnoli/size=40/align=0-8    2.28GB/s ± 4%  2.30GB/s ± 2%    2.16GB/s ±11%  0.67GB/s ± 6%
-CRC32/poly=Castagnoli/size=40/align=1-8    2.06GB/s ± 2%  2.03GB/s ± 3%    2.04GB/s ± 2%  0.69GB/s ± 6%
-CRC32/poly=Castagnoli/size=512/align=0-8   12.8GB/s ± 4%  12.7GB/s ± 2%                    1.5GB/s ± 5%
-CRC32/poly=Castagnoli/size=512/align=1-8   12.2GB/s ± 1%  12.1GB/s ± 3%                    1.5GB/s ± 7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.5GB/s ± 1%  15.6GB/s ± 1%                    1.6GB/s ± 4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8   15.0GB/s ± 2%  14.6GB/s ± 6%                    1.6GB/s ± 5%
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.7GB/s ± 3%  25.1GB/s ± 5%    25.4GB/s ± 7%   1.7GB/s ± 4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8   25.3GB/s ± 3%  24.1GB/s ± 6%    24.1GB/s ± 8%   1.7GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.8GB/s ± 5%  26.9GB/s ± 4%                    1.6GB/s ± 5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8  26.8GB/s ± 4%  25.9GB/s ± 3%                    1.6GB/s ± 8%
-CRC32/poly=Koopman/size=15/align=0-8        421MB/s ± 3%   412MB/s ±10%                    258MB/s ± 5%
-CRC32/poly=Koopman/size=15/align=1-8        422MB/s ± 1%   427MB/s ± 5%                    266MB/s ± 6%
-CRC32/poly=Koopman/size=40/align=0-8        456MB/s ± 2%   437MB/s ± 9%     428MB/s ±12%   281MB/s ± 7%
-CRC32/poly=Koopman/size=40/align=1-8        455MB/s ± 3%   440MB/s ± 6%     461MB/s ± 3%   290MB/s ± 8%
-CRC32/poly=Koopman/size=512/align=0-8       476MB/s ± 3%   453MB/s ± 5%                    304MB/s ± 5%
-CRC32/poly=Koopman/size=512/align=1-8       440MB/s ± 8%   455MB/s ± 6%                    312MB/s ± 3%
-CRC32/poly=Koopman/size=1kB/align=0-8       438MB/s ± 4%   452MB/s ± 9%                    310MB/s ± 4%
-CRC32/poly=Koopman/size=1kB/align=1-8       434MB/s ± 5%   477MB/s ± 2%                    312MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=0-8       455MB/s ± 6%   454MB/s ± 5%     452MB/s ± 8%   308MB/s ± 3%
-CRC32/poly=Koopman/size=4kB/align=1-8       455MB/s ±11%   459MB/s ± 9%     434MB/s ± 9%   311MB/s ± 3%
-CRC32/poly=Koopman/size=32kB/align=0-8      450MB/s ± 4%   453MB/s ± 8%                    307MB/s ± 6%
-CRC32/poly=Koopman/size=32kB/align=1-8      441MB/s ± 3%   471MB/s ± 3%                    309MB/s ± 4%
diff --git a/cmd/benchstat/testdata/allnosplitcsv.golden b/cmd/benchstat/testdata/allnosplitcsv.golden
deleted file mode 100644
index b3513a4..0000000
--- a/cmd/benchstat/testdata/allnosplitcsv.golden
+++ /dev/null
@@ -1,77 +0,0 @@
-name \ time/op (ns/op),new.txt,±,old.txt,±,slashslash4.txt,±,x386.txt,±
-CRC32/poly=IEEE/size=15/align=0-8,4.45200E+01,3%,4.68700E+01,8%,,,6.23500E+01,9%
-CRC32/poly=IEEE/size=15/align=1-8,4.45000E+01,4%,4.47100E+01,5%,,,6.34900E+01,8%
-CRC32/poly=IEEE/size=40/align=0-8,4.25000E+01,6%,4.10375E+01,1%,4.20800E+01,3%,5.74400E+01,3%
-CRC32/poly=IEEE/size=40/align=1-8,4.20400E+01,3%,4.10778E+01,1%,4.16800E+01,5%,5.73300E+01,3%
-CRC32/poly=IEEE/size=512/align=0-8,5.71200E+01,3%,2.38000E+02,5%,,,3.32333E+02,2%
-CRC32/poly=IEEE/size=512/align=1-8,5.71700E+01,3%,2.35500E+02,3%,,,3.35300E+02,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,9.41125E+01,2%,4.52500E+02,4%,,,6.26300E+02,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,9.32875E+01,2%,4.43600E+02,2%,,,6.35300E+02,6%
-CRC32/poly=IEEE/size=4kB/align=0-8,2.98111E+02,1%,1.74000E+03,8%,1.68200E+03,2%,2.45670E+03,7%
-CRC32/poly=IEEE/size=4kB/align=1-8,2.99100E+02,3%,1.76430E+03,6%,1.68980E+03,4%,2.43450E+03,5%
-CRC32/poly=IEEE/size=32kB/align=0-8,2.15800E+03,3%,1.49529E+04,7%,,,1.91499E+04,4%
-CRC32/poly=IEEE/size=32kB/align=1-8,2.17830E+03,3%,1.41888E+04,7%,,,1.94203E+04,5%
-CRC32/poly=Castagnoli/size=15/align=0-8,1.63000E+01,2%,1.63778E+01,3%,,,5.93625E+01,1%
-CRC32/poly=Castagnoli/size=15/align=1-8,1.72900E+01,2%,1.72222E+01,2%,,,5.93500E+01,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,1.75300E+01,4%,1.74300E+01,2%,1.85800E+01,11%,5.96600E+01,6%
-CRC32/poly=Castagnoli/size=40/align=1-8,1.93900E+01,2%,1.97100E+01,3%,1.96375E+01,2%,5.81100E+01,7%
-CRC32/poly=Castagnoli/size=512/align=0-8,4.01300E+01,4%,4.01700E+01,2%,,,3.50800E+02,5%
-CRC32/poly=Castagnoli/size=512/align=1-8,4.19444E+01,2%,4.21400E+01,3%,,,3.49800E+02,7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,6.61625E+01,1%,6.55000E+01,1%,,,6.56700E+02,4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,6.84667E+01,2%,7.00900E+01,6%,,,6.56800E+02,6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,1.58800E+02,3%,1.62800E+02,5%,1.61200E+02,8%,2.45180E+03,4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,1.61600E+02,3%,1.69400E+02,6%,1.69600E+02,8%,2.44800E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,1.21433E+03,3%,1.21822E+03,4%,,,2.05261E+04,5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,1.22080E+03,4%,1.26478E+03,3%,,,2.01794E+04,9%
-CRC32/poly=Koopman/size=15/align=0-8,3.56000E+01,3%,3.65100E+01,11%,,,5.82100E+01,4%
-CRC32/poly=Koopman/size=15/align=1-8,3.55111E+01,1%,3.51500E+01,5%,,,5.64000E+01,7%
-CRC32/poly=Koopman/size=40/align=0-8,8.76500E+01,2%,9.16400E+01,9%,9.38100E+01,13%,1.42300E+02,8%
-CRC32/poly=Koopman/size=40/align=1-8,8.80300E+01,3%,9.10800E+01,6%,8.68600E+01,3%,1.36222E+02,5%
-CRC32/poly=Koopman/size=512/align=0-8,1.07590E+03,3%,1.13170E+03,5%,,,1.68440E+03,5%
-CRC32/poly=Koopman/size=512/align=1-8,1.16660E+03,8%,1.12680E+03,6%,,,1.64290E+03,4%
-CRC32/poly=Koopman/size=1kB/align=0-8,2.34070E+03,4%,2.24333E+03,6%,,,3.30890E+03,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,2.36010E+03,5%,2.14867E+03,2%,,,3.27920E+03,3%
-CRC32/poly=Koopman/size=4kB/align=0-8,9.00320E+03,6%,9.03150E+03,6%,9.08160E+03,8%,1.33243E+04,3%
-CRC32/poly=Koopman/size=4kB/align=1-8,9.04630E+03,12%,8.94020E+03,10%,9.45520E+03,8%,1.31612E+04,3%
-CRC32/poly=Koopman/size=32kB/align=0-8,7.29005E+04,4%,7.24280E+04,9%,,,1.06891E+05,6%
-CRC32/poly=Koopman/size=32kB/align=1-8,7.42809E+04,3%,6.96194E+04,3%,,,1.06069E+05,4%
-[Geo mean],2.37855E+02,,3.44668E+02,,2.39033E+02,,8.74025E+02
-
-name \ speed (MB/s),new.txt,±,old.txt,±,slashslash4.txt,±,x386.txt,±
-CRC32/poly=IEEE/size=15/align=0-8,3.36950E+02,3%,3.20711E+02,8%,,,2.41180E+02,8%
-CRC32/poly=IEEE/size=15/align=1-8,3.37066E+02,4%,3.35516E+02,4%,,,2.36728E+02,8%
-CRC32/poly=IEEE/size=40/align=0-8,9.41823E+02,5%,9.74718E+02,1%,9.50988E+02,3%,6.96493E+02,3%
-CRC32/poly=IEEE/size=40/align=1-8,9.51759E+02,3%,9.73636E+02,1%,9.59978E+02,4%,6.97741E+02,3%
-CRC32/poly=IEEE/size=512/align=0-8,8.96715E+03,3%,2.14703E+03,4%,,,1.53793E+03,2%
-CRC32/poly=IEEE/size=512/align=1-8,8.95606E+03,3%,2.16913E+03,3%,,,1.52435E+03,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,1.08807E+04,2%,2.26152E+03,4%,,,1.63354E+03,2%
-CRC32/poly=IEEE/size=1kB/align=1-8,1.09768E+04,2%,2.30619E+03,2%,,,1.61191E+03,6%
-CRC32/poly=IEEE/size=4kB/align=0-8,1.37258E+04,1%,2.35732E+03,7%,2.43489E+03,2%,1.66842E+03,7%
-CRC32/poly=IEEE/size=4kB/align=1-8,1.36770E+04,3%,2.32511E+03,6%,2.42393E+03,4%,1.68370E+03,5%
-CRC32/poly=IEEE/size=32kB/align=0-8,1.51852E+04,3%,2.19443E+03,7%,,,1.71179E+03,4%
-CRC32/poly=IEEE/size=32kB/align=1-8,1.50437E+04,3%,2.31415E+03,8%,,,1.68829E+03,5%
-CRC32/poly=Castagnoli/size=15/align=0-8,9.20433E+02,2%,9.15799E+02,2%,,,2.52761E+02,1%
-CRC32/poly=Castagnoli/size=15/align=1-8,8.67298E+02,2%,8.70312E+02,2%,,,2.52790E+02,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,2.28265E+03,4%,2.29560E+03,2%,2.16074E+03,11%,6.71281E+02,6%
-CRC32/poly=Castagnoli/size=40/align=1-8,2.06346E+03,2%,2.03023E+03,3%,2.03587E+03,2%,6.89073E+02,6%
-CRC32/poly=Castagnoli/size=512/align=0-8,1.27578E+04,4%,1.27437E+04,2%,,,1.45840E+03,5%
-CRC32/poly=Castagnoli/size=512/align=1-8,1.22049E+04,1%,1.21445E+04,3%,,,1.46342E+03,7%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,1.54766E+04,1%,1.56355E+04,1%,,,1.55922E+03,4%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,1.49597E+04,2%,1.46273E+04,6%,,,1.55926E+03,5%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,2.56897E+04,3%,2.50862E+04,5%,2.53717E+04,7%,1.67078E+03,4%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,2.52736E+04,3%,2.41378E+04,6%,2.41249E+04,8%,1.67399E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,2.68232E+04,5%,2.68975E+04,4%,,,1.59801E+03,5%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,2.68422E+04,4%,2.59038E+04,3%,,,1.62604E+03,8%
-CRC32/poly=Koopman/size=15/align=0-8,4.21452E+02,3%,4.11932E+02,10%,,,2.57893E+02,5%
-CRC32/poly=Koopman/size=15/align=1-8,4.22362E+02,1%,4.27408E+02,5%,,,2.66301E+02,6%
-CRC32/poly=Koopman/size=40/align=0-8,4.56472E+02,2%,4.36831E+02,9%,4.28261E+02,12%,2.80521E+02,7%
-CRC32/poly=Koopman/size=40/align=1-8,4.54515E+02,3%,4.39731E+02,6%,4.60660E+02,3%,2.89866E+02,8%
-CRC32/poly=Koopman/size=512/align=0-8,4.75749E+02,3%,4.52693E+02,5%,,,3.04127E+02,5%
-CRC32/poly=Koopman/size=512/align=1-8,4.39685E+02,8%,4.54579E+02,6%,,,3.11609E+02,3%
-CRC32/poly=Koopman/size=1kB/align=0-8,4.37629E+02,4%,4.52443E+02,9%,,,3.09621E+02,4%
-CRC32/poly=Koopman/size=1kB/align=1-8,4.34042E+02,5%,4.76558E+02,2%,,,3.12318E+02,3%
-CRC32/poly=Koopman/size=4kB/align=0-8,4.55492E+02,6%,4.54022E+02,5%,4.52301E+02,8%,3.07516E+02,3%
-CRC32/poly=Koopman/size=4kB/align=1-8,4.54627E+02,11%,4.59394E+02,9%,4.33984E+02,9%,3.11292E+02,3%
-CRC32/poly=Koopman/size=32kB/align=0-8,4.49828E+02,4%,4.53471E+02,8%,,,3.06816E+02,6%
-CRC32/poly=Koopman/size=32kB/align=1-8,4.41379E+02,3%,4.70784E+02,3%,,,3.09085E+02,4%
-[Geo mean],2.48066E+03,,1.71221E+03,,1.69486E+03,,6.75103E+02
diff --git a/cmd/benchstat/testdata/bench_test.go b/cmd/benchstat/testdata/bench_test.go
new file mode 100644
index 0000000..c7285de
--- /dev/null
+++ b/cmd/benchstat/testdata/bench_test.go
@@ -0,0 +1,48 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"encoding/gob"
+	"encoding/json"
+	"io/ioutil"
+	"testing"
+)
+
+// Example benchmark used in package documentation.
+func BenchmarkEncode(b *testing.B) {
+	data := makeTree(4)
+
+	b.Run("format=json", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			e := json.NewEncoder(ioutil.Discard)
+			err := e.Encode(data)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+
+	b.Run("format=gob", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			e := gob.NewEncoder(ioutil.Discard)
+			err := e.Encode(data)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
+
+type tree struct {
+	L, R *tree
+}
+
+func makeTree(depth int) *tree {
+	if depth <= 0 {
+		return nil
+	}
+	return &tree{makeTree(depth - 1), makeTree(depth - 1)}
+}
diff --git a/cmd/benchstat/testdata/crc-new.txt b/cmd/benchstat/testdata/crc-new.txt
new file mode 100644
index 0000000..b45c622
--- /dev/null
+++ b/cmd/benchstat/testdata/crc-new.txt
@@ -0,0 +1,367 @@
+pkg: hash/crc32
+goarch: amd64
+goos: darwin
+note: hw acceleration enabled
+
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.6 ns/op	 343.87 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.4 ns/op	 330.50 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.4 ns/op	 345.50 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.3 ns/op	 331.23 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.9 ns/op	 341.73 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.0 ns/op	 326.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.1 ns/op	 332.54 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.8 ns/op	 342.12 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.8 ns/op	 342.14 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.9 ns/op	 333.84 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.8 ns/op	 342.08 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.68 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.0 ns/op	 333.41 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.1 ns/op	 340.41 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.7 ns/op	 343.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.7 ns/op	 342.96 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.9 ns/op	 333.71 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        46.3 ns/op	 323.68 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.9 ns/op	 333.82 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.6 ns/op	 336.33 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.9 ns/op	 912.06 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.3 ns/op	 945.28 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.7 ns/op	 937.51 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.5 ns/op	 919.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        44.9 ns/op	 890.35 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.6 ns/op	 938.20 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.78 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.2 ns/op	 970.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.06 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.27 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.3 ns/op	 968.46 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.5 ns/op	 941.20 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        43.2 ns/op	 926.72 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.1 ns/op	 949.91 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.9 ns/op	 953.96 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.9 ns/op	 954.23 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.5 ns/op	 964.12 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.9 ns/op	 932.70 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.7 ns/op	 959.93 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.4 ns/op	 966.36 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        56.1 ns/op	9128.24 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        56.2 ns/op	9107.94 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        56.1 ns/op	9129.45 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        56.5 ns/op	9064.49 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        58.1 ns/op	8819.84 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        59.0 ns/op	8675.61 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        55.9 ns/op	9157.53 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        57.0 ns/op	8976.22 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        58.7 ns/op	8724.66 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        57.6 ns/op	8887.48 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        58.3 ns/op	8782.52 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.6 ns/op	8882.44 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        57.0 ns/op	8980.50 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.4 ns/op	9071.81 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.0 ns/op	9135.04 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        58.8 ns/op	8705.97 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        56.2 ns/op	9115.68 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.3 ns/op	8941.28 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.3 ns/op	9090.66 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.8 ns/op	8854.75 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        92.6 ns/op	11058.32 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        93.8 ns/op	10913.47 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        93.5 ns/op	10953.07 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.0 ns/op	10778.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        94.8 ns/op	10799.14 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.3 ns/op	10741.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        92.6 ns/op	11055.29 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 1000000	       100 ns/op	10189.63 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.3 ns/op	10747.01 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 1000000	       102 ns/op	9978.90 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 1000000	       103 ns/op	9907.74 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11036.21 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11031.01 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 1000000	       102 ns/op	10006.87 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.1 ns/op	11000.89 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11029.36 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        95.3 ns/op	10743.40 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.6 ns/op	10941.47 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.3 ns/op	10979.13 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.6 ns/op	11053.13 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       302 ns/op	13561.37 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       294 ns/op	13906.19 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       298 ns/op	13734.81 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       296 ns/op	13826.62 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       301 ns/op	13598.75 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       299 ns/op	13676.26 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       299 ns/op	13686.88 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       289 ns/op	14143.28 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       296 ns/op	13824.99 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       298 ns/op	13716.11 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       295 ns/op	13878.73 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       302 ns/op	13531.91 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       295 ns/op	13868.51 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       293 ns/op	13944.64 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       300 ns/op	13620.51 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       296 ns/op	13820.66 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       305 ns/op	13418.41 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       293 ns/op	13960.90 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       303 ns/op	13506.47 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       309 ns/op	13218.83 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2149 ns/op	15241.57 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2230 ns/op	14693.09 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2183 ns/op	15005.52 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2115 ns/op	15486.09 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2227 ns/op	14708.97 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2140 ns/op	15308.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2157 ns/op	15188.25 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2133 ns/op	15359.48 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2117 ns/op	15475.06 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2129 ns/op	15385.26 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2233 ns/op	14673.10 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2244 ns/op	14596.46 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2154 ns/op	15211.73 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2156 ns/op	15196.79 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2159 ns/op	15175.66 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2151 ns/op	15228.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2169 ns/op	15105.80 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2206 ns/op	14850.25 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2145 ns/op	15271.77 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2166 ns/op	15126.92 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 932.96 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 912.82 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 924.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 934.22 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 924.90 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 904.66 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.5 ns/op	 910.72 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        17.6 ns/op	 853.00 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 915.21 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 923.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 871.49 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 854.97 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.3 ns/op	 864.75 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        16.9 ns/op	 885.36 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 862.94 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        16.9 ns/op	 887.89 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 879.90 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.7 ns/op	 847.32 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.6 ns/op	 854.26 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 864.10 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2279.37 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.1 ns/op	2334.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.9 ns/op	2235.38 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.1 ns/op	2338.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2325.21 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.8 ns/op	2251.24 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.8 ns/op	2242.41 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        18.2 ns/op	2199.30 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.4 ns/op	2305.18 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.3 ns/op	2315.62 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2055.96 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.7 ns/op	2034.99 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2068.49 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2021.43 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.4 ns/op	2060.79 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2076.79 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.1 ns/op	2091.18 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.0 ns/op	2100.75 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2049.71 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2074.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12891.27 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.1 ns/op	12777.24 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        41.7 ns/op	12266.52 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.8 ns/op	12866.81 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.8 ns/op	12862.14 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12537.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.3 ns/op	13022.34 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.9 ns/op	12817.64 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12543.27 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.4 ns/op	12993.64 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.7 ns/op	12276.08 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.0 ns/op	12199.95 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.2 ns/op	12121.96 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12215.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.2 ns/op	11854.68 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.1 ns/op	12165.99 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12328.95 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12206.89 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.6 ns/op	12306.14 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.6 ns/op	12022.39 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.4 ns/op	15646.46 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.9 ns/op	15531.97 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.4 ns/op	15422.12 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.7 ns/op	15358.30 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        72.1 ns/op	14207.76 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.3 ns/op	15455.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.2 ns/op	15479.94 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        68.6 ns/op	14926.33 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.3 ns/op	15436.78 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.1 ns/op	15481.91 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.6 ns/op	14928.29 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.2 ns/op	15241.88 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.3 ns/op	14985.73 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.5 ns/op	14958.31 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.2 ns/op	14806.15 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.4 ns/op	14977.84 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.1 ns/op	14822.32 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.5 ns/op	15171.65 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.7 ns/op	14489.26 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.4 ns/op	14744.72 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	25978.03 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26173.33 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       160 ns/op	25471.67 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26190.72 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       164 ns/op	24861.89 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	26018.79 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26101.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       163 ns/op	24990.34 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25138.27 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	25972.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       164 ns/op	24908.40 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25529.63 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       167 ns/op	24412.21 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       162 ns/op	25206.52 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       158 ns/op	25827.66 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25519.93 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       163 ns/op	25090.11 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       159 ns/op	25657.75 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25576.18 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       163 ns/op	25007.68 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1233 ns/op	26561.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1223 ns/op	26786.22 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1193 ns/op	27451.16 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1288 ns/op	25436.18 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1212 ns/op	27022.56 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1245 ns/op	26314.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1208 ns/op	27105.89 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1200 ns/op	27296.77 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1226 ns/op	26714.57 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  200000	      1189 ns/op	27542.78 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1235 ns/op	26530.39 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1193 ns/op	27444.08 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1272 ns/op	25752.85 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1210 ns/op	27080.94 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1207 ns/op	27142.02 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1245 ns/op	26305.41 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1226 ns/op	26710.70 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1223 ns/op	26787.03 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1217 ns/op	26921.93 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1180 ns/op	27746.71 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 421.93 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 425.84 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.1 ns/op	 415.35 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 426.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 420.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.6 ns/op	 410.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.1 ns/op	 427.35 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.8 ns/op	 419.33 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 421.31 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 425.93 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.1 ns/op	 427.06 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.7 ns/op	 420.07 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        35.2 ns/op	 425.76 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.4 ns/op	 423.84 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.4 ns/op	 423.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        37.4 ns/op	 400.57 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.7 ns/op	 419.68 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.6 ns/op	 421.03 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.5 ns/op	 422.80 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.0 ns/op	 417.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.8 ns/op	 445.33 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.1 ns/op	 448.86 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.6 ns/op	 456.76 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.7 ns/op	 461.44 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.6 ns/op	 462.14 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        88.0 ns/op	 454.58 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.2 ns/op	 458.73 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.5 ns/op	 457.33 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.4 ns/op	 462.92 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.6 ns/op	 456.63 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.2 ns/op	 458.58 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.0 ns/op	 459.82 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        89.1 ns/op	 449.14 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.5 ns/op	 462.62 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        88.8 ns/op	 450.43 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.1 ns/op	 459.03 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.8 ns/op	 455.54 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.5 ns/op	 457.36 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.9 ns/op	 440.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        88.4 ns/op	 452.59 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1100 ns/op	 465.26 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1072 ns/op	 477.31 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1075 ns/op	 475.90 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1056 ns/op	 484.71 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1113 ns/op	 459.68 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1054 ns/op	 485.69 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1066 ns/op	 480.13 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1074 ns/op	 476.36 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1057 ns/op	 483.93 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1092 ns/op	 468.52 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1074 ns/op	 476.50 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1100 ns/op	 465.08 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1106 ns/op	 462.87 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1226 ns/op	 417.43 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1189 ns/op	 430.31 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1176 ns/op	 435.29 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1235 ns/op	 414.39 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1213 ns/op	 422.03 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1210 ns/op	 422.92 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1137 ns/op	 450.03 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2259 ns/op	 453.12 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2322 ns/op	 441.00 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2350 ns/op	 435.62 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2355 ns/op	 434.74 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      2291 ns/op	 446.86 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2343 ns/op	 436.93 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2416 ns/op	 423.68 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2410 ns/op	 424.86 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2256 ns/op	 453.73 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2405 ns/op	 425.75 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2472 ns/op	 414.20 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2433 ns/op	 420.76 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2390 ns/op	 428.34 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2368 ns/op	 432.28 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2294 ns/op	 446.37 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2334 ns/op	 438.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2298 ns/op	 445.50 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2374 ns/op	 431.17 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2354 ns/op	 434.95 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2284 ns/op	 448.24 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8799 ns/op	 465.49 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9346 ns/op	 438.26 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9129 ns/op	 448.65 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8710 ns/op	 470.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8699 ns/op	 470.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8704 ns/op	 470.58 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9563 ns/op	 428.28 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9173 ns/op	 446.50 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9286 ns/op	 441.08 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8623 ns/op	 475.00 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8410 ns/op	 487.02 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8847 ns/op	 462.96 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8430 ns/op	 485.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8505 ns/op	 481.58 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8426 ns/op	 486.11 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	     10107 ns/op	 405.24 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9125 ns/op	 448.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9740 ns/op	 420.50 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9564 ns/op	 428.23 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9309 ns/op	 439.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69825 ns/op	 469.28 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     74409 ns/op	 440.37 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     75041 ns/op	 436.67 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     72268 ns/op	 453.42 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73333 ns/op	 446.84 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     70923 ns/op	 462.02 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     70095 ns/op	 467.48 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     76125 ns/op	 430.45 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73908 ns/op	 443.36 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73078 ns/op	 448.39 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76238 ns/op	 429.81 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     74141 ns/op	 441.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     71910 ns/op	 455.67 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76684 ns/op	 427.31 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     75951 ns/op	 431.43 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     73299 ns/op	 447.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76288 ns/op	 429.53 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     73453 ns/op	 446.10 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     72748 ns/op	 450.43 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     72097 ns/op	 454.50 MB/s
+PASS
+ok  	hash/crc32	72.655s
diff --git a/cmd/benchstat/testdata/crc-old.txt b/cmd/benchstat/testdata/crc-old.txt
new file mode 100644
index 0000000..cd1b57e
--- /dev/null
+++ b/cmd/benchstat/testdata/crc-old.txt
@@ -0,0 +1,367 @@
+pkg: hash/crc32
+goarch: amd64
+goos: darwin
+note: hw acceleration disabled
+
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.6 ns/op	 336.59 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        47.3 ns/op	 317.29 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.6 ns/op	 336.14 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.3 ns/op	 323.80 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        50.7 ns/op	 295.90 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.8 ns/op	 327.41 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.8 ns/op	 320.56 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.3 ns/op	 338.48 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        47.7 ns/op	 314.52 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        50.6 ns/op	 296.42 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        46.8 ns/op	 320.44 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.6 ns/op	 336.05 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.1 ns/op	 340.42 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.5 ns/op	 336.82 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.2 ns/op	 339.16 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.1 ns/op	 332.85 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.71 MB/s
+BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.8 ns/op	 327.44 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 975.84 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.0 ns/op	 930.22 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.9 ns/op	 978.39 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.3 ns/op	 944.76 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 976.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 975.15 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 967.88 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.8 ns/op	 979.93 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.1 ns/op	 974.19 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.2 ns/op	 969.78 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.9 ns/op	 931.47 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 977.34 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.4 ns/op	 965.44 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.2 ns/op	 971.79 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.1 ns/op	 972.53 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.8 ns/op	 979.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 978.70 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 978.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.5 ns/op	 964.07 MB/s
+BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.0 ns/op	 974.58 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       249 ns/op	2051.08 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       248 ns/op	2058.43 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       232 ns/op	2202.80 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       238 ns/op	2145.40 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       234 ns/op	2181.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       237 ns/op	2151.78 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       241 ns/op	2117.97 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       231 ns/op	2213.97 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       231 ns/op	2207.52 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       239 ns/op	2139.64 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       238 ns/op	2143.57 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       232 ns/op	2205.23 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       230 ns/op	2220.72 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       233 ns/op	2192.30 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       231 ns/op	2215.20 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       238 ns/op	2143.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       242 ns/op	2108.05 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       241 ns/op	2117.57 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       230 ns/op	2220.54 MB/s
+BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       240 ns/op	2125.08 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       435 ns/op	2352.05 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       461 ns/op	2218.18 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       446 ns/op	2291.09 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       448 ns/op	2284.64 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       448 ns/op	2282.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       464 ns/op	2206.86 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       456 ns/op	2242.96 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       457 ns/op	2239.06 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       461 ns/op	2219.82 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       449 ns/op	2278.55 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2343.28 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       452 ns/op	2263.28 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       443 ns/op	2310.35 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       446 ns/op	2292.48 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2346.76 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       445 ns/op	2296.07 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       451 ns/op	2268.08 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       450 ns/op	2274.49 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       441 ns/op	2320.41 MB/s
+BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2346.69 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1698 ns/op	2411.93 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1667 ns/op	2456.00 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1694 ns/op	2416.70 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1654 ns/op	2476.16 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1704 ns/op	2402.42 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1876 ns/op	2182.35 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1790 ns/op	2287.20 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1812 ns/op	2259.59 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1828 ns/op	2239.82 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1677 ns/op	2441.05 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1679 ns/op	2438.77 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1705 ns/op	2401.47 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1665 ns/op	2459.27 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1678 ns/op	2440.03 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1819 ns/op	2251.46 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1871 ns/op	2188.25 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1878 ns/op	2180.70 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1782 ns/op	2297.30 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1797 ns/op	2278.59 MB/s
+BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1769 ns/op	2315.22 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14964 ns/op	2189.65 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14612 ns/op	2242.49 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14258 ns/op	2298.15 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15641 ns/op	2094.94 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14653 ns/op	2236.19 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15101 ns/op	2169.79 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15065 ns/op	2174.97 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15801 ns/op	2073.78 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15459 ns/op	2119.55 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     13975 ns/op	2344.74 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13567 ns/op	2415.11 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14515 ns/op	2257.37 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14510 ns/op	2258.21 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     15133 ns/op	2165.26 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14383 ns/op	2278.22 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14983 ns/op	2186.90 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13519 ns/op	2423.74 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13154 ns/op	2491.10 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13594 ns/op	2410.43 MB/s
+BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14530 ns/op	2255.12 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        18.0 ns/op	 834.41 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 913.19 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.85 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.3 ns/op	 920.52 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.0 ns/op	 937.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.12 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.0 ns/op	 934.62 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.49 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 930.77 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.8 ns/op	 895.10 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 884.78 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 855.00 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.1 ns/op	 874.74 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 857.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 869.69 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 883.09 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.1 ns/op	 877.29 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 869.59 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        18.6 ns/op	 807.75 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 861.10 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.3 ns/op	2317.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.6 ns/op	2273.98 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2280.49 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2322.88 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.7 ns/op	2257.80 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2290.51 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2319.50 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2329.20 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.4 ns/op	2300.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.7 ns/op	2262.94 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        20.3 ns/op	1966.20 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2077.39 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.1 ns/op	2094.95 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2022.06 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.9 ns/op	2009.49 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.6 ns/op	2037.74 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        20.1 ns/op	1987.93 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2050.18 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2024.89 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.7 ns/op	2031.46 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.4 ns/op	12662.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12889.39 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.3 ns/op	12706.28 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.0 ns/op	12814.70 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.1 ns/op	12758.84 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12894.29 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12535.17 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12542.40 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.2 ns/op	12750.46 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12882.94 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12212.50 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12206.11 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.1 ns/op	11869.61 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.8 ns/op	11949.04 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.3 ns/op	11823.64 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.0 ns/op	12196.38 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12325.33 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.8 ns/op	12236.96 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12326.79 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.6 ns/op	12298.60 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.4 ns/op	15659.97 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.5 ns/op	15623.63 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.2 ns/op	15711.75 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.5 ns/op	15640.74 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.3 ns/op	15687.32 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.0 ns/op	15509.02 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.3 ns/op	15684.59 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        70.2 ns/op	14584.88 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.7 ns/op	15582.29 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.6 ns/op	15619.90 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.6 ns/op	15140.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.1 ns/op	15039.19 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.5 ns/op	15180.69 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.6 ns/op	15146.80 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        73.1 ns/op	14015.81 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.7 ns/op	14493.63 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        74.1 ns/op	13820.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        71.8 ns/op	14256.30 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.9 ns/op	14451.65 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.5 ns/op	14727.15 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25174.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25194.50 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       167 ns/op	24422.65 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       160 ns/op	25541.37 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       161 ns/op	25354.03 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       171 ns/op	23871.09 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25224.84 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       158 ns/op	25798.19 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       158 ns/op	25815.54 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       167 ns/op	24465.11 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       168 ns/op	24282.26 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       177 ns/op	23065.76 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       176 ns/op	23245.80 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       170 ns/op	24015.77 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       173 ns/op	23579.34 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       174 ns/op	23531.14 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       169 ns/op	24107.05 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       166 ns/op	24657.46 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       162 ns/op	25273.53 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       159 ns/op	25619.67 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1234 ns/op	26534.55 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1210 ns/op	27063.60 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1241 ns/op	26391.50 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1230 ns/op	26634.33 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1190 ns/op	27515.61 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1183 ns/op	27695.90 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1271 ns/op	25771.02 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1205 ns/op	27170.87 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1337 ns/op	24493.55 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1200 ns/op	27299.92 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1263 ns/op	25937.42 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1274 ns/op	25702.13 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1278 ns/op	25639.64 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1271 ns/op	25763.65 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1235 ns/op	26515.05 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1232 ns/op	26586.52 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1341 ns/op	24430.71 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1257 ns/op	26056.28 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1264 ns/op	25910.24 MB/s
+BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1309 ns/op	25023.35 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.5 ns/op	 410.82 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        37.3 ns/op	 402.07 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.7 ns/op	 408.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.8 ns/op	 430.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        40.4 ns/op	 371.74 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        38.6 ns/op	 388.62 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.3 ns/op	 413.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.0 ns/op	 441.26 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.3 ns/op	 437.06 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.2 ns/op	 414.34 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.4 ns/op	 435.65 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        37.0 ns/op	 405.81 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.8 ns/op	 419.55 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.5 ns/op	 410.66 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.2 ns/op	 425.85 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.4 ns/op	 436.09 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.0 ns/op	 441.70 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        33.9 ns/op	 443.08 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        33.9 ns/op	 443.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.4 ns/op	 412.65 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.7 ns/op	 445.75 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        90.4 ns/op	 442.39 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        94.9 ns/op	 421.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.4 ns/op	 457.45 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.3 ns/op	 448.11 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        90.3 ns/op	 443.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.9 ns/op	 455.14 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        93.7 ns/op	 426.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	       100 ns/op	 396.87 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        92.8 ns/op	 430.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.9 ns/op	 455.30 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.9 ns/op	 439.90 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        93.5 ns/op	 427.91 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        93.7 ns/op	 426.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        95.0 ns/op	 421.02 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.1 ns/op	 443.95 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        95.3 ns/op	 419.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        85.6 ns/op	 467.20 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.9 ns/op	 460.19 MB/s
+BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        91.9 ns/op	 435.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1118 ns/op	 457.68 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1168 ns/op	 438.12 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1132 ns/op	 452.06 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1079 ns/op	 474.47 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1079 ns/op	 474.46 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1126 ns/op	 454.69 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1132 ns/op	 452.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1193 ns/op	 429.16 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1124 ns/op	 455.38 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1166 ns/op	 438.87 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1096 ns/op	 466.92 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1117 ns/op	 458.09 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1200 ns/op	 426.33 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1109 ns/op	 461.41 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1155 ns/op	 443.08 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1141 ns/op	 448.59 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1136 ns/op	 450.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1145 ns/op	 446.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1085 ns/op	 471.86 MB/s
+BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1084 ns/op	 471.93 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2308 ns/op	 443.53 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2480 ns/op	 412.88 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2288 ns/op	 447.44 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2371 ns/op	 431.88 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2247 ns/op	 455.55 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2211 ns/op	 462.97 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2264 ns/op	 452.17 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2231 ns/op	 458.82 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2161 ns/op	 473.71 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2109 ns/op	 485.48 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2160 ns/op	 473.90 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2103 ns/op	 486.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2189 ns/op	 467.75 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2159 ns/op	 474.21 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2144 ns/op	 477.53 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2169 ns/op	 472.09 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2123 ns/op	 482.20 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2150 ns/op	 476.26 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2141 ns/op	 478.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2238 ns/op	 457.51 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8562 ns/op	 478.35 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8584 ns/op	 477.16 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9176 ns/op	 446.36 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9096 ns/op	 450.27 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9337 ns/op	 438.67 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9545 ns/op	 429.11 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9287 ns/op	 441.04 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8963 ns/op	 456.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8970 ns/op	 456.61 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8795 ns/op	 465.67 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9179 ns/op	 446.20 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9026 ns/op	 453.76 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8505 ns/op	 481.60 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8396 ns/op	 487.85 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9818 ns/op	 417.17 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8652 ns/op	 473.38 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8689 ns/op	 471.36 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8345 ns/op	 490.83 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9391 ns/op	 436.13 MB/s
+BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9401 ns/op	 435.66 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     74664 ns/op	 438.87 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     72859 ns/op	 449.74 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73403 ns/op	 446.41 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73646 ns/op	 444.94 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     76501 ns/op	 428.33 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     78648 ns/op	 416.64 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69550 ns/op	 471.14 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     67848 ns/op	 482.96 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     67984 ns/op	 482.00 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69177 ns/op	 473.68 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     70108 ns/op	 467.39 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     67566 ns/op	 484.98 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     71256 ns/op	 459.86 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69979 ns/op	 468.25 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69972 ns/op	 468.30 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     68356 ns/op	 479.37 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69631 ns/op	 470.59 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     70087 ns/op	 467.53 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     75510 ns/op	 433.95 MB/s
+BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76970 ns/op	 425.72 MB/s
+PASS
+ok  	hash/crc32	68.427s
diff --git a/cmd/benchstat/testdata/crcIgnore.stdout b/cmd/benchstat/testdata/crcIgnore.stdout
new file mode 100644
index 0000000..611b703
--- /dev/null
+++ b/cmd/benchstat/testdata/crcIgnore.stdout
@@ -0,0 +1,82 @@
+pkg: hash/crc32
+goarch: amd64
+goos: darwin
+                                          │ crc-old.txt  │             crc-new.txt             │
+                                          │    sec/op    │   sec/op     vs base                │
+CRC32/poly=IEEE/size=15/align=0-8            46.55n ± 9%   44.40n ± 2%   -4.62% (p=0.008 n=10)
+CRC32/poly=IEEE/size=15/align=1-8            44.35n ± 3%   44.35n ± 1%        ~ (p=0.539 n=10)
+CRC32/poly=IEEE/size=40/align=0-8            41.05n ± 3%   42.45n ± 3%   +3.41% (p=0.006 n=10)
+CRC32/poly=IEEE/size=40/align=1-8            41.05n ± 1%   41.90n ± 2%   +2.07% (p=0.003 n=10)
+CRC32/poly=IEEE/size=512/align=0-8          237.50n ± 4%   56.75n ± 3%  -76.11% (p=0.000 n=10)
+CRC32/poly=IEEE/size=512/align=1-8          235.50n ± 2%   57.15n ± 2%  -75.73% (p=0.000 n=10)
+CRC32/poly=IEEE/size=1kB/align=0-8          452.50n ± 2%   94.90n ± 5%  -79.03% (p=0.000 n=10)
+CRC32/poly=IEEE/size=1kB/align=1-8          444.00n ± 2%   93.20n ± 9%  -79.01% (p=0.000 n=10)
+CRC32/poly=IEEE/size=4kB/align=0-8          1701.0n ± 7%   298.0n ± 1%  -82.48% (p=0.000 n=10)
+CRC32/poly=IEEE/size=4kB/align=1-8          1775.5n ± 5%   298.0n ± 2%  -83.22% (p=0.000 n=10)
+CRC32/poly=IEEE/size=32kB/align=0-8         15.014µ ± 5%   2.145µ ± 4%  -85.72% (p=0.000 n=10)
+CRC32/poly=IEEE/size=32kB/align=1-8         14.447µ ± 6%   2.163µ ± 3%  -85.03% (p=0.000 n=10)
+CRC32/poly=Castagnoli/size=15/align=0-8      16.50n ± 3%   16.30n ± 2%        ~ (p=0.642 n=10)
+CRC32/poly=Castagnoli/size=15/align=1-8      17.20n ± 2%   17.35n ± 3%        ~ (p=0.959 n=10)
+CRC32/poly=Castagnoli/size=40/align=0-8      17.45n ± 1%   17.45n ± 3%        ~ (p=0.694 n=10)
+CRC32/poly=Castagnoli/size=40/align=1-8      19.75n ± 2%   19.35n ± 2%   -2.03% (p=0.036 n=10)
+CRC32/poly=Castagnoli/size=512/align=0-8     40.15n ± 2%   39.85n ± 2%        ~ (p=0.614 n=10)
+CRC32/poly=Castagnoli/size=512/align=1-8     41.90n ± 3%   41.95n ± 2%        ~ (p=0.838 n=10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8     65.50n ± 1%   66.30n ± 3%   +1.22% (p=0.007 n=10)
+CRC32/poly=Castagnoli/size=1kB/align=1-8     70.10n ± 4%   68.55n ± 2%        ~ (p=0.239 n=10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8     162.0n ± 3%   157.0n ± 4%   -3.09% (p=0.032 n=10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8     169.5n ± 4%   161.0n ± 2%   -5.01% (p=0.005 n=10)
+CRC32/poly=Castagnoli/size=32kB/align=0-8    1.220µ ± 4%   1.218µ ± 2%        ~ (p=0.869 n=10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8    1.268µ ± 3%   1.220µ ± 2%   -3.75% (p=0.001 n=10)
+CRC32/poly=Koopman/size=15/align=0-8         36.40n ± 6%   35.60n ± 1%        ~ (p=0.216 n=10)
+CRC32/poly=Koopman/size=15/align=1-8         34.80n ± 5%   35.55n ± 1%        ~ (p=0.323 n=10)
+CRC32/poly=Koopman/size=40/align=0-8         90.35n ± 5%   87.55n ± 2%   -3.10% (p=0.002 n=10)
+CRC32/poly=Koopman/size=40/align=1-8         91.40n ± 5%   87.65n ± 2%        ~ (p=0.055 n=10)
+CRC32/poly=Koopman/size=512/align=0-8        1.129µ ± 4%   1.073µ ± 3%   -4.96% (p=0.000 n=10)
+CRC32/poly=Koopman/size=512/align=1-8        1.127µ ± 4%   1.183µ ± 7%        ~ (p=0.143 n=10)
+CRC32/poly=Koopman/size=1kB/align=0-8        2.256µ ± 5%   2.347µ ± 4%        ~ (p=0.052 n=10)
+CRC32/poly=Koopman/size=1kB/align=1-8        2.155µ ± 2%   2.361µ ± 3%   +9.58% (p=0.000 n=10)
+CRC32/poly=Koopman/size=4kB/align=0-8        9.033µ ± 5%   8.964µ ± 4%        ~ (p=0.971 n=10)
+CRC32/poly=Koopman/size=4kB/align=1-8        8.858µ ± 6%   8.986µ ± 8%        ~ (p=0.754 n=10)
+CRC32/poly=Koopman/size=32kB/align=0-8       73.13µ ± 7%   73.21µ ± 4%        ~ (p=0.684 n=10)
+CRC32/poly=Koopman/size=32kB/align=1-8       70.03µ ± 8%   73.80µ ± 3%   +5.37% (p=0.009 n=10)
+geomean                                      344.5n        237.5n       -31.05%
+
+                                          │ crc-old.txt  │              crc-new.txt               │
+                                          │     B/s      │      B/s       vs base                 │
+CRC32/poly=IEEE/size=15/align=0-8           307.3Mi ± 8%    322.1Mi ± 2%    +4.84% (p=0.009 n=10)
+CRC32/poly=IEEE/size=15/align=1-8           322.3Mi ± 3%    322.7Mi ± 1%         ~ (p=0.579 n=10)
+CRC32/poly=IEEE/size=40/align=0-8           929.5Mi ± 3%    898.1Mi ± 3%    -3.38% (p=0.011 n=10)
+CRC32/poly=IEEE/size=40/align=1-8           928.5Mi ± 1%    909.9Mi ± 2%    -2.00% (p=0.005 n=10)
+CRC32/poly=IEEE/size=512/align=0-8          2.001Gi ± 4%    8.401Gi ± 3%  +319.83% (p=0.000 n=10)
+CRC32/poly=IEEE/size=512/align=1-8          2.019Gi ± 2%    8.345Gi ± 2%  +313.34% (p=0.000 n=10)
+CRC32/poly=IEEE/size=1kB/align=0-8          2.105Gi ± 2%   10.048Gi ± 6%  +377.22% (p=0.000 n=10)
+CRC32/poly=IEEE/size=1kB/align=1-8          2.145Gi ± 2%   10.235Gi ± 9%  +377.16% (p=0.000 n=10)
+CRC32/poly=IEEE/size=4kB/align=0-8          2.242Gi ± 7%   12.783Gi ± 1%  +470.19% (p=0.000 n=10)
+CRC32/poly=IEEE/size=4kB/align=1-8          2.148Gi ± 6%   12.778Gi ± 2%  +494.93% (p=0.000 n=10)
+CRC32/poly=IEEE/size=32kB/align=0-8         2.032Gi ± 5%   14.226Gi ± 4%  +599.95% (p=0.000 n=10)
+CRC32/poly=IEEE/size=32kB/align=1-8         2.112Gi ± 7%   14.111Gi ± 3%  +567.98% (p=0.000 n=10)
+CRC32/poly=Castagnoli/size=15/align=0-8     866.4Mi ± 3%    876.8Mi ± 2%         ~ (p=0.529 n=10)
+CRC32/poly=Castagnoli/size=15/align=1-8     829.4Mi ± 2%    824.4Mi ± 2%         ~ (p=0.971 n=10)
+CRC32/poly=Castagnoli/size=40/align=0-8     2.138Gi ± 1%    2.135Gi ± 2%         ~ (p=0.684 n=10)
+CRC32/poly=Castagnoli/size=40/align=1-8     1.889Gi ± 2%    1.923Gi ± 1%         ~ (p=0.063 n=10)
+CRC32/poly=Castagnoli/size=512/align=0-8    11.88Gi ± 2%    11.96Gi ± 2%         ~ (p=0.529 n=10)
+CRC32/poly=Castagnoli/size=512/align=1-8    11.37Gi ± 3%    11.37Gi ± 1%         ~ (p=1.000 n=10)
+CRC32/poly=Castagnoli/size=1kB/align=0-8    14.56Gi ± 1%    14.39Gi ± 3%    -1.19% (p=0.007 n=10)
+CRC32/poly=Castagnoli/size=1kB/align=1-8    13.61Gi ± 4%    13.92Gi ± 2%         ~ (p=0.280 n=10)
+CRC32/poly=Castagnoli/size=4kB/align=0-8    23.48Gi ± 3%    24.19Gi ± 4%         ~ (p=0.052 n=10)
+CRC32/poly=Castagnoli/size=4kB/align=1-8    22.41Gi ± 5%    23.62Gi ± 2%    +5.41% (p=0.005 n=10)
+CRC32/poly=Castagnoli/size=32kB/align=0-8   25.01Gi ± 4%    25.06Gi ± 2%         ~ (p=0.912 n=10)
+CRC32/poly=Castagnoli/size=32kB/align=1-8   24.06Gi ± 3%    25.01Gi ± 2%    +3.94% (p=0.001 n=10)
+CRC32/poly=Koopman/size=15/align=0-8        393.1Mi ± 6%    402.1Mi ± 1%         ~ (p=0.218 n=10)
+CRC32/poly=Koopman/size=15/align=1-8        410.8Mi ± 5%    402.4Mi ± 1%         ~ (p=0.315 n=10)
+CRC32/poly=Koopman/size=40/align=0-8        422.2Mi ± 5%    435.9Mi ± 2%    +3.24% (p=0.002 n=10)
+CRC32/poly=Koopman/size=40/align=1-8        417.3Mi ± 5%    435.3Mi ± 2%         ~ (p=0.052 n=10)
+CRC32/poly=Koopman/size=512/align=0-8       432.4Mi ± 5%    454.7Mi ± 2%    +5.17% (p=0.000 n=10)
+CRC32/poly=Koopman/size=512/align=1-8       433.3Mi ± 4%    412.8Mi ± 7%         ~ (p=0.143 n=10)
+CRC32/poly=Koopman/size=1kB/align=0-8       432.8Mi ± 5%    416.1Mi ± 4%         ~ (p=0.052 n=10)
+CRC32/poly=Koopman/size=1kB/align=1-8       453.2Mi ± 2%    413.5Mi ± 3%    -8.76% (p=0.000 n=10)
+CRC32/poly=Koopman/size=4kB/align=0-8       432.4Mi ± 5%    435.9Mi ± 4%         ~ (p=0.971 n=10)
+CRC32/poly=Koopman/size=4kB/align=1-8       441.1Mi ± 6%    434.8Mi ± 8%         ~ (p=0.739 n=10)
+CRC32/poly=Koopman/size=32kB/align=0-8      427.3Mi ± 8%    426.9Mi ± 4%         ~ (p=0.684 n=10)
+CRC32/poly=Koopman/size=32kB/align=1-8      446.2Mi ± 7%    423.5Mi ± 3%    -5.10% (p=0.009 n=10)
+geomean                                     1.594Gi         2.313Gi        +45.06%
diff --git a/cmd/benchstat/testdata/crcOldNew.stdout b/cmd/benchstat/testdata/crcOldNew.stdout
new file mode 100644
index 0000000..9c0a2bf
--- /dev/null
+++ b/cmd/benchstat/testdata/crcOldNew.stdout
@@ -0,0 +1,164 @@
+pkg: hash/crc32
+goarch: amd64
+goos: darwin
+note: hw acceleration disabled
+                                          │ crc-old.txt │
+                                          │   sec/op    │
+CRC32/poly=IEEE/size=15/align=0-8           46.55n ± 9%
+CRC32/poly=IEEE/size=15/align=1-8           44.35n ± 3%
+CRC32/poly=IEEE/size=40/align=0-8           41.05n ± 3%
+CRC32/poly=IEEE/size=40/align=1-8           41.05n ± 1%
+CRC32/poly=IEEE/size=512/align=0-8          237.5n ± 4%
+CRC32/poly=IEEE/size=512/align=1-8          235.5n ± 2%
+CRC32/poly=IEEE/size=1kB/align=0-8          452.5n ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8          444.0n ± 2%
+CRC32/poly=IEEE/size=4kB/align=0-8          1.701µ ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8          1.776µ ± 5%
+CRC32/poly=IEEE/size=32kB/align=0-8         15.01µ ± 5%
+CRC32/poly=IEEE/size=32kB/align=1-8         14.45µ ± 6%
+CRC32/poly=Castagnoli/size=15/align=0-8     16.50n ± 3%
+CRC32/poly=Castagnoli/size=15/align=1-8     17.20n ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8     17.45n ± 1%
+CRC32/poly=Castagnoli/size=40/align=1-8     19.75n ± 2%
+CRC32/poly=Castagnoli/size=512/align=0-8    40.15n ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8    41.90n ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=0-8    65.50n ± 1%
+CRC32/poly=Castagnoli/size=1kB/align=1-8    70.10n ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=0-8    162.0n ± 3%
+CRC32/poly=Castagnoli/size=4kB/align=1-8    169.5n ± 4%
+CRC32/poly=Castagnoli/size=32kB/align=0-8   1.220µ ± 4%
+CRC32/poly=Castagnoli/size=32kB/align=1-8   1.268µ ± 3%
+CRC32/poly=Koopman/size=15/align=0-8        36.40n ± 6%
+CRC32/poly=Koopman/size=15/align=1-8        34.80n ± 5%
+CRC32/poly=Koopman/size=40/align=0-8        90.35n ± 5%
+CRC32/poly=Koopman/size=40/align=1-8        91.40n ± 5%
+CRC32/poly=Koopman/size=512/align=0-8       1.129µ ± 4%
+CRC32/poly=Koopman/size=512/align=1-8       1.127µ ± 4%
+CRC32/poly=Koopman/size=1kB/align=0-8       2.256µ ± 5%
+CRC32/poly=Koopman/size=1kB/align=1-8       2.155µ ± 2%
+CRC32/poly=Koopman/size=4kB/align=0-8       9.033µ ± 5%
+CRC32/poly=Koopman/size=4kB/align=1-8       8.858µ ± 6%
+CRC32/poly=Koopman/size=32kB/align=0-8      73.13µ ± 7%
+CRC32/poly=Koopman/size=32kB/align=1-8      70.03µ ± 8%
+geomean                                     344.5n
+
+                                          │ crc-old.txt  │
+                                          │     B/s      │
+CRC32/poly=IEEE/size=15/align=0-8           307.3Mi ± 8%
+CRC32/poly=IEEE/size=15/align=1-8           322.3Mi ± 3%
+CRC32/poly=IEEE/size=40/align=0-8           929.5Mi ± 3%
+CRC32/poly=IEEE/size=40/align=1-8           928.5Mi ± 1%
+CRC32/poly=IEEE/size=512/align=0-8          2.001Gi ± 4%
+CRC32/poly=IEEE/size=512/align=1-8          2.019Gi ± 2%
+CRC32/poly=IEEE/size=1kB/align=0-8          2.105Gi ± 2%
+CRC32/poly=IEEE/size=1kB/align=1-8          2.145Gi ± 2%
+CRC32/poly=IEEE/size=4kB/align=0-8          2.242Gi ± 7%
+CRC32/poly=IEEE/size=4kB/align=1-8          2.148Gi ± 6%
+CRC32/poly=IEEE/size=32kB/align=0-8         2.032Gi ± 5%
+CRC32/poly=IEEE/size=32kB/align=1-8         2.112Gi ± 7%
+CRC32/poly=Castagnoli/size=15/align=0-8     866.4Mi ± 3%
+CRC32/poly=Castagnoli/size=15/align=1-8     829.4Mi ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8     2.138Gi ± 1%
+CRC32/poly=Castagnoli/size=40/align=1-8     1.889Gi ± 2%
+CRC32/poly=Castagnoli/size=512/align=0-8    11.88Gi ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8    11.37Gi ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=0-8    14.56Gi ± 1%
+CRC32/poly=Castagnoli/size=1kB/align=1-8    13.61Gi ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=0-8    23.48Gi ± 3%
+CRC32/poly=Castagnoli/size=4kB/align=1-8    22.41Gi ± 5%
+CRC32/poly=Castagnoli/size=32kB/align=0-8   25.01Gi ± 4%
+CRC32/poly=Castagnoli/size=32kB/align=1-8   24.06Gi ± 3%
+CRC32/poly=Koopman/size=15/align=0-8        393.1Mi ± 6%
+CRC32/poly=Koopman/size=15/align=1-8        410.8Mi ± 5%
+CRC32/poly=Koopman/size=40/align=0-8        422.2Mi ± 5%
+CRC32/poly=Koopman/size=40/align=1-8        417.3Mi ± 5%
+CRC32/poly=Koopman/size=512/align=0-8       432.4Mi ± 5%
+CRC32/poly=Koopman/size=512/align=1-8       433.3Mi ± 4%
+CRC32/poly=Koopman/size=1kB/align=0-8       432.8Mi ± 5%
+CRC32/poly=Koopman/size=1kB/align=1-8       453.2Mi ± 2%
+CRC32/poly=Koopman/size=4kB/align=0-8       432.4Mi ± 5%
+CRC32/poly=Koopman/size=4kB/align=1-8       441.1Mi ± 6%
+CRC32/poly=Koopman/size=32kB/align=0-8      427.3Mi ± 8%
+CRC32/poly=Koopman/size=32kB/align=1-8      446.2Mi ± 7%
+geomean                                     1.594Gi
+
+note: hw acceleration enabled
+                                          │ crc-new.txt │
+                                          │   sec/op    │
+CRC32/poly=IEEE/size=15/align=0-8           44.40n ± 2%
+CRC32/poly=IEEE/size=15/align=1-8           44.35n ± 1%
+CRC32/poly=IEEE/size=40/align=0-8           42.45n ± 3%
+CRC32/poly=IEEE/size=40/align=1-8           41.90n ± 2%
+CRC32/poly=IEEE/size=512/align=0-8          56.75n ± 3%
+CRC32/poly=IEEE/size=512/align=1-8          57.15n ± 2%
+CRC32/poly=IEEE/size=1kB/align=0-8          94.90n ± 5%
+CRC32/poly=IEEE/size=1kB/align=1-8          93.20n ± 9%
+CRC32/poly=IEEE/size=4kB/align=0-8          298.0n ± 1%
+CRC32/poly=IEEE/size=4kB/align=1-8          298.0n ± 2%
+CRC32/poly=IEEE/size=32kB/align=0-8         2.145µ ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8         2.163µ ± 3%
+CRC32/poly=Castagnoli/size=15/align=0-8     16.30n ± 2%
+CRC32/poly=Castagnoli/size=15/align=1-8     17.35n ± 3%
+CRC32/poly=Castagnoli/size=40/align=0-8     17.45n ± 3%
+CRC32/poly=Castagnoli/size=40/align=1-8     19.35n ± 2%
+CRC32/poly=Castagnoli/size=512/align=0-8    39.85n ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8    41.95n ± 2%
+CRC32/poly=Castagnoli/size=1kB/align=0-8    66.30n ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=1-8    68.55n ± 2%
+CRC32/poly=Castagnoli/size=4kB/align=0-8    157.0n ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8    161.0n ± 2%
+CRC32/poly=Castagnoli/size=32kB/align=0-8   1.218µ ± 2%
+CRC32/poly=Castagnoli/size=32kB/align=1-8   1.220µ ± 2%
+CRC32/poly=Koopman/size=15/align=0-8        35.60n ± 1%
+CRC32/poly=Koopman/size=15/align=1-8        35.55n ± 1%
+CRC32/poly=Koopman/size=40/align=0-8        87.55n ± 2%
+CRC32/poly=Koopman/size=40/align=1-8        87.65n ± 2%
+CRC32/poly=Koopman/size=512/align=0-8       1.073µ ± 3%
+CRC32/poly=Koopman/size=512/align=1-8       1.183µ ± 7%
+CRC32/poly=Koopman/size=1kB/align=0-8       2.347µ ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8       2.361µ ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8       8.964µ ± 4%
+CRC32/poly=Koopman/size=4kB/align=1-8       8.986µ ± 8%
+CRC32/poly=Koopman/size=32kB/align=0-8      73.21µ ± 4%
+CRC32/poly=Koopman/size=32kB/align=1-8      73.80µ ± 3%
+geomean                                     237.5n
+
+                                          │ crc-new.txt  │
+                                          │     B/s      │
+CRC32/poly=IEEE/size=15/align=0-8           322.1Mi ± 2%
+CRC32/poly=IEEE/size=15/align=1-8           322.7Mi ± 1%
+CRC32/poly=IEEE/size=40/align=0-8           898.1Mi ± 3%
+CRC32/poly=IEEE/size=40/align=1-8           909.9Mi ± 2%
+CRC32/poly=IEEE/size=512/align=0-8          8.401Gi ± 3%
+CRC32/poly=IEEE/size=512/align=1-8          8.345Gi ± 2%
+CRC32/poly=IEEE/size=1kB/align=0-8          10.05Gi ± 6%
+CRC32/poly=IEEE/size=1kB/align=1-8          10.24Gi ± 9%
+CRC32/poly=IEEE/size=4kB/align=0-8          12.78Gi ± 1%
+CRC32/poly=IEEE/size=4kB/align=1-8          12.78Gi ± 2%
+CRC32/poly=IEEE/size=32kB/align=0-8         14.23Gi ± 4%
+CRC32/poly=IEEE/size=32kB/align=1-8         14.11Gi ± 3%
+CRC32/poly=Castagnoli/size=15/align=0-8     876.8Mi ± 2%
+CRC32/poly=Castagnoli/size=15/align=1-8     824.4Mi ± 2%
+CRC32/poly=Castagnoli/size=40/align=0-8     2.135Gi ± 2%
+CRC32/poly=Castagnoli/size=40/align=1-8     1.923Gi ± 1%
+CRC32/poly=Castagnoli/size=512/align=0-8    11.96Gi ± 2%
+CRC32/poly=Castagnoli/size=512/align=1-8    11.37Gi ± 1%
+CRC32/poly=Castagnoli/size=1kB/align=0-8    14.39Gi ± 3%
+CRC32/poly=Castagnoli/size=1kB/align=1-8    13.92Gi ± 2%
+CRC32/poly=Castagnoli/size=4kB/align=0-8    24.19Gi ± 4%
+CRC32/poly=Castagnoli/size=4kB/align=1-8    23.62Gi ± 2%
+CRC32/poly=Castagnoli/size=32kB/align=0-8   25.06Gi ± 2%
+CRC32/poly=Castagnoli/size=32kB/align=1-8   25.01Gi ± 2%
+CRC32/poly=Koopman/size=15/align=0-8        402.1Mi ± 1%
+CRC32/poly=Koopman/size=15/align=1-8        402.4Mi ± 1%
+CRC32/poly=Koopman/size=40/align=0-8        435.9Mi ± 2%
+CRC32/poly=Koopman/size=40/align=1-8        435.3Mi ± 2%
+CRC32/poly=Koopman/size=512/align=0-8       454.7Mi ± 2%
+CRC32/poly=Koopman/size=512/align=1-8       412.8Mi ± 7%
+CRC32/poly=Koopman/size=1kB/align=0-8       416.1Mi ± 4%
+CRC32/poly=Koopman/size=1kB/align=1-8       413.5Mi ± 3%
+CRC32/poly=Koopman/size=4kB/align=0-8       435.9Mi ± 4%
+CRC32/poly=Koopman/size=4kB/align=1-8       434.8Mi ± 8%
+CRC32/poly=Koopman/size=32kB/align=0-8      426.9Mi ± 4%
+CRC32/poly=Koopman/size=32kB/align=1-8      423.5Mi ± 3%
+geomean                                     2.313Gi
diff --git a/cmd/benchstat/testdata/crcSizeVsPoly.stdout b/cmd/benchstat/testdata/crcSizeVsPoly.stdout
new file mode 100644
index 0000000..a0702b5
--- /dev/null
+++ b/cmd/benchstat/testdata/crcSizeVsPoly.stdout
@@ -0,0 +1,23 @@
+pkg: hash/crc32
+goarch: amd64
+goos: darwin
+note: hw acceleration enabled
+        │    IEEE     │             Castagnoli              │                 Koopman                 │
+        │   sec/op    │   sec/op     vs base                │    sec/op      vs base                  │
+15        44.40n ± 2%   16.30n ± 2%  -63.29% (p=0.000 n=10)     35.60n ± 1%    -19.82% (p=0.000 n=10)
+40        42.45n ± 3%   17.45n ± 3%  -58.89% (p=0.000 n=10)     87.55n ± 2%   +106.24% (p=0.000 n=10)
+512       56.75n ± 3%   39.85n ± 2%  -29.78% (p=0.000 n=10)   1073.00n ± 3%  +1790.75% (p=0.000 n=10)
+1kB       94.90n ± 5%   66.30n ± 3%  -30.14% (p=0.000 n=10)   2346.50n ± 4%  +2372.60% (p=0.000 n=10)
+4kB       298.0n ± 1%   157.0n ± 4%  -47.32% (p=0.000 n=10)    8964.0n ± 4%  +2908.05% (p=0.000 n=10)
+32kB      2.145µ ± 4%   1.218µ ± 2%  -43.23% (p=0.000 n=10)    73.206µ ± 4%  +3313.64% (p=0.000 n=10)
+geomean   136.6n        72.37n       -47.01%                    1.314µ        +862.25%
+
+        │      IEEE      │               Castagnoli                │               Koopman                │
+        │      B/s       │      B/s        vs base                 │     B/s       vs base                │
+15          322.1Mi ± 2%     876.8Mi ± 2%  +172.18% (p=0.000 n=10)   402.1Mi ± 1%  +24.82% (p=0.000 n=10)
+40          898.1Mi ± 3%    2186.1Mi ± 2%  +143.41% (p=0.000 n=10)   435.9Mi ± 2%  -51.47% (p=0.000 n=10)
+512        8602.5Mi ± 3%   12245.1Mi ± 2%   +42.34% (p=0.000 n=10)   454.7Mi ± 2%  -94.71% (p=0.000 n=10)
+1kB       10289.1Mi ± 6%   14730.6Mi ± 3%   +43.17% (p=0.000 n=10)   416.1Mi ± 4%  -95.96% (p=0.000 n=10)
+4kB       13089.6Mi ± 1%   24772.0Mi ± 4%   +89.25% (p=0.000 n=10)   435.9Mi ± 4%  -96.67% (p=0.000 n=10)
+32kB      14567.5Mi ± 4%   25658.0Mi ± 2%   +76.13% (p=0.000 n=10)   426.9Mi ± 4%  -97.07% (p=0.000 n=10)
+geomean     4.022Gi          7.586Gi        +88.60%                  428.3Mi       -89.60%
diff --git a/cmd/benchstat/testdata/csvErrors.stderr b/cmd/benchstat/testdata/csvErrors.stderr
new file mode 100644
index 0000000..80e42d7
--- /dev/null
+++ b/cmd/benchstat/testdata/csvErrors.stderr
@@ -0,0 +1 @@
+B6: benchmarks vary in .fullname
diff --git a/cmd/benchstat/testdata/csvErrors.stdout b/cmd/benchstat/testdata/csvErrors.stdout
new file mode 100644
index 0000000..49f65b0
--- /dev/null
+++ b/cmd/benchstat/testdata/csvErrors.stdout
@@ -0,0 +1,7 @@
+goos: linux
+goarch: amd64
+pkg: golang.org/x/perf/cmd/benchstat/testdata
+,new.txt
+,sec/op,CI
+Encode,2.2530000000000003e-06,37%
+geomean,2.2530000000000016e-06
diff --git a/cmd/benchstat/testdata/csvOldNew.stdout b/cmd/benchstat/testdata/csvOldNew.stdout
new file mode 100644
index 0000000..8a8d88d
--- /dev/null
+++ b/cmd/benchstat/testdata/csvOldNew.stdout
@@ -0,0 +1,8 @@
+goos: linux
+goarch: amd64
+pkg: golang.org/x/perf/cmd/benchstat/testdata
+,old.txt,,new.txt
+,sec/op,CI,sec/op,CI,vs base,P
+Encode/format=json-48,1.7180000000000001e-06,1%,1.4225000000000001e-06,1%,-17.20%,p=0.000 n=10
+Encode/format=gob-48,3.0655e-06,0%,3.0700000000000003e-06,2%,~,p=0.446 n=10
+geomean,2.294891936453654e-06,,2.089754770302007e-06,,-8.94%
diff --git a/cmd/benchstat/testdata/deltasort.golden b/cmd/benchstat/testdata/deltasort.golden
deleted file mode 100644
index 020c044..0000000
--- a/cmd/benchstat/testdata/deltasort.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.010 n=9+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.000 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.000 n=9+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.003 n=9+8)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.539 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.615 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.650 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.694 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.614 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.952 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.190 n=10+9)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.882 n=9+9)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.216 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.508 n=10+9)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%      ~     (p=0.055 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.754 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.036 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.032 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.005 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.008 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-
-name                                       old speed      new speed       delta
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.001 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.000 n=9+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.002 n=9+8)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.579 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.489 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.661 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%      ~     (p=0.063 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.780 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.211 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.842 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.218 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.497 n=10+9)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.739 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.005 n=10+10)
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.009 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
diff --git a/cmd/benchstat/testdata/example.golden b/cmd/benchstat/testdata/example.golden
deleted file mode 100644
index f98bd9e..0000000
--- a/cmd/benchstat/testdata/example.golden
+++ /dev/null
@@ -1,7 +0,0 @@
-name        old time/op    new time/op    delta
-GobEncode     13.6ms ± 1%    11.8ms ± 1%  -13.31%  (p=0.016 n=4+5)
-JSONEncode    32.1ms ± 1%    31.8ms ± 1%     ~     (p=0.286 n=4+5)
-
-name        old speed      new speed      delta
-GobEncode   56.4MB/s ± 1%  65.1MB/s ± 1%  +15.36%  (p=0.016 n=4+5)
-JSONEncode  60.4MB/s ± 1%  61.1MB/s ± 2%     ~     (p=0.286 n=4+5)
diff --git a/cmd/benchstat/testdata/examplehtml.golden b/cmd/benchstat/testdata/examplehtml.golden
deleted file mode 100644
index 6733791..0000000
--- a/cmd/benchstat/testdata/examplehtml.golden
+++ /dev/null
@@ -1,38 +0,0 @@
-<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Performance Result Comparison</title>
-<style>
-.benchstat { border-collapse: collapse; }
-.benchstat th:nth-child(1) { text-align: left; }
-.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
-.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
-.benchstat .nodelta { text-align: center !important; }
-.benchstat .better td.delta { font-weight: bold; }
-.benchstat .worse td.delta { font-weight: bold; color: #c00; }
-</style>
-</head>
-<body>
-
-<table class='benchstat oldnew'>
-<tr class='configs'><th><th>exampleold.txt<th>examplenew.txt
-
-
-<tbody>
-<tr><th><th colspan='2' class='metric'>time/op<th>delta
-<tr class='better'><td>GobEncode<td>13.6ms ± 1%<td>11.8ms ± 1%<td class='delta'>−13.31%<td class='note'>(p=0.016 n=4+5)
-<tr class='unchanged'><td>JSONEncode<td>32.1ms ± 1%<td>31.8ms ± 1%<td class='nodelta'>~<td class='note'>(p=0.286 n=4+5)
-<tr><td>&nbsp;
-</tbody>
-
-<tbody>
-<tr><th><th colspan='2' class='metric'>speed<th>delta
-<tr class='better'><td>GobEncode<td>56.4MB/s ± 1%<td>65.1MB/s ± 1%<td class='delta'>+15.36%<td class='note'>(p=0.016 n=4+5)
-<tr class='unchanged'><td>JSONEncode<td>60.4MB/s ± 1%<td>61.1MB/s ± 2%<td class='nodelta'>~<td class='note'>(p=0.286 n=4+5)
-<tr><td>&nbsp;
-</tbody>
-
-</table>
-</body>
-</html>
diff --git a/cmd/benchstat/testdata/examplenew.txt b/cmd/benchstat/testdata/examplenew.txt
deleted file mode 100644
index 9463228..0000000
--- a/cmd/benchstat/testdata/examplenew.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-BenchmarkGobEncode   	 100	  11773189 ns/op	  65.19 MB/s
-BenchmarkJSONEncode  	  50	  32036529 ns/op	  60.57 MB/s
-BenchmarkGobEncode   	 100	  11942588 ns/op	  64.27 MB/s
-BenchmarkJSONEncode  	  50	  32156552 ns/op	  60.34 MB/s
-BenchmarkGobEncode   	 100	  11786159 ns/op	  65.12 MB/s
-BenchmarkJSONEncode  	  50	  31288355 ns/op	  62.02 MB/s
-BenchmarkGobEncode   	 100	  11628583 ns/op	  66.00 MB/s
-BenchmarkJSONEncode  	  50	  31559706 ns/op	  61.49 MB/s
-BenchmarkGobEncode   	 100	  11815924 ns/op	  64.96 MB/s
-BenchmarkJSONEncode  	  50	  31765634 ns/op	  61.09 MB/s
diff --git a/cmd/benchstat/testdata/exampleold.golden b/cmd/benchstat/testdata/exampleold.golden
deleted file mode 100644
index 13c69a9..0000000
--- a/cmd/benchstat/testdata/exampleold.golden
+++ /dev/null
@@ -1,7 +0,0 @@
-name        time/op
-GobEncode     13.6ms ± 1%
-JSONEncode    32.1ms ± 1%
-
-name        speed
-GobEncode   56.4MB/s ± 1%
-JSONEncode  60.4MB/s ± 1%
diff --git a/cmd/benchstat/testdata/exampleold.txt b/cmd/benchstat/testdata/exampleold.txt
deleted file mode 100644
index 9ec3f51..0000000
--- a/cmd/benchstat/testdata/exampleold.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-BenchmarkGobEncode   	100	  13552735 ns/op	  56.63 MB/s
-BenchmarkJSONEncode  	 50	  32395067 ns/op	  59.90 MB/s
-BenchmarkGobEncode   	100	  13553943 ns/op	  56.63 MB/s
-BenchmarkJSONEncode  	 50	  32334214 ns/op	  60.01 MB/s
-BenchmarkGobEncode   	100	  13606356 ns/op	  56.41 MB/s
-BenchmarkJSONEncode  	 50	  31992891 ns/op	  60.65 MB/s
-BenchmarkGobEncode   	100	  13683198 ns/op	  56.09 MB/s
-BenchmarkJSONEncode  	 50	  31735022 ns/op	  61.15 MB/s
diff --git a/cmd/benchstat/testdata/exampleoldhtml.golden b/cmd/benchstat/testdata/exampleoldhtml.golden
deleted file mode 100644
index 72c3860..0000000
--- a/cmd/benchstat/testdata/exampleoldhtml.golden
+++ /dev/null
@@ -1,39 +0,0 @@
-<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Performance Result Comparison</title>
-<style>
-.benchstat { border-collapse: collapse; }
-.benchstat th:nth-child(1) { text-align: left; }
-.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
-.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
-.benchstat .nodelta { text-align: center !important; }
-.benchstat .better td.delta { font-weight: bold; }
-.benchstat .worse td.delta { font-weight: bold; color: #c00; }
-</style>
-</head>
-<body>
-
-<table class='benchstat '>
-
-
-<tbody>
-
-<tr><th><th>time/op
-<tr><td>GobEncode<td>13.6ms ± 1%
-<tr><td>JSONEncode<td>32.1ms ± 1%
-<tr><td>&nbsp;
-</tbody>
-
-<tbody>
-
-<tr><th><th>speed
-<tr><td>GobEncode<td>56.4MB/s ± 1%
-<tr><td>JSONEncode<td>60.4MB/s ± 1%
-<tr><td>&nbsp;
-</tbody>
-
-</table>
-</body>
-</html>
diff --git a/cmd/benchstat/testdata/issue19565.stdout b/cmd/benchstat/testdata/issue19565.stdout
new file mode 100644
index 0000000..8564c0b
--- /dev/null
+++ b/cmd/benchstat/testdata/issue19565.stdout
@@ -0,0 +1,8 @@
+        │   before    │                after                │
+        │   sec/op    │   sec/op     vs base                │
+A         100.0n ± 0%   100.0n ± 0%       ~ (p=1.000 n=6) ¹
+B         10.00µ ± 0%
+C                       10.00µ ± 0%
+geomean   1.000µ        1.000µ       +0.00%               ²
+¹ all samples are equal
+² benchmark set differs from baseline; geomeans may not be comparable
diff --git a/cmd/benchstat/testdata/issue19565.txt b/cmd/benchstat/testdata/issue19565.txt
new file mode 100644
index 0000000..eefcbeb
--- /dev/null
+++ b/cmd/benchstat/testdata/issue19565.txt
@@ -0,0 +1,29 @@
+note: before
+
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkB 10 10000 ns/op
+BenchmarkB 10 10000 ns/op
+BenchmarkB 10 10000 ns/op
+BenchmarkB 10 10000 ns/op
+BenchmarkB 10 10000 ns/op
+BenchmarkB 10 10000 ns/op
+
+note: after
+
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkA 10 100 ns/op
+BenchmarkC 10 10000 ns/op
+BenchmarkC 10 10000 ns/op
+BenchmarkC 10 10000 ns/op
+BenchmarkC 10 10000 ns/op
+BenchmarkC 10 10000 ns/op
+BenchmarkC 10 10000 ns/op
diff --git a/cmd/benchstat/testdata/issue19634.stdout b/cmd/benchstat/testdata/issue19634.stdout
new file mode 100644
index 0000000..3c7750c
--- /dev/null
+++ b/cmd/benchstat/testdata/issue19634.stdout
@@ -0,0 +1,4 @@
+               │    before     │                after                │
+               │    sec/op     │    sec/op     vs base               │
+FloatSub/100-4   115.00n ± ∞ ¹   78.80n ± ∞ ¹  -31.48% (p=0.008 n=5)
+¹ need >= 6 samples for confidence interval at level 0.95
diff --git a/cmd/benchstat/testdata/issue19634.txt b/cmd/benchstat/testdata/issue19634.txt
new file mode 100644
index 0000000..a2cb3ec
--- /dev/null
+++ b/cmd/benchstat/testdata/issue19634.txt
@@ -0,0 +1,15 @@
+note: before
+
+BenchmarkFloatSub/100-4         20000000           115 ns/op
+BenchmarkFloatSub/100-4         20000000           114 ns/op
+BenchmarkFloatSub/100-4         20000000           115 ns/op
+BenchmarkFloatSub/100-4         20000000           115 ns/op
+BenchmarkFloatSub/100-4         20000000           115 ns/op
+
+note: after
+
+BenchmarkFloatSub/100-4         20000000            78.8 ns/op
+BenchmarkFloatSub/100-4         20000000            78.8 ns/op
+BenchmarkFloatSub/100-4         20000000            78.8 ns/op
+BenchmarkFloatSub/100-4         20000000            78.8 ns/op
+BenchmarkFloatSub/100-4         20000000            78.8 ns/op
diff --git a/cmd/benchstat/testdata/namesort.golden b/cmd/benchstat/testdata/namesort.golden
deleted file mode 100644
index 2dea70b..0000000
--- a/cmd/benchstat/testdata/namesort.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.615 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.650 n=9+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.003 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.190 n=10+9)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.882 n=9+9)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.002 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.694 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.036 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.032 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.614 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.952 n=10+9)
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.008 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.539 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.000 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.216 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.508 n=10+9)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.010 n=9+10)
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%      ~     (p=0.055 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.754 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.143 n=10+10)
-
-name                                       old speed      new speed       delta
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.489 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.661 n=9+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.002 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.211 n=10+9)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.842 n=9+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.002 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%      ~     (p=0.063 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.780 n=10+9)
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.009 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.579 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.001 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.218 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.497 n=10+9)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.739 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.143 n=10+10)
diff --git a/cmd/benchstat/testdata/new.txt b/cmd/benchstat/testdata/new.txt
index b45c622..2ec1a22 100644
--- a/cmd/benchstat/testdata/new.txt
+++ b/cmd/benchstat/testdata/new.txt
@@ -1,367 +1,28 @@
-pkg: hash/crc32
-goarch: amd64
-goos: darwin
-note: hw acceleration enabled
+new.txt is with Go 1.17. old.txt format=json is with Go 1.16 and
+format=gob is with 1.17 (since this is just example data, I wanted to
+capture both a significant change and a non-significant change).
 
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.6 ns/op	 343.87 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.4 ns/op	 330.50 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.4 ns/op	 345.50 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.3 ns/op	 331.23 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.9 ns/op	 341.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.0 ns/op	 326.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.1 ns/op	 332.54 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.8 ns/op	 342.12 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        43.8 ns/op	 342.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.9 ns/op	 333.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.8 ns/op	 342.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.68 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.0 ns/op	 333.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.1 ns/op	 340.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.7 ns/op	 343.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        43.7 ns/op	 342.96 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.9 ns/op	 333.71 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        46.3 ns/op	 323.68 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.9 ns/op	 333.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.6 ns/op	 336.33 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.9 ns/op	 912.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.3 ns/op	 945.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.7 ns/op	 937.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.5 ns/op	 919.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        44.9 ns/op	 890.35 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.6 ns/op	 938.20 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.78 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.2 ns/op	 970.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 968.27 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.3 ns/op	 968.46 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.5 ns/op	 941.20 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        43.2 ns/op	 926.72 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.1 ns/op	 949.91 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.9 ns/op	 953.96 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.9 ns/op	 954.23 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.5 ns/op	 964.12 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.9 ns/op	 932.70 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.7 ns/op	 959.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.4 ns/op	 966.36 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        56.1 ns/op	9128.24 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        56.2 ns/op	9107.94 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        56.1 ns/op	9129.45 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        56.5 ns/op	9064.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        58.1 ns/op	8819.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        59.0 ns/op	8675.61 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        55.9 ns/op	9157.53 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 3000000	        57.0 ns/op	8976.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        58.7 ns/op	8724.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	 2000000	        57.6 ns/op	8887.48 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        58.3 ns/op	8782.52 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.6 ns/op	8882.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        57.0 ns/op	8980.50 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.4 ns/op	9071.81 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.0 ns/op	9135.04 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        58.8 ns/op	8705.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 2000000	        56.2 ns/op	9115.68 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.3 ns/op	8941.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        56.3 ns/op	9090.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	 3000000	        57.8 ns/op	8854.75 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        92.6 ns/op	11058.32 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        93.8 ns/op	10913.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        93.5 ns/op	10953.07 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.0 ns/op	10778.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        94.8 ns/op	10799.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.3 ns/op	10741.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        92.6 ns/op	11055.29 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 1000000	       100 ns/op	10189.63 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 2000000	        95.3 ns/op	10747.01 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	 1000000	       102 ns/op	9978.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 1000000	       103 ns/op	9907.74 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11036.21 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11031.01 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 1000000	       102 ns/op	10006.87 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.1 ns/op	11000.89 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.8 ns/op	11029.36 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        95.3 ns/op	10743.40 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.6 ns/op	10941.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        93.3 ns/op	10979.13 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	 2000000	        92.6 ns/op	11053.13 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       302 ns/op	13561.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       294 ns/op	13906.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       298 ns/op	13734.81 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       296 ns/op	13826.62 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       301 ns/op	13598.75 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       299 ns/op	13676.26 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       299 ns/op	13686.88 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       289 ns/op	14143.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       296 ns/op	13824.99 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  500000	       298 ns/op	13716.11 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       295 ns/op	13878.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       302 ns/op	13531.91 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       295 ns/op	13868.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       293 ns/op	13944.64 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       300 ns/op	13620.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       296 ns/op	13820.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       305 ns/op	13418.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       293 ns/op	13960.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       303 ns/op	13506.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  500000	       309 ns/op	13218.83 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2149 ns/op	15241.57 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2230 ns/op	14693.09 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2183 ns/op	15005.52 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2115 ns/op	15486.09 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2227 ns/op	14708.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2140 ns/op	15308.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2157 ns/op	15188.25 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   50000	      2133 ns/op	15359.48 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2117 ns/op	15475.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	  100000	      2129 ns/op	15385.26 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2233 ns/op	14673.10 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2244 ns/op	14596.46 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2154 ns/op	15211.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2156 ns/op	15196.79 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2159 ns/op	15175.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2151 ns/op	15228.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2169 ns/op	15105.80 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2206 ns/op	14850.25 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	  100000	      2145 ns/op	15271.77 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   50000	      2166 ns/op	15126.92 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 932.96 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 912.82 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 924.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 934.22 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 924.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 904.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.5 ns/op	 910.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        17.6 ns/op	 853.00 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 915.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.2 ns/op	 923.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 871.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 854.97 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.3 ns/op	 864.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        16.9 ns/op	 885.36 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 862.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        16.9 ns/op	 887.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 879.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.7 ns/op	 847.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.6 ns/op	 854.26 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 864.10 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2279.37 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.1 ns/op	2334.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.9 ns/op	2235.38 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.1 ns/op	2338.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2325.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.8 ns/op	2251.24 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.8 ns/op	2242.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        18.2 ns/op	2199.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.4 ns/op	2305.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.3 ns/op	2315.62 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2055.96 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.7 ns/op	2034.99 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2068.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2021.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.4 ns/op	2060.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2076.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.1 ns/op	2091.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.0 ns/op	2100.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2049.71 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2074.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12891.27 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.1 ns/op	12777.24 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        41.7 ns/op	12266.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.8 ns/op	12866.81 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.8 ns/op	12862.14 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12537.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.3 ns/op	13022.34 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.9 ns/op	12817.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12543.27 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.4 ns/op	12993.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.7 ns/op	12276.08 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.0 ns/op	12199.95 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.2 ns/op	12121.96 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12215.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.2 ns/op	11854.68 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.1 ns/op	12165.99 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12328.95 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12206.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.6 ns/op	12306.14 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.6 ns/op	12022.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.4 ns/op	15646.46 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.9 ns/op	15531.97 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.4 ns/op	15422.12 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.7 ns/op	15358.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        72.1 ns/op	14207.76 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.3 ns/op	15455.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.2 ns/op	15479.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        68.6 ns/op	14926.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.3 ns/op	15436.78 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.1 ns/op	15481.91 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.6 ns/op	14928.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.2 ns/op	15241.88 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.3 ns/op	14985.73 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.5 ns/op	14958.31 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.2 ns/op	14806.15 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.4 ns/op	14977.84 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.1 ns/op	14822.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.5 ns/op	15171.65 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.7 ns/op	14489.26 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.4 ns/op	14744.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	25978.03 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26173.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       160 ns/op	25471.67 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26190.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       164 ns/op	24861.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	26018.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26101.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       163 ns/op	24990.34 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25138.27 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       157 ns/op	25972.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       164 ns/op	24908.40 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25529.63 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       167 ns/op	24412.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       162 ns/op	25206.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       158 ns/op	25827.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25519.93 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       163 ns/op	25090.11 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       159 ns/op	25657.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       160 ns/op	25576.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       163 ns/op	25007.68 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1233 ns/op	26561.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1223 ns/op	26786.22 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1193 ns/op	27451.16 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1288 ns/op	25436.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1212 ns/op	27022.56 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1245 ns/op	26314.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1208 ns/op	27105.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1200 ns/op	27296.77 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1226 ns/op	26714.57 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  200000	      1189 ns/op	27542.78 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1235 ns/op	26530.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1193 ns/op	27444.08 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1272 ns/op	25752.85 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1210 ns/op	27080.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1207 ns/op	27142.02 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1245 ns/op	26305.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1226 ns/op	26710.70 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1223 ns/op	26787.03 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1217 ns/op	26921.93 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1180 ns/op	27746.71 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 421.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 425.84 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.1 ns/op	 415.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 426.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 420.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.6 ns/op	 410.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.1 ns/op	 427.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.8 ns/op	 419.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.6 ns/op	 421.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        35.2 ns/op	 425.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.1 ns/op	 427.06 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.7 ns/op	 420.07 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        35.2 ns/op	 425.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.4 ns/op	 423.84 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.4 ns/op	 423.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        37.4 ns/op	 400.57 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.7 ns/op	 419.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.6 ns/op	 421.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.5 ns/op	 422.80 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.0 ns/op	 417.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.8 ns/op	 445.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.1 ns/op	 448.86 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.6 ns/op	 456.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.7 ns/op	 461.44 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.6 ns/op	 462.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        88.0 ns/op	 454.58 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.2 ns/op	 458.73 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.5 ns/op	 457.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        86.4 ns/op	 462.92 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.6 ns/op	 456.63 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.2 ns/op	 458.58 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.0 ns/op	 459.82 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        89.1 ns/op	 449.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.5 ns/op	 462.62 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        88.8 ns/op	 450.43 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.1 ns/op	 459.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.8 ns/op	 455.54 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.5 ns/op	 457.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.9 ns/op	 440.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        88.4 ns/op	 452.59 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1100 ns/op	 465.26 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1072 ns/op	 477.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1075 ns/op	 475.90 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1056 ns/op	 484.71 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1113 ns/op	 459.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1054 ns/op	 485.69 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1066 ns/op	 480.13 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1074 ns/op	 476.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1057 ns/op	 483.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1092 ns/op	 468.52 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1074 ns/op	 476.50 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1100 ns/op	 465.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1106 ns/op	 462.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1226 ns/op	 417.43 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1189 ns/op	 430.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1176 ns/op	 435.29 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1235 ns/op	 414.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1213 ns/op	 422.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1210 ns/op	 422.92 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1137 ns/op	 450.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2259 ns/op	 453.12 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2322 ns/op	 441.00 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2350 ns/op	 435.62 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2355 ns/op	 434.74 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      2291 ns/op	 446.86 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2343 ns/op	 436.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2416 ns/op	 423.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2410 ns/op	 424.86 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2256 ns/op	 453.73 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2405 ns/op	 425.75 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2472 ns/op	 414.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2433 ns/op	 420.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2390 ns/op	 428.34 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2368 ns/op	 432.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2294 ns/op	 446.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2334 ns/op	 438.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2298 ns/op	 445.50 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2374 ns/op	 431.17 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2354 ns/op	 434.95 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2284 ns/op	 448.24 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8799 ns/op	 465.49 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9346 ns/op	 438.26 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9129 ns/op	 448.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8710 ns/op	 470.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8699 ns/op	 470.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8704 ns/op	 470.58 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9563 ns/op	 428.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9173 ns/op	 446.50 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9286 ns/op	 441.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8623 ns/op	 475.00 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8410 ns/op	 487.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8847 ns/op	 462.96 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8430 ns/op	 485.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8505 ns/op	 481.58 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8426 ns/op	 486.11 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	     10107 ns/op	 405.24 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9125 ns/op	 448.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9740 ns/op	 420.50 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9564 ns/op	 428.23 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9309 ns/op	 439.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69825 ns/op	 469.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     74409 ns/op	 440.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     75041 ns/op	 436.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     72268 ns/op	 453.42 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73333 ns/op	 446.84 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     70923 ns/op	 462.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     70095 ns/op	 467.48 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     76125 ns/op	 430.45 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73908 ns/op	 443.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73078 ns/op	 448.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76238 ns/op	 429.81 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     74141 ns/op	 441.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     71910 ns/op	 455.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76684 ns/op	 427.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     75951 ns/op	 431.43 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     73299 ns/op	 447.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76288 ns/op	 429.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     73453 ns/op	 446.10 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     72748 ns/op	 450.43 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     72097 ns/op	 454.50 MB/s
+goos: linux
+goarch: amd64
+pkg: golang.org/x/perf/cmd/benchstat/testdata
+BenchmarkEncode/format=json-48         	  714387	      1423 ns/op
+BenchmarkEncode/format=json-48         	  845445	      1416 ns/op
+BenchmarkEncode/format=json-48         	  815714	      1411 ns/op
+BenchmarkEncode/format=json-48         	  828824	      1413 ns/op
+BenchmarkEncode/format=json-48         	  834070	      1412 ns/op
+BenchmarkEncode/format=json-48         	  828123	      1426 ns/op
+BenchmarkEncode/format=json-48         	  834493	      1422 ns/op
+BenchmarkEncode/format=json-48         	  838406	      1424 ns/op
+BenchmarkEncode/format=json-48         	  836227	      1447 ns/op
+BenchmarkEncode/format=json-48         	  830835	      1425 ns/op
+BenchmarkEncode/format=gob-48          	  394441	      3075 ns/op
+BenchmarkEncode/format=gob-48          	  393207	      3065 ns/op
+BenchmarkEncode/format=gob-48          	  392374	      3059 ns/op
+BenchmarkEncode/format=gob-48          	  396037	      3065 ns/op
+BenchmarkEncode/format=gob-48          	  393255	      3060 ns/op
+BenchmarkEncode/format=gob-48          	  382629	      3081 ns/op
+BenchmarkEncode/format=gob-48          	  389558	      3186 ns/op
+BenchmarkEncode/format=gob-48          	  392668	      3135 ns/op
+BenchmarkEncode/format=gob-48          	  392313	      3087 ns/op
+BenchmarkEncode/format=gob-48          	  394274	      3062 ns/op
 PASS
-ok  	hash/crc32	72.655s
diff --git a/cmd/benchstat/testdata/new4.golden b/cmd/benchstat/testdata/new4.golden
deleted file mode 100644
index a1eb317..0000000
--- a/cmd/benchstat/testdata/new4.golden
+++ /dev/null
@@ -1,27 +0,0 @@
-name                                      old time/op    new time/op    delta
-CRC32/poly=IEEE/size=40/align=0-8           42.5ns ± 6%    42.1ns ± 3%      ~     (p=0.642 n=10+10)
-CRC32/poly=IEEE/size=40/align=1-8           42.0ns ± 3%    41.7ns ± 5%      ~     (p=0.148 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=0-8           298ns ± 1%    1682ns ± 2%  +464.22%  (p=0.000 n=9+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           299ns ± 3%    1690ns ± 4%  +464.96%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=0-8     17.5ns ± 4%    18.6ns ±11%    +5.99%  (p=0.049 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8     19.4ns ± 2%    19.6ns ± 2%      ~     (p=0.072 n=10+8)
-CRC32/poly=Castagnoli/size=4kB/align=0-8     159ns ± 3%     161ns ± 8%      ~     (p=0.421 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8     162ns ± 3%     170ns ± 8%    +4.95%  (p=0.019 n=10+10)
-CRC32/poly=Koopman/size=40/align=0-8        87.6ns ± 2%    93.8ns ±13%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8        88.0ns ± 3%    86.9ns ± 3%    -1.33%  (p=0.050 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       9.00µs ± 6%    9.08µs ± 8%      ~     (p=0.631 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       9.05µs ±12%    9.46µs ± 8%      ~     (p=0.123 n=10+10)
-
-name                                      old speed      new speed      delta
-CRC32/poly=IEEE/size=40/align=0-8          942MB/s ± 5%   951MB/s ± 3%      ~     (p=0.684 n=10+10)
-CRC32/poly=IEEE/size=40/align=1-8          952MB/s ± 3%   960MB/s ± 4%      ~     (p=0.143 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=0-8        13.7GB/s ± 1%   2.4GB/s ± 2%   -82.26%  (p=0.000 n=9+9)
-CRC32/poly=IEEE/size=4kB/align=1-8        13.7GB/s ± 3%   2.4GB/s ± 4%   -82.28%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=0-8   2.28GB/s ± 4%  2.16GB/s ±11%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8   2.06GB/s ± 2%  2.04GB/s ± 2%      ~     (p=0.055 n=10+8)
-CRC32/poly=Castagnoli/size=4kB/align=0-8  25.7GB/s ± 3%  25.4GB/s ± 7%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8  25.3GB/s ± 3%  24.1GB/s ± 8%    -4.55%  (p=0.015 n=10+10)
-CRC32/poly=Koopman/size=40/align=0-8       456MB/s ± 2%   428MB/s ±12%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8       455MB/s ± 3%   461MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8      455MB/s ± 6%   452MB/s ± 8%      ~     (p=0.631 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8      455MB/s ±11%   434MB/s ± 9%      ~     (p=0.123 n=10+10)
diff --git a/cmd/benchstat/testdata/old.txt b/cmd/benchstat/testdata/old.txt
index cd1b57e..7dab559 100644
--- a/cmd/benchstat/testdata/old.txt
+++ b/cmd/benchstat/testdata/old.txt
@@ -1,367 +1,24 @@
-pkg: hash/crc32
+goos: linux
 goarch: amd64
-goos: darwin
-note: hw acceleration disabled
-
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.6 ns/op	 336.59 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        47.3 ns/op	 317.29 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.6 ns/op	 336.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.3 ns/op	 323.80 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        50.7 ns/op	 295.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        45.8 ns/op	 327.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        46.8 ns/op	 320.56 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        44.3 ns/op	 338.48 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        47.7 ns/op	 314.52 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 3000000	        50.6 ns/op	 296.42 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        46.8 ns/op	 320.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.6 ns/op	 336.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.1 ns/op	 340.42 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.5 ns/op	 336.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.2 ns/op	 339.16 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.1 ns/op	 332.85 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        44.0 ns/op	 340.71 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 3000000	        45.8 ns/op	 327.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 975.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.0 ns/op	 930.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.9 ns/op	 978.39 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.3 ns/op	 944.76 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 976.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.0 ns/op	 975.15 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.3 ns/op	 967.88 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.8 ns/op	 979.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.1 ns/op	 974.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.2 ns/op	 969.78 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.9 ns/op	 931.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 977.34 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.4 ns/op	 965.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.2 ns/op	 971.79 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.1 ns/op	 972.53 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.8 ns/op	 979.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 978.70 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.9 ns/op	 978.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.5 ns/op	 964.07 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.0 ns/op	 974.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       249 ns/op	2051.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       248 ns/op	2058.43 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       232 ns/op	2202.80 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       238 ns/op	2145.40 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       234 ns/op	2181.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       237 ns/op	2151.78 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       241 ns/op	2117.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       231 ns/op	2213.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       231 ns/op	2207.52 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       239 ns/op	2139.64 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       238 ns/op	2143.57 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       232 ns/op	2205.23 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       230 ns/op	2220.72 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       233 ns/op	2192.30 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       231 ns/op	2215.20 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       238 ns/op	2143.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       242 ns/op	2108.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       241 ns/op	2117.57 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       230 ns/op	2220.54 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       240 ns/op	2125.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       435 ns/op	2352.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       461 ns/op	2218.18 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       446 ns/op	2291.09 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       448 ns/op	2284.64 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       448 ns/op	2282.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       464 ns/op	2206.86 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       456 ns/op	2242.96 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       457 ns/op	2239.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       461 ns/op	2219.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  300000	       449 ns/op	2278.55 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2343.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       452 ns/op	2263.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       443 ns/op	2310.35 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       446 ns/op	2292.48 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2346.76 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       445 ns/op	2296.07 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       451 ns/op	2268.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       450 ns/op	2274.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       441 ns/op	2320.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  300000	       436 ns/op	2346.69 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1698 ns/op	2411.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1667 ns/op	2456.00 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1694 ns/op	2416.70 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1654 ns/op	2476.16 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1704 ns/op	2402.42 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1876 ns/op	2182.35 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1790 ns/op	2287.20 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1812 ns/op	2259.59 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1828 ns/op	2239.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1677 ns/op	2441.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1679 ns/op	2438.77 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1705 ns/op	2401.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1665 ns/op	2459.27 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1678 ns/op	2440.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1819 ns/op	2251.46 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1871 ns/op	2188.25 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1878 ns/op	2180.70 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1782 ns/op	2297.30 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1797 ns/op	2278.59 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1769 ns/op	2315.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14964 ns/op	2189.65 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14612 ns/op	2242.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14258 ns/op	2298.15 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15641 ns/op	2094.94 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     14653 ns/op	2236.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15101 ns/op	2169.79 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15065 ns/op	2174.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15801 ns/op	2073.78 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     15459 ns/op	2119.55 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     13975 ns/op	2344.74 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13567 ns/op	2415.11 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14515 ns/op	2257.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14510 ns/op	2258.21 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     15133 ns/op	2165.26 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14383 ns/op	2278.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14983 ns/op	2186.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13519 ns/op	2423.74 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13154 ns/op	2491.10 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     13594 ns/op	2410.43 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     14530 ns/op	2255.12 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        18.0 ns/op	 834.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.4 ns/op	 913.19 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.85 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.3 ns/op	 920.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.0 ns/op	 937.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.12 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.0 ns/op	 934.62 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.6 ns/op	 903.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.1 ns/op	 930.77 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	10000000	        16.8 ns/op	 895.10 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 884.78 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 855.00 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.1 ns/op	 874.74 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.5 ns/op	 857.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 869.69 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.0 ns/op	 883.09 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.1 ns/op	 877.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.2 ns/op	 869.59 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        18.6 ns/op	 807.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	10000000	        17.4 ns/op	 861.10 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.3 ns/op	2317.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.6 ns/op	2273.98 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2280.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2322.88 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.7 ns/op	2257.80 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2290.51 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2319.50 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.2 ns/op	2329.20 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.4 ns/op	2300.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.7 ns/op	2262.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        20.3 ns/op	1966.20 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.3 ns/op	2077.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.1 ns/op	2094.95 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2022.06 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.9 ns/op	2009.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.6 ns/op	2037.74 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        20.1 ns/op	1987.93 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2050.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2024.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.7 ns/op	2031.46 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.4 ns/op	12662.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12889.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.3 ns/op	12706.28 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.0 ns/op	12814.70 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.1 ns/op	12758.84 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12894.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12535.17 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.8 ns/op	12542.40 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        40.2 ns/op	12750.46 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	 3000000	        39.7 ns/op	12882.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12212.50 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.9 ns/op	12206.11 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.1 ns/op	11869.61 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.8 ns/op	11949.04 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        43.3 ns/op	11823.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        42.0 ns/op	12196.38 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12325.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.8 ns/op	12236.96 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.5 ns/op	12326.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	 3000000	        41.6 ns/op	12298.60 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.4 ns/op	15659.97 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.5 ns/op	15623.63 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.2 ns/op	15711.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.5 ns/op	15640.74 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.3 ns/op	15687.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        66.0 ns/op	15509.02 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.3 ns/op	15684.59 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        70.2 ns/op	14584.88 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.7 ns/op	15582.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	 2000000	        65.6 ns/op	15619.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.6 ns/op	15140.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        68.1 ns/op	15039.19 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.5 ns/op	15180.69 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        67.6 ns/op	15146.80 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        73.1 ns/op	14015.81 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.7 ns/op	14493.63 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        74.1 ns/op	13820.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        71.8 ns/op	14256.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        70.9 ns/op	14451.65 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	 2000000	        69.5 ns/op	14727.15 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25174.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25194.50 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       167 ns/op	24422.65 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       160 ns/op	25541.37 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       161 ns/op	25354.03 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       171 ns/op	23871.09 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25224.84 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       158 ns/op	25798.19 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       158 ns/op	25815.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       167 ns/op	24465.11 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       168 ns/op	24282.26 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       177 ns/op	23065.76 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       176 ns/op	23245.80 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       170 ns/op	24015.77 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       173 ns/op	23579.34 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       174 ns/op	23531.14 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       169 ns/op	24107.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       166 ns/op	24657.46 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       162 ns/op	25273.53 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       159 ns/op	25619.67 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1234 ns/op	26534.55 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1210 ns/op	27063.60 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1241 ns/op	26391.50 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1230 ns/op	26634.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1190 ns/op	27515.61 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1183 ns/op	27695.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1271 ns/op	25771.02 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1205 ns/op	27170.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1337 ns/op	24493.55 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	  100000	      1200 ns/op	27299.92 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1263 ns/op	25937.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1274 ns/op	25702.13 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1278 ns/op	25639.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1271 ns/op	25763.65 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1235 ns/op	26515.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1232 ns/op	26586.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1341 ns/op	24430.71 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1257 ns/op	26056.28 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1264 ns/op	25910.24 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	  100000	      1309 ns/op	25023.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.5 ns/op	 410.82 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        37.3 ns/op	 402.07 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.7 ns/op	 408.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.8 ns/op	 430.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        40.4 ns/op	 371.74 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        38.6 ns/op	 388.62 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.3 ns/op	 413.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.0 ns/op	 441.26 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        34.3 ns/op	 437.06 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 5000000	        36.2 ns/op	 414.34 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.4 ns/op	 435.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        37.0 ns/op	 405.81 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.8 ns/op	 419.55 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.5 ns/op	 410.66 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        35.2 ns/op	 425.85 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.4 ns/op	 436.09 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        34.0 ns/op	 441.70 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        33.9 ns/op	 443.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        33.9 ns/op	 443.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 5000000	        36.4 ns/op	 412.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.7 ns/op	 445.75 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        90.4 ns/op	 442.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        94.9 ns/op	 421.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.4 ns/op	 457.45 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        89.3 ns/op	 448.11 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        90.3 ns/op	 443.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.9 ns/op	 455.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        93.7 ns/op	 426.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	       100 ns/op	 396.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        92.8 ns/op	 430.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.9 ns/op	 455.30 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.9 ns/op	 439.90 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        93.5 ns/op	 427.91 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        93.7 ns/op	 426.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        95.0 ns/op	 421.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        90.1 ns/op	 443.95 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        95.3 ns/op	 419.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        85.6 ns/op	 467.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.9 ns/op	 460.19 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        91.9 ns/op	 435.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1118 ns/op	 457.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1168 ns/op	 438.12 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1132 ns/op	 452.06 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1079 ns/op	 474.47 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1079 ns/op	 474.46 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1126 ns/op	 454.69 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1132 ns/op	 452.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1193 ns/op	 429.16 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1124 ns/op	 455.38 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  200000	      1166 ns/op	 438.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1096 ns/op	 466.92 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1117 ns/op	 458.09 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1200 ns/op	 426.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1109 ns/op	 461.41 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1155 ns/op	 443.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1141 ns/op	 448.59 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1136 ns/op	 450.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1145 ns/op	 446.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  200000	      1085 ns/op	 471.86 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1084 ns/op	 471.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2308 ns/op	 443.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2480 ns/op	 412.88 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2288 ns/op	 447.44 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2371 ns/op	 431.88 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2247 ns/op	 455.55 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2211 ns/op	 462.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2264 ns/op	 452.17 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2231 ns/op	 458.82 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2161 ns/op	 473.71 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	  100000	      2109 ns/op	 485.48 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2160 ns/op	 473.90 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2103 ns/op	 486.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2189 ns/op	 467.75 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2159 ns/op	 474.21 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2144 ns/op	 477.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2169 ns/op	 472.09 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2123 ns/op	 482.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      2150 ns/op	 476.26 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2141 ns/op	 478.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	  100000	      2238 ns/op	 457.51 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8562 ns/op	 478.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8584 ns/op	 477.16 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9176 ns/op	 446.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9096 ns/op	 450.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9337 ns/op	 438.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9545 ns/op	 429.11 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9287 ns/op	 441.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8963 ns/op	 456.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8970 ns/op	 456.61 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8795 ns/op	 465.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9179 ns/op	 446.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9026 ns/op	 453.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8505 ns/op	 481.60 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8396 ns/op	 487.85 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9818 ns/op	 417.17 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8652 ns/op	 473.38 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8689 ns/op	 471.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8345 ns/op	 490.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9391 ns/op	 436.13 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9401 ns/op	 435.66 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     74664 ns/op	 438.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     72859 ns/op	 449.74 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73403 ns/op	 446.41 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     73646 ns/op	 444.94 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     76501 ns/op	 428.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     78648 ns/op	 416.64 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69550 ns/op	 471.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     67848 ns/op	 482.96 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     67984 ns/op	 482.00 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	     69177 ns/op	 473.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     70108 ns/op	 467.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     67566 ns/op	 484.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     71256 ns/op	 459.86 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69979 ns/op	 468.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69972 ns/op	 468.30 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     68356 ns/op	 479.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     69631 ns/op	 470.59 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     70087 ns/op	 467.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     75510 ns/op	 433.95 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	     76970 ns/op	 425.72 MB/s
+pkg: golang.org/x/perf/cmd/benchstat/testdata
+BenchmarkEncode/format=json-48         	  690848	      1726 ns/op
+BenchmarkEncode/format=json-48         	  684861	      1723 ns/op
+BenchmarkEncode/format=json-48         	  693285	      1707 ns/op
+BenchmarkEncode/format=json-48         	  677692	      1707 ns/op
+BenchmarkEncode/format=json-48         	  692130	      1713 ns/op
+BenchmarkEncode/format=json-48         	  684164	      1729 ns/op
+BenchmarkEncode/format=json-48         	  682500	      1736 ns/op
+BenchmarkEncode/format=json-48         	  677509	      1707 ns/op
+BenchmarkEncode/format=json-48         	  687295	      1705 ns/op
+BenchmarkEncode/format=json-48         	  695533	      1774 ns/op
+BenchmarkEncode/format=gob-48          	  372699	      3069 ns/op
+BenchmarkEncode/format=gob-48          	  394740	      3075 ns/op
+BenchmarkEncode/format=gob-48          	  391335	      3069 ns/op
+BenchmarkEncode/format=gob-48          	  383588	      3067 ns/op
+BenchmarkEncode/format=gob-48          	  385885	      3207 ns/op
+BenchmarkEncode/format=gob-48          	  389970	      3064 ns/op
+BenchmarkEncode/format=gob-48          	  393361	      3064 ns/op
+BenchmarkEncode/format=gob-48          	  393882	      3058 ns/op
+BenchmarkEncode/format=gob-48          	  396171	      3059 ns/op
+BenchmarkEncode/format=gob-48          	  397812	      3062 ns/op
 PASS
-ok  	hash/crc32	68.427s
diff --git a/cmd/benchstat/testdata/oldcsv.golden b/cmd/benchstat/testdata/oldcsv.golden
deleted file mode 100644
index 3e271a2..0000000
--- a/cmd/benchstat/testdata/oldcsv.golden
+++ /dev/null
@@ -1,77 +0,0 @@
-name,time/op (ns/op),±
-CRC32/poly=IEEE/size=15/align=0-8,4.68700E+01,8%
-CRC32/poly=IEEE/size=15/align=1-8,4.47100E+01,5%
-CRC32/poly=IEEE/size=40/align=0-8,4.10375E+01,1%
-CRC32/poly=IEEE/size=40/align=1-8,4.10778E+01,1%
-CRC32/poly=IEEE/size=512/align=0-8,2.38000E+02,5%
-CRC32/poly=IEEE/size=512/align=1-8,2.35500E+02,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,4.52500E+02,4%
-CRC32/poly=IEEE/size=1kB/align=1-8,4.43600E+02,2%
-CRC32/poly=IEEE/size=4kB/align=0-8,1.74000E+03,8%
-CRC32/poly=IEEE/size=4kB/align=1-8,1.76430E+03,6%
-CRC32/poly=IEEE/size=32kB/align=0-8,1.49529E+04,7%
-CRC32/poly=IEEE/size=32kB/align=1-8,1.41888E+04,7%
-CRC32/poly=Castagnoli/size=15/align=0-8,1.63778E+01,3%
-CRC32/poly=Castagnoli/size=15/align=1-8,1.72222E+01,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,1.74300E+01,2%
-CRC32/poly=Castagnoli/size=40/align=1-8,1.97100E+01,3%
-CRC32/poly=Castagnoli/size=512/align=0-8,4.01700E+01,2%
-CRC32/poly=Castagnoli/size=512/align=1-8,4.21400E+01,3%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,6.55000E+01,1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,7.00900E+01,6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,1.62800E+02,5%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,1.69400E+02,6%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,1.21822E+03,4%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,1.26478E+03,3%
-CRC32/poly=Koopman/size=15/align=0-8,3.65100E+01,11%
-CRC32/poly=Koopman/size=15/align=1-8,3.51500E+01,5%
-CRC32/poly=Koopman/size=40/align=0-8,9.16400E+01,9%
-CRC32/poly=Koopman/size=40/align=1-8,9.10800E+01,6%
-CRC32/poly=Koopman/size=512/align=0-8,1.13170E+03,5%
-CRC32/poly=Koopman/size=512/align=1-8,1.12680E+03,6%
-CRC32/poly=Koopman/size=1kB/align=0-8,2.24333E+03,6%
-CRC32/poly=Koopman/size=1kB/align=1-8,2.14867E+03,2%
-CRC32/poly=Koopman/size=4kB/align=0-8,9.03150E+03,6%
-CRC32/poly=Koopman/size=4kB/align=1-8,8.94020E+03,10%
-CRC32/poly=Koopman/size=32kB/align=0-8,7.24280E+04,9%
-CRC32/poly=Koopman/size=32kB/align=1-8,6.96194E+04,3%
-[Geo mean],3.44668E+02
-
-name,speed (MB/s),±
-CRC32/poly=IEEE/size=15/align=0-8,3.20711E+02,8%
-CRC32/poly=IEEE/size=15/align=1-8,3.35516E+02,4%
-CRC32/poly=IEEE/size=40/align=0-8,9.74718E+02,1%
-CRC32/poly=IEEE/size=40/align=1-8,9.73636E+02,1%
-CRC32/poly=IEEE/size=512/align=0-8,2.14703E+03,4%
-CRC32/poly=IEEE/size=512/align=1-8,2.16913E+03,3%
-CRC32/poly=IEEE/size=1kB/align=0-8,2.26152E+03,4%
-CRC32/poly=IEEE/size=1kB/align=1-8,2.30619E+03,2%
-CRC32/poly=IEEE/size=4kB/align=0-8,2.35732E+03,7%
-CRC32/poly=IEEE/size=4kB/align=1-8,2.32511E+03,6%
-CRC32/poly=IEEE/size=32kB/align=0-8,2.19443E+03,7%
-CRC32/poly=IEEE/size=32kB/align=1-8,2.31415E+03,8%
-CRC32/poly=Castagnoli/size=15/align=0-8,9.15799E+02,2%
-CRC32/poly=Castagnoli/size=15/align=1-8,8.70312E+02,2%
-CRC32/poly=Castagnoli/size=40/align=0-8,2.29560E+03,2%
-CRC32/poly=Castagnoli/size=40/align=1-8,2.03023E+03,3%
-CRC32/poly=Castagnoli/size=512/align=0-8,1.27437E+04,2%
-CRC32/poly=Castagnoli/size=512/align=1-8,1.21445E+04,3%
-CRC32/poly=Castagnoli/size=1kB/align=0-8,1.56355E+04,1%
-CRC32/poly=Castagnoli/size=1kB/align=1-8,1.46273E+04,6%
-CRC32/poly=Castagnoli/size=4kB/align=0-8,2.50862E+04,5%
-CRC32/poly=Castagnoli/size=4kB/align=1-8,2.41378E+04,6%
-CRC32/poly=Castagnoli/size=32kB/align=0-8,2.68975E+04,4%
-CRC32/poly=Castagnoli/size=32kB/align=1-8,2.59038E+04,3%
-CRC32/poly=Koopman/size=15/align=0-8,4.11932E+02,10%
-CRC32/poly=Koopman/size=15/align=1-8,4.27408E+02,5%
-CRC32/poly=Koopman/size=40/align=0-8,4.36831E+02,9%
-CRC32/poly=Koopman/size=40/align=1-8,4.39731E+02,6%
-CRC32/poly=Koopman/size=512/align=0-8,4.52693E+02,5%
-CRC32/poly=Koopman/size=512/align=1-8,4.54579E+02,6%
-CRC32/poly=Koopman/size=1kB/align=0-8,4.52443E+02,9%
-CRC32/poly=Koopman/size=1kB/align=1-8,4.76558E+02,2%
-CRC32/poly=Koopman/size=4kB/align=0-8,4.54022E+02,5%
-CRC32/poly=Koopman/size=4kB/align=1-8,4.59394E+02,9%
-CRC32/poly=Koopman/size=32kB/align=0-8,4.53471E+02,8%
-CRC32/poly=Koopman/size=32kB/align=1-8,4.70784E+02,3%
-[Geo mean],1.71221E+03
diff --git a/cmd/benchstat/testdata/oldnew.golden b/cmd/benchstat/testdata/oldnew.golden
deleted file mode 100644
index c754683..0000000
--- a/cmd/benchstat/testdata/oldnew.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.008 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.539 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.000 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.615 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.650 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.694 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.036 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.614 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.952 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.003 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.190 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.032 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.882 n=9+9)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.216 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.508 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%      ~     (p=0.055 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.010 n=9+10)
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.754 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-
-name                                       old speed      new speed       delta
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.009 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.579 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.001 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.489 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.661 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%      ~     (p=0.063 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.780 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.002 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.211 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.842 n=9+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.218 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.497 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.739 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
diff --git a/cmd/benchstat/testdata/oldnew4html.golden b/cmd/benchstat/testdata/oldnew4html.golden
deleted file mode 100644
index d8dbdba..0000000
--- a/cmd/benchstat/testdata/oldnew4html.golden
+++ /dev/null
@@ -1,106 +0,0 @@
-<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Performance Result Comparison</title>
-<style>
-.benchstat { border-collapse: collapse; }
-.benchstat th:nth-child(1) { text-align: left; }
-.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
-.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
-.benchstat .nodelta { text-align: center !important; }
-.benchstat .better td.delta { font-weight: bold; }
-.benchstat .worse td.delta { font-weight: bold; color: #c00; }
-</style>
-</head>
-<body>
-
-<table class='benchstat '>
-<tr class='configs'><th><th>old.txt<th>new.txt<th>slashslash4.txt
-
-
-<tbody>
-<tr><th><th colspan='3' class='metric'>time/op
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8<td>46.9ns ± 8%<td>44.5ns ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8<td>44.7ns ± 5%<td>44.5ns ± 4%<td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8<td>41.0ns ± 1%<td>42.5ns ± 6%<td>42.1ns ± 3%
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8<td>41.1ns ± 1%<td>42.0ns ± 3%<td>41.7ns ± 5%
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8<td>238ns ± 5%<td>57ns ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8<td>236ns ± 3%<td>57ns ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>452ns ± 4%<td>94ns ± 2%<td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>444ns ± 2%<td>93ns ± 2%<td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>1.74µs ± 8%<td>0.30µs ± 1%<td>1.68µs ± 2%
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>1.76µs ± 6%<td>0.30µs ± 3%<td>1.69µs ± 4%
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>15.0µs ± 7%<td>2.2µs ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>14.2µs ± 7%<td>2.2µs ± 3%<td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>16.4ns ± 3%<td>16.3ns ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>17.2ns ± 2%<td>17.3ns ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>17.4ns ± 2%<td>17.5ns ± 4%<td>18.6ns ±11%
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>19.7ns ± 3%<td>19.4ns ± 2%<td>19.6ns ± 2%
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>40.2ns ± 2%<td>40.1ns ± 4%<td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>42.1ns ± 3%<td>41.9ns ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>65.5ns ± 1%<td>66.2ns ± 1%<td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>70.1ns ± 6%<td>68.5ns ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>163ns ± 5%<td>159ns ± 3%<td>161ns ± 8%
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>169ns ± 6%<td>162ns ± 3%<td>170ns ± 8%
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>1.22µs ± 4%<td>1.21µs ± 3%<td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>1.26µs ± 3%<td>1.22µs ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8<td>36.5ns ±11%<td>35.6ns ± 3%<td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8<td>35.1ns ± 5%<td>35.5ns ± 1%<td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8<td>91.6ns ± 9%<td>87.6ns ± 2%<td>93.8ns ±13%
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8<td>91.1ns ± 6%<td>88.0ns ± 3%<td>86.9ns ± 3%
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8<td>1.13µs ± 5%<td>1.08µs ± 3%<td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8<td>1.13µs ± 6%<td>1.17µs ± 8%<td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>2.24µs ± 6%<td>2.34µs ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>2.15µs ± 2%<td>2.36µs ± 5%<td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>9.03µs ± 6%<td>9.00µs ± 6%<td>9.08µs ± 8%
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>8.94µs ±10%<td>9.05µs ±12%<td>9.46µs ± 8%
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>72.4µs ± 9%<td>72.9µs ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>69.6µs ± 3%<td>74.3µs ± 3%<td>
-<tr><td>&nbsp;
-</tbody>
-
-<tbody>
-<tr><th><th colspan='3' class='metric'>speed
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8<td>321MB/s ± 8%<td>337MB/s ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8<td>336MB/s ± 4%<td>337MB/s ± 4%<td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8<td>975MB/s ± 1%<td>942MB/s ± 5%<td>951MB/s ± 3%
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8<td>974MB/s ± 1%<td>952MB/s ± 3%<td>960MB/s ± 4%
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8<td>2.15GB/s ± 4%<td>8.97GB/s ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8<td>2.17GB/s ± 3%<td>8.96GB/s ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>2.26GB/s ± 4%<td>10.88GB/s ± 2%<td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>2.31GB/s ± 2%<td>10.98GB/s ± 2%<td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>2.36GB/s ± 7%<td>13.73GB/s ± 1%<td>2.43GB/s ± 2%
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>2.33GB/s ± 6%<td>13.68GB/s ± 3%<td>2.42GB/s ± 4%
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>2.19GB/s ± 7%<td>15.19GB/s ± 3%<td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>2.31GB/s ± 8%<td>15.04GB/s ± 3%<td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>916MB/s ± 2%<td>920MB/s ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>870MB/s ± 2%<td>867MB/s ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>2.30GB/s ± 2%<td>2.28GB/s ± 4%<td>2.16GB/s ±11%
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>2.03GB/s ± 3%<td>2.06GB/s ± 2%<td>2.04GB/s ± 2%
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>12.7GB/s ± 2%<td>12.8GB/s ± 4%<td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>12.1GB/s ± 3%<td>12.2GB/s ± 1%<td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>15.6GB/s ± 1%<td>15.5GB/s ± 1%<td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>14.6GB/s ± 6%<td>15.0GB/s ± 2%<td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>25.1GB/s ± 5%<td>25.7GB/s ± 3%<td>25.4GB/s ± 7%
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>24.1GB/s ± 6%<td>25.3GB/s ± 3%<td>24.1GB/s ± 8%
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>26.9GB/s ± 4%<td>26.8GB/s ± 5%<td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>25.9GB/s ± 3%<td>26.8GB/s ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8<td>412MB/s ±10%<td>421MB/s ± 3%<td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8<td>427MB/s ± 5%<td>422MB/s ± 1%<td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8<td>437MB/s ± 9%<td>456MB/s ± 2%<td>428MB/s ±12%
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8<td>440MB/s ± 6%<td>455MB/s ± 3%<td>461MB/s ± 3%
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8<td>453MB/s ± 5%<td>476MB/s ± 3%<td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8<td>455MB/s ± 6%<td>440MB/s ± 8%<td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>452MB/s ± 9%<td>438MB/s ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>477MB/s ± 2%<td>434MB/s ± 5%<td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>454MB/s ± 5%<td>455MB/s ± 6%<td>452MB/s ± 8%
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>459MB/s ± 9%<td>455MB/s ±11%<td>434MB/s ± 9%
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>453MB/s ± 8%<td>450MB/s ± 4%<td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>471MB/s ± 3%<td>441MB/s ± 3%<td>
-<tr><td>&nbsp;
-</tbody>
-
-</table>
-</body>
-</html>
diff --git a/cmd/benchstat/testdata/oldnewgeo.golden b/cmd/benchstat/testdata/oldnewgeo.golden
deleted file mode 100644
index c625f03..0000000
--- a/cmd/benchstat/testdata/oldnewgeo.golden
+++ /dev/null
@@ -1,77 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.008 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.539 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.000 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.615 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.650 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.694 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.036 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.614 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.952 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.003 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.190 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.032 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.882 n=9+9)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.216 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.508 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%      ~     (p=0.055 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.010 n=9+10)
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.754 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-[Geo mean]                                    345ns           238ns        -30.99%
-
-name                                       old speed      new speed       delta
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.009 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.579 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.001 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.489 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.661 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%      ~     (p=0.063 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.780 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.002 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.211 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.005 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.842 n=9+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.002 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.218 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.497 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.002 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.739 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
-[Geo mean]                                 1.71GB/s        2.48GB/s        +44.88%
diff --git a/cmd/benchstat/testdata/oldnewhtml.golden b/cmd/benchstat/testdata/oldnewhtml.golden
deleted file mode 100644
index 9ac9367..0000000
--- a/cmd/benchstat/testdata/oldnewhtml.golden
+++ /dev/null
@@ -1,106 +0,0 @@
-<!doctype html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>Performance Result Comparison</title>
-<style>
-.benchstat { border-collapse: collapse; }
-.benchstat th:nth-child(1) { text-align: left; }
-.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }
-.benchstat tr:not(.configs) th { border-top: 1px solid #666; border-bottom: 1px solid #ccc; }
-.benchstat .nodelta { text-align: center !important; }
-.benchstat .better td.delta { font-weight: bold; }
-.benchstat .worse td.delta { font-weight: bold; color: #c00; }
-</style>
-</head>
-<body>
-
-<table class='benchstat oldnew'>
-<tr class='configs'><th><th>old.txt<th>new.txt
-
-
-<tbody>
-<tr><th><th colspan='2' class='metric'>time/op<th>delta
-<tr class='better'><td>CRC32/poly=IEEE/size=15/align=0-8<td>46.9ns ± 8%<td>44.5ns ± 3%<td class='delta'>−5.01%<td class='note'>(p=0.008 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=IEEE/size=15/align=1-8<td>44.7ns ± 5%<td>44.5ns ± 4%<td class='nodelta'>~<td class='note'>(p=0.539 n=10+10)
-<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=0-8<td>41.0ns ± 1%<td>42.5ns ± 6%<td class='delta'>+3.56%<td class='note'>(p=0.000 n=8+10)
-<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=1-8<td>41.1ns ± 1%<td>42.0ns ± 3%<td class='delta'>+2.34%<td class='note'>(p=0.000 n=9+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=512/align=0-8<td>238ns ± 5%<td>57ns ± 3%<td class='delta'>−76.00%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=512/align=1-8<td>236ns ± 3%<td>57ns ± 3%<td class='delta'>−75.72%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>452ns ± 4%<td>94ns ± 2%<td class='delta'>−79.20%<td class='note'>(p=0.000 n=10+8)
-<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>444ns ± 2%<td>93ns ± 2%<td class='delta'>−78.97%<td class='note'>(p=0.000 n=10+8)
-<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>1.74µs ± 8%<td>0.30µs ± 1%<td class='delta'>−82.87%<td class='note'>(p=0.000 n=10+9)
-<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>1.76µs ± 6%<td>0.30µs ± 3%<td class='delta'>−83.05%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>15.0µs ± 7%<td>2.2µs ± 3%<td class='delta'>−85.57%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>14.2µs ± 7%<td>2.2µs ± 3%<td class='delta'>−84.65%<td class='note'>(p=0.000 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>16.4ns ± 3%<td>16.3ns ± 2%<td class='nodelta'>~<td class='note'>(p=0.615 n=9+9)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>17.2ns ± 2%<td>17.3ns ± 2%<td class='nodelta'>~<td class='note'>(p=0.650 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>17.4ns ± 2%<td>17.5ns ± 4%<td class='nodelta'>~<td class='note'>(p=0.694 n=10+10)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>19.7ns ± 3%<td>19.4ns ± 2%<td class='delta'>−1.62%<td class='note'>(p=0.036 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>40.2ns ± 2%<td>40.1ns ± 4%<td class='nodelta'>~<td class='note'>(p=0.614 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>42.1ns ± 3%<td>41.9ns ± 2%<td class='nodelta'>~<td class='note'>(p=0.952 n=10+9)
-<tr class='worse'><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>65.5ns ± 1%<td>66.2ns ± 1%<td class='delta'>+1.01%<td class='note'>(p=0.003 n=9+8)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>70.1ns ± 6%<td>68.5ns ± 2%<td class='nodelta'>~<td class='note'>(p=0.190 n=10+9)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>163ns ± 5%<td>159ns ± 3%<td class='delta'>−2.46%<td class='note'>(p=0.032 n=10+10)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>169ns ± 6%<td>162ns ± 3%<td class='delta'>−4.60%<td class='note'>(p=0.005 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>1.22µs ± 4%<td>1.21µs ± 3%<td class='nodelta'>~<td class='note'>(p=0.882 n=9+9)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>1.26µs ± 3%<td>1.22µs ± 4%<td class='delta'>−3.48%<td class='note'>(p=0.002 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=0-8<td>36.5ns ±11%<td>35.6ns ± 3%<td class='nodelta'>~<td class='note'>(p=0.216 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=1-8<td>35.1ns ± 5%<td>35.5ns ± 1%<td class='nodelta'>~<td class='note'>(p=0.508 n=10+9)
-<tr class='better'><td>CRC32/poly=Koopman/size=40/align=0-8<td>91.6ns ± 9%<td>87.6ns ± 2%<td class='delta'>−4.35%<td class='note'>(p=0.002 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=40/align=1-8<td>91.1ns ± 6%<td>88.0ns ± 3%<td class='nodelta'>~<td class='note'>(p=0.055 n=10+10)
-<tr class='better'><td>CRC32/poly=Koopman/size=512/align=0-8<td>1.13µs ± 5%<td>1.08µs ± 3%<td class='delta'>−4.93%<td class='note'>(p=0.000 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=512/align=1-8<td>1.13µs ± 6%<td>1.17µs ± 8%<td class='nodelta'>~<td class='note'>(p=0.143 n=10+10)
-<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>2.24µs ± 6%<td>2.34µs ± 4%<td class='delta'>+4.34%<td class='note'>(p=0.010 n=9+10)
-<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>2.15µs ± 2%<td>2.36µs ± 5%<td class='delta'>+9.84%<td class='note'>(p=0.000 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>9.03µs ± 6%<td>9.00µs ± 6%<td class='nodelta'>~<td class='note'>(p=0.971 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>8.94µs ±10%<td>9.05µs ±12%<td class='nodelta'>~<td class='note'>(p=0.754 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>72.4µs ± 9%<td>72.9µs ± 4%<td class='nodelta'>~<td class='note'>(p=0.684 n=10+10)
-<tr class='worse'><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>69.6µs ± 3%<td>74.3µs ± 3%<td class='delta'>+6.70%<td class='note'>(p=0.000 n=8+10)
-<tr><td>&nbsp;
-</tbody>
-
-<tbody>
-<tr><th><th colspan='2' class='metric'>speed<th>delta
-<tr class='better'><td>CRC32/poly=IEEE/size=15/align=0-8<td>321MB/s ± 8%<td>337MB/s ± 3%<td class='delta'>+5.06%<td class='note'>(p=0.009 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=IEEE/size=15/align=1-8<td>336MB/s ± 4%<td>337MB/s ± 4%<td class='nodelta'>~<td class='note'>(p=0.579 n=10+10)
-<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=0-8<td>975MB/s ± 1%<td>942MB/s ± 5%<td class='delta'>−3.37%<td class='note'>(p=0.001 n=8+10)
-<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=1-8<td>974MB/s ± 1%<td>952MB/s ± 3%<td class='delta'>−2.25%<td class='note'>(p=0.000 n=9+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=512/align=0-8<td>2.15GB/s ± 4%<td>8.97GB/s ± 3%<td class='delta'>+317.65%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=512/align=1-8<td>2.17GB/s ± 3%<td>8.96GB/s ± 3%<td class='delta'>+312.89%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>2.26GB/s ± 4%<td>10.88GB/s ± 2%<td class='delta'>+381.12%<td class='note'>(p=0.000 n=10+8)
-<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>2.31GB/s ± 2%<td>10.98GB/s ± 2%<td class='delta'>+375.97%<td class='note'>(p=0.000 n=10+8)
-<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>2.36GB/s ± 7%<td>13.73GB/s ± 1%<td class='delta'>+482.26%<td class='note'>(p=0.000 n=10+9)
-<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>2.33GB/s ± 6%<td>13.68GB/s ± 3%<td class='delta'>+488.23%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>2.19GB/s ± 7%<td>15.19GB/s ± 3%<td class='delta'>+591.99%<td class='note'>(p=0.000 n=10+10)
-<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>2.31GB/s ± 8%<td>15.04GB/s ± 3%<td class='delta'>+550.07%<td class='note'>(p=0.000 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>916MB/s ± 2%<td>920MB/s ± 2%<td class='nodelta'>~<td class='note'>(p=0.489 n=9+9)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>870MB/s ± 2%<td>867MB/s ± 2%<td class='nodelta'>~<td class='note'>(p=0.661 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>2.30GB/s ± 2%<td>2.28GB/s ± 4%<td class='nodelta'>~<td class='note'>(p=0.684 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>2.03GB/s ± 3%<td>2.06GB/s ± 2%<td class='nodelta'>~<td class='note'>(p=0.063 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>12.7GB/s ± 2%<td>12.8GB/s ± 4%<td class='nodelta'>~<td class='note'>(p=0.529 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>12.1GB/s ± 3%<td>12.2GB/s ± 1%<td class='nodelta'>~<td class='note'>(p=0.780 n=10+9)
-<tr class='worse'><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>15.6GB/s ± 1%<td>15.5GB/s ± 1%<td class='delta'>−1.02%<td class='note'>(p=0.002 n=9+8)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>14.6GB/s ± 6%<td>15.0GB/s ± 2%<td class='nodelta'>~<td class='note'>(p=0.211 n=10+9)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>25.1GB/s ± 5%<td>25.7GB/s ± 3%<td class='nodelta'>~<td class='note'>(p=0.052 n=10+10)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>24.1GB/s ± 6%<td>25.3GB/s ± 3%<td class='delta'>+4.71%<td class='note'>(p=0.005 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>26.9GB/s ± 4%<td>26.8GB/s ± 5%<td class='nodelta'>~<td class='note'>(p=0.842 n=9+10)
-<tr class='better'><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>25.9GB/s ± 3%<td>26.8GB/s ± 4%<td class='delta'>+3.62%<td class='note'>(p=0.002 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=0-8<td>412MB/s ±10%<td>421MB/s ± 3%<td class='nodelta'>~<td class='note'>(p=0.218 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=1-8<td>427MB/s ± 5%<td>422MB/s ± 1%<td class='nodelta'>~<td class='note'>(p=0.497 n=10+9)
-<tr class='better'><td>CRC32/poly=Koopman/size=40/align=0-8<td>437MB/s ± 9%<td>456MB/s ± 2%<td class='delta'>+4.50%<td class='note'>(p=0.002 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=40/align=1-8<td>440MB/s ± 6%<td>455MB/s ± 3%<td class='nodelta'>~<td class='note'>(p=0.052 n=10+10)
-<tr class='better'><td>CRC32/poly=Koopman/size=512/align=0-8<td>453MB/s ± 5%<td>476MB/s ± 3%<td class='delta'>+5.09%<td class='note'>(p=0.000 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=512/align=1-8<td>455MB/s ± 6%<td>440MB/s ± 8%<td class='nodelta'>~<td class='note'>(p=0.143 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>452MB/s ± 9%<td>438MB/s ± 4%<td class='nodelta'>~<td class='note'>(p=0.052 n=10+10)
-<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>477MB/s ± 2%<td>434MB/s ± 5%<td class='delta'>−8.92%<td class='note'>(p=0.000 n=9+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>454MB/s ± 5%<td>455MB/s ± 6%<td class='nodelta'>~<td class='note'>(p=0.971 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>459MB/s ± 9%<td>455MB/s ±11%<td class='nodelta'>~<td class='note'>(p=0.739 n=10+10)
-<tr class='unchanged'><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>453MB/s ± 8%<td>450MB/s ± 4%<td class='nodelta'>~<td class='note'>(p=0.684 n=10+10)
-<tr class='worse'><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>471MB/s ± 3%<td>441MB/s ± 3%<td class='delta'>−6.25%<td class='note'>(p=0.000 n=8+10)
-<tr><td>&nbsp;
-</tbody>
-
-</table>
-</body>
-</html>
diff --git a/cmd/benchstat/testdata/oldnewttest.golden b/cmd/benchstat/testdata/oldnewttest.golden
deleted file mode 100644
index 9ff4fb3..0000000
--- a/cmd/benchstat/testdata/oldnewttest.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.011 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.600 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.006 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.001 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.511 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.553 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.472 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.033 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.885 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.430 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.001 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.069 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.029 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.735 n=9+9)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.001 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.183 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.374 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.009 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%    -3.35%  (p=0.022 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.001 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.086 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.008 n=9+10)
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.849 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.678 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.730 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-
-name                                       old speed      new speed       delta
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.010 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.600 n=10+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.005 n=8+10)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.001 n=9+10)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.469 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.599 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.469 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%    +1.64%  (p=0.035 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.872 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.391 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.001 n=9+8)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.071 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%    +2.41%  (p=0.033 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.797 n=9+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.001 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.203 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.306 n=10+9)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.008 n=10+10)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%    +3.36%  (p=0.024 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.001 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.096 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.065 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.844 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.708 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.669 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
diff --git a/cmd/benchstat/testdata/packages.golden b/cmd/benchstat/testdata/packages.golden
deleted file mode 100644
index 05cf0c7..0000000
--- a/cmd/benchstat/testdata/packages.golden
+++ /dev/null
@@ -1,11 +0,0 @@
-name        old time/op    new time/op    delta
-pkg:encoding/gob
-GobEncode     13.6ms ± 1%    11.8ms ± 1%  -13.31%  (p=0.016 n=4+5)
-pkg:encoding/json
-JSONEncode    32.1ms ± 1%    31.8ms ± 1%     ~     (p=0.286 n=4+5)
-
-name        old speed      new speed      delta
-pkg:encoding/gob
-GobEncode   56.4MB/s ± 1%  65.1MB/s ± 1%  +15.36%  (p=0.016 n=4+5)
-pkg:encoding/json
-JSONEncode  60.4MB/s ± 1%  61.1MB/s ± 2%     ~     (p=0.286 n=4+5)
diff --git a/cmd/benchstat/testdata/packagesnew.txt b/cmd/benchstat/testdata/packagesnew.txt
deleted file mode 100644
index 7732820..0000000
--- a/cmd/benchstat/testdata/packagesnew.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-pkg: encoding/gob
-BenchmarkGobEncode   	 100	  11773189 ns/op	  65.19 MB/s
-BenchmarkGobEncode   	 100	  11942588 ns/op	  64.27 MB/s
-BenchmarkGobEncode   	 100	  11786159 ns/op	  65.12 MB/s
-BenchmarkGobEncode   	 100	  11628583 ns/op	  66.00 MB/s
-BenchmarkGobEncode   	 100	  11815924 ns/op	  64.96 MB/s
-pkg: encoding/json
-BenchmarkJSONEncode  	  50	  32036529 ns/op	  60.57 MB/s
-BenchmarkJSONEncode  	  50	  32156552 ns/op	  60.34 MB/s
-BenchmarkJSONEncode  	  50	  31288355 ns/op	  62.02 MB/s
-BenchmarkJSONEncode  	  50	  31559706 ns/op	  61.49 MB/s
-BenchmarkJSONEncode  	  50	  31765634 ns/op	  61.09 MB/s
diff --git a/cmd/benchstat/testdata/packagesold.golden b/cmd/benchstat/testdata/packagesold.golden
deleted file mode 100644
index 4c20063..0000000
--- a/cmd/benchstat/testdata/packagesold.golden
+++ /dev/null
@@ -1,11 +0,0 @@
-name        time/op
-pkg:encoding/gob
-GobEncode     13.6ms ± 1%
-pkg:encoding/json
-JSONEncode    32.1ms ± 1%
-
-name        speed
-pkg:encoding/gob
-GobEncode   56.4MB/s ± 1%
-pkg:encoding/json
-JSONEncode  60.4MB/s ± 1%
diff --git a/cmd/benchstat/testdata/packagesold.txt b/cmd/benchstat/testdata/packagesold.txt
deleted file mode 100644
index add4b7a..0000000
--- a/cmd/benchstat/testdata/packagesold.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-pkg: encoding/gob
-BenchmarkGobEncode   	100	  13552735 ns/op	  56.63 MB/s
-BenchmarkGobEncode   	100	  13553943 ns/op	  56.63 MB/s
-BenchmarkGobEncode   	100	  13606356 ns/op	  56.41 MB/s
-BenchmarkGobEncode   	100	  13683198 ns/op	  56.09 MB/s
-pkg: encoding/json
-BenchmarkJSONEncode  	 50	  32395067 ns/op	  59.90 MB/s
-BenchmarkJSONEncode  	 50	  32334214 ns/op	  60.01 MB/s
-BenchmarkJSONEncode  	 50	  31992891 ns/op	  60.65 MB/s
-BenchmarkJSONEncode  	 50	  31735022 ns/op	  61.15 MB/s
diff --git a/cmd/benchstat/testdata/rdeltasort.golden b/cmd/benchstat/testdata/rdeltasort.golden
deleted file mode 100644
index d74cce4..0000000
--- a/cmd/benchstat/testdata/rdeltasort.golden
+++ /dev/null
@@ -1,75 +0,0 @@
-name                                       old time/op    new time/op     delta
-CRC32/poly=IEEE/size=32kB/align=0-8          15.0µs ± 7%      2.2µs ± 3%   -85.57%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8          14.2µs ± 7%      2.2µs ± 3%   -84.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=1-8           1.76µs ± 6%     0.30µs ± 3%   -83.05%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=0-8           1.74µs ± 8%     0.30µs ± 1%   -82.87%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=1kB/align=0-8            452ns ± 4%       94ns ± 2%   -79.20%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8            444ns ± 2%       93ns ± 2%   -78.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=512/align=0-8            238ns ± 5%       57ns ± 3%   -76.00%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8            236ns ± 3%       57ns ± 3%   -75.72%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=15/align=0-8            46.9ns ± 8%     44.5ns ± 3%    -5.01%  (p=0.008 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8        1.13µs ± 5%     1.08µs ± 3%    -4.93%  (p=0.000 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8      169ns ± 6%      162ns ± 3%    -4.60%  (p=0.005 n=10+10)
-CRC32/poly=Koopman/size=40/align=0-8         91.6ns ± 9%     87.6ns ± 2%    -4.35%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8    1.26µs ± 3%     1.22µs ± 4%    -3.48%  (p=0.002 n=9+10)
-CRC32/poly=Castagnoli/size=4kB/align=0-8      163ns ± 5%      159ns ± 3%    -2.46%  (p=0.032 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8      19.7ns ± 3%     19.4ns ± 2%    -1.62%  (p=0.036 n=10+10)
-CRC32/poly=IEEE/size=15/align=1-8            44.7ns ± 5%     44.5ns ± 4%      ~     (p=0.539 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8      16.4ns ± 3%     16.3ns ± 2%      ~     (p=0.615 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8      17.2ns ± 2%     17.3ns ± 2%      ~     (p=0.650 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8      17.4ns ± 2%     17.5ns ± 4%      ~     (p=0.694 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8     40.2ns ± 2%     40.1ns ± 4%      ~     (p=0.614 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8     42.1ns ± 3%     41.9ns ± 2%      ~     (p=0.952 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=1-8     70.1ns ± 6%     68.5ns ± 2%      ~     (p=0.190 n=10+9)
-CRC32/poly=Castagnoli/size=32kB/align=0-8    1.22µs ± 4%     1.21µs ± 3%      ~     (p=0.882 n=9+9)
-CRC32/poly=Koopman/size=15/align=0-8         36.5ns ±11%     35.6ns ± 3%      ~     (p=0.216 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8         35.1ns ± 5%     35.5ns ± 1%      ~     (p=0.508 n=10+9)
-CRC32/poly=Koopman/size=40/align=1-8         91.1ns ± 6%     88.0ns ± 3%      ~     (p=0.055 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8        1.13µs ± 6%     1.17µs ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8        9.03µs ± 6%     9.00µs ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8        8.94µs ±10%     9.05µs ±12%      ~     (p=0.754 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8       72.4µs ± 9%     72.9µs ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8     65.5ns ± 1%     66.2ns ± 1%    +1.01%  (p=0.003 n=9+8)
-CRC32/poly=IEEE/size=40/align=1-8            41.1ns ± 1%     42.0ns ± 3%    +2.34%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=40/align=0-8            41.0ns ± 1%     42.5ns ± 6%    +3.56%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=1kB/align=0-8        2.24µs ± 6%     2.34µs ± 4%    +4.34%  (p=0.010 n=9+10)
-CRC32/poly=Koopman/size=32kB/align=1-8       69.6µs ± 3%     74.3µs ± 3%    +6.70%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=1kB/align=1-8        2.15µs ± 2%     2.36µs ± 5%    +9.84%  (p=0.000 n=9+10)
-
-name                                       old speed      new speed       delta
-CRC32/poly=IEEE/size=32kB/align=0-8        2.19GB/s ± 7%  15.19GB/s ± 3%  +591.99%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=32kB/align=1-8        2.31GB/s ± 8%  15.04GB/s ± 3%  +550.07%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=1-8         2.33GB/s ± 6%  13.68GB/s ± 3%  +488.23%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=4kB/align=0-8         2.36GB/s ± 7%  13.73GB/s ± 1%  +482.26%  (p=0.000 n=10+9)
-CRC32/poly=IEEE/size=1kB/align=0-8         2.26GB/s ± 4%  10.88GB/s ± 2%  +381.12%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=1kB/align=1-8         2.31GB/s ± 2%  10.98GB/s ± 2%  +375.97%  (p=0.000 n=10+8)
-CRC32/poly=IEEE/size=512/align=0-8         2.15GB/s ± 4%   8.97GB/s ± 3%  +317.65%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=512/align=1-8         2.17GB/s ± 3%   8.96GB/s ± 3%  +312.89%  (p=0.000 n=10+10)
-CRC32/poly=Koopman/size=512/align=0-8       453MB/s ± 5%    476MB/s ± 3%    +5.09%  (p=0.000 n=10+10)
-CRC32/poly=IEEE/size=15/align=0-8           321MB/s ± 8%    337MB/s ± 3%    +5.06%  (p=0.009 n=10+10)
-CRC32/poly=Castagnoli/size=4kB/align=1-8   24.1GB/s ± 6%   25.3GB/s ± 3%    +4.71%  (p=0.005 n=10+10)
-CRC32/poly=Koopman/size=40/align=0-8        437MB/s ± 9%    456MB/s ± 2%    +4.50%  (p=0.002 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=1-8  25.9GB/s ± 3%   26.8GB/s ± 4%    +3.62%  (p=0.002 n=9+10)
-CRC32/poly=IEEE/size=15/align=1-8           336MB/s ± 4%    337MB/s ± 4%      ~     (p=0.579 n=10+10)
-CRC32/poly=Castagnoli/size=15/align=0-8     916MB/s ± 2%    920MB/s ± 2%      ~     (p=0.489 n=9+9)
-CRC32/poly=Castagnoli/size=15/align=1-8     870MB/s ± 2%    867MB/s ± 2%      ~     (p=0.661 n=9+10)
-CRC32/poly=Castagnoli/size=40/align=0-8    2.30GB/s ± 2%   2.28GB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=40/align=1-8    2.03GB/s ± 3%   2.06GB/s ± 2%      ~     (p=0.063 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=0-8   12.7GB/s ± 2%   12.8GB/s ± 4%      ~     (p=0.529 n=10+10)
-CRC32/poly=Castagnoli/size=512/align=1-8   12.1GB/s ± 3%   12.2GB/s ± 1%      ~     (p=0.780 n=10+9)
-CRC32/poly=Castagnoli/size=1kB/align=1-8   14.6GB/s ± 6%   15.0GB/s ± 2%      ~     (p=0.211 n=10+9)
-CRC32/poly=Castagnoli/size=4kB/align=0-8   25.1GB/s ± 5%   25.7GB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Castagnoli/size=32kB/align=0-8  26.9GB/s ± 4%   26.8GB/s ± 5%      ~     (p=0.842 n=9+10)
-CRC32/poly=Koopman/size=15/align=0-8        412MB/s ±10%    421MB/s ± 3%      ~     (p=0.218 n=10+10)
-CRC32/poly=Koopman/size=15/align=1-8        427MB/s ± 5%    422MB/s ± 1%      ~     (p=0.497 n=10+9)
-CRC32/poly=Koopman/size=40/align=1-8        440MB/s ± 6%    455MB/s ± 3%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=512/align=1-8       455MB/s ± 6%    440MB/s ± 8%      ~     (p=0.143 n=10+10)
-CRC32/poly=Koopman/size=1kB/align=0-8       452MB/s ± 9%    438MB/s ± 4%      ~     (p=0.052 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=0-8       454MB/s ± 5%    455MB/s ± 6%      ~     (p=0.971 n=10+10)
-CRC32/poly=Koopman/size=4kB/align=1-8       459MB/s ± 9%    455MB/s ±11%      ~     (p=0.739 n=10+10)
-CRC32/poly=Koopman/size=32kB/align=0-8      453MB/s ± 8%    450MB/s ± 4%      ~     (p=0.684 n=10+10)
-CRC32/poly=Castagnoli/size=1kB/align=0-8   15.6GB/s ± 1%   15.5GB/s ± 1%    -1.02%  (p=0.002 n=9+8)
-CRC32/poly=IEEE/size=40/align=1-8           974MB/s ± 1%    952MB/s ± 3%    -2.25%  (p=0.000 n=9+10)
-CRC32/poly=IEEE/size=40/align=0-8           975MB/s ± 1%    942MB/s ± 5%    -3.37%  (p=0.001 n=8+10)
-CRC32/poly=Koopman/size=32kB/align=1-8      471MB/s ± 3%    441MB/s ± 3%    -6.25%  (p=0.000 n=8+10)
-CRC32/poly=Koopman/size=1kB/align=1-8       477MB/s ± 2%    434MB/s ± 5%    -8.92%  (p=0.000 n=9+10)
diff --git a/cmd/benchstat/testdata/slashslash4.txt b/cmd/benchstat/testdata/slashslash4.txt
deleted file mode 100644
index 509c0d1..0000000
--- a/cmd/benchstat/testdata/slashslash4.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-pkg: hash/crc32
-goarch: amd64
-goos: darwin
-note: hw acceleration disabled, -bench=//4
-
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.9 ns/op	 978.88 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.8 ns/op	 933.98 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        40.9 ns/op	 978.23 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.4 ns/op	 942.76 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.4 ns/op	 967.26 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        41.6 ns/op	 962.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.6 ns/op	 939.29 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        43.5 ns/op	 918.83 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.0 ns/op	 951.27 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        42.7 ns/op	 937.30 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.1 ns/op	 973.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        43.6 ns/op	 918.29 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.8 ns/op	 934.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.0 ns/op	 975.00 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.2 ns/op	 970.18 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.8 ns/op	 980.81 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        42.4 ns/op	 942.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.4 ns/op	 966.71 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        40.7 ns/op	 982.65 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        41.8 ns/op	 955.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1807 ns/op	2266.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1668 ns/op	2454.61 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1718 ns/op	2383.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1675 ns/op	2445.28 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1682 ns/op	2434.60 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1673 ns/op	2448.24 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1672 ns/op	2449.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1693 ns/op	2419.16 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1658 ns/op	2469.45 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      1699 ns/op	2410.39 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1699 ns/op	2409.64 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1677 ns/op	2441.13 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1671 ns/op	2450.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1666 ns/op	2457.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1757 ns/op	2330.39 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1666 ns/op	2457.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1716 ns/op	2386.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1662 ns/op	2464.20 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1703 ns/op	2404.35 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	  100000	      1681 ns/op	2436.35 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.1 ns/op	2334.91 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.5 ns/op	2288.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.8 ns/op	2244.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        17.3 ns/op	2311.76 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        18.0 ns/op	2224.01 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        20.7 ns/op	1933.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        19.3 ns/op	2073.61 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        19.3 ns/op	2067.47 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        19.9 ns/op	2011.31 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	10000000	        18.9 ns/op	2117.00 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.9 ns/op	2005.75 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2049.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.2 ns/op	2081.60 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        20.0 ns/op	1996.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.4 ns/op	2066.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2025.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.8 ns/op	2015.13 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        21.2 ns/op	1888.37 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        19.5 ns/op	2046.76 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	10000000	        22.3 ns/op	1796.24 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       174 ns/op	23478.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       158 ns/op	25880.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       164 ns/op	24857.78 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26150.67 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26185.14 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       161 ns/op	25366.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       164 ns/op	24930.88 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       156 ns/op	26209.59 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       161 ns/op	25376.24 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	 1000000	       162 ns/op	25281.02 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       168 ns/op	24280.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       166 ns/op	24653.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       163 ns/op	24991.81 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       158 ns/op	25826.99 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       162 ns/op	25188.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       164 ns/op	24970.03 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       179 ns/op	22771.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       176 ns/op	23182.06 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       184 ns/op	22193.41 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	 1000000	       176 ns/op	23191.10 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       106 ns/op	 376.78 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       102 ns/op	 391.07 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        99.7 ns/op	 401.12 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        95.6 ns/op	 418.47 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        93.3 ns/op	 428.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        90.5 ns/op	 441.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        92.1 ns/op	 434.22 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        85.6 ns/op	 467.07 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        87.6 ns/op	 456.72 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 2000000	        85.7 ns/op	 466.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        85.6 ns/op	 467.12 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.1 ns/op	 464.59 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        85.5 ns/op	 467.95 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.5 ns/op	 462.44 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        89.8 ns/op	 445.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        86.4 ns/op	 463.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        85.8 ns/op	 466.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.7 ns/op	 456.34 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        88.0 ns/op	 454.78 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 2000000	        87.2 ns/op	 458.77 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8373 ns/op	 489.18 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8397 ns/op	 487.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9496 ns/op	 431.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9426 ns/op	 434.52 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9035 ns/op	 453.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9295 ns/op	 440.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9220 ns/op	 444.22 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9539 ns/op	 429.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      8401 ns/op	 487.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   20000	      9634 ns/op	 425.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9591 ns/op	 427.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9878 ns/op	 414.64 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9630 ns/op	 425.32 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9284 ns/op	 441.18 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8676 ns/op	 472.10 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      8844 ns/op	 463.13 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9621 ns/op	 425.72 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9364 ns/op	 437.38 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9993 ns/op	 409.85 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   20000	      9671 ns/op	 423.50 MB/s
-PASS
-ok  	hash/crc32	25.585s
diff --git a/cmd/benchstat/testdata/smallSample.stdout b/cmd/benchstat/testdata/smallSample.stdout
new file mode 100644
index 0000000..c7577cf
--- /dev/null
+++ b/cmd/benchstat/testdata/smallSample.stdout
@@ -0,0 +1,5 @@
+  │    before    │              after              │
+  │    sec/op    │    sec/op     vs base           │
+X   100.0n ± ∞ ¹   101.0n ± ∞ ¹  ~ (p=1.000 n=1) ²
+¹ need >= 6 samples for confidence interval at level 0.95
+² need >= 4 samples to detect a difference at alpha level 0.05
diff --git a/cmd/benchstat/testdata/smallSample.txt b/cmd/benchstat/testdata/smallSample.txt
new file mode 100644
index 0000000..48e32ac
--- /dev/null
+++ b/cmd/benchstat/testdata/smallSample.txt
@@ -0,0 +1,7 @@
+note: before
+
+BenchmarkX 1 100 ns/op
+
+note: after
+
+BenchmarkX 1 101 ns/op
diff --git a/cmd/benchstat/testdata/units-new.txt b/cmd/benchstat/testdata/units-new.txt
deleted file mode 100644
index 397088d..0000000
--- a/cmd/benchstat/testdata/units-new.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-pkg: synthetic
-note: test benchstat printing of units
-
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14100000000000 user-ns/op 5 ns/GC 12 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14700000000000 user-ns/op 5 ns/GC 16 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14800000000000 user-ns/op 5 ns/GC 12 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14300000000000 user-ns/op 5 ns/GC 16 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14000000000000 user-ns/op 5 ns/GC 12 quick-bytes
diff --git a/cmd/benchstat/testdata/units-old.txt b/cmd/benchstat/testdata/units-old.txt
deleted file mode 100644
index 6e87b07..0000000
--- a/cmd/benchstat/testdata/units-old.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-pkg: synthetic
-note: test benchstat printing of units
-
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14400000000000 user-ns/op 5 ns/GC 12 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14500000000000 user-ns/op 5 ns/GC 16 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14600000000000 user-ns/op 5 ns/GC 12 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14200000000000 user-ns/op 5 ns/GC 16 quick-bytes
-BenchmarkTwoHourMarathon 1 7200000000000 ns/op 14300000000000 user-ns/op 5 ns/GC 12 quick-bytes
diff --git a/cmd/benchstat/testdata/units.golden b/cmd/benchstat/testdata/units.golden
deleted file mode 100644
index 3a5ca67..0000000
--- a/cmd/benchstat/testdata/units.golden
+++ /dev/null
@@ -1,11 +0,0 @@
-name             old time/op       new time/op       delta
-TwoHourMarathon        7200s ± 0%        7200s ± 0%   ~     (all equal)
-
-name             old user-time/op  new user-time/op  delta
-TwoHourMarathon       14400s ± 1%       14380s ± 3%   ~     (p=0.881 n=5+5)
-
-name             old time/GC       new time/GC       delta
-TwoHourMarathon       5.00ns ± 0%       5.00ns ± 0%   ~     (all equal)
-
-name             old quick-bytes   new quick-bytes   delta
-TwoHourMarathon        13.6B ±18%        13.6B ±18%   ~     (p=1.000 n=5+5)
diff --git a/cmd/benchstat/testdata/units.stdout b/cmd/benchstat/testdata/units.stdout
new file mode 100644
index 0000000..b536123
--- /dev/null
+++ b/cmd/benchstat/testdata/units.stdout
@@ -0,0 +1,6 @@
+         │    before    │          after           │
+         │  text-bytes  │ text-bytes  vs base      │
+Size       100.0 ± 0%     105.0 ± 0%  +5.00% (n=1)
+NonExact   101.0 ± 1% ¹   101.0 ± 0%   0.00% (n=3)
+geomean    100.5          103.0       +2.47%
+¹ exact distribution expected, but values range from 100 to 101
diff --git a/cmd/benchstat/testdata/units.txt b/cmd/benchstat/testdata/units.txt
new file mode 100644
index 0000000..b9197c4
--- /dev/null
+++ b/cmd/benchstat/testdata/units.txt
@@ -0,0 +1,17 @@
+Unit text-bytes assume=exact
+
+note: before
+
+BenchmarkSize 1 100 text-bytes
+
+BenchmarkNonExact 1 100 text-bytes
+BenchmarkNonExact 1 101 text-bytes
+BenchmarkNonExact 1 101 text-bytes
+
+note: after
+
+BenchmarkSize 1 105 text-bytes
+
+BenchmarkNonExact 1 101 text-bytes
+BenchmarkNonExact 1 101 text-bytes
+BenchmarkNonExact 1 101 text-bytes
diff --git a/cmd/benchstat/testdata/x386.txt b/cmd/benchstat/testdata/x386.txt
deleted file mode 100644
index 73d2a05..0000000
--- a/cmd/benchstat/testdata/x386.txt
+++ /dev/null
@@ -1,368 +0,0 @@
-pkg: hash/crc32
-goarch: 386
-goos: darwin
-note: 32-bit build
-warning: <blink>hello</blink>
-
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        60.1 ns/op	 249.74 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        59.4 ns/op	 252.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        62.3 ns/op	 240.78 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        59.4 ns/op	 252.36 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        61.5 ns/op	 243.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        67.8 ns/op	 221.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        60.1 ns/op	 249.62 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        65.6 ns/op	 228.65 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        67.9 ns/op	 220.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=0-8         	 2000000	        59.4 ns/op	 252.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        61.3 ns/op	 244.70 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        63.9 ns/op	 234.92 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        60.9 ns/op	 246.27 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        62.9 ns/op	 238.32 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        63.4 ns/op	 236.54 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        68.7 ns/op	 218.33 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        68.1 ns/op	 220.24 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        64.6 ns/op	 232.32 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        59.2 ns/op	 253.35 MB/s
-BenchmarkCRC32/poly=IEEE/size=15/align=1-8         	 2000000	        61.9 ns/op	 242.29 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        57.3 ns/op	 698.52 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        58.2 ns/op	 686.77 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        57.1 ns/op	 700.17 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        58.5 ns/op	 684.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        56.4 ns/op	 708.98 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        56.8 ns/op	 704.40 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        57.8 ns/op	 692.04 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        56.6 ns/op	 707.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 2000000	        59.0 ns/op	 677.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=0-8         	 3000000	        56.7 ns/op	 705.31 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 2000000	        57.3 ns/op	 697.95 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 2000000	        58.1 ns/op	 688.08 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        56.4 ns/op	 709.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 2000000	        56.4 ns/op	 709.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 2000000	        56.3 ns/op	 710.10 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        56.3 ns/op	 710.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        57.2 ns/op	 698.71 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        58.6 ns/op	 682.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        59.0 ns/op	 678.11 MB/s
-BenchmarkCRC32/poly=IEEE/size=40/align=1-8         	 3000000	        57.7 ns/op	 693.31 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       331 ns/op	1544.96 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       330 ns/op	1549.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       349 ns/op	1465.25 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       330 ns/op	1548.50 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       339 ns/op	1506.14 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       330 ns/op	1547.00 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       330 ns/op	1549.89 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       333 ns/op	1536.18 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       333 ns/op	1534.03 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=0-8        	  500000	       335 ns/op	1524.75 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  300000	       334 ns/op	1529.22 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       329 ns/op	1553.02 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       339 ns/op	1510.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  300000	       345 ns/op	1481.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       329 ns/op	1551.53 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       337 ns/op	1515.82 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       337 ns/op	1516.02 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       332 ns/op	1538.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       328 ns/op	1556.83 MB/s
-BenchmarkCRC32/poly=IEEE/size=512/align=1-8        	  500000	       343 ns/op	1490.30 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       632 ns/op	1618.84 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       622 ns/op	1643.79 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       616 ns/op	1660.67 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       622 ns/op	1643.99 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       639 ns/op	1602.15 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       620 ns/op	1649.79 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       624 ns/op	1638.85 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       641 ns/op	1595.55 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       622 ns/op	1645.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=0-8        	  200000	       625 ns/op	1636.01 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       676 ns/op	1513.45 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       647 ns/op	1582.09 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       620 ns/op	1650.24 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       642 ns/op	1594.15 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       617 ns/op	1658.81 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       638 ns/op	1604.66 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       618 ns/op	1655.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       620 ns/op	1649.88 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       624 ns/op	1638.49 MB/s
-BenchmarkCRC32/poly=IEEE/size=1kB/align=1-8        	  200000	       651 ns/op	1572.31 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2525 ns/op	1621.93 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2379 ns/op	1721.02 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	  100000	      2431 ns/op	1684.62 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2481 ns/op	1650.59 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2625 ns/op	1559.90 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2372 ns/op	1726.40 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2407 ns/op	1701.24 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2440 ns/op	1678.55 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2395 ns/op	1709.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=0-8        	   50000	      2512 ns/op	1630.23 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2493 ns/op	1642.53 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2548 ns/op	1607.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2416 ns/op	1695.07 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2483 ns/op	1649.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2378 ns/op	1722.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2373 ns/op	1725.47 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2541 ns/op	1611.87 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2412 ns/op	1697.97 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2360 ns/op	1735.58 MB/s
-BenchmarkCRC32/poly=IEEE/size=4kB/align=1-8        	   50000	      2341 ns/op	1749.19 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     19117 ns/op	1714.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     19504 ns/op	1680.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     18836 ns/op	1739.64 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     19885 ns/op	1647.81 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     18815 ns/op	1741.56 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     19054 ns/op	1719.73 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     18893 ns/op	1734.36 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     18778 ns/op	1744.95 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     18894 ns/op	1734.30 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=0-8       	   10000	     19723 ns/op	1661.41 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19206 ns/op	1706.06 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     18774 ns/op	1745.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19806 ns/op	1654.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19032 ns/op	1721.68 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19774 ns/op	1657.05 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19158 ns/op	1710.37 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19134 ns/op	1712.51 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19073 ns/op	1718.01 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     20436 ns/op	1603.44 MB/s
-BenchmarkCRC32/poly=IEEE/size=32kB/align=1-8       	   10000	     19810 ns/op	1654.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        66.3 ns/op	 226.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        63.8 ns/op	 235.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        59.3 ns/op	 252.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 3000000	        59.7 ns/op	 251.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        59.9 ns/op	 250.28 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        59.4 ns/op	 252.63 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        58.7 ns/op	 255.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 3000000	        60.1 ns/op	 249.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        59.1 ns/op	 253.84 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=0-8   	 2000000	        58.7 ns/op	 255.71 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        59.1 ns/op	 253.95 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        58.7 ns/op	 255.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        60.4 ns/op	 248.51 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        58.8 ns/op	 255.07 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        60.6 ns/op	 247.35 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        59.3 ns/op	 252.88 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 3000000	        58.6 ns/op	 255.98 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        59.0 ns/op	 254.28 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        59.1 ns/op	 253.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=15/align=1-8   	 2000000	        59.9 ns/op	 250.50 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 3000000	        56.3 ns/op	 710.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        56.1 ns/op	 713.21 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        59.8 ns/op	 668.58 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        61.6 ns/op	 649.17 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 3000000	        59.0 ns/op	 678.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        60.9 ns/op	 656.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 3000000	        60.6 ns/op	 660.54 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 3000000	        60.2 ns/op	 664.38 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        63.3 ns/op	 631.62 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=0-8   	 2000000	        58.8 ns/op	 680.65 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 3000000	        56.2 ns/op	 711.22 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 3000000	        56.1 ns/op	 713.52 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        58.2 ns/op	 687.04 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        56.4 ns/op	 709.57 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 3000000	        58.2 ns/op	 687.58 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        56.2 ns/op	 711.91 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        58.6 ns/op	 682.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        62.0 ns/op	 644.93 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 2000000	        61.0 ns/op	 655.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=40/align=1-8   	 3000000	        58.2 ns/op	 686.72 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       337 ns/op	1517.89 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       342 ns/op	1493.38 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       350 ns/op	1459.81 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       370 ns/op	1382.99 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       351 ns/op	1456.12 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       357 ns/op	1433.94 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       356 ns/op	1435.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       356 ns/op	1436.96 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       349 ns/op	1466.27 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=0-8  	  500000	       340 ns/op	1501.51 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       360 ns/op	1420.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  300000	       358 ns/op	1427.91 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  300000	       344 ns/op	1487.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       352 ns/op	1453.44 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       358 ns/op	1426.66 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       347 ns/op	1474.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       335 ns/op	1526.35 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       376 ns/op	1359.30 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       337 ns/op	1515.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=512/align=1-8  	  500000	       331 ns/op	1543.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       641 ns/op	1596.29 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       629 ns/op	1627.68 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       632 ns/op	1618.10 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       677 ns/op	1512.13 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       677 ns/op	1511.58 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       666 ns/op	1536.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       654 ns/op	1563.44 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       663 ns/op	1544.36 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       663 ns/op	1543.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=0-8  	  200000	       665 ns/op	1538.35 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       671 ns/op	1524.71 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       652 ns/op	1569.20 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       645 ns/op	1587.35 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       663 ns/op	1543.45 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       694 ns/op	1474.32 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       674 ns/op	1518.51 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       638 ns/op	1603.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       657 ns/op	1557.80 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       636 ns/op	1609.05 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=1kB/align=1-8  	  200000	       638 ns/op	1604.77 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2358 ns/op	1736.69 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2544 ns/op	1609.49 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2462 ns/op	1663.08 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2493 ns/op	1642.57 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2459 ns/op	1665.28 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2470 ns/op	1657.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2476 ns/op	1653.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2441 ns/op	1677.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2394 ns/op	1710.31 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=0-8  	   50000	      2421 ns/op	1691.81 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2429 ns/op	1686.23 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	  100000	      2555 ns/op	1603.04 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2386 ns/op	1716.33 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2392 ns/op	1712.18 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2433 ns/op	1682.90 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	  100000	      2430 ns/op	1685.17 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2396 ns/op	1709.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2476 ns/op	1654.23 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2571 ns/op	1592.69 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=4kB/align=1-8  	   50000	      2412 ns/op	1697.74 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     20585 ns/op	1591.77 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     19541 ns/op	1676.84 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     20578 ns/op	1592.34 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     20335 ns/op	1611.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	    5000	     21641 ns/op	1514.14 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     19939 ns/op	1643.39 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     21381 ns/op	1532.56 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     20214 ns/op	1620.99 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     21208 ns/op	1545.07 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=0-8 	   10000	     19839 ns/op	1651.64 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	    5000	     23566 ns/op	1390.45 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     20604 ns/op	1590.36 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     19488 ns/op	1681.44 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     22008 ns/op	1488.87 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     19449 ns/op	1684.79 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     19511 ns/op	1679.42 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     19695 ns/op	1663.73 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     20185 ns/op	1623.36 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     20397 ns/op	1606.43 MB/s
-BenchmarkCRC32/poly=Castagnoli/size=32kB/align=1-8 	   10000	     20278 ns/op	1615.93 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 2000000	        55.6 ns/op	 270.00 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        58.3 ns/op	 257.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        60.0 ns/op	 249.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 2000000	        58.5 ns/op	 256.45 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 2000000	        58.3 ns/op	 257.51 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        60.2 ns/op	 249.29 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        60.1 ns/op	 249.55 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 2000000	        57.5 ns/op	 261.00 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        57.0 ns/op	 263.05 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=0-8      	 3000000	        56.6 ns/op	 265.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        60.1 ns/op	 249.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        55.1 ns/op	 272.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        54.2 ns/op	 276.58 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        53.8 ns/op	 279.07 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        57.0 ns/op	 262.94 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 2000000	        56.7 ns/op	 264.60 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        57.4 ns/op	 261.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 2000000	        55.6 ns/op	 269.72 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 3000000	        59.2 ns/op	 253.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=15/align=1-8      	 2000000	        54.9 ns/op	 273.38 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       139 ns/op	 287.45 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       140 ns/op	 284.31 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       141 ns/op	 283.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       138 ns/op	 288.46 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       145 ns/op	 274.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       137 ns/op	 291.46 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       153 ns/op	 260.57 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       144 ns/op	 277.25 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       148 ns/op	 269.74 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=0-8      	 1000000	       138 ns/op	 288.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       136 ns/op	 292.32 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       150 ns/op	 265.66 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       143 ns/op	 279.14 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       139 ns/op	 286.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       138 ns/op	 289.48 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       137 ns/op	 290.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       137 ns/op	 289.91 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       132 ns/op	 302.04 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       133 ns/op	 300.41 MB/s
-BenchmarkCRC32/poly=Koopman/size=40/align=1-8      	 1000000	       131 ns/op	 303.16 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1724 ns/op	 296.97 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1663 ns/op	 307.73 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1687 ns/op	 303.37 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1688 ns/op	 303.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1599 ns/op	 320.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1698 ns/op	 301.42 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1669 ns/op	 306.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1621 ns/op	 315.81 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1721 ns/op	 297.46 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=0-8     	  100000	      1774 ns/op	 288.53 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1665 ns/op	 307.36 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1702 ns/op	 300.76 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1622 ns/op	 315.59 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1606 ns/op	 318.71 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1642 ns/op	 311.64 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1663 ns/op	 307.75 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1618 ns/op	 316.26 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1629 ns/op	 314.15 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1636 ns/op	 312.89 MB/s
-BenchmarkCRC32/poly=Koopman/size=512/align=1-8     	  100000	      1646 ns/op	 310.98 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3428 ns/op	 298.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3209 ns/op	 319.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3232 ns/op	 316.77 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3283 ns/op	 311.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3316 ns/op	 308.75 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3235 ns/op	 316.49 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3324 ns/op	 308.05 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3220 ns/op	 317.94 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3386 ns/op	 302.41 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=0-8     	   50000	      3456 ns/op	 296.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3300 ns/op	 310.29 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3375 ns/op	 303.35 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3326 ns/op	 307.87 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3198 ns/op	 320.19 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3278 ns/op	 312.29 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3289 ns/op	 311.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3206 ns/op	 319.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3230 ns/op	 317.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3279 ns/op	 312.27 MB/s
-BenchmarkCRC32/poly=Koopman/size=1kB/align=1-8     	   50000	      3311 ns/op	 309.24 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13182 ns/op	 310.72 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13639 ns/op	 300.30 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13560 ns/op	 302.06 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13710 ns/op	 298.74 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13051 ns/op	 313.83 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13290 ns/op	 308.19 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13463 ns/op	 304.23 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13403 ns/op	 305.60 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     12934 ns/op	 316.68 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=0-8     	   10000	     13011 ns/op	 314.81 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     14397 ns/op	 284.49 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13142 ns/op	 311.65 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     12905 ns/op	 317.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13503 ns/op	 303.34 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13370 ns/op	 306.34 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13181 ns/op	 310.73 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13252 ns/op	 309.08 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     13340 ns/op	 307.03 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     12869 ns/op	 318.28 MB/s
-BenchmarkCRC32/poly=Koopman/size=4kB/align=1-8     	   10000	     12889 ns/op	 317.79 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    106037 ns/op	 309.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    102938 ns/op	 318.33 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    1000	    103083 ns/op	 317.88 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    103243 ns/op	 317.39 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    107923 ns/op	 303.62 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    108832 ns/op	 301.09 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    106804 ns/op	 306.80 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    1000	    113306 ns/op	 289.20 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    107050 ns/op	 306.10 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=0-8    	    2000	    109691 ns/op	 298.73 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    107190 ns/op	 305.70 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    104206 ns/op	 314.45 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    110537 ns/op	 296.44 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    107769 ns/op	 304.06 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    1000	    104639 ns/op	 313.15 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    109221 ns/op	 300.02 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    102506 ns/op	 319.67 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    104850 ns/op	 312.52 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    104962 ns/op	 312.19 MB/s
-BenchmarkCRC32/poly=Koopman/size=32kB/align=1-8    	    2000	    104807 ns/op	 312.65 MB/s
-PASS
-ok  	hash/crc32	64.158s
diff --git a/cmd/benchstat/testdata/zero-new.txt b/cmd/benchstat/testdata/zero-new.txt
deleted file mode 100644
index d855b6b..0000000
--- a/cmd/benchstat/testdata/zero-new.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pkg: synthetic
-note: test benchstat printing of zeros
-
-BenchmarkImperceptible 1 1234567890 a-bytes 171717171716 b-bytes 99999930 c-bytes
diff --git a/cmd/benchstat/testdata/zero-old.txt b/cmd/benchstat/testdata/zero-old.txt
deleted file mode 100644
index 9df0d06..0000000
--- a/cmd/benchstat/testdata/zero-old.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pkg: synthetic
-note: test benchstat printing of zeros
-
-BenchmarkImperceptible 1 1234567890 a-bytes 171717171717 b-bytes 99999929 c-bytes
diff --git a/cmd/benchstat/testdata/zero.golden b/cmd/benchstat/testdata/zero.golden
deleted file mode 100644
index 7c6170f..0000000
--- a/cmd/benchstat/testdata/zero.golden
+++ /dev/null
@@ -1,8 +0,0 @@
-name           old a-bytes  new a-bytes  delta
-Imperceptible  1.23GB ± 0%  1.23GB ± 0%   0.00%
-
-name           old b-bytes  new b-bytes  delta
-Imperceptible   172GB ± 0%   172GB ± 0%  -0.00%
-
-name           old c-bytes  new c-bytes  delta
-Imperceptible   100MB ± 0%   100MB ± 0%  +0.00%
diff --git a/cmd/benchstat/testdata/zero.stdout b/cmd/benchstat/testdata/zero.stdout
new file mode 100644
index 0000000..6c427c3
--- /dev/null
+++ b/cmd/benchstat/testdata/zero.stdout
@@ -0,0 +1,31 @@
+               │    before    │           after            │
+               │   a-bytes    │   a-bytes     vs base      │
+Imperceptible    1.150Gi ± 0%   1.150Gi ± 0%   0.00% (n=1)
+Imperceptible2   1.150Gi ± 0%   1.150Gi ± 0%   0.00% (n=1)
+geomean          1.150Gi        1.150Gi       +0.00%
+
+               │    before    │           after            │
+               │   b-bytes    │   b-bytes     vs base      │
+Imperceptible    159.9Gi ± 0%   159.9Gi ± 0%  +0.00% (n=1)
+Imperceptible2   159.9Gi ± 0%   159.9Gi ± 0%  +0.00% (n=1)
+geomean          159.9Gi        159.9Gi       +0.00%
+
+               │    before    │           after            │
+               │   c-bytes    │   c-bytes     vs base      │
+Imperceptible    95.37Mi ± 0%   95.37Mi ± 0%  -0.00% (n=1)
+Imperceptible2   95.37Mi ± 0%   95.37Mi ± 0%  -0.00% (n=1)
+geomean          95.37Mi        95.37Mi       -0.00%
+
+              │    before    │           after            │
+              │      x       │     x       vs base        │
+ZeroOverZero    0.000 ± 0%     0.000 ± 0%   0.00% (n=1)
+ZeroOverZero2   0.000 ± 0%     0.000 ± 0%   0.00% (n=1)
+geomean                    ¹               +0.00%       ¹
+¹ summaries must be >0 to compute geomean
+
+                 │   before   │        after        │
+                 │     y      │     y       vs base │
+NonZeroOverZero    0.0 ± 0%     100.0 ± 0%  ? (n=1)
+NonZeroOverZero2   0.0 ± 0%     100.0 ± 0%  ? (n=1)
+geomean                     ¹   100.0       ?
+¹ summaries must be >0 to compute geomean
diff --git a/cmd/benchstat/testdata/zero.txt b/cmd/benchstat/testdata/zero.txt
new file mode 100644
index 0000000..4b96143
--- /dev/null
+++ b/cmd/benchstat/testdata/zero.txt
@@ -0,0 +1,27 @@
+Unit a-bytes assume=exact
+Unit b-bytes assume=exact
+Unit c-bytes assume=exact
+Unit x assume=exact
+Unit y assume=exact
+
+note: before
+
+# We double each benchmark so benchstat shows a geomean row.
+
+BenchmarkImperceptible 1 1234567890 a-bytes 171717171716 b-bytes 99999930 c-bytes
+BenchmarkImperceptible2 1 1234567890 a-bytes 171717171716 b-bytes 99999930 c-bytes
+# Ratio should be treated as 1.
+BenchmarkZeroOverZero 1 0 x
+BenchmarkZeroOverZero2 1 0 x
+# Ratio should be treated as uncomputable.
+BenchmarkNonZeroOverZero 1 0 y
+BenchmarkNonZeroOverZero2 1 0 y
+
+note: after
+
+BenchmarkImperceptible 1 1234567890 a-bytes 171717171717 b-bytes 99999929 c-bytes
+BenchmarkImperceptible2 1 1234567890 a-bytes 171717171717 b-bytes 99999929 c-bytes
+BenchmarkZeroOverZero 1 0 x
+BenchmarkZeroOverZero2 1 0 x
+BenchmarkNonZeroOverZero 1 100 y
+BenchmarkNonZeroOverZero2 1 100 y
