benchstat: more css-friendly html output

- tag old-vs-new tables with tbody class oldnew
- tag better, worse, unchanged rows with classes
- print one big table, to keep alignment across tables
- tag note column and don't right-align

Change-Id: Ifd3b2103e02781904f6b73eeb47b00340484caf6
Reviewed-on: https://go-review.googlesource.com/35939
Reviewed-by: Quentin Smith <quentin@golang.org>
diff --git a/cmd/benchstat/html.go b/cmd/benchstat/html.go
index b4c9511..adc99e2 100644
--- a/cmd/benchstat/html.go
+++ b/cmd/benchstat/html.go
@@ -9,17 +9,28 @@
 	"html/template"
 )
 
-var htmlTemplate = template.Must(template.New("").Parse(
-	`{{range $i, $table := .}}{{if gt $i 0}}
-{{end}}<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
+var htmlTemplate = template.Must(template.New("").Parse(`
 <table class='benchstat'>
-{{if .OldNewDelta}}<tr><th>name</th><th>old {{.Metric}}</th><th>new {{.Metric}}</th><th>delta</th>
-{{else if eq (len .Configs) 1}}<tr><th>name</th><th>{{.Metric}}</th>
-{{else}}<tr><th>name \ {{.Metric}}</th>{{range .Configs}}<th>{{.}}</th>{{end}}
-{{end}}{{range $row := $table.Rows}}<tr><td>{{.Benchmark}}</td>{{range $m := .Metrics}}<td>{{$m.Format $row.Scaler}}</td>{{end}}{{if $table.OldNewDelta}}<td>{{.Delta}}</td><td>{{.Note}}</td>{{end}}
+{{- range $i, $table := .}}
+<tbody {{if .OldNewDelta}}class='oldnew'{{end}}>
+{{if .OldNewDelta -}}
+<tr><th>name<th>old {{.Metric}}<th>new {{.Metric}}<th>delta
+{{else if eq (len .Configs) 1}}
+<tr><th>name<th>{{.Metric}}
+{{else -}}
+<tr><th>name \ {{.Metric}}{{range .Configs}}<th>{{.}}{{end}}
+{{end}}{{range $row := $table.Rows -}}
+{{if $table.OldNewDelta -}}
+<tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'>
+{{- else -}}
+<tr>
+{{- end -}}
+<td>{{.Benchmark}}{{range .Metrics}}<td>{{.Format $row.Scaler}}{{end}}{{if $table.OldNewDelta}}<td>{{.Delta}}<td class='note'>{{.Note}}{{end}}
 {{end -}}
+</tbody>
+{{end}}
 </table>
-{{end}}`))
+`))
 
 // FormatHTML appends an HTML formatting of the tables to buf.
 func FormatHTML(buf *bytes.Buffer, tables []*Table) {
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 42c4c18..3beda32 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go
@@ -147,9 +147,13 @@
 
 	var buf bytes.Buffer
 	if *flagHTML {
+		buf.WriteString(htmlStyle)
 		FormatHTML(&buf, tables)
 	} else {
 		FormatText(&buf, tables)
 	}
 	os.Stdout.Write(buf.Bytes())
 }
+
+var htmlStyle = `<style>.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }</style>
+`
diff --git a/cmd/benchstat/table.go b/cmd/benchstat/table.go
index 9c7c937..0d664ba 100644
--- a/cmd/benchstat/table.go
+++ b/cmd/benchstat/table.go
@@ -25,7 +25,7 @@
 	Metrics   []*Metrics // columns of statistics
 	Delta     string     // formatted percent change
 	Note      string     // additional information
-	Same      bool       // likely no change
+	Change    int        // +1 better, -1 worse, 0 unchanged
 }
 
 // Tables returns tables comparing the benchmarks in the collection.
@@ -81,7 +81,13 @@
 				} else if testerr != nil {
 					row.Note = fmt.Sprintf("(%s)", testerr)
 				} else if pval < *flagAlpha {
-					row.Delta = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
+					pct := ((new.Mean / old.Mean) - 1.0) * 100.0
+					row.Delta = fmt.Sprintf("%+.2f%%", pct)
+					if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
+						row.Change = +1
+					} else {
+						row.Change = -1
+					}
 				}
 				if row.Note == "" && pval != -1 {
 					row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
diff --git a/cmd/benchstat/testdata/examplehtml.golden b/cmd/benchstat/testdata/examplehtml.golden
index b47e1fd..16d9eb1 100644
--- a/cmd/benchstat/testdata/examplehtml.golden
+++ b/cmd/benchstat/testdata/examplehtml.golden
@@ -1,13 +1,16 @@
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
-<table class='benchstat'>
-<tr><th>name</th><th>old time/op</th><th>new time/op</th><th>delta</th>
-<tr><td>GobEncode</td><td>13.6ms ± 1%</td><td>11.8ms ± 1%</td><td>-13.31%</td><td>(p=0.016 n=4&#43;5)</td>
-<tr><td>JSONEncode</td><td>32.1ms ± 1%</td><td>31.8ms ± 1%</td><td>~</td><td>(p=0.286 n=4&#43;5)</td>
-</table>
+<style>.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }</style>
 
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
 <table class='benchstat'>
-<tr><th>name</th><th>old speed</th><th>new speed</th><th>delta</th>
-<tr><td>GobEncode</td><td>56.4MB/s ± 1%</td><td>65.1MB/s ± 1%</td><td>&#43;15.36%</td><td>(p=0.016 n=4&#43;5)</td>
-<tr><td>JSONEncode</td><td>60.4MB/s ± 1%</td><td>61.1MB/s ± 2%</td><td>~</td><td>(p=0.286 n=4&#43;5)</td>
+<tbody class='oldnew'>
+<tr><th>name<th>old time/op<th>new time/op<th>delta
+<tr class='better'><td>GobEncode<td>13.6ms ± 1%<td>11.8ms ± 1%<td>-13.31%<td class='note'>(p=0.016 n=4&#43;5)
+<tr class='unchanged'><td>JSONEncode<td>32.1ms ± 1%<td>31.8ms ± 1%<td>~<td class='note'>(p=0.286 n=4&#43;5)
+</tbody>
+
+<tbody class='oldnew'>
+<tr><th>name<th>old speed<th>new speed<th>delta
+<tr class='better'><td>GobEncode<td>56.4MB/s ± 1%<td>65.1MB/s ± 1%<td>&#43;15.36%<td class='note'>(p=0.016 n=4&#43;5)
+<tr class='unchanged'><td>JSONEncode<td>60.4MB/s ± 1%<td>61.1MB/s ± 2%<td>~<td class='note'>(p=0.286 n=4&#43;5)
+</tbody>
+
 </table>
diff --git a/cmd/benchstat/testdata/exampleoldhtml.golden b/cmd/benchstat/testdata/exampleoldhtml.golden
index b056fc6..b11a296 100644
--- a/cmd/benchstat/testdata/exampleoldhtml.golden
+++ b/cmd/benchstat/testdata/exampleoldhtml.golden
@@ -1,13 +1,18 @@
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
-<table class='benchstat'>
-<tr><th>name</th><th>time/op</th>
-<tr><td>GobEncode</td><td>13.6ms ± 1%</td>
-<tr><td>JSONEncode</td><td>32.1ms ± 1%</td>
-</table>
+<style>.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }</style>
 
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
 <table class='benchstat'>
-<tr><th>name</th><th>speed</th>
-<tr><td>GobEncode</td><td>56.4MB/s ± 1%</td>
-<tr><td>JSONEncode</td><td>60.4MB/s ± 1%</td>
+<tbody >
+
+<tr><th>name<th>time/op
+<tr><td>GobEncode<td>13.6ms ± 1%
+<tr><td>JSONEncode<td>32.1ms ± 1%
+</tbody>
+
+<tbody >
+
+<tr><th>name<th>speed
+<tr><td>GobEncode<td>56.4MB/s ± 1%
+<tr><td>JSONEncode<td>60.4MB/s ± 1%
+</tbody>
+
 </table>
diff --git a/cmd/benchstat/testdata/oldnew4html.golden b/cmd/benchstat/testdata/oldnew4html.golden
index 7b070ef..093c0ee 100644
--- a/cmd/benchstat/testdata/oldnew4html.golden
+++ b/cmd/benchstat/testdata/oldnew4html.golden
@@ -1,81 +1,84 @@
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
-<table class='benchstat'>
-<tr><th>name \ time/op</th><th>old.txt</th><th>new.txt</th><th>slashslash4.txt</th>
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8</td><td>46.9ns ± 8%</td><td>44.5ns ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8</td><td>44.7ns ± 5%</td><td>44.5ns ± 4%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8</td><td>41.0ns ± 1%</td><td>42.5ns ± 6%</td><td>42.1ns ± 3%</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8</td><td>41.1ns ± 1%</td><td>42.0ns ± 3%</td><td>41.7ns ± 5%</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8</td><td>238ns ± 5%</td><td>57ns ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8</td><td>236ns ± 3%</td><td>57ns ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8</td><td>452ns ± 4%</td><td>94ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8</td><td>444ns ± 2%</td><td>93ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8</td><td>1.74µs ± 8%</td><td>0.30µs ± 1%</td><td>1.68µs ± 2%</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8</td><td>1.76µs ± 6%</td><td>0.30µs ± 3%</td><td>1.69µs ± 4%</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8</td><td>15.0µs ± 7%</td><td>2.2µs ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8</td><td>14.2µs ± 7%</td><td>2.2µs ± 3%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8</td><td>16.4ns ± 3%</td><td>16.3ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8</td><td>17.2ns ± 2%</td><td>17.3ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8</td><td>17.4ns ± 2%</td><td>17.5ns ± 4%</td><td>18.6ns ±11%</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8</td><td>19.7ns ± 3%</td><td>19.4ns ± 2%</td><td>19.6ns ± 2%</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8</td><td>40.2ns ± 2%</td><td>40.1ns ± 4%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8</td><td>42.1ns ± 3%</td><td>41.9ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8</td><td>65.5ns ± 1%</td><td>66.2ns ± 1%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8</td><td>70.1ns ± 6%</td><td>68.5ns ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8</td><td>163ns ± 5%</td><td>159ns ± 3%</td><td>161ns ± 8%</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8</td><td>169ns ± 6%</td><td>162ns ± 3%</td><td>170ns ± 8%</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8</td><td>1.22µs ± 4%</td><td>1.21µs ± 3%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8</td><td>1.26µs ± 3%</td><td>1.22µs ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8</td><td>36.5ns ±11%</td><td>35.6ns ± 3%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8</td><td>35.1ns ± 5%</td><td>35.5ns ± 1%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8</td><td>91.6ns ± 9%</td><td>87.6ns ± 2%</td><td>93.8ns ±13%</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8</td><td>91.1ns ± 6%</td><td>88.0ns ± 3%</td><td>86.9ns ± 3%</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8</td><td>1.13µs ± 5%</td><td>1.08µs ± 3%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8</td><td>1.13µs ± 6%</td><td>1.17µs ± 8%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8</td><td>2.24µs ± 6%</td><td>2.34µs ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8</td><td>2.15µs ± 2%</td><td>2.36µs ± 5%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8</td><td>9.03µs ± 6%</td><td>9.00µs ± 6%</td><td>9.08µs ± 8%</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8</td><td>8.94µs ±10%</td><td>9.05µs ±12%</td><td>9.46µs ± 8%</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8</td><td>72.4µs ± 9%</td><td>72.9µs ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8</td><td>69.6µs ± 3%</td><td>74.3µs ± 3%</td><td></td>
-</table>
+<style>.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }</style>
 
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
 <table class='benchstat'>
-<tr><th>name \ speed</th><th>old.txt</th><th>new.txt</th><th>slashslash4.txt</th>
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8</td><td>321MB/s ± 8%</td><td>337MB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8</td><td>336MB/s ± 4%</td><td>337MB/s ± 4%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8</td><td>975MB/s ± 1%</td><td>942MB/s ± 5%</td><td>951MB/s ± 3%</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8</td><td>974MB/s ± 1%</td><td>952MB/s ± 3%</td><td>960MB/s ± 4%</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8</td><td>2.15GB/s ± 4%</td><td>8.97GB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8</td><td>2.17GB/s ± 3%</td><td>8.96GB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8</td><td>2.26GB/s ± 4%</td><td>10.88GB/s ± 2%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8</td><td>2.31GB/s ± 2%</td><td>10.98GB/s ± 2%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8</td><td>2.36GB/s ± 7%</td><td>13.73GB/s ± 1%</td><td>2.43GB/s ± 2%</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8</td><td>2.33GB/s ± 6%</td><td>13.68GB/s ± 3%</td><td>2.42GB/s ± 4%</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8</td><td>2.19GB/s ± 7%</td><td>15.19GB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8</td><td>2.31GB/s ± 8%</td><td>15.04GB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8</td><td>916MB/s ± 2%</td><td>920MB/s ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8</td><td>870MB/s ± 2%</td><td>867MB/s ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8</td><td>2.30GB/s ± 2%</td><td>2.28GB/s ± 4%</td><td>2.16GB/s ±11%</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8</td><td>2.03GB/s ± 3%</td><td>2.06GB/s ± 2%</td><td>2.04GB/s ± 2%</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8</td><td>12.7GB/s ± 2%</td><td>12.8GB/s ± 4%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8</td><td>12.1GB/s ± 3%</td><td>12.2GB/s ± 1%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8</td><td>15.6GB/s ± 1%</td><td>15.5GB/s ± 1%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8</td><td>14.6GB/s ± 6%</td><td>15.0GB/s ± 2%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8</td><td>25.1GB/s ± 5%</td><td>25.7GB/s ± 3%</td><td>25.4GB/s ± 7%</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8</td><td>24.1GB/s ± 6%</td><td>25.3GB/s ± 3%</td><td>24.1GB/s ± 8%</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8</td><td>26.9GB/s ± 4%</td><td>26.8GB/s ± 5%</td><td></td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8</td><td>25.9GB/s ± 3%</td><td>26.8GB/s ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8</td><td>412MB/s ±10%</td><td>421MB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8</td><td>427MB/s ± 5%</td><td>422MB/s ± 1%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8</td><td>437MB/s ± 9%</td><td>456MB/s ± 2%</td><td>428MB/s ±12%</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8</td><td>440MB/s ± 6%</td><td>455MB/s ± 3%</td><td>461MB/s ± 3%</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8</td><td>453MB/s ± 5%</td><td>476MB/s ± 3%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8</td><td>455MB/s ± 6%</td><td>440MB/s ± 8%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8</td><td>452MB/s ± 9%</td><td>438MB/s ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8</td><td>477MB/s ± 2%</td><td>434MB/s ± 5%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8</td><td>454MB/s ± 5%</td><td>455MB/s ± 6%</td><td>452MB/s ± 8%</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8</td><td>459MB/s ± 9%</td><td>455MB/s ±11%</td><td>434MB/s ± 9%</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8</td><td>453MB/s ± 8%</td><td>450MB/s ± 4%</td><td></td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8</td><td>471MB/s ± 3%</td><td>441MB/s ± 3%</td><td></td>
+<tbody >
+<tr><th>name \ time/op<th>old.txt<th>new.txt<th>slashslash4.txt
+<tr><td>CRC32/poly=IEEE/size=15/align=0-8<td>46.9ns ± 8%<td>44.5ns ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=15/align=1-8<td>44.7ns ± 5%<td>44.5ns ± 4%<td>
+<tr><td>CRC32/poly=IEEE/size=40/align=0-8<td>41.0ns ± 1%<td>42.5ns ± 6%<td>42.1ns ± 3%
+<tr><td>CRC32/poly=IEEE/size=40/align=1-8<td>41.1ns ± 1%<td>42.0ns ± 3%<td>41.7ns ± 5%
+<tr><td>CRC32/poly=IEEE/size=512/align=0-8<td>238ns ± 5%<td>57ns ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=512/align=1-8<td>236ns ± 3%<td>57ns ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>452ns ± 4%<td>94ns ± 2%<td>
+<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>444ns ± 2%<td>93ns ± 2%<td>
+<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>1.74µs ± 8%<td>0.30µs ± 1%<td>1.68µs ± 2%
+<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>1.76µs ± 6%<td>0.30µs ± 3%<td>1.69µs ± 4%
+<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>15.0µs ± 7%<td>2.2µs ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>14.2µs ± 7%<td>2.2µs ± 3%<td>
+<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>16.4ns ± 3%<td>16.3ns ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>17.2ns ± 2%<td>17.3ns ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>17.4ns ± 2%<td>17.5ns ± 4%<td>18.6ns ±11%
+<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>19.7ns ± 3%<td>19.4ns ± 2%<td>19.6ns ± 2%
+<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>40.2ns ± 2%<td>40.1ns ± 4%<td>
+<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>42.1ns ± 3%<td>41.9ns ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>65.5ns ± 1%<td>66.2ns ± 1%<td>
+<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>70.1ns ± 6%<td>68.5ns ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>163ns ± 5%<td>159ns ± 3%<td>161ns ± 8%
+<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>169ns ± 6%<td>162ns ± 3%<td>170ns ± 8%
+<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>1.22µs ± 4%<td>1.21µs ± 3%<td>
+<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>1.26µs ± 3%<td>1.22µs ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=15/align=0-8<td>36.5ns ±11%<td>35.6ns ± 3%<td>
+<tr><td>CRC32/poly=Koopman/size=15/align=1-8<td>35.1ns ± 5%<td>35.5ns ± 1%<td>
+<tr><td>CRC32/poly=Koopman/size=40/align=0-8<td>91.6ns ± 9%<td>87.6ns ± 2%<td>93.8ns ±13%
+<tr><td>CRC32/poly=Koopman/size=40/align=1-8<td>91.1ns ± 6%<td>88.0ns ± 3%<td>86.9ns ± 3%
+<tr><td>CRC32/poly=Koopman/size=512/align=0-8<td>1.13µs ± 5%<td>1.08µs ± 3%<td>
+<tr><td>CRC32/poly=Koopman/size=512/align=1-8<td>1.13µs ± 6%<td>1.17µs ± 8%<td>
+<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>2.24µs ± 6%<td>2.34µs ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>2.15µs ± 2%<td>2.36µs ± 5%<td>
+<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>9.03µs ± 6%<td>9.00µs ± 6%<td>9.08µs ± 8%
+<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>8.94µs ±10%<td>9.05µs ±12%<td>9.46µs ± 8%
+<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>72.4µs ± 9%<td>72.9µs ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>69.6µs ± 3%<td>74.3µs ± 3%<td>
+</tbody>
+
+<tbody >
+<tr><th>name \ speed<th>old.txt<th>new.txt<th>slashslash4.txt
+<tr><td>CRC32/poly=IEEE/size=15/align=0-8<td>321MB/s ± 8%<td>337MB/s ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=15/align=1-8<td>336MB/s ± 4%<td>337MB/s ± 4%<td>
+<tr><td>CRC32/poly=IEEE/size=40/align=0-8<td>975MB/s ± 1%<td>942MB/s ± 5%<td>951MB/s ± 3%
+<tr><td>CRC32/poly=IEEE/size=40/align=1-8<td>974MB/s ± 1%<td>952MB/s ± 3%<td>960MB/s ± 4%
+<tr><td>CRC32/poly=IEEE/size=512/align=0-8<td>2.15GB/s ± 4%<td>8.97GB/s ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=512/align=1-8<td>2.17GB/s ± 3%<td>8.96GB/s ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>2.26GB/s ± 4%<td>10.88GB/s ± 2%<td>
+<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>2.31GB/s ± 2%<td>10.98GB/s ± 2%<td>
+<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>2.36GB/s ± 7%<td>13.73GB/s ± 1%<td>2.43GB/s ± 2%
+<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>2.33GB/s ± 6%<td>13.68GB/s ± 3%<td>2.42GB/s ± 4%
+<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>2.19GB/s ± 7%<td>15.19GB/s ± 3%<td>
+<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>2.31GB/s ± 8%<td>15.04GB/s ± 3%<td>
+<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>916MB/s ± 2%<td>920MB/s ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>870MB/s ± 2%<td>867MB/s ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>2.30GB/s ± 2%<td>2.28GB/s ± 4%<td>2.16GB/s ±11%
+<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>2.03GB/s ± 3%<td>2.06GB/s ± 2%<td>2.04GB/s ± 2%
+<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>12.7GB/s ± 2%<td>12.8GB/s ± 4%<td>
+<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>12.1GB/s ± 3%<td>12.2GB/s ± 1%<td>
+<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>15.6GB/s ± 1%<td>15.5GB/s ± 1%<td>
+<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>14.6GB/s ± 6%<td>15.0GB/s ± 2%<td>
+<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>25.1GB/s ± 5%<td>25.7GB/s ± 3%<td>25.4GB/s ± 7%
+<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>24.1GB/s ± 6%<td>25.3GB/s ± 3%<td>24.1GB/s ± 8%
+<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>26.9GB/s ± 4%<td>26.8GB/s ± 5%<td>
+<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>25.9GB/s ± 3%<td>26.8GB/s ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=15/align=0-8<td>412MB/s ±10%<td>421MB/s ± 3%<td>
+<tr><td>CRC32/poly=Koopman/size=15/align=1-8<td>427MB/s ± 5%<td>422MB/s ± 1%<td>
+<tr><td>CRC32/poly=Koopman/size=40/align=0-8<td>437MB/s ± 9%<td>456MB/s ± 2%<td>428MB/s ±12%
+<tr><td>CRC32/poly=Koopman/size=40/align=1-8<td>440MB/s ± 6%<td>455MB/s ± 3%<td>461MB/s ± 3%
+<tr><td>CRC32/poly=Koopman/size=512/align=0-8<td>453MB/s ± 5%<td>476MB/s ± 3%<td>
+<tr><td>CRC32/poly=Koopman/size=512/align=1-8<td>455MB/s ± 6%<td>440MB/s ± 8%<td>
+<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>452MB/s ± 9%<td>438MB/s ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>477MB/s ± 2%<td>434MB/s ± 5%<td>
+<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>454MB/s ± 5%<td>455MB/s ± 6%<td>452MB/s ± 8%
+<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>459MB/s ± 9%<td>455MB/s ±11%<td>434MB/s ± 9%
+<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>453MB/s ± 8%<td>450MB/s ± 4%<td>
+<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>471MB/s ± 3%<td>441MB/s ± 3%<td>
+</tbody>
+
 </table>
diff --git a/cmd/benchstat/testdata/oldnewhtml.golden b/cmd/benchstat/testdata/oldnewhtml.golden
index 2037c69..549ec6d 100644
--- a/cmd/benchstat/testdata/oldnewhtml.golden
+++ b/cmd/benchstat/testdata/oldnewhtml.golden
@@ -1,81 +1,84 @@
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
-<table class='benchstat'>
-<tr><th>name</th><th>old time/op</th><th>new time/op</th><th>delta</th>
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8</td><td>46.9ns ± 8%</td><td>44.5ns ± 3%</td><td>-5.01%</td><td>(p=0.008 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8</td><td>44.7ns ± 5%</td><td>44.5ns ± 4%</td><td>~</td><td>(p=0.539 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8</td><td>41.0ns ± 1%</td><td>42.5ns ± 6%</td><td>&#43;3.56%</td><td>(p=0.000 n=8&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8</td><td>41.1ns ± 1%</td><td>42.0ns ± 3%</td><td>&#43;2.34%</td><td>(p=0.000 n=9&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8</td><td>238ns ± 5%</td><td>57ns ± 3%</td><td>-76.00%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8</td><td>236ns ± 3%</td><td>57ns ± 3%</td><td>-75.72%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8</td><td>452ns ± 4%</td><td>94ns ± 2%</td><td>-79.20%</td><td>(p=0.000 n=10&#43;8)</td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8</td><td>444ns ± 2%</td><td>93ns ± 2%</td><td>-78.97%</td><td>(p=0.000 n=10&#43;8)</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8</td><td>1.74µs ± 8%</td><td>0.30µs ± 1%</td><td>-82.87%</td><td>(p=0.000 n=10&#43;9)</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8</td><td>1.76µs ± 6%</td><td>0.30µs ± 3%</td><td>-83.05%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8</td><td>15.0µs ± 7%</td><td>2.2µs ± 3%</td><td>-85.57%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8</td><td>14.2µs ± 7%</td><td>2.2µs ± 3%</td><td>-84.65%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8</td><td>16.4ns ± 3%</td><td>16.3ns ± 2%</td><td>~</td><td>(p=0.615 n=9&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8</td><td>17.2ns ± 2%</td><td>17.3ns ± 2%</td><td>~</td><td>(p=0.650 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8</td><td>17.4ns ± 2%</td><td>17.5ns ± 4%</td><td>~</td><td>(p=0.694 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8</td><td>19.7ns ± 3%</td><td>19.4ns ± 2%</td><td>-1.62%</td><td>(p=0.036 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8</td><td>40.2ns ± 2%</td><td>40.1ns ± 4%</td><td>~</td><td>(p=0.614 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8</td><td>42.1ns ± 3%</td><td>41.9ns ± 2%</td><td>~</td><td>(p=0.952 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8</td><td>65.5ns ± 1%</td><td>66.2ns ± 1%</td><td>&#43;1.01%</td><td>(p=0.003 n=9&#43;8)</td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8</td><td>70.1ns ± 6%</td><td>68.5ns ± 2%</td><td>~</td><td>(p=0.190 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8</td><td>163ns ± 5%</td><td>159ns ± 3%</td><td>-2.46%</td><td>(p=0.032 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8</td><td>169ns ± 6%</td><td>162ns ± 3%</td><td>-4.60%</td><td>(p=0.005 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8</td><td>1.22µs ± 4%</td><td>1.21µs ± 3%</td><td>~</td><td>(p=0.882 n=9&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8</td><td>1.26µs ± 3%</td><td>1.22µs ± 4%</td><td>-3.48%</td><td>(p=0.002 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8</td><td>36.5ns ±11%</td><td>35.6ns ± 3%</td><td>~</td><td>(p=0.216 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8</td><td>35.1ns ± 5%</td><td>35.5ns ± 1%</td><td>~</td><td>(p=0.508 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8</td><td>91.6ns ± 9%</td><td>87.6ns ± 2%</td><td>-4.35%</td><td>(p=0.002 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8</td><td>91.1ns ± 6%</td><td>88.0ns ± 3%</td><td>~</td><td>(p=0.055 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8</td><td>1.13µs ± 5%</td><td>1.08µs ± 3%</td><td>-4.93%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8</td><td>1.13µs ± 6%</td><td>1.17µs ± 8%</td><td>~</td><td>(p=0.143 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8</td><td>2.24µs ± 6%</td><td>2.34µs ± 4%</td><td>&#43;4.34%</td><td>(p=0.010 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8</td><td>2.15µs ± 2%</td><td>2.36µs ± 5%</td><td>&#43;9.84%</td><td>(p=0.000 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8</td><td>9.03µs ± 6%</td><td>9.00µs ± 6%</td><td>~</td><td>(p=0.971 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8</td><td>8.94µs ±10%</td><td>9.05µs ±12%</td><td>~</td><td>(p=0.754 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8</td><td>72.4µs ± 9%</td><td>72.9µs ± 4%</td><td>~</td><td>(p=0.684 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8</td><td>69.6µs ± 3%</td><td>74.3µs ± 3%</td><td>&#43;6.70%</td><td>(p=0.000 n=8&#43;10)</td>
-</table>
+<style>.benchstat tbody td:nth-child(1n+2):not(.note) { text-align: right; padding: 0em 1em; }</style>
 
-<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>
 <table class='benchstat'>
-<tr><th>name</th><th>old speed</th><th>new speed</th><th>delta</th>
-<tr><td>CRC32/poly=IEEE/size=15/align=0-8</td><td>321MB/s ± 8%</td><td>337MB/s ± 3%</td><td>&#43;5.06%</td><td>(p=0.009 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=15/align=1-8</td><td>336MB/s ± 4%</td><td>337MB/s ± 4%</td><td>~</td><td>(p=0.579 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=0-8</td><td>975MB/s ± 1%</td><td>942MB/s ± 5%</td><td>-3.37%</td><td>(p=0.001 n=8&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=40/align=1-8</td><td>974MB/s ± 1%</td><td>952MB/s ± 3%</td><td>-2.25%</td><td>(p=0.000 n=9&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=0-8</td><td>2.15GB/s ± 4%</td><td>8.97GB/s ± 3%</td><td>&#43;317.65%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=512/align=1-8</td><td>2.17GB/s ± 3%</td><td>8.96GB/s ± 3%</td><td>&#43;312.89%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=0-8</td><td>2.26GB/s ± 4%</td><td>10.88GB/s ± 2%</td><td>&#43;381.12%</td><td>(p=0.000 n=10&#43;8)</td>
-<tr><td>CRC32/poly=IEEE/size=1kB/align=1-8</td><td>2.31GB/s ± 2%</td><td>10.98GB/s ± 2%</td><td>&#43;375.97%</td><td>(p=0.000 n=10&#43;8)</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=0-8</td><td>2.36GB/s ± 7%</td><td>13.73GB/s ± 1%</td><td>&#43;482.26%</td><td>(p=0.000 n=10&#43;9)</td>
-<tr><td>CRC32/poly=IEEE/size=4kB/align=1-8</td><td>2.33GB/s ± 6%</td><td>13.68GB/s ± 3%</td><td>&#43;488.23%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=0-8</td><td>2.19GB/s ± 7%</td><td>15.19GB/s ± 3%</td><td>&#43;591.99%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=IEEE/size=32kB/align=1-8</td><td>2.31GB/s ± 8%</td><td>15.04GB/s ± 3%</td><td>&#43;550.07%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=0-8</td><td>916MB/s ± 2%</td><td>920MB/s ± 2%</td><td>~</td><td>(p=0.489 n=9&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=15/align=1-8</td><td>870MB/s ± 2%</td><td>867MB/s ± 2%</td><td>~</td><td>(p=0.661 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=0-8</td><td>2.30GB/s ± 2%</td><td>2.28GB/s ± 4%</td><td>~</td><td>(p=0.684 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=40/align=1-8</td><td>2.03GB/s ± 3%</td><td>2.06GB/s ± 2%</td><td>~</td><td>(p=0.063 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=0-8</td><td>12.7GB/s ± 2%</td><td>12.8GB/s ± 4%</td><td>~</td><td>(p=0.529 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=512/align=1-8</td><td>12.1GB/s ± 3%</td><td>12.2GB/s ± 1%</td><td>~</td><td>(p=0.780 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=0-8</td><td>15.6GB/s ± 1%</td><td>15.5GB/s ± 1%</td><td>-1.02%</td><td>(p=0.002 n=9&#43;8)</td>
-<tr><td>CRC32/poly=Castagnoli/size=1kB/align=1-8</td><td>14.6GB/s ± 6%</td><td>15.0GB/s ± 2%</td><td>~</td><td>(p=0.211 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=0-8</td><td>25.1GB/s ± 5%</td><td>25.7GB/s ± 3%</td><td>~</td><td>(p=0.052 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=4kB/align=1-8</td><td>24.1GB/s ± 6%</td><td>25.3GB/s ± 3%</td><td>&#43;4.71%</td><td>(p=0.005 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=0-8</td><td>26.9GB/s ± 4%</td><td>26.8GB/s ± 5%</td><td>~</td><td>(p=0.842 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Castagnoli/size=32kB/align=1-8</td><td>25.9GB/s ± 3%</td><td>26.8GB/s ± 4%</td><td>&#43;3.62%</td><td>(p=0.002 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=15/align=0-8</td><td>412MB/s ±10%</td><td>421MB/s ± 3%</td><td>~</td><td>(p=0.218 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=15/align=1-8</td><td>427MB/s ± 5%</td><td>422MB/s ± 1%</td><td>~</td><td>(p=0.497 n=10&#43;9)</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=0-8</td><td>437MB/s ± 9%</td><td>456MB/s ± 2%</td><td>&#43;4.50%</td><td>(p=0.002 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=40/align=1-8</td><td>440MB/s ± 6%</td><td>455MB/s ± 3%</td><td>~</td><td>(p=0.052 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=0-8</td><td>453MB/s ± 5%</td><td>476MB/s ± 3%</td><td>&#43;5.09%</td><td>(p=0.000 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=512/align=1-8</td><td>455MB/s ± 6%</td><td>440MB/s ± 8%</td><td>~</td><td>(p=0.143 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=0-8</td><td>452MB/s ± 9%</td><td>438MB/s ± 4%</td><td>~</td><td>(p=0.052 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=1kB/align=1-8</td><td>477MB/s ± 2%</td><td>434MB/s ± 5%</td><td>-8.92%</td><td>(p=0.000 n=9&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=0-8</td><td>454MB/s ± 5%</td><td>455MB/s ± 6%</td><td>~</td><td>(p=0.971 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=4kB/align=1-8</td><td>459MB/s ± 9%</td><td>455MB/s ±11%</td><td>~</td><td>(p=0.739 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=0-8</td><td>453MB/s ± 8%</td><td>450MB/s ± 4%</td><td>~</td><td>(p=0.684 n=10&#43;10)</td>
-<tr><td>CRC32/poly=Koopman/size=32kB/align=1-8</td><td>471MB/s ± 3%</td><td>441MB/s ± 3%</td><td>-6.25%</td><td>(p=0.000 n=8&#43;10)</td>
+<tbody class='oldnew'>
+<tr><th>name<th>old time/op<th>new time/op<th>delta
+<tr class='better'><td>CRC32/poly=IEEE/size=15/align=0-8<td>46.9ns ± 8%<td>44.5ns ± 3%<td>-5.01%<td class='note'>(p=0.008 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=IEEE/size=15/align=1-8<td>44.7ns ± 5%<td>44.5ns ± 4%<td>~<td class='note'>(p=0.539 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=0-8<td>41.0ns ± 1%<td>42.5ns ± 6%<td>&#43;3.56%<td class='note'>(p=0.000 n=8&#43;10)
+<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=1-8<td>41.1ns ± 1%<td>42.0ns ± 3%<td>&#43;2.34%<td class='note'>(p=0.000 n=9&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=512/align=0-8<td>238ns ± 5%<td>57ns ± 3%<td>-76.00%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=512/align=1-8<td>236ns ± 3%<td>57ns ± 3%<td>-75.72%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>452ns ± 4%<td>94ns ± 2%<td>-79.20%<td class='note'>(p=0.000 n=10&#43;8)
+<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>444ns ± 2%<td>93ns ± 2%<td>-78.97%<td class='note'>(p=0.000 n=10&#43;8)
+<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>1.74µs ± 8%<td>0.30µs ± 1%<td>-82.87%<td class='note'>(p=0.000 n=10&#43;9)
+<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>1.76µs ± 6%<td>0.30µs ± 3%<td>-83.05%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>15.0µs ± 7%<td>2.2µs ± 3%<td>-85.57%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>14.2µs ± 7%<td>2.2µs ± 3%<td>-84.65%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>16.4ns ± 3%<td>16.3ns ± 2%<td>~<td class='note'>(p=0.615 n=9&#43;9)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>17.2ns ± 2%<td>17.3ns ± 2%<td>~<td class='note'>(p=0.650 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>17.4ns ± 2%<td>17.5ns ± 4%<td>~<td class='note'>(p=0.694 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>19.7ns ± 3%<td>19.4ns ± 2%<td>-1.62%<td class='note'>(p=0.036 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>40.2ns ± 2%<td>40.1ns ± 4%<td>~<td class='note'>(p=0.614 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>42.1ns ± 3%<td>41.9ns ± 2%<td>~<td class='note'>(p=0.952 n=10&#43;9)
+<tr class='worse'><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>65.5ns ± 1%<td>66.2ns ± 1%<td>&#43;1.01%<td class='note'>(p=0.003 n=9&#43;8)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>70.1ns ± 6%<td>68.5ns ± 2%<td>~<td class='note'>(p=0.190 n=10&#43;9)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>163ns ± 5%<td>159ns ± 3%<td>-2.46%<td class='note'>(p=0.032 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>169ns ± 6%<td>162ns ± 3%<td>-4.60%<td class='note'>(p=0.005 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>1.22µs ± 4%<td>1.21µs ± 3%<td>~<td class='note'>(p=0.882 n=9&#43;9)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>1.26µs ± 3%<td>1.22µs ± 4%<td>-3.48%<td class='note'>(p=0.002 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=0-8<td>36.5ns ±11%<td>35.6ns ± 3%<td>~<td class='note'>(p=0.216 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=1-8<td>35.1ns ± 5%<td>35.5ns ± 1%<td>~<td class='note'>(p=0.508 n=10&#43;9)
+<tr class='better'><td>CRC32/poly=Koopman/size=40/align=0-8<td>91.6ns ± 9%<td>87.6ns ± 2%<td>-4.35%<td class='note'>(p=0.002 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=40/align=1-8<td>91.1ns ± 6%<td>88.0ns ± 3%<td>~<td class='note'>(p=0.055 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=Koopman/size=512/align=0-8<td>1.13µs ± 5%<td>1.08µs ± 3%<td>-4.93%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=512/align=1-8<td>1.13µs ± 6%<td>1.17µs ± 8%<td>~<td class='note'>(p=0.143 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>2.24µs ± 6%<td>2.34µs ± 4%<td>&#43;4.34%<td class='note'>(p=0.010 n=9&#43;10)
+<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>2.15µs ± 2%<td>2.36µs ± 5%<td>&#43;9.84%<td class='note'>(p=0.000 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>9.03µs ± 6%<td>9.00µs ± 6%<td>~<td class='note'>(p=0.971 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>8.94µs ±10%<td>9.05µs ±12%<td>~<td class='note'>(p=0.754 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>72.4µs ± 9%<td>72.9µs ± 4%<td>~<td class='note'>(p=0.684 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>69.6µs ± 3%<td>74.3µs ± 3%<td>&#43;6.70%<td class='note'>(p=0.000 n=8&#43;10)
+</tbody>
+
+<tbody class='oldnew'>
+<tr><th>name<th>old speed<th>new speed<th>delta
+<tr class='better'><td>CRC32/poly=IEEE/size=15/align=0-8<td>321MB/s ± 8%<td>337MB/s ± 3%<td>&#43;5.06%<td class='note'>(p=0.009 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=IEEE/size=15/align=1-8<td>336MB/s ± 4%<td>337MB/s ± 4%<td>~<td class='note'>(p=0.579 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=0-8<td>975MB/s ± 1%<td>942MB/s ± 5%<td>-3.37%<td class='note'>(p=0.001 n=8&#43;10)
+<tr class='worse'><td>CRC32/poly=IEEE/size=40/align=1-8<td>974MB/s ± 1%<td>952MB/s ± 3%<td>-2.25%<td class='note'>(p=0.000 n=9&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=512/align=0-8<td>2.15GB/s ± 4%<td>8.97GB/s ± 3%<td>&#43;317.65%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=512/align=1-8<td>2.17GB/s ± 3%<td>8.96GB/s ± 3%<td>&#43;312.89%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=0-8<td>2.26GB/s ± 4%<td>10.88GB/s ± 2%<td>&#43;381.12%<td class='note'>(p=0.000 n=10&#43;8)
+<tr class='better'><td>CRC32/poly=IEEE/size=1kB/align=1-8<td>2.31GB/s ± 2%<td>10.98GB/s ± 2%<td>&#43;375.97%<td class='note'>(p=0.000 n=10&#43;8)
+<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=0-8<td>2.36GB/s ± 7%<td>13.73GB/s ± 1%<td>&#43;482.26%<td class='note'>(p=0.000 n=10&#43;9)
+<tr class='better'><td>CRC32/poly=IEEE/size=4kB/align=1-8<td>2.33GB/s ± 6%<td>13.68GB/s ± 3%<td>&#43;488.23%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=0-8<td>2.19GB/s ± 7%<td>15.19GB/s ± 3%<td>&#43;591.99%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=IEEE/size=32kB/align=1-8<td>2.31GB/s ± 8%<td>15.04GB/s ± 3%<td>&#43;550.07%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=0-8<td>916MB/s ± 2%<td>920MB/s ± 2%<td>~<td class='note'>(p=0.489 n=9&#43;9)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=15/align=1-8<td>870MB/s ± 2%<td>867MB/s ± 2%<td>~<td class='note'>(p=0.661 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=0-8<td>2.30GB/s ± 2%<td>2.28GB/s ± 4%<td>~<td class='note'>(p=0.684 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=40/align=1-8<td>2.03GB/s ± 3%<td>2.06GB/s ± 2%<td>~<td class='note'>(p=0.063 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=0-8<td>12.7GB/s ± 2%<td>12.8GB/s ± 4%<td>~<td class='note'>(p=0.529 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=512/align=1-8<td>12.1GB/s ± 3%<td>12.2GB/s ± 1%<td>~<td class='note'>(p=0.780 n=10&#43;9)
+<tr class='worse'><td>CRC32/poly=Castagnoli/size=1kB/align=0-8<td>15.6GB/s ± 1%<td>15.5GB/s ± 1%<td>-1.02%<td class='note'>(p=0.002 n=9&#43;8)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=1kB/align=1-8<td>14.6GB/s ± 6%<td>15.0GB/s ± 2%<td>~<td class='note'>(p=0.211 n=10&#43;9)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=4kB/align=0-8<td>25.1GB/s ± 5%<td>25.7GB/s ± 3%<td>~<td class='note'>(p=0.052 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=4kB/align=1-8<td>24.1GB/s ± 6%<td>25.3GB/s ± 3%<td>&#43;4.71%<td class='note'>(p=0.005 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Castagnoli/size=32kB/align=0-8<td>26.9GB/s ± 4%<td>26.8GB/s ± 5%<td>~<td class='note'>(p=0.842 n=9&#43;10)
+<tr class='better'><td>CRC32/poly=Castagnoli/size=32kB/align=1-8<td>25.9GB/s ± 3%<td>26.8GB/s ± 4%<td>&#43;3.62%<td class='note'>(p=0.002 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=0-8<td>412MB/s ±10%<td>421MB/s ± 3%<td>~<td class='note'>(p=0.218 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=15/align=1-8<td>427MB/s ± 5%<td>422MB/s ± 1%<td>~<td class='note'>(p=0.497 n=10&#43;9)
+<tr class='better'><td>CRC32/poly=Koopman/size=40/align=0-8<td>437MB/s ± 9%<td>456MB/s ± 2%<td>&#43;4.50%<td class='note'>(p=0.002 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=40/align=1-8<td>440MB/s ± 6%<td>455MB/s ± 3%<td>~<td class='note'>(p=0.052 n=10&#43;10)
+<tr class='better'><td>CRC32/poly=Koopman/size=512/align=0-8<td>453MB/s ± 5%<td>476MB/s ± 3%<td>&#43;5.09%<td class='note'>(p=0.000 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=512/align=1-8<td>455MB/s ± 6%<td>440MB/s ± 8%<td>~<td class='note'>(p=0.143 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=1kB/align=0-8<td>452MB/s ± 9%<td>438MB/s ± 4%<td>~<td class='note'>(p=0.052 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=Koopman/size=1kB/align=1-8<td>477MB/s ± 2%<td>434MB/s ± 5%<td>-8.92%<td class='note'>(p=0.000 n=9&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=0-8<td>454MB/s ± 5%<td>455MB/s ± 6%<td>~<td class='note'>(p=0.971 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=4kB/align=1-8<td>459MB/s ± 9%<td>455MB/s ±11%<td>~<td class='note'>(p=0.739 n=10&#43;10)
+<tr class='unchanged'><td>CRC32/poly=Koopman/size=32kB/align=0-8<td>453MB/s ± 8%<td>450MB/s ± 4%<td>~<td class='note'>(p=0.684 n=10&#43;10)
+<tr class='worse'><td>CRC32/poly=Koopman/size=32kB/align=1-8<td>471MB/s ± 3%<td>441MB/s ± 3%<td>-6.25%<td class='note'>(p=0.000 n=8&#43;10)
+</tbody>
+
 </table>