benchunit: new package for working with benchmark units

Change-Id: I50dfa1387606c0111ac8c2200e0b01639800def0
Reviewed-on: https://go-review.googlesource.com/c/perf/+/283615
Trust: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
diff --git a/benchunit/parse.go b/benchunit/parse.go
new file mode 100644
index 0000000..5583f33
--- /dev/null
+++ b/benchunit/parse.go
@@ -0,0 +1,96 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package benchunit manipulates benchmark units and formats numbers
+// in those units.
+package benchunit
+
+import (
+	"fmt"
+	"unicode"
+)
+
+// A Class specifies what class of unit prefixes are in use.
+type Class int
+
+const (
+	// Decimal indicates values of a given unit should be scaled
+	// by powers of 1000. Decimal units use the International
+	// System of Units SI prefixes, such as "k", and "M".
+	Decimal Class = iota
+	// Binary indicates values of a given unit should be scaled by
+	// powers of 1024. Binary units use the International
+	// Electrotechnical Commission (IEC) binary prefixes, such as
+	// "Ki" and "Mi".
+	Binary
+)
+
+func (c Class) String() string {
+	switch c {
+	case Decimal:
+		return "Decimal"
+	case Binary:
+		return "Binary"
+	}
+	return fmt.Sprintf("Class(%d)", int(c))
+}
+
+// ClassOf returns the Class of unit. If unit contains some measure of
+// bytes in the numerator, this is Binary. Otherwise, it is Decimal.
+func ClassOf(unit string) Class {
+	p := newParser(unit)
+	for p.next() {
+		if (p.tok == "B" || p.tok == "MB" || p.tok == "bytes") && !p.denom {
+			return Binary
+		}
+	}
+	return Decimal
+}
+
+type parser struct {
+	rest string // unparsed unit
+	rpos int    // byte consumed from original unit
+
+	// Current token
+	tok   string
+	pos   int  // byte offset of tok in original unit
+	denom bool // current token is in denominator
+}
+
+func newParser(unit string) *parser {
+	return &parser{rest: unit}
+}
+
+func (p *parser) next() bool {
+	// Consume separators.
+	for i, r := range p.rest {
+		if r == '*' {
+			p.denom = false
+		} else if r == '/' {
+			p.denom = true
+		} else if !(r == '-' || unicode.IsSpace(r)) {
+			p.rpos += i
+			p.rest = p.rest[i:]
+			goto tok
+		}
+	}
+	// End of string.
+	p.rest = ""
+	return false
+
+tok:
+	// Consume until separator.
+	end := len(p.rest)
+	for i, r := range p.rest {
+		if r == '*' || r == '/' || r == '-' || unicode.IsSpace(r) {
+			end = i
+			break
+		}
+	}
+	p.tok = p.rest[:end]
+	p.pos = p.rpos
+	p.rpos += end
+	p.rest = p.rest[end:]
+	return true
+}
diff --git a/benchunit/parse_test.go b/benchunit/parse_test.go
new file mode 100644
index 0000000..3df3763
--- /dev/null
+++ b/benchunit/parse_test.go
@@ -0,0 +1,30 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchunit
+
+import "testing"
+
+func TestClassOf(t *testing.T) {
+	test := func(unit string, cls Class) {
+		t.Helper()
+		got := ClassOf(unit)
+		if got != cls {
+			t.Errorf("for %s, want %s, got %s", unit, cls, got)
+		}
+	}
+	test("ns/op", Decimal)
+	test("sec/op", Decimal)
+	test("sec/B", Decimal)
+	test("sec/B/B", Decimal)
+	test("sec/disk-B", Decimal)
+
+	test("B/op", Binary)
+	test("bytes/op", Binary)
+	test("B/s", Binary)
+	test("B/sec", Binary)
+	test("sec/B*B", Binary) // Discouraged
+	test("disk-B/sec", Binary)
+	test("disk-B/sec", Binary)
+}
diff --git a/benchunit/scale.go b/benchunit/scale.go
new file mode 100644
index 0000000..996ceb6
--- /dev/null
+++ b/benchunit/scale.go
@@ -0,0 +1,162 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchunit
+
+import (
+	"fmt"
+	"math"
+	"strconv"
+)
+
+// A Scaler represents a scaling factor for a number and
+// its scientific representation.
+type Scaler struct {
+	Prec   int     // Digits after the decimal point
+	Factor float64 // Unscaled value of 1 Prefix (e.g., 1 k => 1000)
+	Prefix string  // Unit prefix ("k", "M", "Ki", etc)
+}
+
+// Format formats val and appends the unit prefix according to the given scale.
+// For example, if the Scaler has class Decimal, Format(123456789)
+// returns "123.4M".
+//
+// If the value has units, be sure to tidy it first (see Tidy).
+// Otherwise, this can result in nonsense units when the result is
+// displayed with its units; for example, representing 123456789 ns as
+// 123.4M ns ("megananoseconds", aka milliseconds).
+func (s Scaler) Format(val float64) string {
+	buf := make([]byte, 0, 20)
+	buf = strconv.AppendFloat(buf, val/s.Factor, 'f', s.Prec, 64)
+	buf = append(buf, s.Prefix...)
+	return string(buf)
+}
+
+// NoOpScaler is a Scaler that formats numbers with the smallest
+// number of digits necessary to capture the exact value, and no
+// prefix. This is intended for when the output will be consumed by
+// another program, such as when producing CSV format.
+var NoOpScaler = Scaler{-1, 1, ""}
+
+type factor struct {
+	factor float64
+	prefix string
+	// Thresholds for 100.0, 10.00, 1.000.
+	t100, t10, t1 float64
+}
+
+var siFactors = mkSIFactors()
+var iecFactors = mkIECFactors()
+var sigfigs, sigfigsBase = mkSigfigs()
+
+func mkSIFactors() []factor {
+	// To ensure that the thresholds for printing values with
+	// various factors exactly match how printing itself will
+	// round, we construct the thresholds by parsing the printed
+	// representation.
+	var factors []factor
+	exp := 12
+	for _, p := range []string{"T", "G", "M", "k", "", "m", "µ", "n"} {
+		t100, _ := strconv.ParseFloat(fmt.Sprintf("99.995e%d", exp), 64)
+		t10, _ := strconv.ParseFloat(fmt.Sprintf("9.9995e%d", exp), 64)
+		t1, _ := strconv.ParseFloat(fmt.Sprintf(".99995e%d", exp), 64)
+		factors = append(factors, factor{math.Pow(10, float64(exp)), p, t100, t10, t1})
+		exp -= 3
+	}
+	return factors
+}
+
+func mkIECFactors() []factor {
+	var factors []factor
+	exp := 40
+	// ISO/IEC 80000 doesn't specify fractional prefixes. They're
+	// meaningful for things like "B/sec", but "1 /KiB/s" looks a
+	// lot more confusing than bottoming out at "B" and printing
+	// with more precision, such as "0.001 B/s".
+	//
+	// Because t1 of one factor represents 1024 of the next
+	// smaller factor, we'll render values in the range [1000,
+	// 1024) using the smaller factor. E.g., 1020 * 1024 will be
+	// rendered as 1020 KiB, not 0.996 MiB. This is intentional as
+	// it achieves higher precision, avoids rendering scaled
+	// values below 1, and seems generally less confusing for
+	// binary factors.
+	for _, p := range []string{"Ti", "Gi", "Mi", "Ki", ""} {
+		t100, _ := strconv.ParseFloat(fmt.Sprintf("0x1.8ffae147ae148p%d", 6+exp), 64) // 99.995
+		t10, _ := strconv.ParseFloat(fmt.Sprintf("0x1.3ffbe76c8b439p%d", 3+exp), 64)  // 9.9995
+		t1, _ := strconv.ParseFloat(fmt.Sprintf("0x1.fff972474538fp%d", -1+exp), 64)  // .99995
+		factors = append(factors, factor{math.Pow(2, float64(exp)), p, t100, t10, t1})
+		exp -= 10
+	}
+	return factors
+}
+
+func mkSigfigs() ([]float64, int) {
+	var sigfigs []float64
+	// Print up to 10 digits after the decimal place.
+	for exp := -1; exp > -9; exp-- {
+		thresh, _ := strconv.ParseFloat(fmt.Sprintf("9.9995e%d", exp), 64)
+		sigfigs = append(sigfigs, thresh)
+	}
+	// sigfigs[0] is the threshold for 3 digits after the decimal.
+	return sigfigs, 3
+}
+
+// Scale formats val using at least three significant digits,
+// appending an SI or binary prefix. See Scaler.Format for details.
+func Scale(val float64, cls Class) string {
+	return CommonScale([]float64{val}, cls).Format(val)
+}
+
+// CommonScale returns a common Scaler to apply to all values in vals.
+// This scale will show at least three significant digits for every
+// value.
+func CommonScale(vals []float64, cls Class) Scaler {
+	// The common scale is determined by the non-zero value
+	// closest to zero.
+	var min float64
+	for _, v := range vals {
+		v = math.Abs(v)
+		if v != 0 && (min == 0 || v < min) {
+			min = v
+		}
+	}
+	if min == 0 {
+		return Scaler{3, 1, ""}
+	}
+
+	var factors []factor
+	switch cls {
+	default:
+		panic(fmt.Sprintf("bad Class %v", cls))
+	case Decimal:
+		factors = siFactors
+	case Binary:
+		factors = iecFactors
+	}
+
+	for _, factor := range factors {
+		switch {
+		case min >= factor.t100:
+			return Scaler{1, factor.factor, factor.prefix}
+		case min >= factor.t10:
+			return Scaler{2, factor.factor, factor.prefix}
+		case min >= factor.t1:
+			return Scaler{3, factor.factor, factor.prefix}
+		}
+	}
+
+	// The value is less than the smallest factor. Print it using
+	// the smallest factor and more precision to achieve the
+	// desired sigfigs.
+	factor := factors[len(factors)-1]
+	val := min / factor.factor
+	for i, thresh := range sigfigs {
+		if val >= thresh || i == len(sigfigs)-1 {
+			return Scaler{i + sigfigsBase, factor.factor, factor.prefix}
+		}
+	}
+
+	panic("not reachable")
+}
diff --git a/benchunit/scale_test.go b/benchunit/scale_test.go
new file mode 100644
index 0000000..c5a1563
--- /dev/null
+++ b/benchunit/scale_test.go
@@ -0,0 +1,130 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchunit
+
+import (
+	"math"
+	"testing"
+)
+
+func TestScale(t *testing.T) {
+	var cls Class
+	test := func(num float64, want, wantPred string) {
+		t.Helper()
+
+		got := Scale(num, cls)
+		if got != want {
+			t.Errorf("for %v, got %s, want %s", num, got, want)
+		}
+
+		// Check what happens when this number is exactly on
+		// the crux between two scale factors.
+		pred := math.Nextafter(num, 0)
+		got = Scale(pred, cls)
+		if got != wantPred {
+			dir := "-ε"
+			if num < 0 {
+				dir = "+ε"
+			}
+			t.Errorf("for %v%s, got %s, want %s", num, dir, got, wantPred)
+		}
+	}
+
+	cls = Decimal
+	// Smoke tests
+	test(0, "0.000", "0.000")
+	test(1, "1.000", "1.000")
+	test(-1, "-1.000", "-1.000")
+	// Full range
+	test(9999500000000000, "9999.5T", "9999.5T")
+	test(999950000000000, "1000.0T", "999.9T")
+	test(99995000000000, "100.0T", "99.99T")
+	test(9999500000000, "10.00T", "9.999T")
+	test(999950000000, "1.000T", "999.9G")
+	test(99995000000, "100.0G", "99.99G")
+	test(9999500000, "10.00G", "9.999G")
+	test(999950000, "1.000G", "999.9M")
+	test(99995000, "100.0M", "99.99M")
+	test(9999500, "10.00M", "9.999M")
+	test(999950, "1.000M", "999.9k")
+	test(99995, "100.0k", "99.99k")
+	test(9999.5, "10.00k", "9.999k")
+	test(999.95, "1.000k", "999.9")
+	test(99.995, "100.0", "99.99")
+	test(9.9995, "10.00", "9.999")
+	test(.99995, "1.000", "999.9m")
+	test(.099995, "100.0m", "99.99m")
+	test(.0099995, "10.00m", "9.999m")
+	test(.00099995, "1.000m", "999.9µ")
+	test(.000099995, "100.0µ", "99.99µ")
+	test(.0000099995, "10.00µ", "9.999µ")
+	test(.00000099995, "1.000µ", "999.9n")
+	test(.000000099995, "100.0n", "99.99n")
+	test(.0000000099995, "10.00n", "9.999n")
+	test(.00000000099995, "1.000n", "0.9999n") // First pred we won't up-scale
+
+	// Below the smallest scale unit rounding gets imperfect, but
+	// it's off from the ideal by at most one ulp, so we accept it.
+	test(math.Nextafter(.000000000099995, 1), "0.1000n", "0.09999n")
+	test(.0000000000099995, "0.01000n", "0.009999n")
+	test(math.Nextafter(.00000000000099995, 1), "0.001000n", "0.0009999n")
+	test(.000000000000099995, "0.0001000n", "0.00009999n")
+	test(.0000000000000099995, "0.00001000n", "0.000009999n")
+	test(math.Nextafter(.00000000000000099995, 1), "0.000001000n", "0.0000009999n")
+
+	// Misc
+	test(-99995000000000, "-100.0T", "-99.99T")
+	test(-.0000000099995, "-10.00n", "-9.999n")
+
+	cls = Binary
+	// Smoke tests
+	test(0, "0.000", "0.000")
+	test(1, "1.000", "1.000")
+	test(-1, "-1.000", "-1.000")
+	// Full range
+	test(.99995*(1<<50), "1023.9Ti", "1023.9Ti")
+	test(99.995*(1<<40), "100.0Ti", "99.99Ti")
+	test(9.9995*(1<<40), "10.00Ti", "9.999Ti")
+	test(.99995*(1<<40), "1.000Ti", "1023.9Gi")
+	test(99.995*(1<<30), "100.0Gi", "99.99Gi")
+	test(9.9995*(1<<30), "10.00Gi", "9.999Gi")
+	test(.99995*(1<<30), "1.000Gi", "1023.9Mi")
+	test(99.995*(1<<20), "100.0Mi", "99.99Mi")
+	test(9.9995*(1<<20), "10.00Mi", "9.999Mi")
+	test(.99995*(1<<20), "1.000Mi", "1023.9Ki")
+	test(99.995*(1<<10), "100.0Ki", "99.99Ki")
+	test(9.9995*(1<<10), "10.00Ki", "9.999Ki")
+	test(.99995*(1<<10), "1.000Ki", "1023.9")
+	test(99.995*(1<<0), "100.0", "99.99")
+	test(9.9995*(1<<0), "10.00", "9.999")
+	test(.99995, "1.000", "0.9999")
+	test(.099995, "0.1000", "0.09999")
+	test(.0099995, "0.01000", "0.009999")
+	test(.00099995, "0.001000", "0.0009999")
+	test(.000099995, "0.0001000", "0.00009999")
+	test(.0000099995, "0.00001000", "0.000009999")
+	test(.00000099995, "0.000001000", "0.0000009999")
+	// We stop at 10 digits after the decimal. Again, rounding
+	// gets a little weird.
+	test(.00000009995, "0.0000001000", "0.0000000999")
+	test(math.Nextafter(.00000000995, 1), "0.0000000100", "0.0000000099")
+	test(.00000000095, "0.0000000010", "0.0000000009")
+	test(.00000000005, "0.0000000001", "0.0000000000")
+	test(.000000000009, "0.0000000000", "0.0000000000")
+}
+
+func TestNoOpScaler(t *testing.T) {
+	test := func(val float64, want string) {
+		t.Helper()
+		got := NoOpScaler.Format(val)
+		if got != want {
+			t.Errorf("for %v, got %s, want %s", val, got, want)
+		}
+	}
+
+	test(1, "1")
+	test(123456789, "123456789")
+	test(123.456789, "123.456789")
+}
diff --git a/benchunit/tidy.go b/benchunit/tidy.go
new file mode 100644
index 0000000..cec257d
--- /dev/null
+++ b/benchunit/tidy.go
@@ -0,0 +1,90 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchunit
+
+import (
+	"strings"
+	"sync"
+)
+
+type tidyEntry struct {
+	tidied string
+	factor float64
+}
+
+var tidyCache sync.Map // unit string -> *tidyCache
+
+// Tidy normalizes a value with a (possibly pre-scaled) unit into base units.
+// For example, if unit is "ns" or "MB", it will re-scale the value to
+// "sec" or "B" units, respectively. It returns the re-scaled value and
+// its new unit. If the value is already in base units, it does nothing.
+func Tidy(value float64, unit string) (tidiedValue float64, tidiedUnit string) {
+	newUnit, factor := tidyUnit(unit)
+	return value * factor, newUnit
+}
+
+// tidyUnit normalizes common pre-scaled units like "ns" to "sec" and
+// "MB" to "B". It returns the tidied version of unit and the
+// multiplicative factor to convert a value in unit "unit" to a value in
+// unit "tidied". For example, to convert value x in the untidied unit
+// to the tidied unit, multiply x by factor.
+func tidyUnit(unit string) (tidied string, factor float64) {
+	// Fast path for units from testing package.
+	switch unit {
+	case "ns/op":
+		return "sec/op", 1e-9
+	case "MB/s":
+		return "B/s", 1e6
+	case "B/op", "allocs/op":
+		return unit, 1
+	}
+	// Fast path for units with no normalization.
+	if !(strings.Contains(unit, "ns") || strings.Contains(unit, "MB")) {
+		return unit, 1
+	}
+
+	// Check the cache.
+	if tc, ok := tidyCache.Load(unit); ok {
+		tc := tc.(*tidyEntry)
+		return tc.tidied, tc.factor
+	}
+
+	// Do the hard work and cache it.
+	tidied, factor = tidyUnitUncached(unit)
+	tidyCache.Store(unit, &tidyEntry{tidied, factor})
+	return
+}
+
+func tidyUnitUncached(unit string) (tidied string, factor float64) {
+	type edit struct {
+		pos, len int
+		replace  string
+	}
+
+	// The caller has handled the fast paths. Parse the unit.
+	factor = 1
+	p := newParser(unit)
+	edits := make([]edit, 0, 4)
+	for p.next() {
+		if p.denom {
+			// Don't edit in the denominator.
+			continue
+		}
+		switch p.tok {
+		case "ns":
+			edits = append(edits, edit{p.pos, len("ns"), "sec"})
+			factor /= 1e9
+		case "MB":
+			edits = append(edits, edit{p.pos, len("MB"), "B"})
+			factor *= 1e6
+		}
+	}
+	// Apply edits.
+	for i := len(edits) - 1; i >= 0; i-- {
+		e := edits[i]
+		unit = unit[:e.pos] + e.replace + unit[e.pos+e.len:]
+	}
+	return unit, factor
+}
diff --git a/benchunit/tidy_test.go b/benchunit/tidy_test.go
new file mode 100644
index 0000000..032d05f
--- /dev/null
+++ b/benchunit/tidy_test.go
@@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchunit
+
+import "testing"
+
+func TestTidy(t *testing.T) {
+	test := func(unit, tidied string, factor float64) {
+		t.Helper()
+		gotFactor, got := Tidy(1, unit)
+		if got != tidied || gotFactor != factor {
+			t.Errorf("for %s, want *%f %s, got *%f %s", unit, factor, tidied, gotFactor, got)
+		}
+	}
+
+	test("ns/op", "sec/op", 1e-9)
+	test("x-ns/op", "x-sec/op", 1e-9)
+	test("MB/s", "B/s", 1e6)
+	test("x-MB/s", "x-B/s", 1e6)
+	test("B/op", "B/op", 1)
+	test("x-B/op", "x-B/op", 1)
+	test("x-allocs/op", "x-allocs/op", 1)
+
+	test("op/ns", "op/ns", 1)
+	test("MB*MB/s", "B*B/s", 1e6*1e6)
+	test("MB/MB", "B/MB", 1e6)
+}