benchmath: new package of opinionated benchmark statistics
Updates golang/go#20728.
Change-Id: I4c33e64d5959cadfbb97ca6a2274e0c060e87d29
Reviewed-on: https://go-review.googlesource.com/c/perf/+/283616
Trust: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/benchmath/aexact.go b/benchmath/aexact.go
new file mode 100644
index 0000000..3519e76
--- /dev/null
+++ b/benchmath/aexact.go
@@ -0,0 +1,51 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import "fmt"
+
+// AssumeExact is an assumption that a value can be measured exactly
+// and thus has no distribution and does not require repeated sampling.
+// It reports a warning if not all values in a sample are equal.
+var AssumeExact = assumeExact{}
+
+type assumeExact struct{}
+
+var _ Assumption = assumeExact{}
+
+func (assumeExact) SummaryLabel() string {
+ // Really the summary is the mode, but the point of this
+ // assumption is that the summary is the exact value.
+ return "exact"
+}
+
+func (assumeExact) Summary(s *Sample, confidence float64) Summary {
+ // Find the sample's mode. This checks if all samples are the
+ // same, and lets us return a reasonable summary even if they
+ // aren't all the same.
+ val, count := s.Values[0], 1
+ modeVal, modeCount := val, count
+ for _, v := range s.Values[1:] {
+ if v == val {
+ count++
+ if count > modeCount {
+ modeVal, modeCount = val, count
+ }
+ } else {
+ val, count = v, 1
+ }
+ }
+ summary := Summary{Center: modeVal, Lo: s.Values[0], Hi: s.Values[len(s.Values)-1], Confidence: 1}
+
+ if modeCount != len(s.Values) {
+ // They're not all the same. Report a warning.
+ summary.Warnings = []error{fmt.Errorf("exact distribution expected, but values range from %v to %v", s.Values[0], s.Values[len(s.Values)-1])}
+ }
+ return summary
+}
+
+func (assumeExact) Compare(s1, s2 *Sample) Comparison {
+ return Comparison{P: 0, N1: len(s1.Values), N2: len(s2.Values)}
+}
diff --git a/benchmath/anone.go b/benchmath/anone.go
new file mode 100644
index 0000000..0ce86b6
--- /dev/null
+++ b/benchmath/anone.go
@@ -0,0 +1,134 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import (
+ "fmt"
+ "math"
+ "sync"
+
+ "github.com/aclements/go-moremath/stats"
+)
+
+// AssumeNothing is a non-parametric Assumption (that is, it makes no
+// distributional assumptions). The summary statistic is the sample
+// median and comparisons are done using the Mann-Whitney U test.
+//
+// This is a good default assumption for benchmarks.
+// There's substantial evidence that benchmark results are non-normal.
+// The disadvantage (of any non-parametric methods) is that this is
+// less statistically powerful than parametric methods.
+var AssumeNothing = assumeNothing{}
+
+type assumeNothing struct{}
+
+var _ Assumption = assumeNothing{}
+
+// medianCache maps from ciKey to stats.QuantileCIResult for median
+// confidence intervals.
+var medianCache sync.Map
+
+func medianCI(n int, confidence float64) stats.QuantileCIResult {
+ type ciKey struct {
+ n int
+ confidence float64
+ }
+ key := ciKey{n, confidence}
+ if ciX, ok := medianCache.Load(key); ok {
+ return ciX.(stats.QuantileCIResult)
+ }
+ ci := stats.QuantileCI(n, 0.5, confidence)
+ medianCache.Store(key, ci)
+ return ci
+}
+
+// medianSamples returns the minimum number of samples required to get
+// a finite confidence interval at the given confidence level.
+func medianSamples(confidence float64) (op string, n int) {
+ const limit = 50
+ // We need at least two samples to have an interval.
+ for n = 2; n <= limit; n++ {
+ ci := medianCI(n, confidence)
+ if 0 < ci.LoOrder && ci.HiOrder <= n {
+ return ">=", n
+ }
+ }
+ return ">", limit
+}
+
+func (assumeNothing) SummaryLabel() string {
+ return "median"
+}
+
+func (assumeNothing) Summary(s *Sample, confidence float64) Summary {
+ ci := medianCI(len(s.Values), confidence)
+ median, lo, hi := ci.SampleCI(s.sample())
+
+ var warnings []error
+ if math.IsInf(lo, 0) || math.IsInf(hi, 0) {
+ // Explain to the user why there's a ±∞
+ op, need := medianSamples(confidence)
+ msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence)
+ warnings = append(warnings, msg)
+ }
+
+ return Summary{median, lo, hi, ci.Confidence, warnings}
+}
+
+// uTestMinP[n] is the minimum possible P value for the U-test with
+// two samples of size n.
+//
+// Generated by go run mktables.go.
+var uTestMinP = []float64{
+ 1: 1,
+ 2: 0.3333333333333333,
+ 3: 0.1,
+ 4: 0.02857142857142857,
+ 5: 0.007936507936507936,
+ 6: 0.0021645021645021645,
+ 7: 0.0005827505827505828,
+ 8: 0.0001554001554001554,
+ 9: 4.113533525298231e-05,
+}
+
+// uTestSamples returns the minimum number of samples required for the
+// U-test to achieve statistical significance at the given alpha
+// level.
+func uTestSamples(alpha float64) (op string, n int) {
+ for n, minP := range uTestMinP {
+ if n == 0 {
+ continue
+ }
+ if minP <= alpha {
+ return ">=", n
+ }
+ }
+ return ">", len(uTestMinP)
+}
+
+func (assumeNothing) Compare(s1, s2 *Sample) Comparison {
+ res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers)
+ if err != nil {
+ // The U-test failed. Report as if there's no
+ // significant difference, along with the error.
+ return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}}
+ }
+ cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha}
+ // Warn if there aren't enough samples to report a difference
+ // even if they were maximally diverged.
+ if cmp.P > cmp.Alpha {
+ op, n := uTestSamples(cmp.Alpha)
+ if cmp.N1 < n && cmp.N2 < n {
+ // We could deal with asymmetric sample sizes
+ // by first ramping up the smaller sample
+ // until the minimum P value is sufficient or
+ // the sample sizes are equal. But it doesn't
+ // seem worth the complexity.
+ msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha)
+ cmp.Warnings = append(cmp.Warnings, msg)
+ }
+ }
+ return cmp
+}
diff --git a/benchmath/anormal.go b/benchmath/anormal.go
new file mode 100644
index 0000000..b9fd4bf
--- /dev/null
+++ b/benchmath/anormal.go
@@ -0,0 +1,44 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import "github.com/aclements/go-moremath/stats"
+
+// AssumeNormal is an assumption that a sample is normally distributed.
+// The summary statistic is the sample mean and comparisons are done
+// using the two-sample t-test.
+var AssumeNormal = assumeNormal{}
+
+type assumeNormal struct{}
+
+var _ Assumption = assumeNormal{}
+
+func (assumeNormal) SummaryLabel() string {
+ return "mean"
+}
+
+func (assumeNormal) Summary(s *Sample, confidence float64) Summary {
+ // TODO: Perform a normality test.
+
+ sample := s.sample()
+ mean, lo, hi := sample.MeanCI(confidence)
+
+ return Summary{
+ Center: mean,
+ Lo: lo,
+ Hi: hi,
+ Confidence: confidence,
+ }
+}
+
+func (assumeNormal) Compare(s1, s2 *Sample) Comparison {
+ t, err := stats.TwoSampleWelchTTest(s1.sample(), s2.sample(), stats.LocationDiffers)
+ if err != nil {
+ // The t-test failed. Report as if there's no
+ // significant difference, along with the error.
+ return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Warnings: []error{err}}
+ }
+ return Comparison{P: t.P, N1: len(s1.Values), N2: len(s2.Values)}
+}
diff --git a/benchmath/assumption_test.go b/benchmath/assumption_test.go
new file mode 100644
index 0000000..4a9d671
--- /dev/null
+++ b/benchmath/assumption_test.go
@@ -0,0 +1,178 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import (
+ "fmt"
+ "math"
+ "testing"
+
+ "github.com/aclements/go-moremath/stats"
+)
+
+func TestMedianSamples(t *testing.T) {
+ if false {
+ for n := 2; n <= 50; n++ {
+ d := stats.BinomialDist{N: n, P: 0.5}
+ t.Log(n, 1-(d.PMF(0)+d.PMF(float64(d.N))), d.PMF(0))
+ }
+ }
+
+ check := func(confidence float64, wantOp string, wantN int) {
+ t.Helper()
+ gotOp, gotN := medianSamples(confidence)
+ if gotOp != wantOp || gotN != wantN {
+ t.Errorf("for confidence %v, want %s %d, got %s %d", confidence, wantOp, wantN, gotOp, gotN)
+ }
+ }
+
+ // At n=6, the tails are 0.015625 * 2 => 0.03125
+ check(0.95, ">=", 6)
+ // At n=8, the tails are 0.00390625 * 2 => 0.0078125
+ check(0.99, ">=", 8)
+ // The hard-coded threshold is 50.
+ check(1, ">", 50)
+ // Check the other extreme. We always need at least two
+ // samples to have an interval.
+ check(0, ">=", 2)
+}
+
+func TestUTestSamples(t *testing.T) {
+ check := func(alpha float64, wantOp string, wantN int) {
+ t.Helper()
+ gotOp, gotN := uTestSamples(alpha)
+ if gotOp != wantOp || gotN != wantN {
+ t.Errorf("for alpha %v, want %s %d, got %s %d", alpha, wantOp, wantN, gotOp, gotN)
+ }
+ }
+ check(1, ">=", 1)
+ check(0.05, ">=", 4)
+ check(0.01, ">=", 5)
+ check(1e-50, ">", 10)
+ check(0, ">", 10)
+}
+
+func TestSummaryNone(t *testing.T) {
+ // The following tests correspond to the tests in
+ // TestMedianSamples.
+ a := AssumeNothing
+ var sample *Sample
+ inf := math.Inf(1)
+ sample = NewSample([]float64{-10, 2, 3, 4, 5, 6}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 3.5, Lo: -10, Hi: 6, Confidence: 1 - 0.03125})
+ checkSummary(t, a.Summary(sample, 0.99),
+ Summary{Center: 3.5, Lo: -inf, Hi: inf, Confidence: 1},
+ "need >= 8 samples for confidence interval at level 0.99")
+ checkSummary(t, a.Summary(sample, 1),
+ Summary{Center: 3.5, Lo: -inf, Hi: inf, Confidence: 1},
+ "need > 50 samples for confidence interval at level 1")
+ sample = NewSample([]float64{1, 2}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0),
+ Summary{Center: 1.5, Lo: 1, Hi: 2, Confidence: 0.5})
+
+ // And test very small samples.
+ sample = NewSample([]float64{1}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 1, Lo: -inf, Hi: inf, Confidence: 1},
+ "need >= 6 samples for confidence interval at level 0.95")
+}
+
+func TestCompareNone(t *testing.T) {
+ // Most of the complexity is in the sample size warning.
+ a := AssumeNothing
+ thr := DefaultThresholds
+ thr.CompareAlpha = 0.05
+ // Too-small samples.
+ s1 := NewSample([]float64{-1, -1, -1}, &thr)
+ s2 := NewSample([]float64{1, 1, 1}, &thr)
+ checkComparison(t, a.Compare(s1, s2),
+ Comparison{P: 0.1, N1: 3, N2: 3, Alpha: 0.05},
+ "need >= 4 samples to detect a difference at alpha level 0.05")
+ // Big enough samples with a difference.
+ s1 = NewSample([]float64{-1, -1, -1, -1}, &thr)
+ s2 = NewSample([]float64{1, 1, 1, 1}, &thr)
+ checkComparison(t, a.Compare(s1, s2),
+ Comparison{P: 0.02857142857142857, N1: 4, N2: 4, Alpha: 0.05})
+ // Big enough samples, but not enough difference.
+ s1 = NewSample([]float64{1, -1, -1, -1}, &thr)
+ s2 = NewSample([]float64{-1, 1, 1, 1}, &thr)
+ checkComparison(t, a.Compare(s1, s2),
+ Comparison{P: 0.4857142857142857, N1: 4, N2: 4, Alpha: 0.05})
+
+ // All samples equal, so the U-test is meaningless.
+ s1 = NewSample([]float64{1, 1, 1, 1}, &thr)
+ s2 = NewSample([]float64{1, 1, 1, 1}, &thr)
+ checkComparison(t, a.Compare(s1, s2),
+ Comparison{P: 1, N1: 4, N2: 4, Alpha: 0.05},
+ "all samples are equal")
+
+}
+
+func TestSummaryNormal(t *testing.T) {
+ // This is a thin wrapper around sample.MeanCI, so just do a
+ // smoke test.
+ a := AssumeNormal
+ sample := NewSample([]float64{-8, 2, 3, 4, 5, 6}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 2, Lo: -3.351092806089359, Hi: 7.351092806089359, Confidence: 0.95})
+}
+
+func TestSummaryExact(t *testing.T) {
+ a := AssumeExact
+ sample := NewSample([]float64{1, 1, 1, 1}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 1, Lo: 1, Hi: 1, Confidence: 1})
+
+ sample = NewSample([]float64{1}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 1, Lo: 1, Hi: 1, Confidence: 1})
+
+ sample = NewSample([]float64{1, 2, 2, 3}, &DefaultThresholds)
+ checkSummary(t, a.Summary(sample, 0.95),
+ Summary{Center: 2, Lo: 1, Hi: 3, Confidence: 1},
+ "exact distribution expected, but values range from 1 to 3")
+}
+
+func aeq(x, y float64) bool {
+ if x < 0 && y < 0 {
+ x, y = -x, -y
+ }
+ // Check that x and y are equal to 8 digits.
+ const factor = 1 - 1e-7
+ return x*factor <= y && y*factor <= x
+}
+
+func checkSummary(t *testing.T, got, want Summary, warnings ...string) {
+ t.Helper()
+ for _, w := range warnings {
+ want.Warnings = append(want.Warnings, fmt.Errorf("%s", w))
+ }
+ if !aeq(got.Center, want.Center) || !aeq(got.Lo, want.Lo) || !aeq(got.Hi, got.Hi) || got.Confidence != want.Confidence || !errorsEq(got.Warnings, want.Warnings) {
+ t.Errorf("got %v, want %v", got, want)
+ }
+}
+
+func checkComparison(t *testing.T, got, want Comparison, warnings ...string) {
+ t.Helper()
+ for _, w := range warnings {
+ want.Warnings = append(want.Warnings, fmt.Errorf("%s", w))
+ }
+ if !aeq(got.P, want.P) || got.N1 != want.N1 || got.N2 != want.N2 || got.Alpha != want.Alpha || !errorsEq(got.Warnings, want.Warnings) {
+ t.Errorf("got %#v, want %#v", got, want)
+ }
+}
+
+func errorsEq(a, b []error) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if a[i].Error() != b[i].Error() {
+ return false
+ }
+ }
+ return true
+}
diff --git a/benchmath/mktables.go b/benchmath/mktables.go
new file mode 100644
index 0000000..d377603
--- /dev/null
+++ b/benchmath/mktables.go
@@ -0,0 +1,35 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+// +build ignore
+
+// Mktables pre-computes statistical tables.
+package main
+
+import (
+ "fmt"
+
+ "github.com/aclements/go-moremath/stats"
+)
+
+func main() {
+ var s1, s2 []float64
+
+ // Compute minimal P-value for the U-test given different
+ // sample sizes.
+ fmt.Printf("var uTestMinP = []float64{\n")
+ for n := 1; n < 10; n++ {
+ // The P-value is minimized when the order statistic
+ // is maximally separated.
+ s1 = append(s1, -1)
+ s2 = append(s2, 1)
+ res, err := stats.MannWhitneyUTest(s1, s2, stats.LocationDiffers)
+ if err != nil {
+ panic(err)
+ }
+ fmt.Printf("\t%d: %v,\n", n, res.P)
+ }
+ fmt.Printf("}\n")
+}
diff --git a/benchmath/sample.go b/benchmath/sample.go
new file mode 100644
index 0000000..b9d8513
--- /dev/null
+++ b/benchmath/sample.go
@@ -0,0 +1,196 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package benchmath provides tools for computing statistics over
+// distributions of benchmark measurements.
+//
+// This package is opinionated. For example, it doesn't provide
+// specific statistical tests. Instead, callers state distributional
+// assumptions and this package chooses appropriate tests.
+//
+// All analysis results contain a list of warnings, captured as an
+// []error value. These aren't errors that prevent analysis, but
+// should be presented to the user along with analysis results.
+package benchmath
+
+import (
+ "fmt"
+ "math"
+ "sort"
+
+ "github.com/aclements/go-moremath/mathx"
+ "github.com/aclements/go-moremath/stats"
+)
+
+// A Sample is a set of repeated measurements of a given benchmark.
+type Sample struct {
+ // Values are the measured values, in ascending order.
+ Values []float64
+
+ // Thresholds stores the statistical thresholds used by tests
+ // on this sample.
+ Thresholds *Thresholds
+
+ // Warnings is a list of warnings about this sample that
+ // should be reported to the user.
+ Warnings []error
+}
+
+// NewSample constructs a Sample from a set of measurements.
+func NewSample(values []float64, t *Thresholds) *Sample {
+ // TODO: Analyze stationarity and put results in Warnings.
+ // Consider Augmented Dickey–Fuller (based on Maricq et al.)
+
+ // Sort values for fast order statistics.
+ sort.Float64s(values)
+ return &Sample{values, t, nil}
+}
+
+func (s *Sample) sample() stats.Sample {
+ return stats.Sample{Xs: s.Values, Sorted: true}
+}
+
+// A Thresholds configures various thresholds used by statistical tests.
+//
+// This should be initialized to DefaultThresholds because it may be
+// extended with other fields in the future.
+type Thresholds struct {
+ // CompareAlpha is the alpha level below which
+ // Assumption.Compare rejects the null hypothesis that two
+ // samples come from the same distribution.
+ //
+ // This is typically 0.05.
+ CompareAlpha float64
+}
+
+// Note: Thresholds exists so we can extend it in the future with
+// things like the stationarity and normality test thresholds without
+// having to add function arguments in the future.
+
+// DefaultThresholds contains a reasonable set of defaults for Thresholds.
+var DefaultThresholds = Thresholds{
+ CompareAlpha: 0.05,
+}
+
+// An Assumption indicates a distributional assumption about a sample.
+type Assumption interface {
+ // SummaryLabel returns the string name for the summary
+ // statistic under this assumption. For example, "median" or
+ // "mean".
+ SummaryLabel() string
+
+ // Summary returns a summary statistic and its confidence
+ // interval at the given confidence level for Sample s.
+ //
+ // Confidence is given in the range [0,1], e.g., 0.95 for 95%
+ // confidence.
+ Summary(s *Sample, confidence float64) Summary
+
+ // Compare tests whether s1 and s2 come from the same
+ // distribution.
+ Compare(s1, s2 *Sample) Comparison
+}
+
+// A Summary summarizes a Sample.
+type Summary struct {
+ // Center is some measure of the central tendency of a sample.
+ Center float64
+
+ // Lo and Hi give the bounds of the confidence interval around
+ // Center.
+ Lo, Hi float64
+
+ // Confidence is the actual confidence level of the confidence
+ // interval given by Lo, Hi. It will be >= the requested
+ // confidence level.
+ Confidence float64
+
+ // Warnings is a list of warnings about this summary or its
+ // confidence interval.
+ Warnings []error
+}
+
+// PctRangeString returns a string representation of the range of this
+// Summary's confidence interval as a percentage.
+func (s Summary) PctRangeString() string {
+ if math.IsInf(s.Lo, 0) || math.IsInf(s.Hi, 0) {
+ return "∞"
+ }
+
+ // If the signs of the bounds differ from the center, we can't
+ // render it as a percent.
+ var csign = mathx.Sign(s.Center)
+ if csign != mathx.Sign(s.Lo) || csign != mathx.Sign(s.Hi) {
+ return "?"
+ }
+
+ // If center is 0, avoid dividing by zero. But we can only get
+ // here if lo and hi are also 0, in which case is seems
+ // reasonable to call this 0%.
+ if s.Center == 0 {
+ return "0%"
+ }
+
+ // Phew. Compute the range percent.
+ v := math.Max(s.Hi/s.Center-1, 1-s.Lo/s.Center)
+ return fmt.Sprintf("%.0f%%", 100*v)
+}
+
+// A Comparison is the result of comparing two samples to test if they
+// come from the same distribution.
+type Comparison struct {
+ // P is the p-value of the null hypothesis that two samples
+ // come from the same distribution. If P is less than a
+ // threshold alpha (typically 0.05), then we reject the null
+ // hypothesis.
+ //
+ // P can be 0, which indicates this is an exact result.
+ P float64
+
+ // N1 and N2 are the sizes of the two samples.
+ N1, N2 int
+
+ // Alpha is the alpha threshold for this test. If P < Alpha,
+ // we reject the null hypothesis that the two samples come
+ // from the same distribution.
+ Alpha float64
+
+ // Warnings is a list of warnings about this comparison
+ // result.
+ Warnings []error
+}
+
+// String summarizes the comparison. The general form of this string
+// is "p=0.PPP n=N1+N2" but can be shortened.
+func (c Comparison) String() string {
+ var s string
+ if c.P != 0 {
+ s = fmt.Sprintf("p=%0.3f ", c.P)
+ }
+ if c.N1 == c.N2 {
+ // Slightly shorter form for a common case.
+ return s + fmt.Sprintf("n=%d", c.N1)
+ }
+ return s + fmt.Sprintf("n=%d+%d", c.N1, c.N2)
+}
+
+// FormatDelta formats the difference in the centers of two distributions.
+// The old and new values must be the center summaries of the two
+// compared samples. If the Comparison accepts the null hypothesis
+// that the samples come from the same distribution, FormatDelta
+// returns "~" to indicate there's no meaningful difference.
+// Otherwise, it returns the percent difference between the centers.
+func (c Comparison) FormatDelta(old, new float64) string {
+ if c.P > c.Alpha {
+ return "~"
+ }
+ if old == new {
+ return "0.00%"
+ }
+ if old == 0 {
+ return "?"
+ }
+ pct := ((new / old) - 1.0) * 100.0
+ return fmt.Sprintf("%+.2f%%", pct)
+}
diff --git a/benchmath/sample_test.go b/benchmath/sample_test.go
new file mode 100644
index 0000000..13916ff
--- /dev/null
+++ b/benchmath/sample_test.go
@@ -0,0 +1,68 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import (
+ "math"
+ "testing"
+)
+
+func TestSummaryFormat(t *testing.T) {
+ check := func(center, lo, hi float64, want string) {
+ t.Helper()
+ s := Summary{Center: center, Lo: lo, Hi: hi}
+ got := s.PctRangeString()
+ if got != want {
+ t.Errorf("for %v CI [%v, %v], got %s, want %s", center, lo, hi, got, want)
+ }
+ }
+ inf := math.Inf(1)
+
+ check(1, 0.5, 1.1, "50%")
+ check(1, 0.9, 1.5, "50%")
+ check(1, 1, 1, "0%")
+
+ check(-1, -0.5, -1.1, "50%")
+ check(-1, -0.9, -1.5, "50%")
+ check(-1, -1, -1, "0%")
+
+ check(1, -inf, 1, "∞")
+ check(1, 1, inf, "∞")
+
+ check(1, -1, 1, "?")
+ check(1, -1, -1, "?")
+ check(-1, -1, 1, "?")
+ check(-1, 1, -1, "?")
+ check(0, -1, 1, "?")
+
+ check(0, 0, 0, "0%")
+}
+
+func TestComparisonFormat(t *testing.T) {
+ check := func(p float64, n1, n2 int, want string) {
+ t.Helper()
+ got := Comparison{P: p, N1: n1, N2: n2}.String()
+ if got != want {
+ t.Errorf("for %v,%v,%v, got %s, want %s", p, n1, n2, got, want)
+ }
+ }
+ check(0.5, 1, 2, "p=0.500 n=1+2")
+ check(0.5, 2, 2, "p=0.500 n=2")
+ check(0, 1, 2, "n=1+2")
+ check(0, 2, 2, "n=2")
+
+ checkD := func(p, old, new, alpha float64, want string) {
+ got := Comparison{P: p, Alpha: alpha}.FormatDelta(old, new)
+ if got != want {
+ t.Errorf("for p=%v %v=>%v @%v, got %s, want %s", p, old, new, alpha, got, want)
+ }
+ }
+ checkD(0.5, 0, 0, 0.05, "~")
+ checkD(0.01, 0, 0, 0.05, "0.00%")
+ checkD(0.01, 1, 1, 0.05, "0.00%")
+ checkD(0.01, 0, 1, 0.05, "?")
+ checkD(0.01, 1, 1.5, 0.05, "+50.00%")
+ checkD(0.01, 1, 0.5, 0.05, "-50.00%")
+}
diff --git a/go.mod b/go.mod
index 8bac3d7..842359f 100644
--- a/go.mod
+++ b/go.mod
@@ -6,7 +6,7 @@
cloud.google.com/go v0.0.0-20170206221025-ce650573d812
github.com/GoogleCloudPlatform/cloudsql-proxy v0.0.0-20190129172621-c8b1d7a94ddf
github.com/aclements/go-gg v0.0.0-20170118225347-6dbb4e4fefb0
- github.com/aclements/go-moremath v0.0.0-20161014184102-0ff62e0875ff // indirect
+ github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794
github.com/go-sql-driver/mysql v1.4.1
github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac // indirect
github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 // indirect
diff --git a/go.sum b/go.sum
index a039423..dcf682b 100644
--- a/go.sum
+++ b/go.sum
@@ -6,6 +6,8 @@
github.com/aclements/go-gg v0.0.0-20170118225347-6dbb4e4fefb0/go.mod h1:55qNq4vcpkIuHowELi5C8e+1yUHtoLoOUR9QU5j7Tes=
github.com/aclements/go-moremath v0.0.0-20161014184102-0ff62e0875ff h1:txKOXqsFQUyi7Ht0Prto4QMU4O/0Gby6v5RFqMS0/PM=
github.com/aclements/go-moremath v0.0.0-20161014184102-0ff62e0875ff/go.mod h1:idZL3yvz4kzx1dsBOAC+oYv6L92P1oFEhUXUB1A/lwQ=
+github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 h1:xlwdaKcTNVW4PtpQb8aKA4Pjy0CdJHEqvFbAnvR5m2g=
+github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794/go.mod h1:7e+I0LQFUI9AXWxOfsQROs9xPhoJtbsyWcjJqDd4KPY=
github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA=
github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=