benchmath: new package of opinionated benchmark statistics
Updates golang/go#20728.
Change-Id: I4c33e64d5959cadfbb97ca6a2274e0c060e87d29
Reviewed-on: https://go-review.googlesource.com/c/perf/+/283616
Trust: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/benchmath/anone.go b/benchmath/anone.go
new file mode 100644
index 0000000..0ce86b6
--- /dev/null
+++ b/benchmath/anone.go
@@ -0,0 +1,134 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import (
+ "fmt"
+ "math"
+ "sync"
+
+ "github.com/aclements/go-moremath/stats"
+)
+
+// AssumeNothing is a non-parametric Assumption (that is, it makes no
+// distributional assumptions). The summary statistic is the sample
+// median and comparisons are done using the Mann-Whitney U test.
+//
+// This is a good default assumption for benchmarks.
+// There's substantial evidence that benchmark results are non-normal.
+// The disadvantage (of any non-parametric methods) is that this is
+// less statistically powerful than parametric methods.
+var AssumeNothing = assumeNothing{}
+
+type assumeNothing struct{}
+
+var _ Assumption = assumeNothing{}
+
+// medianCache maps from ciKey to stats.QuantileCIResult for median
+// confidence intervals.
+var medianCache sync.Map
+
+func medianCI(n int, confidence float64) stats.QuantileCIResult {
+ type ciKey struct {
+ n int
+ confidence float64
+ }
+ key := ciKey{n, confidence}
+ if ciX, ok := medianCache.Load(key); ok {
+ return ciX.(stats.QuantileCIResult)
+ }
+ ci := stats.QuantileCI(n, 0.5, confidence)
+ medianCache.Store(key, ci)
+ return ci
+}
+
+// medianSamples returns the minimum number of samples required to get
+// a finite confidence interval at the given confidence level.
+func medianSamples(confidence float64) (op string, n int) {
+ const limit = 50
+ // We need at least two samples to have an interval.
+ for n = 2; n <= limit; n++ {
+ ci := medianCI(n, confidence)
+ if 0 < ci.LoOrder && ci.HiOrder <= n {
+ return ">=", n
+ }
+ }
+ return ">", limit
+}
+
+func (assumeNothing) SummaryLabel() string {
+ return "median"
+}
+
+func (assumeNothing) Summary(s *Sample, confidence float64) Summary {
+ ci := medianCI(len(s.Values), confidence)
+ median, lo, hi := ci.SampleCI(s.sample())
+
+ var warnings []error
+ if math.IsInf(lo, 0) || math.IsInf(hi, 0) {
+ // Explain to the user why there's a ±∞
+ op, need := medianSamples(confidence)
+ msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence)
+ warnings = append(warnings, msg)
+ }
+
+ return Summary{median, lo, hi, ci.Confidence, warnings}
+}
+
+// uTestMinP[n] is the minimum possible P value for the U-test with
+// two samples of size n.
+//
+// Generated by go run mktables.go.
+var uTestMinP = []float64{
+ 1: 1,
+ 2: 0.3333333333333333,
+ 3: 0.1,
+ 4: 0.02857142857142857,
+ 5: 0.007936507936507936,
+ 6: 0.0021645021645021645,
+ 7: 0.0005827505827505828,
+ 8: 0.0001554001554001554,
+ 9: 4.113533525298231e-05,
+}
+
+// uTestSamples returns the minimum number of samples required for the
+// U-test to achieve statistical significance at the given alpha
+// level.
+func uTestSamples(alpha float64) (op string, n int) {
+ for n, minP := range uTestMinP {
+ if n == 0 {
+ continue
+ }
+ if minP <= alpha {
+ return ">=", n
+ }
+ }
+ return ">", len(uTestMinP)
+}
+
+func (assumeNothing) Compare(s1, s2 *Sample) Comparison {
+ res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers)
+ if err != nil {
+ // The U-test failed. Report as if there's no
+ // significant difference, along with the error.
+ return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}}
+ }
+ cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha}
+ // Warn if there aren't enough samples to report a difference
+ // even if they were maximally diverged.
+ if cmp.P > cmp.Alpha {
+ op, n := uTestSamples(cmp.Alpha)
+ if cmp.N1 < n && cmp.N2 < n {
+ // We could deal with asymmetric sample sizes
+ // by first ramping up the smaller sample
+ // until the minimum P value is sufficient or
+ // the sample sizes are equal. But it doesn't
+ // seem worth the complexity.
+ msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha)
+ cmp.Warnings = append(cmp.Warnings, msg)
+ }
+ }
+ return cmp
+}