| // Copyright 2022 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package benchmath |
| |
| import ( |
| "fmt" |
| "math" |
| "sync" |
| |
| "github.com/aclements/go-moremath/stats" |
| ) |
| |
| // AssumeNothing is a non-parametric Assumption (that is, it makes no |
| // distributional assumptions). The summary statistic is the sample |
| // median and comparisons are done using the Mann-Whitney U test. |
| // |
| // This is a good default assumption for benchmarks. |
| // There's substantial evidence that benchmark results are non-normal. |
| // The disadvantage (of any non-parametric methods) is that this is |
| // less statistically powerful than parametric methods. |
| var AssumeNothing = assumeNothing{} |
| |
| type assumeNothing struct{} |
| |
| var _ Assumption = assumeNothing{} |
| |
| // medianCache maps from ciKey to stats.QuantileCIResult for median |
| // confidence intervals. |
| var medianCache sync.Map |
| |
| func medianCI(n int, confidence float64) stats.QuantileCIResult { |
| type ciKey struct { |
| n int |
| confidence float64 |
| } |
| key := ciKey{n, confidence} |
| if ciX, ok := medianCache.Load(key); ok { |
| return ciX.(stats.QuantileCIResult) |
| } |
| ci := stats.QuantileCI(n, 0.5, confidence) |
| medianCache.Store(key, ci) |
| return ci |
| } |
| |
| // medianSamples returns the minimum number of samples required to get |
| // a finite confidence interval at the given confidence level. |
| func medianSamples(confidence float64) (op string, n int) { |
| const limit = 50 |
| // We need at least two samples to have an interval. |
| for n = 2; n <= limit; n++ { |
| ci := medianCI(n, confidence) |
| if 0 < ci.LoOrder && ci.HiOrder <= n { |
| return ">=", n |
| } |
| } |
| return ">", limit |
| } |
| |
| func (assumeNothing) SummaryLabel() string { |
| return "median" |
| } |
| |
| func (assumeNothing) Summary(s *Sample, confidence float64) Summary { |
| ci := medianCI(len(s.Values), confidence) |
| median, lo, hi := ci.SampleCI(s.sample()) |
| |
| var warnings []error |
| if math.IsInf(lo, 0) || math.IsInf(hi, 0) { |
| // Explain to the user why there's a ±∞ |
| op, need := medianSamples(confidence) |
| msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence) |
| warnings = append(warnings, msg) |
| } |
| |
| return Summary{median, lo, hi, ci.Confidence, warnings} |
| } |
| |
| // uTestMinP[n] is the minimum possible P value for the U-test with |
| // two samples of size n. |
| // |
| // Generated by go run mktables.go. |
| var uTestMinP = []float64{ |
| 1: 1, |
| 2: 0.3333333333333333, |
| 3: 0.1, |
| 4: 0.02857142857142857, |
| 5: 0.007936507936507936, |
| 6: 0.0021645021645021645, |
| 7: 0.0005827505827505828, |
| 8: 0.0001554001554001554, |
| 9: 4.113533525298231e-05, |
| } |
| |
| // uTestSamples returns the minimum number of samples required for the |
| // U-test to achieve statistical significance at the given alpha |
| // level. |
| func uTestSamples(alpha float64) (op string, n int) { |
| for n, minP := range uTestMinP { |
| if n == 0 { |
| continue |
| } |
| if minP <= alpha { |
| return ">=", n |
| } |
| } |
| return ">", len(uTestMinP) |
| } |
| |
| func (assumeNothing) Compare(s1, s2 *Sample) Comparison { |
| res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers) |
| if err != nil { |
| // The U-test failed. Report as if there's no |
| // significant difference, along with the error. |
| return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}} |
| } |
| cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha} |
| // Warn if there aren't enough samples to report a difference |
| // even if they were maximally diverged. |
| if cmp.P > cmp.Alpha { |
| op, n := uTestSamples(cmp.Alpha) |
| if cmp.N1 < n && cmp.N2 < n { |
| // We could deal with asymmetric sample sizes |
| // by first ramping up the smaller sample |
| // until the minimum P value is sufficient or |
| // the sample sizes are equal. But it doesn't |
| // seem worth the complexity. |
| msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha) |
| cmp.Warnings = append(cmp.Warnings, msg) |
| } |
| } |
| return cmp |
| } |