blob: 0ce86b67056d494bedf577f382dfae15be672f58 [file] [log] [blame]
 // Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package benchmath import ( "fmt" "math" "sync" "github.com/aclements/go-moremath/stats" ) // AssumeNothing is a non-parametric Assumption (that is, it makes no // distributional assumptions). The summary statistic is the sample // median and comparisons are done using the Mann-Whitney U test. // // This is a good default assumption for benchmarks. // There's substantial evidence that benchmark results are non-normal. // The disadvantage (of any non-parametric methods) is that this is // less statistically powerful than parametric methods. var AssumeNothing = assumeNothing{} type assumeNothing struct{} var _ Assumption = assumeNothing{} // medianCache maps from ciKey to stats.QuantileCIResult for median // confidence intervals. var medianCache sync.Map func medianCI(n int, confidence float64) stats.QuantileCIResult { type ciKey struct { n int confidence float64 } key := ciKey{n, confidence} if ciX, ok := medianCache.Load(key); ok { return ciX.(stats.QuantileCIResult) } ci := stats.QuantileCI(n, 0.5, confidence) medianCache.Store(key, ci) return ci } // medianSamples returns the minimum number of samples required to get // a finite confidence interval at the given confidence level. func medianSamples(confidence float64) (op string, n int) { const limit = 50 // We need at least two samples to have an interval. for n = 2; n <= limit; n++ { ci := medianCI(n, confidence) if 0 < ci.LoOrder && ci.HiOrder <= n { return ">=", n } } return ">", limit } func (assumeNothing) SummaryLabel() string { return "median" } func (assumeNothing) Summary(s *Sample, confidence float64) Summary { ci := medianCI(len(s.Values), confidence) median, lo, hi := ci.SampleCI(s.sample()) var warnings []error if math.IsInf(lo, 0) || math.IsInf(hi, 0) { // Explain to the user why there's a ±∞ op, need := medianSamples(confidence) msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence) warnings = append(warnings, msg) } return Summary{median, lo, hi, ci.Confidence, warnings} } // uTestMinP[n] is the minimum possible P value for the U-test with // two samples of size n. // // Generated by go run mktables.go. var uTestMinP = []float64{ 1: 1, 2: 0.3333333333333333, 3: 0.1, 4: 0.02857142857142857, 5: 0.007936507936507936, 6: 0.0021645021645021645, 7: 0.0005827505827505828, 8: 0.0001554001554001554, 9: 4.113533525298231e-05, } // uTestSamples returns the minimum number of samples required for the // U-test to achieve statistical significance at the given alpha // level. func uTestSamples(alpha float64) (op string, n int) { for n, minP := range uTestMinP { if n == 0 { continue } if minP <= alpha { return ">=", n } } return ">", len(uTestMinP) } func (assumeNothing) Compare(s1, s2 *Sample) Comparison { res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers) if err != nil { // The U-test failed. Report as if there's no // significant difference, along with the error. return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}} } cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha} // Warn if there aren't enough samples to report a difference // even if they were maximally diverged. if cmp.P > cmp.Alpha { op, n := uTestSamples(cmp.Alpha) if cmp.N1 < n && cmp.N2 < n { // We could deal with asymmetric sample sizes // by first ramping up the smaller sample // until the minimum P value is sufficient or // the sample sizes are equal. But it doesn't // seem worth the complexity. msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha) cmp.Warnings = append(cmp.Warnings, msg) } } return cmp }