Austin Clements | af31a76 | 2021-01-12 18:25:40 -0500 | [diff] [blame] | 1 | // Copyright 2022 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package benchmath |
| 6 | |
| 7 | import ( |
| 8 | "fmt" |
| 9 | "math" |
| 10 | "sync" |
| 11 | |
| 12 | "github.com/aclements/go-moremath/stats" |
| 13 | ) |
| 14 | |
| 15 | // AssumeNothing is a non-parametric Assumption (that is, it makes no |
| 16 | // distributional assumptions). The summary statistic is the sample |
| 17 | // median and comparisons are done using the Mann-Whitney U test. |
| 18 | // |
| 19 | // This is a good default assumption for benchmarks. |
| 20 | // There's substantial evidence that benchmark results are non-normal. |
| 21 | // The disadvantage (of any non-parametric methods) is that this is |
| 22 | // less statistically powerful than parametric methods. |
| 23 | var AssumeNothing = assumeNothing{} |
| 24 | |
| 25 | type assumeNothing struct{} |
| 26 | |
| 27 | var _ Assumption = assumeNothing{} |
| 28 | |
| 29 | // medianCache maps from ciKey to stats.QuantileCIResult for median |
| 30 | // confidence intervals. |
| 31 | var medianCache sync.Map |
| 32 | |
| 33 | func medianCI(n int, confidence float64) stats.QuantileCIResult { |
| 34 | type ciKey struct { |
| 35 | n int |
| 36 | confidence float64 |
| 37 | } |
| 38 | key := ciKey{n, confidence} |
| 39 | if ciX, ok := medianCache.Load(key); ok { |
| 40 | return ciX.(stats.QuantileCIResult) |
| 41 | } |
| 42 | ci := stats.QuantileCI(n, 0.5, confidence) |
| 43 | medianCache.Store(key, ci) |
| 44 | return ci |
| 45 | } |
| 46 | |
| 47 | // medianSamples returns the minimum number of samples required to get |
| 48 | // a finite confidence interval at the given confidence level. |
| 49 | func medianSamples(confidence float64) (op string, n int) { |
| 50 | const limit = 50 |
| 51 | // We need at least two samples to have an interval. |
| 52 | for n = 2; n <= limit; n++ { |
| 53 | ci := medianCI(n, confidence) |
| 54 | if 0 < ci.LoOrder && ci.HiOrder <= n { |
| 55 | return ">=", n |
| 56 | } |
| 57 | } |
| 58 | return ">", limit |
| 59 | } |
| 60 | |
| 61 | func (assumeNothing) SummaryLabel() string { |
| 62 | return "median" |
| 63 | } |
| 64 | |
| 65 | func (assumeNothing) Summary(s *Sample, confidence float64) Summary { |
| 66 | ci := medianCI(len(s.Values), confidence) |
| 67 | median, lo, hi := ci.SampleCI(s.sample()) |
| 68 | |
| 69 | var warnings []error |
| 70 | if math.IsInf(lo, 0) || math.IsInf(hi, 0) { |
| 71 | // Explain to the user why there's a ±∞ |
| 72 | op, need := medianSamples(confidence) |
| 73 | msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence) |
| 74 | warnings = append(warnings, msg) |
| 75 | } |
| 76 | |
| 77 | return Summary{median, lo, hi, ci.Confidence, warnings} |
| 78 | } |
| 79 | |
| 80 | // uTestMinP[n] is the minimum possible P value for the U-test with |
| 81 | // two samples of size n. |
| 82 | // |
| 83 | // Generated by go run mktables.go. |
| 84 | var uTestMinP = []float64{ |
| 85 | 1: 1, |
| 86 | 2: 0.3333333333333333, |
| 87 | 3: 0.1, |
| 88 | 4: 0.02857142857142857, |
| 89 | 5: 0.007936507936507936, |
| 90 | 6: 0.0021645021645021645, |
| 91 | 7: 0.0005827505827505828, |
| 92 | 8: 0.0001554001554001554, |
| 93 | 9: 4.113533525298231e-05, |
| 94 | } |
| 95 | |
| 96 | // uTestSamples returns the minimum number of samples required for the |
| 97 | // U-test to achieve statistical significance at the given alpha |
| 98 | // level. |
| 99 | func uTestSamples(alpha float64) (op string, n int) { |
| 100 | for n, minP := range uTestMinP { |
| 101 | if n == 0 { |
| 102 | continue |
| 103 | } |
| 104 | if minP <= alpha { |
| 105 | return ">=", n |
| 106 | } |
| 107 | } |
| 108 | return ">", len(uTestMinP) |
| 109 | } |
| 110 | |
| 111 | func (assumeNothing) Compare(s1, s2 *Sample) Comparison { |
| 112 | res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers) |
| 113 | if err != nil { |
| 114 | // The U-test failed. Report as if there's no |
| 115 | // significant difference, along with the error. |
| 116 | return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}} |
| 117 | } |
| 118 | cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha} |
| 119 | // Warn if there aren't enough samples to report a difference |
| 120 | // even if they were maximally diverged. |
| 121 | if cmp.P > cmp.Alpha { |
| 122 | op, n := uTestSamples(cmp.Alpha) |
| 123 | if cmp.N1 < n && cmp.N2 < n { |
| 124 | // We could deal with asymmetric sample sizes |
| 125 | // by first ramping up the smaller sample |
| 126 | // until the minimum P value is sufficient or |
| 127 | // the sample sizes are equal. But it doesn't |
| 128 | // seem worth the complexity. |
| 129 | msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha) |
| 130 | cmp.Warnings = append(cmp.Warnings, msg) |
| 131 | } |
| 132 | } |
| 133 | return cmp |
| 134 | } |