blob: 0ce86b67056d494bedf577f382dfae15be672f58 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package benchmath
import (
"fmt"
"math"
"sync"
"github.com/aclements/go-moremath/stats"
)
// AssumeNothing is a non-parametric Assumption (that is, it makes no
// distributional assumptions). The summary statistic is the sample
// median and comparisons are done using the Mann-Whitney U test.
//
// This is a good default assumption for benchmarks.
// There's substantial evidence that benchmark results are non-normal.
// The disadvantage (of any non-parametric methods) is that this is
// less statistically powerful than parametric methods.
var AssumeNothing = assumeNothing{}
type assumeNothing struct{}
var _ Assumption = assumeNothing{}
// medianCache maps from ciKey to stats.QuantileCIResult for median
// confidence intervals.
var medianCache sync.Map
func medianCI(n int, confidence float64) stats.QuantileCIResult {
type ciKey struct {
n int
confidence float64
}
key := ciKey{n, confidence}
if ciX, ok := medianCache.Load(key); ok {
return ciX.(stats.QuantileCIResult)
}
ci := stats.QuantileCI(n, 0.5, confidence)
medianCache.Store(key, ci)
return ci
}
// medianSamples returns the minimum number of samples required to get
// a finite confidence interval at the given confidence level.
func medianSamples(confidence float64) (op string, n int) {
const limit = 50
// We need at least two samples to have an interval.
for n = 2; n <= limit; n++ {
ci := medianCI(n, confidence)
if 0 < ci.LoOrder && ci.HiOrder <= n {
return ">=", n
}
}
return ">", limit
}
func (assumeNothing) SummaryLabel() string {
return "median"
}
func (assumeNothing) Summary(s *Sample, confidence float64) Summary {
ci := medianCI(len(s.Values), confidence)
median, lo, hi := ci.SampleCI(s.sample())
var warnings []error
if math.IsInf(lo, 0) || math.IsInf(hi, 0) {
// Explain to the user why there's a ±∞
op, need := medianSamples(confidence)
msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence)
warnings = append(warnings, msg)
}
return Summary{median, lo, hi, ci.Confidence, warnings}
}
// uTestMinP[n] is the minimum possible P value for the U-test with
// two samples of size n.
//
// Generated by go run mktables.go.
var uTestMinP = []float64{
1: 1,
2: 0.3333333333333333,
3: 0.1,
4: 0.02857142857142857,
5: 0.007936507936507936,
6: 0.0021645021645021645,
7: 0.0005827505827505828,
8: 0.0001554001554001554,
9: 4.113533525298231e-05,
}
// uTestSamples returns the minimum number of samples required for the
// U-test to achieve statistical significance at the given alpha
// level.
func uTestSamples(alpha float64) (op string, n int) {
for n, minP := range uTestMinP {
if n == 0 {
continue
}
if minP <= alpha {
return ">=", n
}
}
return ">", len(uTestMinP)
}
func (assumeNothing) Compare(s1, s2 *Sample) Comparison {
res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers)
if err != nil {
// The U-test failed. Report as if there's no
// significant difference, along with the error.
return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}}
}
cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha}
// Warn if there aren't enough samples to report a difference
// even if they were maximally diverged.
if cmp.P > cmp.Alpha {
op, n := uTestSamples(cmp.Alpha)
if cmp.N1 < n && cmp.N2 < n {
// We could deal with asymmetric sample sizes
// by first ramping up the smaller sample
// until the minimum P value is sufficient or
// the sample sizes are equal. But it doesn't
// seem worth the complexity.
msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha)
cmp.Warnings = append(cmp.Warnings, msg)
}
}
return cmp
}