benchmath: new package of opinionated benchmark statistics Updates golang/go#20728. Change-Id: I4c33e64d5959cadfbb97ca6a2274e0c060e87d29 Reviewed-on: https://go-review.googlesource.com/c/perf/+/283616 Trust: Austin Clements <austin@google.com> Run-TryBot: Austin Clements <austin@google.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>

commit: af31a76731725f4850083dc6a181043ec240186e [log] [tgz]
author: Austin Clements <austin@google.com> Tue Jan 12 18:25:40 2021 -0500
committer: Austin Clements <austin@google.com> Thu Mar 17 01:25:15 2022 +0000
tree: 619d55a9f996961513c31a826421ab0f608ec48a
parent: c932945851e32cfac81a8556479d59f053297364 [diff] [blame]
diff --git a/benchmath/anone.go b/benchmath/anone.go
new file mode 100644
index 0000000..0ce86b6
--- /dev/null
+++ b/benchmath/anone.go

@@ -0,0 +1,134 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmath
+
+import (
+	"fmt"
+	"math"
+	"sync"
+
+	"github.com/aclements/go-moremath/stats"
+)
+
+// AssumeNothing is a non-parametric Assumption (that is, it makes no
+// distributional assumptions). The summary statistic is the sample
+// median and comparisons are done using the Mann-Whitney U test.
+//
+// This is a good default assumption for benchmarks.
+// There's substantial evidence that benchmark results are non-normal.
+// The disadvantage (of any non-parametric methods) is that this is
+// less statistically powerful than parametric methods.
+var AssumeNothing = assumeNothing{}
+
+type assumeNothing struct{}
+
+var _ Assumption = assumeNothing{}
+
+// medianCache maps from ciKey to stats.QuantileCIResult for median
+// confidence intervals.
+var medianCache sync.Map
+
+func medianCI(n int, confidence float64) stats.QuantileCIResult {
+	type ciKey struct {
+		n          int
+		confidence float64
+	}
+	key := ciKey{n, confidence}
+	if ciX, ok := medianCache.Load(key); ok {
+		return ciX.(stats.QuantileCIResult)
+	}
+	ci := stats.QuantileCI(n, 0.5, confidence)
+	medianCache.Store(key, ci)
+	return ci
+}
+
+// medianSamples returns the minimum number of samples required to get
+// a finite confidence interval at the given confidence level.
+func medianSamples(confidence float64) (op string, n int) {
+	const limit = 50
+	// We need at least two samples to have an interval.
+	for n = 2; n <= limit; n++ {
+		ci := medianCI(n, confidence)
+		if 0 < ci.LoOrder && ci.HiOrder <= n {
+			return ">=", n
+		}
+	}
+	return ">", limit
+}
+
+func (assumeNothing) SummaryLabel() string {
+	return "median"
+}
+
+func (assumeNothing) Summary(s *Sample, confidence float64) Summary {
+	ci := medianCI(len(s.Values), confidence)
+	median, lo, hi := ci.SampleCI(s.sample())
+
+	var warnings []error
+	if math.IsInf(lo, 0) || math.IsInf(hi, 0) {
+		// Explain to the user why there's a ±∞
+		op, need := medianSamples(confidence)
+		msg := fmt.Errorf("need %s %d samples for confidence interval at level %v", op, need, confidence)
+		warnings = append(warnings, msg)
+	}
+
+	return Summary{median, lo, hi, ci.Confidence, warnings}
+}
+
+// uTestMinP[n] is the minimum possible P value for the U-test with
+// two samples of size n.
+//
+// Generated by go run mktables.go.
+var uTestMinP = []float64{
+	1: 1,
+	2: 0.3333333333333333,
+	3: 0.1,
+	4: 0.02857142857142857,
+	5: 0.007936507936507936,
+	6: 0.0021645021645021645,
+	7: 0.0005827505827505828,
+	8: 0.0001554001554001554,
+	9: 4.113533525298231e-05,
+}
+
+// uTestSamples returns the minimum number of samples required for the
+// U-test to achieve statistical significance at the given alpha
+// level.
+func uTestSamples(alpha float64) (op string, n int) {
+	for n, minP := range uTestMinP {
+		if n == 0 {
+			continue
+		}
+		if minP <= alpha {
+			return ">=", n
+		}
+	}
+	return ">", len(uTestMinP)
+}
+
+func (assumeNothing) Compare(s1, s2 *Sample) Comparison {
+	res, err := stats.MannWhitneyUTest(s1.Values, s2.Values, stats.LocationDiffers)
+	if err != nil {
+		// The U-test failed. Report as if there's no
+		// significant difference, along with the error.
+		return Comparison{P: 1, N1: len(s1.Values), N2: len(s2.Values), Alpha: s1.Thresholds.CompareAlpha, Warnings: []error{err}}
+	}
+	cmp := Comparison{P: res.P, N1: res.N1, N2: res.N2, Alpha: s1.Thresholds.CompareAlpha}
+	// Warn if there aren't enough samples to report a difference
+	// even if they were maximally diverged.
+	if cmp.P > cmp.Alpha {
+		op, n := uTestSamples(cmp.Alpha)
+		if cmp.N1 < n && cmp.N2 < n {
+			// We could deal with asymmetric sample sizes
+			// by first ramping up the smaller sample
+			// until the minimum P value is sufficient or
+			// the sample sizes are equal. But it doesn't
+			// seem worth the complexity.
+			msg := fmt.Errorf("need %s %d samples to detect a difference at alpha level %v", op, n, cmp.Alpha)
+			cmp.Warnings = append(cmp.Warnings, msg)
+		}
+	}
+	return cmp
+}
commit	af31a76731725f4850083dc6a181043ec240186e	[log] [tgz]
author	Austin Clements <austin@google.com>	Tue Jan 12 18:25:40 2021 -0500
committer	Austin Clements <austin@google.com>	Thu Mar 17 01:25:15 2022 +0000
tree	619d55a9f996961513c31a826421ab0f608ec48a
parent	c932945851e32cfac81a8556479d59f053297364 [diff] [blame]