benchstat: split significance tests into delta.go
Change-Id: Ied07ec9cec690a1927e9e32fc8d47f36521f658f
Reviewed-on: https://go-review.googlesource.com/35935
Reviewed-by: Quentin Smith <quentin@golang.org>
diff --git a/cmd/benchstat/delta.go b/cmd/benchstat/delta.go
new file mode 100644
index 0000000..dc91c8f
--- /dev/null
+++ b/cmd/benchstat/delta.go
@@ -0,0 +1,72 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Significance tests.
+
+package main
+
+import (
+ "errors"
+
+ "golang.org/x/perf/internal/stats"
+)
+
+// A DeltaTest compares the old and new metrics and returns the
+// expected probability that they are drawn from the same distribution.
+//
+// If a probability cannot be computed, the DeltaTest returns an
+// error explaining why. Common errors include ErrSamplesEqual
+// (all samples are equal), ErrSampleSize (there aren't enough samples),
+// and ErrZeroVariance (the sample has zero variance).
+//
+// As a special case, the missing test NoDeltaTest returns -1, nil.
+type DeltaTest func(old, new *Metrics) (float64, error)
+
+// Errors returned by DeltaTest.
+var (
+ ErrSamplesEqual = errors.New("all equal")
+ ErrSampleSize = errors.New("too few samples")
+ ErrZeroVariance = errors.New("zero variance")
+)
+
+// NoDeltaTest applies no delta test; it returns -1, nil.
+func NoDeltaTest(old, new *Metrics) (pval float64, err error) {
+ return -1, nil
+}
+
+// TTest is a DeltaTest using the two-sample Welch t-test.
+func TTest(old, new *Metrics) (pval float64, err error) {
+ t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
+ if err != nil {
+ return -1, convertErr(err)
+ }
+ return t.P, nil
+}
+
+// UTest is a DeltaTest using the Mann-Whitney U test.
+func UTest(old, new *Metrics) (pval float64, err error) {
+ u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
+ if err != nil {
+ return -1, convertErr(err)
+ }
+ return u.P, nil
+}
+
+// convertErr converts from the stats package's internal errors
+// to errors exported by this package and expected from
+// a DeltaTest.
+// Using different errors makes it possible for clients to use
+// package benchstat without access to the internal stats package,
+// and it also gives us a chance to use shorter error messages.
+func convertErr(err error) error {
+ switch err {
+ case stats.ErrZeroVariance:
+ return ErrZeroVariance
+ case stats.ErrSampleSize:
+ return ErrSampleSize
+ case stats.ErrSamplesEqual:
+ return ErrSamplesEqual
+ }
+ return err
+}
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 734f286..ca5ba02 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go
@@ -119,14 +119,14 @@
flagHTML = flag.Bool("html", false, "print results as an HTML table")
)
-var deltaTestNames = map[string]func(old, new *Metrics) (float64, error){
- "none": notest,
- "u": utest,
- "u-test": utest,
- "utest": utest,
- "t": ttest,
- "t-test": ttest,
- "ttest": ttest,
+var deltaTestNames = map[string]DeltaTest{
+ "none": NoDeltaTest,
+ "u": UTest,
+ "u-test": UTest,
+ "utest": UTest,
+ "t": TTest,
+ "t-test": TTest,
+ "ttest": TTest,
}
type row struct {
@@ -187,13 +187,7 @@
scaler := NewScaler(old.Mean, old.Unit)
row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~ ")
- if testerr == stats.ErrZeroVariance {
- row.add("(zero variance)")
- } else if testerr == stats.ErrSampleSize {
- row.add("(too few samples)")
- } else if testerr == stats.ErrSamplesEqual {
- row.add("(all equal)")
- } else if testerr != nil {
+ if testerr != nil {
row.add(fmt.Sprintf("(%s)", testerr))
} else if pval < *flagAlpha {
row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
@@ -499,25 +493,3 @@
return unit
}
}
-
-// Significance tests.
-
-func notest(old, new *Metrics) (pval float64, err error) {
- return -1, nil
-}
-
-func ttest(old, new *Metrics) (pval float64, err error) {
- t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
- if err != nil {
- return -1, err
- }
- return t.P, nil
-}
-
-func utest(old, new *Metrics) (pval float64, err error) {
- u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
- if err != nil {
- return -1, err
- }
- return u.P, nil
-}