benchstat: split significance tests into delta.go Change-Id: Ied07ec9cec690a1927e9e32fc8d47f36521f658f Reviewed-on: https://go-review.googlesource.com/35935 Reviewed-by: Quentin Smith <quentin@golang.org>

commit: 35ee1bd057a3cb1720e74acb37b44aac3ec639c7 [log] [tgz]
author: Russ Cox <rsc@golang.org> Fri Jan 27 12:03:26 2017 -0500
committer: Russ Cox <rsc@golang.org> Mon Jan 30 16:22:40 2017 +0000
tree: f93f16cb98601b02d57c24c2719d42c68eabb13a
parent: 83910fa1be56788034531b3950c8c98608628516 [diff]
diff --git a/cmd/benchstat/delta.go b/cmd/benchstat/delta.go
new file mode 100644
index 0000000..dc91c8f
--- /dev/null
+++ b/cmd/benchstat/delta.go

@@ -0,0 +1,72 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Significance tests.
+
+package main
+
+import (
+	"errors"
+
+	"golang.org/x/perf/internal/stats"
+)
+
+// A DeltaTest compares the old and new metrics and returns the
+// expected probability that they are drawn from the same distribution.
+//
+// If a probability cannot be computed, the DeltaTest returns an
+// error explaining why. Common errors include ErrSamplesEqual
+// (all samples are equal), ErrSampleSize (there aren't enough samples),
+// and ErrZeroVariance (the sample has zero variance).
+//
+// As a special case, the missing test NoDeltaTest returns -1, nil.
+type DeltaTest func(old, new *Metrics) (float64, error)
+
+// Errors returned by DeltaTest.
+var (
+	ErrSamplesEqual = errors.New("all equal")
+	ErrSampleSize   = errors.New("too few samples")
+	ErrZeroVariance = errors.New("zero variance")
+)
+
+// NoDeltaTest applies no delta test; it returns -1, nil.
+func NoDeltaTest(old, new *Metrics) (pval float64, err error) {
+	return -1, nil
+}
+
+// TTest is a DeltaTest using the two-sample Welch t-test.
+func TTest(old, new *Metrics) (pval float64, err error) {
+	t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
+	if err != nil {
+		return -1, convertErr(err)
+	}
+	return t.P, nil
+}
+
+// UTest is a DeltaTest using the Mann-Whitney U test.
+func UTest(old, new *Metrics) (pval float64, err error) {
+	u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
+	if err != nil {
+		return -1, convertErr(err)
+	}
+	return u.P, nil
+}
+
+// convertErr converts from the stats package's internal errors
+// to errors exported by this package and expected from
+// a DeltaTest.
+// Using different errors makes it possible for clients to use
+// package benchstat without access to the internal stats package,
+// and it also gives us a chance to use shorter error messages.
+func convertErr(err error) error {
+	switch err {
+	case stats.ErrZeroVariance:
+		return ErrZeroVariance
+	case stats.ErrSampleSize:
+		return ErrSampleSize
+	case stats.ErrSamplesEqual:
+		return ErrSamplesEqual
+	}
+	return err
+}

diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
index 734f286..ca5ba02 100644
--- a/cmd/benchstat/main.go
+++ b/cmd/benchstat/main.go

@@ -119,14 +119,14 @@
 	flagHTML      = flag.Bool("html", false, "print results as an HTML table")
 )
 
-var deltaTestNames = map[string]func(old, new *Metrics) (float64, error){
-	"none":   notest,
-	"u":      utest,
-	"u-test": utest,
-	"utest":  utest,
-	"t":      ttest,
-	"t-test": ttest,
-	"ttest":  ttest,
+var deltaTestNames = map[string]DeltaTest{
+	"none":   NoDeltaTest,
+	"u":      UTest,
+	"u-test": UTest,
+	"utest":  UTest,
+	"t":      TTest,
+	"t-test": TTest,
+	"ttest":  TTest,
 }
 
 type row struct {
@@ -187,13 +187,7 @@
 
 				scaler := NewScaler(old.Mean, old.Unit)
 				row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~   ")
-				if testerr == stats.ErrZeroVariance {
-					row.add("(zero variance)")
-				} else if testerr == stats.ErrSampleSize {
-					row.add("(too few samples)")
-				} else if testerr == stats.ErrSamplesEqual {
-					row.add("(all equal)")
-				} else if testerr != nil {
+				if testerr != nil {
 					row.add(fmt.Sprintf("(%s)", testerr))
 				} else if pval < *flagAlpha {
 					row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
@@ -499,25 +493,3 @@
 		return unit
 	}
 }
-
-// Significance tests.
-
-func notest(old, new *Metrics) (pval float64, err error) {
-	return -1, nil
-}
-
-func ttest(old, new *Metrics) (pval float64, err error) {
-	t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
-	if err != nil {
-		return -1, err
-	}
-	return t.P, nil
-}
-
-func utest(old, new *Metrics) (pval float64, err error) {
-	u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
-	if err != nil {
-		return -1, err
-	}
-	return u.P, nil
-}
commit	35ee1bd057a3cb1720e74acb37b44aac3ec639c7	[log] [tgz]
author	Russ Cox <rsc@golang.org>	Fri Jan 27 12:03:26 2017 -0500
committer	Russ Cox <rsc@golang.org>	Mon Jan 30 16:22:40 2017 +0000
tree	f93f16cb98601b02d57c24c2719d42c68eabb13a
parent	83910fa1be56788034531b3950c8c98608628516 [diff]