cmd/benchstat: import from rsc.io/benchstat
I copied the code from various dependencies in go-moremath
into a single 'internal/stats' package. That package is at the top
level of the repo because I expect to pull much of benchcmp
into an importable package.
For golang/go#14304.
Change-Id: Ie114839b2901f5060c202feb3ffc768bf43ce5da
Reviewed-on: https://go-review.googlesource.com/35503
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Quentin Smith <quentin@golang.org>
diff --git a/cmd/benchstat/README.md b/cmd/benchstat/README.md
new file mode 100644
index 0000000..9f40820
--- /dev/null
+++ b/cmd/benchstat/README.md
@@ -0,0 +1,85 @@
+# Benchstat
+
+Benchstat computes and compares statistics about benchmarks.
+
+Usage:
+
+ benchstat [-delta-test name] [-geomean] [-html] old.txt [new.txt] [more.txt ...]
+
+Each input file should contain the concatenated output of a number of runs
+of ``go test -bench.'' For each different benchmark listed in an input file,
+benchstat computes the mean, minimum, and maximum run time, after removing
+outliers using the interquartile range rule.
+
+If invoked on a single input file, benchstat prints the per-benchmark
+statistics for that file.
+
+If invoked on a pair of input files, benchstat adds to the output a column
+showing the statistics from the second file and a column showing the percent
+change in mean from the first to the second file. Next to the percent
+change, benchstat shows the p-value and sample sizes from a test of the two
+distributions of benchmark times. Small p-values indicate that the two
+distributions are significantly different. If the test indicates that there
+was no significant change between the two benchmarks (defined as p > 0.05),
+benchstat displays a single ~ instead of the percent change.
+
+The -delta-test option controls which significance test is applied: utest
+(Mann-Whitney U-test), ttest (two-sample Welch t-test), or none. The default
+is the U-test, sometimes also referred to as the Wilcoxon rank sum test.
+
+If invoked on more than two input files, benchstat prints the per-benchmark
+statistics for all the files, showing one column of statistics for each
+file, with no column for percent change or statistical significance.
+
+The -html option causes benchstat to print the results as an HTML table.
+
+## Example
+
+Suppose we collect benchmark results from running ``go test -bench=Encode''
+five times before and after a particular change.
+
+The file old.txt contains:
+
+ BenchmarkGobEncode 100 13552735 ns/op 56.63 MB/s
+ BenchmarkJSONEncode 50 32395067 ns/op 59.90 MB/s
+ BenchmarkGobEncode 100 13553943 ns/op 56.63 MB/s
+ BenchmarkJSONEncode 50 32334214 ns/op 60.01 MB/s
+ BenchmarkGobEncode 100 13606356 ns/op 56.41 MB/s
+ BenchmarkJSONEncode 50 31992891 ns/op 60.65 MB/s
+ BenchmarkGobEncode 100 13683198 ns/op 56.09 MB/s
+ BenchmarkJSONEncode 50 31735022 ns/op 61.15 MB/s
+
+The file new.txt contains:
+
+ BenchmarkGobEncode 100 11773189 ns/op 65.19 MB/s
+ BenchmarkJSONEncode 50 32036529 ns/op 60.57 MB/s
+ BenchmarkGobEncode 100 11942588 ns/op 64.27 MB/s
+ BenchmarkJSONEncode 50 32156552 ns/op 60.34 MB/s
+ BenchmarkGobEncode 100 11786159 ns/op 65.12 MB/s
+ BenchmarkJSONEncode 50 31288355 ns/op 62.02 MB/s
+ BenchmarkGobEncode 100 11628583 ns/op 66.00 MB/s
+ BenchmarkJSONEncode 50 31559706 ns/op 61.49 MB/s
+ BenchmarkGobEncode 100 11815924 ns/op 64.96 MB/s
+ BenchmarkJSONEncode 50 31765634 ns/op 61.09 MB/s
+
+The order of the lines in the file does not matter, except that the output
+lists benchmarks in order of appearance.
+
+If run with just one input file, benchstat summarizes that file:
+
+ $ benchstat old.txt
+ name time/op
+ GobEncode 13.6ms ± 1%
+ JSONEncode 32.1ms ± 1%
+ $
+
+If run with two input files, benchstat summarizes and compares:
+
+ $ benchstat old.txt new.txt
+ name old time/op new time/op delta
+ GobEncode 13.6ms ± 1% 11.8ms ± 1% -13.31% (p=0.016 n=4+5)
+ JSONEncode 32.1ms ± 1% 31.8ms ± 1% ~ (p=0.286 n=4+5)
+ $
+
+Note that the JSONEncode result is reported as statistically insignificant
+instead of a -0.93% delta.
diff --git a/cmd/benchstat/main.go b/cmd/benchstat/main.go
new file mode 100644
index 0000000..0b65394
--- /dev/null
+++ b/cmd/benchstat/main.go
@@ -0,0 +1,616 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Benchstat computes and compares statistics about benchmarks.
+//
+// Usage:
+//
+// benchstat [-delta-test name] [-geomean] [-html] old.txt [new.txt] [more.txt ...]
+//
+// Each input file should contain the concatenated output of a number
+// of runs of ``go test -bench.'' For each different benchmark listed in an input file,
+// benchstat computes the mean, minimum, and maximum run time,
+// after removing outliers using the interquartile range rule.
+//
+// If invoked on a single input file, benchstat prints the per-benchmark statistics
+// for that file.
+//
+// If invoked on a pair of input files, benchstat adds to the output a column
+// showing the statistics from the second file and a column showing the
+// percent change in mean from the first to the second file.
+// Next to the percent change, benchstat shows the p-value and sample
+// sizes from a test of the two distributions of benchmark times.
+// Small p-values indicate that the two distributions are significantly different.
+// If the test indicates that there was no significant change between the two
+// benchmarks (defined as p > 0.05), benchstat displays a single ~ instead of
+// the percent change.
+//
+// The -delta-test option controls which significance test is applied:
+// utest (Mann-Whitney U-test), ttest (two-sample Welch t-test), or none.
+// The default is the U-test, sometimes also referred to as the Wilcoxon rank
+// sum test.
+//
+// If invoked on more than two input files, benchstat prints the per-benchmark
+// statistics for all the files, showing one column of statistics for each file,
+// with no column for percent change or statistical significance.
+//
+// The -html option causes benchstat to print the results as an HTML table.
+//
+// Example
+//
+// Suppose we collect benchmark results from running ``go test -bench=Encode''
+// five times before and after a particular change.
+//
+// The file old.txt contains:
+//
+// BenchmarkGobEncode 100 13552735 ns/op 56.63 MB/s
+// BenchmarkJSONEncode 50 32395067 ns/op 59.90 MB/s
+// BenchmarkGobEncode 100 13553943 ns/op 56.63 MB/s
+// BenchmarkJSONEncode 50 32334214 ns/op 60.01 MB/s
+// BenchmarkGobEncode 100 13606356 ns/op 56.41 MB/s
+// BenchmarkJSONEncode 50 31992891 ns/op 60.65 MB/s
+// BenchmarkGobEncode 100 13683198 ns/op 56.09 MB/s
+// BenchmarkJSONEncode 50 31735022 ns/op 61.15 MB/s
+//
+// The file new.txt contains:
+//
+// BenchmarkGobEncode 100 11773189 ns/op 65.19 MB/s
+// BenchmarkJSONEncode 50 32036529 ns/op 60.57 MB/s
+// BenchmarkGobEncode 100 11942588 ns/op 64.27 MB/s
+// BenchmarkJSONEncode 50 32156552 ns/op 60.34 MB/s
+// BenchmarkGobEncode 100 11786159 ns/op 65.12 MB/s
+// BenchmarkJSONEncode 50 31288355 ns/op 62.02 MB/s
+// BenchmarkGobEncode 100 11628583 ns/op 66.00 MB/s
+// BenchmarkJSONEncode 50 31559706 ns/op 61.49 MB/s
+// BenchmarkGobEncode 100 11815924 ns/op 64.96 MB/s
+// BenchmarkJSONEncode 50 31765634 ns/op 61.09 MB/s
+//
+// The order of the lines in the file does not matter, except that the
+// output lists benchmarks in order of appearance.
+//
+// If run with just one input file, benchstat summarizes that file:
+//
+// $ benchstat old.txt
+// name time/op
+// GobEncode 13.6ms ± 1%
+// JSONEncode 32.1ms ± 1%
+// $
+//
+// If run with two input files, benchstat summarizes and compares:
+//
+// $ benchstat old.txt new.txt
+// name old time/op new time/op delta
+// GobEncode 13.6ms ± 1% 11.8ms ± 1% -13.31% (p=0.016 n=4+5)
+// JSONEncode 32.1ms ± 1% 31.8ms ± 1% ~ (p=0.286 n=4+5)
+// $
+//
+// Note that the JSONEncode result is reported as
+// statistically insignificant instead of a -0.93% delta.
+//
+package main
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "html"
+ "io/ioutil"
+ "log"
+ "os"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "golang.org/x/perf/internal/stats"
+)
+
+func usage() {
+ fmt.Fprintf(os.Stderr, "usage: benchstat [options] old.txt [new.txt] [more.txt ...]\n")
+ fmt.Fprintf(os.Stderr, "options:\n")
+ flag.PrintDefaults()
+ os.Exit(2)
+}
+
+var (
+ flagDeltaTest = flag.String("delta-test", "utest", "significance `test` to apply to delta: utest, ttest, or none")
+ flagAlpha = flag.Float64("alpha", 0.05, "consider change significant if p < `α`")
+ flagGeomean = flag.Bool("geomean", false, "print the geometric mean of each file")
+ flagHTML = flag.Bool("html", false, "print results as an HTML table")
+)
+
+var deltaTestNames = map[string]func(old, new *Benchstat) (float64, error){
+ "none": notest,
+ "u": utest,
+ "u-test": utest,
+ "utest": utest,
+ "t": ttest,
+ "t-test": ttest,
+ "ttest": ttest,
+}
+
+type row struct {
+ cols []string
+}
+
+func newRow(cols ...string) *row {
+ return &row{cols: cols}
+}
+
+func (r *row) add(col string) {
+ r.cols = append(r.cols, col)
+}
+
+func (r *row) trim() {
+ for len(r.cols) > 0 && r.cols[len(r.cols)-1] == "" {
+ r.cols = r.cols[:len(r.cols)-1]
+ }
+}
+
+func main() {
+ log.SetPrefix("benchstat: ")
+ log.SetFlags(0)
+ flag.Usage = usage
+ flag.Parse()
+ deltaTest := deltaTestNames[strings.ToLower(*flagDeltaTest)]
+ if flag.NArg() < 1 || deltaTest == nil {
+ flag.Usage()
+ }
+
+ // Read in benchmark data.
+ c := readFiles(flag.Args())
+ for _, stat := range c.Stats {
+ stat.ComputeStats()
+ }
+
+ var tables [][]*row
+ switch len(c.Configs) {
+ case 2:
+ before, after := c.Configs[0], c.Configs[1]
+ key := BenchKey{}
+ for _, key.Unit = range c.Units {
+ var table []*row
+ metric := metricOf(key.Unit)
+ for _, key.Benchmark = range c.Benchmarks {
+ key.Config = before
+ old := c.Stats[key]
+ key.Config = after
+ new := c.Stats[key]
+ if old == nil || new == nil {
+ continue
+ }
+ if len(table) == 0 {
+ table = append(table, newRow("name", "old "+metric, "new "+metric, "delta"))
+ }
+
+ pval, testerr := deltaTest(old, new)
+
+ scaler := newScaler(old.Mean, old.Unit)
+ row := newRow(key.Benchmark, old.Format(scaler), new.Format(scaler), "~ ")
+ if testerr == stats.ErrZeroVariance {
+ row.add("(zero variance)")
+ } else if testerr == stats.ErrSampleSize {
+ row.add("(too few samples)")
+ } else if testerr == stats.ErrSamplesEqual {
+ row.add("(all equal)")
+ } else if testerr != nil {
+ row.add(fmt.Sprintf("(%s)", testerr))
+ } else if pval < *flagAlpha {
+ row.cols[3] = fmt.Sprintf("%+.2f%%", ((new.Mean/old.Mean)-1.0)*100.0)
+ }
+ if len(row.cols) == 4 && pval != -1 {
+ row.add(fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues)))
+ }
+ table = append(table, row)
+ }
+ if len(table) > 0 {
+ table = addGeomean(table, c, key.Unit, true)
+ tables = append(tables, table)
+ }
+ }
+
+ default:
+ key := BenchKey{}
+ for _, key.Unit = range c.Units {
+ var table []*row
+ metric := metricOf(key.Unit)
+
+ if len(c.Configs) > 1 {
+ hdr := newRow("name \\ " + metric)
+ for _, config := range c.Configs {
+ hdr.add(config)
+ }
+ table = append(table, hdr)
+ } else {
+ table = append(table, newRow("name", metric))
+ }
+
+ for _, key.Benchmark = range c.Benchmarks {
+ row := newRow(key.Benchmark)
+ var scaler func(float64) string
+ for _, key.Config = range c.Configs {
+ stat := c.Stats[key]
+ if stat == nil {
+ row.add("")
+ continue
+ }
+ if scaler == nil {
+ scaler = newScaler(stat.Mean, stat.Unit)
+ }
+ row.add(stat.Format(scaler))
+ }
+ row.trim()
+ if len(row.cols) > 1 {
+ table = append(table, row)
+ }
+ }
+ table = addGeomean(table, c, key.Unit, false)
+ tables = append(tables, table)
+ }
+ }
+
+ numColumn := 0
+ for _, table := range tables {
+ for _, row := range table {
+ if numColumn < len(row.cols) {
+ numColumn = len(row.cols)
+ }
+ }
+ }
+
+ max := make([]int, numColumn)
+ for _, table := range tables {
+ for _, row := range table {
+ for i, s := range row.cols {
+ n := utf8.RuneCountInString(s)
+ if max[i] < n {
+ max[i] = n
+ }
+ }
+ }
+ }
+
+ var buf bytes.Buffer
+ for i, table := range tables {
+ if i > 0 {
+ fmt.Fprintf(&buf, "\n")
+ }
+
+ if *flagHTML {
+ fmt.Fprintf(&buf, "<style>.benchstat tbody td:nth-child(1n+2) { text-align: right; padding: 0em 1em; }</style>\n")
+ fmt.Fprintf(&buf, "<table class='benchstat'>\n")
+ printRow := func(row *row, tag string) {
+ fmt.Fprintf(&buf, "<tr>")
+ for _, cell := range row.cols {
+ fmt.Fprintf(&buf, "<%s>%s</%s>", tag, html.EscapeString(cell), tag)
+ }
+ fmt.Fprintf(&buf, "\n")
+ }
+ printRow(table[0], "th")
+ for _, row := range table[1:] {
+ printRow(row, "td")
+ }
+ fmt.Fprintf(&buf, "</table>\n")
+ continue
+ }
+
+ // headings
+ row := table[0]
+ for i, s := range row.cols {
+ switch i {
+ case 0:
+ fmt.Fprintf(&buf, "%-*s", max[i], s)
+ default:
+ fmt.Fprintf(&buf, " %-*s", max[i], s)
+ case len(row.cols) - 1:
+ fmt.Fprintf(&buf, " %s\n", s)
+ }
+ }
+
+ // data
+ for _, row := range table[1:] {
+ for i, s := range row.cols {
+ switch i {
+ case 0:
+ fmt.Fprintf(&buf, "%-*s", max[i], s)
+ default:
+ if i == len(row.cols)-1 && len(s) > 0 && s[0] == '(' {
+ // Left-align p value.
+ fmt.Fprintf(&buf, " %s", s)
+ break
+ }
+ fmt.Fprintf(&buf, " %*s", max[i], s)
+ }
+ }
+ fmt.Fprintf(&buf, "\n")
+ }
+ }
+
+ os.Stdout.Write(buf.Bytes())
+}
+
+func addGeomean(table []*row, c *Collection, unit string, delta bool) []*row {
+ if !*flagGeomean {
+ return table
+ }
+
+ row := newRow("[Geo mean]")
+ key := BenchKey{Unit: unit}
+ geomeans := []float64{}
+ for _, key.Config = range c.Configs {
+ var means []float64
+ for _, key.Benchmark = range c.Benchmarks {
+ stat := c.Stats[key]
+ if stat != nil {
+ means = append(means, stat.Mean)
+ }
+ }
+ if len(means) == 0 {
+ row.add("")
+ delta = false
+ } else {
+ geomean := stats.GeoMean(means)
+ geomeans = append(geomeans, geomean)
+ row.add(newScaler(geomean, unit)(geomean) + " ")
+ }
+ }
+ if delta {
+ row.add(fmt.Sprintf("%+.2f%%", ((geomeans[1]/geomeans[0])-1.0)*100.0))
+ }
+ return append(table, row)
+}
+
+func timeScaler(ns float64) func(float64) string {
+ var format string
+ var scale float64
+ switch x := ns / 1e9; {
+ case x >= 99.5:
+ format, scale = "%.0fs", 1
+ case x >= 9.95:
+ format, scale = "%.1fs", 1
+ case x >= 0.995:
+ format, scale = "%.2fs", 1
+ case x >= 0.0995:
+ format, scale = "%.0fms", 1000
+ case x >= 0.00995:
+ format, scale = "%.1fms", 1000
+ case x >= 0.000995:
+ format, scale = "%.2fms", 1000
+ case x >= 0.0000995:
+ format, scale = "%.0fµs", 1000*1000
+ case x >= 0.00000995:
+ format, scale = "%.1fµs", 1000*1000
+ case x >= 0.000000995:
+ format, scale = "%.2fµs", 1000*1000
+ case x >= 0.0000000995:
+ format, scale = "%.0fns", 1000*1000*1000
+ case x >= 0.00000000995:
+ format, scale = "%.1fns", 1000*1000*1000
+ default:
+ format, scale = "%.2fns", 1000*1000*1000
+ }
+ return func(ns float64) string {
+ return fmt.Sprintf(format, ns/1e9*scale)
+ }
+}
+
+func newScaler(val float64, unit string) func(float64) string {
+ if unit == "ns/op" {
+ return timeScaler(val)
+ }
+
+ var format string
+ var scale float64
+ var suffix string
+
+ prescale := 1.0
+ if unit == "MB/s" {
+ prescale = 1e6
+ }
+
+ switch x := val * prescale; {
+ case x >= 99500000000000:
+ format, scale, suffix = "%.0f", 1e12, "T"
+ case x >= 9950000000000:
+ format, scale, suffix = "%.1f", 1e12, "T"
+ case x >= 995000000000:
+ format, scale, suffix = "%.2f", 1e12, "T"
+ case x >= 99500000000:
+ format, scale, suffix = "%.0f", 1e9, "G"
+ case x >= 9950000000:
+ format, scale, suffix = "%.1f", 1e9, "G"
+ case x >= 995000000:
+ format, scale, suffix = "%.2f", 1e9, "G"
+ case x >= 99500000:
+ format, scale, suffix = "%.0f", 1e6, "M"
+ case x >= 9950000:
+ format, scale, suffix = "%.1f", 1e6, "M"
+ case x >= 995000:
+ format, scale, suffix = "%.2f", 1e6, "M"
+ case x >= 99500:
+ format, scale, suffix = "%.0f", 1e3, "k"
+ case x >= 9950:
+ format, scale, suffix = "%.1f", 1e3, "k"
+ case x >= 995:
+ format, scale, suffix = "%.2f", 1e3, "k"
+ case x >= 99.5:
+ format, scale, suffix = "%.0f", 1, ""
+ case x >= 9.95:
+ format, scale, suffix = "%.1f", 1, ""
+ default:
+ format, scale, suffix = "%.2f", 1, ""
+ }
+
+ if unit == "B/op" {
+ suffix += "B"
+ }
+ if unit == "MB/s" {
+ suffix += "B/s"
+ }
+ scale /= prescale
+
+ return func(val float64) string {
+ return fmt.Sprintf(format+suffix, val/scale)
+ }
+}
+
+func (b *Benchstat) Format(scaler func(float64) string) string {
+ diff := 1 - b.Min/b.Mean
+ if d := b.Max/b.Mean - 1; d > diff {
+ diff = d
+ }
+ s := scaler(b.Mean)
+ if b.Mean == 0 {
+ s += " "
+ } else {
+ s = fmt.Sprintf("%s ±%3s", s, fmt.Sprintf("%.0f%%", diff*100.0))
+ }
+ return s
+}
+
+// ComputeStats updates the derived statistics in s from the raw
+// samples in s.Values.
+func (stat *Benchstat) ComputeStats() {
+ // Discard outliers.
+ values := stats.Sample{Xs: stat.Values}
+ q1, q3 := values.Percentile(0.25), values.Percentile(0.75)
+ lo, hi := q1-1.5*(q3-q1), q3+1.5*(q3-q1)
+ for _, value := range stat.Values {
+ if lo <= value && value <= hi {
+ stat.RValues = append(stat.RValues, value)
+ }
+ }
+
+ // Compute statistics of remaining data.
+ stat.Min, stat.Max = stats.Bounds(stat.RValues)
+ stat.Mean = stats.Mean(stat.RValues)
+}
+
+// A Benchstat is the metrics along one axis (e.g., ns/op or MB/s)
+// for all runs of a specific benchmark.
+type Benchstat struct {
+ Unit string
+ Values []float64 // metrics
+ RValues []float64 // metrics with outliers removed
+ Min float64 // min of RValues
+ Mean float64 // mean of RValues
+ Max float64 // max of RValues
+}
+
+// A BenchKey identifies one metric (e.g., "ns/op", "B/op") from one
+// benchmark (function name sans "Benchmark" prefix) in one
+// configuration (input file name).
+type BenchKey struct {
+ Config, Benchmark, Unit string
+}
+
+type Collection struct {
+ Stats map[BenchKey]*Benchstat
+
+ // Configs, Benchmarks, and Units give the set of configs,
+ // benchmarks, and units from the keys in Stats in an order
+ // meant to match the order the benchmarks were read in.
+ Configs, Benchmarks, Units []string
+}
+
+func (c *Collection) AddStat(key BenchKey) *Benchstat {
+ if stat, ok := c.Stats[key]; ok {
+ return stat
+ }
+
+ addString := func(strings *[]string, add string) {
+ for _, s := range *strings {
+ if s == add {
+ return
+ }
+ }
+ *strings = append(*strings, add)
+ }
+ addString(&c.Configs, key.Config)
+ addString(&c.Benchmarks, key.Benchmark)
+ addString(&c.Units, key.Unit)
+ stat := &Benchstat{Unit: key.Unit}
+ c.Stats[key] = stat
+ return stat
+}
+
+// readFiles reads a set of benchmark files.
+func readFiles(files []string) *Collection {
+ c := Collection{Stats: make(map[BenchKey]*Benchstat)}
+ for _, file := range files {
+ readFile(file, &c)
+ }
+ return &c
+}
+
+// readFile reads a set of benchmarks from a file in to a Collection.
+func readFile(file string, c *Collection) {
+ c.Configs = append(c.Configs, file)
+ key := BenchKey{Config: file}
+
+ text, err := ioutil.ReadFile(file)
+ if err != nil {
+ log.Fatal(err)
+ }
+ for _, line := range strings.Split(string(text), "\n") {
+ f := strings.Fields(line)
+ if len(f) < 4 {
+ continue
+ }
+ name := f[0]
+ if !strings.HasPrefix(name, "Benchmark") {
+ continue
+ }
+ name = strings.TrimPrefix(name, "Benchmark")
+ n, _ := strconv.Atoi(f[1])
+ if n == 0 {
+ continue
+ }
+
+ key.Benchmark = name
+ for i := 2; i+2 <= len(f); i += 2 {
+ val, err := strconv.ParseFloat(f[i], 64)
+ if err != nil {
+ continue
+ }
+ key.Unit = f[i+1]
+ stat := c.AddStat(key)
+ stat.Values = append(stat.Values, val)
+ }
+ }
+}
+
+func metricOf(unit string) string {
+ switch unit {
+ case "ns/op":
+ return "time/op"
+ case "B/op":
+ return "alloc/op"
+ case "MB/s":
+ return "speed"
+ default:
+ return unit
+ }
+}
+
+// Significance tests.
+
+func notest(old, new *Benchstat) (pval float64, err error) {
+ return -1, nil
+}
+
+func ttest(old, new *Benchstat) (pval float64, err error) {
+ t, err := stats.TwoSampleWelchTTest(stats.Sample{Xs: old.RValues}, stats.Sample{Xs: new.RValues}, stats.LocationDiffers)
+ if err != nil {
+ return -1, err
+ }
+ return t.P, nil
+}
+
+func utest(old, new *Benchstat) (pval float64, err error) {
+ u, err := stats.MannWhitneyUTest(old.RValues, new.RValues, stats.LocationDiffers)
+ if err != nil {
+ return -1, err
+ }
+ return u.P, nil
+}
diff --git a/internal/stats/alg.go b/internal/stats/alg.go
new file mode 100644
index 0000000..f3774b5
--- /dev/null
+++ b/internal/stats/alg.go
@@ -0,0 +1,112 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+// Miscellaneous helper algorithms
+
+import (
+ "fmt"
+)
+
+func maxint(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}
+
+func minint(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
+func sumint(xs []int) int {
+ sum := 0
+ for _, x := range xs {
+ sum += x
+ }
+ return sum
+}
+
+// bisect returns an x in [low, high] such that |f(x)| <= tolerance
+// using the bisection method.
+//
+// f(low) and f(high) must have opposite signs.
+//
+// If f does not have a root in this interval (e.g., it is
+// discontiguous), this returns the X of the apparent discontinuity
+// and false.
+func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) {
+ flow, fhigh := f(low), f(high)
+ if -tolerance <= flow && flow <= tolerance {
+ return low, true
+ }
+ if -tolerance <= fhigh && fhigh <= tolerance {
+ return high, true
+ }
+ if mathSign(flow) == mathSign(fhigh) {
+ panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh))
+ }
+ for {
+ mid := (high + low) / 2
+ fmid := f(mid)
+ if -tolerance <= fmid && fmid <= tolerance {
+ return mid, true
+ }
+ if mid == high || mid == low {
+ return mid, false
+ }
+ if mathSign(fmid) == mathSign(flow) {
+ low = mid
+ flow = fmid
+ } else {
+ high = mid
+ fhigh = fmid
+ }
+ }
+}
+
+// bisectBool implements the bisection method on a boolean function.
+// It returns x1, x2 ∈ [low, high], x1 < x2 such that f(x1) != f(x2)
+// and x2 - x1 <= xtol.
+//
+// If f(low) == f(high), it panics.
+func bisectBool(f func(float64) bool, low, high, xtol float64) (x1, x2 float64) {
+ flow, fhigh := f(low), f(high)
+ if flow == fhigh {
+ panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%v f(%g)=%v", low, flow, high, fhigh))
+ }
+ for {
+ if high-low <= xtol {
+ return low, high
+ }
+ mid := (high + low) / 2
+ if mid == high || mid == low {
+ return low, high
+ }
+ fmid := f(mid)
+ if fmid == flow {
+ low = mid
+ flow = fmid
+ } else {
+ high = mid
+ fhigh = fmid
+ }
+ }
+}
+
+// series returns the sum of the series f(0), f(1), ...
+//
+// This implementation is fast, but subject to round-off error.
+func series(f func(float64) float64) float64 {
+ y, yp := 0.0, 1.0
+ for n := 0.0; y != yp; n++ {
+ yp = y
+ y += f(n)
+ }
+ return y
+}
diff --git a/internal/stats/beta.go b/internal/stats/beta.go
new file mode 100644
index 0000000..9cd1cd7
--- /dev/null
+++ b/internal/stats/beta.go
@@ -0,0 +1,93 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "math"
+
+func lgamma(x float64) float64 {
+ y, _ := math.Lgamma(x)
+ return y
+}
+
+// mathBeta returns the value of the complete beta function B(a, b).
+func mathBeta(a, b float64) float64 {
+ // B(x,y) = Γ(x)Γ(y) / Γ(x+y)
+ return math.Exp(lgamma(a) + lgamma(b) - lgamma(a+b))
+}
+
+// mathBetaInc returns the value of the regularized incomplete beta
+// function Iₓ(a, b).
+//
+// This is not to be confused with the "incomplete beta function",
+// which can be computed as BetaInc(x, a, b)*Beta(a, b).
+//
+// If x < 0 or x > 1, returns NaN.
+func mathBetaInc(x, a, b float64) float64 {
+ // Based on Numerical Recipes in C, section 6.4. This uses the
+ // continued fraction definition of I:
+ //
+ // (xᵃ*(1-x)ᵇ)/(a*B(a,b)) * (1/(1+(d₁/(1+(d₂/(1+...))))))
+ //
+ // where B(a,b) is the beta function and
+ //
+ // d_{2m+1} = -(a+m)(a+b+m)x/((a+2m)(a+2m+1))
+ // d_{2m} = m(b-m)x/((a+2m-1)(a+2m))
+ if x < 0 || x > 1 {
+ return math.NaN()
+ }
+ bt := 0.0
+ if 0 < x && x < 1 {
+ // Compute the coefficient before the continued
+ // fraction.
+ bt = math.Exp(lgamma(a+b) - lgamma(a) - lgamma(b) +
+ a*math.Log(x) + b*math.Log(1-x))
+ }
+ if x < (a+1)/(a+b+2) {
+ // Compute continued fraction directly.
+ return bt * betacf(x, a, b) / a
+ } else {
+ // Compute continued fraction after symmetry transform.
+ return 1 - bt*betacf(1-x, b, a)/b
+ }
+}
+
+// betacf is the continued fraction component of the regularized
+// incomplete beta function Iₓ(a, b).
+func betacf(x, a, b float64) float64 {
+ const maxIterations = 200
+ const epsilon = 3e-14
+
+ raiseZero := func(z float64) float64 {
+ if math.Abs(z) < math.SmallestNonzeroFloat64 {
+ return math.SmallestNonzeroFloat64
+ }
+ return z
+ }
+
+ c := 1.0
+ d := 1 / raiseZero(1-(a+b)*x/(a+1))
+ h := d
+ for m := 1; m <= maxIterations; m++ {
+ mf := float64(m)
+
+ // Even step of the recurrence.
+ numer := mf * (b - mf) * x / ((a + 2*mf - 1) * (a + 2*mf))
+ d = 1 / raiseZero(1+numer*d)
+ c = raiseZero(1 + numer/c)
+ h *= d * c
+
+ // Odd step of the recurrence.
+ numer = -(a + mf) * (a + b + mf) * x / ((a + 2*mf) * (a + 2*mf + 1))
+ d = 1 / raiseZero(1+numer*d)
+ c = raiseZero(1 + numer/c)
+ hfac := d * c
+ h *= hfac
+
+ if math.Abs(hfac-1) < epsilon {
+ return h
+ }
+ }
+ panic("betainc: a or b too big; failed to converge")
+}
diff --git a/internal/stats/beta_test.go b/internal/stats/beta_test.go
new file mode 100644
index 0000000..5f13080
--- /dev/null
+++ b/internal/stats/beta_test.go
@@ -0,0 +1,28 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "testing"
+)
+
+func TestBetaInc(t *testing.T) {
+ // Example values from MATLAB betainc documentation.
+ testFunc(t, "I_0.5(%v, 3)",
+ func(a float64) float64 { return mathBetaInc(0.5, a, 3) },
+ map[float64]float64{
+ 0: 1.00000000000000,
+ 1: 0.87500000000000,
+ 2: 0.68750000000000,
+ 3: 0.50000000000000,
+ 4: 0.34375000000000,
+ 5: 0.22656250000000,
+ 6: 0.14453125000000,
+ 7: 0.08984375000000,
+ 8: 0.05468750000000,
+ 9: 0.03271484375000,
+ 10: 0.01928710937500,
+ })
+}
diff --git a/internal/stats/deltadist.go b/internal/stats/deltadist.go
new file mode 100644
index 0000000..bb3ba3f
--- /dev/null
+++ b/internal/stats/deltadist.go
@@ -0,0 +1,57 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+// DeltaDist is the Dirac delta function, centered at T, with total
+// area 1.
+//
+// The CDF of the Dirac delta function is the Heaviside step function,
+// centered at T. Specifically, f(T) == 1.
+type DeltaDist struct {
+ T float64
+}
+
+func (d DeltaDist) PDF(x float64) float64 {
+ if x == d.T {
+ return inf
+ }
+ return 0
+}
+
+func (d DeltaDist) pdfEach(xs []float64) []float64 {
+ res := make([]float64, len(xs))
+ for i, x := range xs {
+ if x == d.T {
+ res[i] = inf
+ }
+ }
+ return res
+}
+
+func (d DeltaDist) CDF(x float64) float64 {
+ if x >= d.T {
+ return 1
+ }
+ return 0
+}
+
+func (d DeltaDist) cdfEach(xs []float64) []float64 {
+ res := make([]float64, len(xs))
+ for i, x := range xs {
+ res[i] = d.CDF(x)
+ }
+ return res
+}
+
+func (d DeltaDist) InvCDF(y float64) float64 {
+ if y < 0 || y > 1 {
+ return nan
+ }
+ return d.T
+}
+
+func (d DeltaDist) Bounds() (float64, float64) {
+ return d.T - 1, d.T + 1
+}
diff --git a/internal/stats/dist.go b/internal/stats/dist.go
new file mode 100644
index 0000000..048477d
--- /dev/null
+++ b/internal/stats/dist.go
@@ -0,0 +1,210 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "math/rand"
+
+// A DistCommon is a statistical distribution. DistCommon is a base
+// interface provided by both continuous and discrete distributions.
+type DistCommon interface {
+ // CDF returns the cumulative probability Pr[X <= x].
+ //
+ // For continuous distributions, the CDF is the integral of
+ // the PDF from -inf to x.
+ //
+ // For discrete distributions, the CDF is the sum of the PMF
+ // at all defined points from -inf to x, inclusive. Note that
+ // the CDF of a discrete distribution is defined for the whole
+ // real line (unlike the PMF) but has discontinuities where
+ // the PMF is non-zero.
+ //
+ // The CDF is a monotonically increasing function and has a
+ // domain of all real numbers. If the distribution has bounded
+ // support, it has a range of [0, 1]; otherwise it has a range
+ // of (0, 1). Finally, CDF(-inf)==0 and CDF(inf)==1.
+ CDF(x float64) float64
+
+ // Bounds returns reasonable bounds for this distribution's
+ // PDF/PMF and CDF. The total weight outside of these bounds
+ // should be approximately 0.
+ //
+ // For a discrete distribution, both bounds are integer
+ // multiples of Step().
+ //
+ // If this distribution has finite support, it returns exact
+ // bounds l, h such that CDF(l')=0 for all l' < l and
+ // CDF(h')=1 for all h' >= h.
+ Bounds() (float64, float64)
+}
+
+// A Dist is a continuous statistical distribution.
+type Dist interface {
+ DistCommon
+
+ // PDF returns the value of the probability density function
+ // of this distribution at x.
+ PDF(x float64) float64
+}
+
+// A DiscreteDist is a discrete statistical distribution.
+//
+// Most discrete distributions are defined only at integral values of
+// the random variable. However, some are defined at other intervals,
+// so this interface takes a float64 value for the random variable.
+// The probability mass function rounds down to the nearest defined
+// point. Note that float64 values can exactly represent integer
+// values between ±2**53, so this generally shouldn't be an issue for
+// integer-valued distributions (likewise, for half-integer-valued
+// distributions, float64 can exactly represent all values between
+// ±2**52).
+type DiscreteDist interface {
+ DistCommon
+
+ // PMF returns the value of the probability mass function
+ // Pr[X = x'], where x' is x rounded down to the nearest
+ // defined point on the distribution.
+ //
+ // Note for implementers: for integer-valued distributions,
+ // round x using int(math.Floor(x)). Do not use int(x), since
+ // that truncates toward zero (unless all x <= 0 are handled
+ // the same).
+ PMF(x float64) float64
+
+ // Step returns s, where the distribution is defined for sℕ.
+ Step() float64
+}
+
+// TODO: Add a Support method for finite support distributions? Or
+// maybe just another return value from Bounds indicating that the
+// bounds are exact?
+
+// TODO: Plot method to return a pre-configured Plot object with
+// reasonable bounds and an integral function? Have to distinguish
+// PDF/CDF/InvCDF. Three methods? Argument?
+//
+// Doesn't have to be a method of Dist. Could be just a function that
+// takes a Dist and uses Bounds.
+
+// InvCDF returns the inverse CDF function of the given distribution
+// (also known as the quantile function or the percent point
+// function). This is a function f such that f(dist.CDF(x)) == x. If
+// dist.CDF is only weakly monotonic (that it, there are intervals
+// over which it is constant) and y > 0, f returns the smallest x that
+// satisfies this condition. In general, the inverse CDF is not
+// well-defined for y==0, but for convenience if y==0, f returns the
+// largest x that satisfies this condition. For distributions with
+// infinite support both the largest and smallest x are -Inf; however,
+// for distributions with finite support, this is the lower bound of
+// the support.
+//
+// If y < 0 or y > 1, f returns NaN.
+//
+// If dist implements InvCDF(float64) float64, this returns that
+// method. Otherwise, it returns a function that uses a generic
+// numerical method to construct the inverse CDF at y by finding x
+// such that dist.CDF(x) == y. This may have poor precision around
+// points of discontinuity, including f(0) and f(1).
+func InvCDF(dist DistCommon) func(y float64) (x float64) {
+ type invCDF interface {
+ InvCDF(float64) float64
+ }
+ if dist, ok := dist.(invCDF); ok {
+ return dist.InvCDF
+ }
+
+ // Otherwise, use a numerical algorithm.
+ //
+ // TODO: For discrete distributions, use the step size to
+ // inform this computation.
+ return func(y float64) (x float64) {
+ const almostInf = 1e100
+ const xtol = 1e-16
+
+ if y < 0 || y > 1 {
+ return nan
+ } else if y == 0 {
+ l, _ := dist.Bounds()
+ if dist.CDF(l) == 0 {
+ // Finite support
+ return l
+ } else {
+ // Infinite support
+ return -inf
+ }
+ } else if y == 1 {
+ _, h := dist.Bounds()
+ if dist.CDF(h) == 1 {
+ // Finite support
+ return h
+ } else {
+ // Infinite support
+ return inf
+ }
+ }
+
+ // Find loX, hiX for which cdf(loX) < y <= cdf(hiX).
+ var loX, loY, hiX, hiY float64
+ x1, y1 := 0.0, dist.CDF(0)
+ xdelta := 1.0
+ if y1 < y {
+ hiX, hiY = x1, y1
+ for hiY < y && hiX != inf {
+ loX, loY, hiX = hiX, hiY, hiX+xdelta
+ hiY = dist.CDF(hiX)
+ xdelta *= 2
+ }
+ } else {
+ loX, loY = x1, y1
+ for y <= loY && loX != -inf {
+ hiX, hiY, loX = loX, loY, loX-xdelta
+ loY = dist.CDF(loX)
+ xdelta *= 2
+ }
+ }
+ if loX == -inf {
+ return loX
+ } else if hiX == inf {
+ return hiX
+ }
+
+ // Use bisection on the interval to find the smallest
+ // x at which cdf(x) <= y.
+ _, x = bisectBool(func(x float64) bool {
+ return dist.CDF(x) < y
+ }, loX, hiX, xtol)
+ return
+ }
+}
+
+// Rand returns a random number generator that draws from the given
+// distribution. The returned generator takes an optional source of
+// randomness; if this is nil, it uses the default global source.
+//
+// If dist implements Rand(*rand.Rand) float64, Rand returns that
+// method. Otherwise, it returns a generic generator based on dist's
+// inverse CDF (which may in turn use an efficient implementation or a
+// generic numerical implementation; see InvCDF).
+func Rand(dist DistCommon) func(*rand.Rand) float64 {
+ type distRand interface {
+ Rand(*rand.Rand) float64
+ }
+ if dist, ok := dist.(distRand); ok {
+ return dist.Rand
+ }
+
+ // Otherwise, use a generic algorithm.
+ inv := InvCDF(dist)
+ return func(r *rand.Rand) float64 {
+ var y float64
+ for y == 0 {
+ if r == nil {
+ y = rand.Float64()
+ } else {
+ y = r.Float64()
+ }
+ }
+ return inv(y)
+ }
+}
diff --git a/internal/stats/mathx.go b/internal/stats/mathx.go
new file mode 100644
index 0000000..68a9a0a
--- /dev/null
+++ b/internal/stats/mathx.go
@@ -0,0 +1,75 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "math"
+
+// mathSign returns the sign of x: -1 if x < 0, 0 if x == 0, 1 if x > 0.
+// If x is NaN, it returns NaN.
+func mathSign(x float64) float64 {
+ if x == 0 {
+ return 0
+ } else if x < 0 {
+ return -1
+ } else if x > 0 {
+ return 1
+ }
+ return nan
+}
+
+const smallFactLimit = 20 // 20! => 62 bits
+var smallFact [smallFactLimit + 1]int64
+
+func init() {
+ smallFact[0] = 1
+ fact := int64(1)
+ for n := int64(1); n <= smallFactLimit; n++ {
+ fact *= n
+ smallFact[n] = fact
+ }
+}
+
+// mathChoose returns the binomial coefficient of n and k.
+func mathChoose(n, k int) float64 {
+ if k == 0 || k == n {
+ return 1
+ }
+ if k < 0 || n < k {
+ return 0
+ }
+ if n <= smallFactLimit { // Implies k <= smallFactLimit
+ // It's faster to do several integer multiplications
+ // than it is to do an extra integer division.
+ // Remarkably, this is also faster than pre-computing
+ // Pascal's triangle (presumably because this is very
+ // cache efficient).
+ numer := int64(1)
+ for n1 := int64(n - (k - 1)); n1 <= int64(n); n1++ {
+ numer *= n1
+ }
+ denom := smallFact[k]
+ return float64(numer / denom)
+ }
+
+ return math.Exp(lchoose(n, k))
+}
+
+// mathLchoose returns math.Log(mathChoose(n, k)).
+func mathLchoose(n, k int) float64 {
+ if k == 0 || k == n {
+ return 0
+ }
+ if k < 0 || n < k {
+ return math.NaN()
+ }
+ return lchoose(n, k)
+}
+
+func lchoose(n, k int) float64 {
+ a, _ := math.Lgamma(float64(n + 1))
+ b, _ := math.Lgamma(float64(k + 1))
+ c, _ := math.Lgamma(float64(n - k + 1))
+ return a - b - c
+}
diff --git a/internal/stats/normaldist.go b/internal/stats/normaldist.go
new file mode 100644
index 0000000..d00f96a
--- /dev/null
+++ b/internal/stats/normaldist.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "math"
+ "math/rand"
+)
+
+// NormalDist is a normal (Gaussian) distribution with mean Mu and
+// standard deviation Sigma.
+type NormalDist struct {
+ Mu, Sigma float64
+}
+
+// StdNormal is the standard normal distribution (Mu = 0, Sigma = 1)
+var StdNormal = NormalDist{0, 1}
+
+// 1/sqrt(2 * pi)
+const invSqrt2Pi = 0.39894228040143267793994605993438186847585863116493465766592583
+
+func (n NormalDist) PDF(x float64) float64 {
+ z := x - n.Mu
+ return math.Exp(-z*z/(2*n.Sigma*n.Sigma)) * invSqrt2Pi / n.Sigma
+}
+
+func (n NormalDist) pdfEach(xs []float64) []float64 {
+ res := make([]float64, len(xs))
+ if n.Mu == 0 && n.Sigma == 1 {
+ // Standard normal fast path
+ for i, x := range xs {
+ res[i] = math.Exp(-x*x/2) * invSqrt2Pi
+ }
+ } else {
+ a := -1 / (2 * n.Sigma * n.Sigma)
+ b := invSqrt2Pi / n.Sigma
+ for i, x := range xs {
+ z := x - n.Mu
+ res[i] = math.Exp(z*z*a) * b
+ }
+ }
+ return res
+}
+
+func (n NormalDist) CDF(x float64) float64 {
+ return math.Erfc(-(x-n.Mu)/(n.Sigma*math.Sqrt2)) / 2
+}
+
+func (n NormalDist) cdfEach(xs []float64) []float64 {
+ res := make([]float64, len(xs))
+ a := 1 / (n.Sigma * math.Sqrt2)
+ for i, x := range xs {
+ res[i] = math.Erfc(-(x-n.Mu)*a) / 2
+ }
+ return res
+}
+
+func (n NormalDist) InvCDF(p float64) (x float64) {
+ // This is based on Peter John Acklam's inverse normal CDF
+ // algorithm: http://home.online.no/~pjacklam/notes/invnorm/
+ const (
+ a1 = -3.969683028665376e+01
+ a2 = 2.209460984245205e+02
+ a3 = -2.759285104469687e+02
+ a4 = 1.383577518672690e+02
+ a5 = -3.066479806614716e+01
+ a6 = 2.506628277459239e+00
+
+ b1 = -5.447609879822406e+01
+ b2 = 1.615858368580409e+02
+ b3 = -1.556989798598866e+02
+ b4 = 6.680131188771972e+01
+ b5 = -1.328068155288572e+01
+
+ c1 = -7.784894002430293e-03
+ c2 = -3.223964580411365e-01
+ c3 = -2.400758277161838e+00
+ c4 = -2.549732539343734e+00
+ c5 = 4.374664141464968e+00
+ c6 = 2.938163982698783e+00
+
+ d1 = 7.784695709041462e-03
+ d2 = 3.224671290700398e-01
+ d3 = 2.445134137142996e+00
+ d4 = 3.754408661907416e+00
+
+ plow = 0.02425
+ phigh = 1 - plow
+ )
+
+ if p < 0 || p > 1 {
+ return nan
+ } else if p == 0 {
+ return -inf
+ } else if p == 1 {
+ return inf
+ }
+
+ if p < plow {
+ // Rational approximation for lower region.
+ q := math.Sqrt(-2 * math.Log(p))
+ x = (((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+ ((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+ } else if phigh < p {
+ // Rational approximation for upper region.
+ q := math.Sqrt(-2 * math.Log(1-p))
+ x = -(((((c1*q+c2)*q+c3)*q+c4)*q+c5)*q + c6) /
+ ((((d1*q+d2)*q+d3)*q+d4)*q + 1)
+ } else {
+ // Rational approximation for central region.
+ q := p - 0.5
+ r := q * q
+ x = (((((a1*r+a2)*r+a3)*r+a4)*r+a5)*r + a6) * q /
+ (((((b1*r+b2)*r+b3)*r+b4)*r+b5)*r + 1)
+ }
+
+ // Refine approximation.
+ e := 0.5*math.Erfc(-x/math.Sqrt2) - p
+ u := e * math.Sqrt(2*math.Pi) * math.Exp(x*x/2)
+ x = x - u/(1+x*u/2)
+
+ // Adjust from standard normal.
+ return x*n.Sigma + n.Mu
+}
+
+func (n NormalDist) Rand(r *rand.Rand) float64 {
+ var x float64
+ if r == nil {
+ x = rand.NormFloat64()
+ } else {
+ x = r.NormFloat64()
+ }
+ return x*n.Sigma + n.Mu
+}
+
+func (n NormalDist) Bounds() (float64, float64) {
+ const stddevs = 3
+ return n.Mu - stddevs*n.Sigma, n.Mu + stddevs*n.Sigma
+}
diff --git a/internal/stats/normaldist_test.go b/internal/stats/normaldist_test.go
new file mode 100644
index 0000000..1ea5e17
--- /dev/null
+++ b/internal/stats/normaldist_test.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "fmt"
+ "math"
+ "testing"
+)
+
+func TestNormalDist(t *testing.T) {
+ d := StdNormal
+
+ testFunc(t, fmt.Sprintf("%+v.PDF", d), d.PDF, map[float64]float64{
+ -10000: 0, // approx
+ -1: 1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5),
+ 0: 1 / math.Sqrt(2*math.Pi),
+ 1: 1 / math.Sqrt(2*math.Pi) * math.Exp(-0.5),
+ 10000: 0, // approx
+ })
+
+ testFunc(t, fmt.Sprintf("%+v.CDF", d), d.CDF, map[float64]float64{
+ -10000: 0, // approx
+ 0: 0.5,
+ 10000: 1, // approx
+ })
+
+ d2 := NormalDist{Mu: 2, Sigma: 5}
+ testInvCDF(t, d, false)
+ testInvCDF(t, d2, false)
+}
diff --git a/internal/stats/package.go b/internal/stats/package.go
new file mode 100644
index 0000000..d0b291e
--- /dev/null
+++ b/internal/stats/package.go
@@ -0,0 +1,27 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package stats implements several statistical distributions,
+// hypothesis tests, and functions for descriptive statistics.
+//
+// Currently stats is fairly small, but for what it does implement, it
+// focuses on high quality, fast implementations with good, idiomatic
+// Go APIs.
+//
+// This is a trimmed fork of github.com/aclements/go-moremath/stats.
+package stats
+
+import (
+ "errors"
+ "math"
+)
+
+var inf = math.Inf(1)
+var nan = math.NaN()
+
+// TODO: Put all errors in the same place and maybe unify them.
+
+var (
+ ErrSamplesEqual = errors.New("all samples are equal")
+)
diff --git a/internal/stats/sample.go b/internal/stats/sample.go
new file mode 100644
index 0000000..d6fe8a6
--- /dev/null
+++ b/internal/stats/sample.go
@@ -0,0 +1,340 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "math"
+ "sort"
+)
+
+// Sample is a collection of possibly weighted data points.
+type Sample struct {
+ // Xs is the slice of sample values.
+ Xs []float64
+
+ // Weights[i] is the weight of sample Xs[i]. If Weights is
+ // nil, all Xs have weight 1. Weights must have the same
+ // length of Xs and all values must be non-negative.
+ Weights []float64
+
+ // Sorted indicates that Xs is sorted in ascending order.
+ Sorted bool
+}
+
+// Bounds returns the minimum and maximum values of xs.
+func Bounds(xs []float64) (min float64, max float64) {
+ if len(xs) == 0 {
+ return math.NaN(), math.NaN()
+ }
+ min, max = xs[0], xs[0]
+ for _, x := range xs {
+ if x < min {
+ min = x
+ }
+ if x > max {
+ max = x
+ }
+ }
+ return
+}
+
+// Bounds returns the minimum and maximum values of the Sample.
+//
+// If the Sample is weighted, this ignores samples with zero weight.
+//
+// This is constant time if s.Sorted and there are no zero-weighted
+// values.
+func (s Sample) Bounds() (min float64, max float64) {
+ if len(s.Xs) == 0 || (!s.Sorted && s.Weights == nil) {
+ return Bounds(s.Xs)
+ }
+
+ if s.Sorted {
+ if s.Weights == nil {
+ return s.Xs[0], s.Xs[len(s.Xs)-1]
+ }
+ min, max = math.NaN(), math.NaN()
+ for i, w := range s.Weights {
+ if w != 0 {
+ min = s.Xs[i]
+ break
+ }
+ }
+ if math.IsNaN(min) {
+ return
+ }
+ for i := range s.Weights {
+ if s.Weights[len(s.Weights)-i-1] != 0 {
+ max = s.Xs[len(s.Weights)-i-1]
+ break
+ }
+ }
+ } else {
+ min, max = math.Inf(1), math.Inf(-1)
+ for i, x := range s.Xs {
+ w := s.Weights[i]
+ if x < min && w != 0 {
+ min = x
+ }
+ if x > max && w != 0 {
+ max = x
+ }
+ }
+ if math.IsInf(min, 0) {
+ min, max = math.NaN(), math.NaN()
+ }
+ }
+ return
+}
+
+// vecSum returns the sum of xs.
+func vecSum(xs []float64) float64 {
+ sum := 0.0
+ for _, x := range xs {
+ sum += x
+ }
+ return sum
+}
+
+// Sum returns the (possibly weighted) sum of the Sample.
+func (s Sample) Sum() float64 {
+ if s.Weights == nil {
+ return vecSum(s.Xs)
+ }
+ sum := 0.0
+ for i, x := range s.Xs {
+ sum += x * s.Weights[i]
+ }
+ return sum
+}
+
+// Weight returns the total weight of the Sasmple.
+func (s Sample) Weight() float64 {
+ if s.Weights == nil {
+ return float64(len(s.Xs))
+ }
+ return vecSum(s.Weights)
+}
+
+// Mean returns the arithmetic mean of xs.
+func Mean(xs []float64) float64 {
+ if len(xs) == 0 {
+ return math.NaN()
+ }
+ m := 0.0
+ for i, x := range xs {
+ m += (x - m) / float64(i+1)
+ }
+ return m
+}
+
+// Mean returns the arithmetic mean of the Sample.
+func (s Sample) Mean() float64 {
+ if len(s.Xs) == 0 || s.Weights == nil {
+ return Mean(s.Xs)
+ }
+
+ m, wsum := 0.0, 0.0
+ for i, x := range s.Xs {
+ // Use weighted incremental mean:
+ // m_i = (1 - w_i/wsum_i) * m_(i-1) + (w_i/wsum_i) * x_i
+ // = m_(i-1) + (x_i - m_(i-1)) * (w_i/wsum_i)
+ w := s.Weights[i]
+ wsum += w
+ m += (x - m) * w / wsum
+ }
+ return m
+}
+
+// GeoMean returns the geometric mean of xs. xs must be positive.
+func GeoMean(xs []float64) float64 {
+ if len(xs) == 0 {
+ return math.NaN()
+ }
+ m := 0.0
+ for i, x := range xs {
+ if x <= 0 {
+ return math.NaN()
+ }
+ lx := math.Log(x)
+ m += (lx - m) / float64(i+1)
+ }
+ return math.Exp(m)
+}
+
+// GeoMean returns the geometric mean of the Sample. All samples
+// values must be positive.
+func (s Sample) GeoMean() float64 {
+ if len(s.Xs) == 0 || s.Weights == nil {
+ return GeoMean(s.Xs)
+ }
+
+ m, wsum := 0.0, 0.0
+ for i, x := range s.Xs {
+ w := s.Weights[i]
+ wsum += w
+ lx := math.Log(x)
+ m += (lx - m) * w / wsum
+ }
+ return math.Exp(m)
+}
+
+// Variance returns the sample variance of xs.
+func Variance(xs []float64) float64 {
+ if len(xs) == 0 {
+ return math.NaN()
+ } else if len(xs) <= 1 {
+ return 0
+ }
+
+ // Based on Wikipedia's presentation of Welford 1962
+ // (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm).
+ // This is more numerically stable than the standard two-pass
+ // formula and not prone to massive cancellation.
+ mean, M2 := 0.0, 0.0
+ for n, x := range xs {
+ delta := x - mean
+ mean += delta / float64(n+1)
+ M2 += delta * (x - mean)
+ }
+ return M2 / float64(len(xs)-1)
+}
+
+func (s Sample) Variance() float64 {
+ if len(s.Xs) == 0 || s.Weights == nil {
+ return Variance(s.Xs)
+ }
+ // TODO(austin)
+ panic("Weighted Variance not implemented")
+}
+
+// StdDev returns the sample standard deviation of xs.
+func StdDev(xs []float64) float64 {
+ return math.Sqrt(Variance(xs))
+}
+
+// StdDev returns the sample standard deviation of the Sample.
+func (s Sample) StdDev() float64 {
+ if len(s.Xs) == 0 || s.Weights == nil {
+ return StdDev(s.Xs)
+ }
+ // TODO(austin)
+ panic("Weighted StdDev not implemented")
+}
+
+// Percentile returns the pctileth value from the Sample. This uses
+// interpolation method R8 from Hyndman and Fan (1996).
+//
+// pctile will be capped to the range [0, 1]. If len(xs) == 0 or all
+// weights are 0, returns NaN.
+//
+// Percentile(0.5) is the median. Percentile(0.25) and
+// Percentile(0.75) are the first and third quartiles, respectively.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s Sample) Percentile(pctile float64) float64 {
+ if len(s.Xs) == 0 {
+ return math.NaN()
+ } else if pctile <= 0 {
+ min, _ := s.Bounds()
+ return min
+ } else if pctile >= 1 {
+ _, max := s.Bounds()
+ return max
+ }
+
+ if !s.Sorted {
+ // TODO(austin) Use select algorithm instead
+ s = *s.Copy().Sort()
+ }
+
+ if s.Weights == nil {
+ N := float64(len(s.Xs))
+ //n := pctile * (N + 1) // R6
+ n := 1/3.0 + pctile*(N+1/3.0) // R8
+ kf, frac := math.Modf(n)
+ k := int(kf)
+ if k <= 0 {
+ return s.Xs[0]
+ } else if k >= len(s.Xs) {
+ return s.Xs[len(s.Xs)-1]
+ }
+ return s.Xs[k-1] + frac*(s.Xs[k]-s.Xs[k-1])
+ } else {
+ // TODO(austin): Implement interpolation
+
+ target := s.Weight() * pctile
+
+ // TODO(austin) If we had cumulative weights, we could
+ // do this in log time.
+ for i, weight := range s.Weights {
+ target -= weight
+ if target < 0 {
+ return s.Xs[i]
+ }
+ }
+ return s.Xs[len(s.Xs)-1]
+ }
+}
+
+// IQR returns the interquartile range of the Sample.
+//
+// This is constant time if s.Sorted and s.Weights == nil.
+func (s Sample) IQR() float64 {
+ if !s.Sorted {
+ s = *s.Copy().Sort()
+ }
+ return s.Percentile(0.75) - s.Percentile(0.25)
+}
+
+type sampleSorter struct {
+ xs []float64
+ weights []float64
+}
+
+func (p *sampleSorter) Len() int {
+ return len(p.xs)
+}
+
+func (p *sampleSorter) Less(i, j int) bool {
+ return p.xs[i] < p.xs[j]
+}
+
+func (p *sampleSorter) Swap(i, j int) {
+ p.xs[i], p.xs[j] = p.xs[j], p.xs[i]
+ p.weights[i], p.weights[j] = p.weights[j], p.weights[i]
+}
+
+// Sort sorts the samples in place in s and returns s.
+//
+// A sorted sample improves the performance of some algorithms.
+func (s *Sample) Sort() *Sample {
+ if s.Sorted || sort.Float64sAreSorted(s.Xs) {
+ // All set
+ } else if s.Weights == nil {
+ sort.Float64s(s.Xs)
+ } else {
+ sort.Sort(&sampleSorter{s.Xs, s.Weights})
+ }
+ s.Sorted = true
+ return s
+}
+
+// Copy returns a copy of the Sample.
+//
+// The returned Sample shares no data with the original, so they can
+// be modified (for example, sorted) independently.
+func (s Sample) Copy() *Sample {
+ xs := make([]float64, len(s.Xs))
+ copy(xs, s.Xs)
+
+ weights := []float64(nil)
+ if s.Weights != nil {
+ weights = make([]float64, len(s.Weights))
+ copy(weights, s.Weights)
+ }
+
+ return &Sample{xs, weights, s.Sorted}
+}
diff --git a/internal/stats/sample_test.go b/internal/stats/sample_test.go
new file mode 100644
index 0000000..6e32509
--- /dev/null
+++ b/internal/stats/sample_test.go
@@ -0,0 +1,21 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "testing"
+
+func TestSamplePercentile(t *testing.T) {
+ s := Sample{Xs: []float64{15, 20, 35, 40, 50}}
+ testFunc(t, "Percentile", s.Percentile, map[float64]float64{
+ -1: 15,
+ 0: 15,
+ .05: 15,
+ .30: 19.666666666666666,
+ .40: 27,
+ .95: 50,
+ 1: 50,
+ 2: 50,
+ })
+}
diff --git a/internal/stats/tdist.go b/internal/stats/tdist.go
new file mode 100644
index 0000000..1d1c704
--- /dev/null
+++ b/internal/stats/tdist.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "math"
+
+// A TDist is a Student's t-distribution with V degrees of freedom.
+type TDist struct {
+ V float64
+}
+
+func (t TDist) PDF(x float64) float64 {
+ return math.Exp(lgamma((t.V+1)/2)-lgamma(t.V/2)) /
+ math.Sqrt(t.V*math.Pi) * math.Pow(1+(x*x)/t.V, -(t.V+1)/2)
+}
+
+func (t TDist) CDF(x float64) float64 {
+ if x == 0 {
+ return 0.5
+ } else if x > 0 {
+ return 1 - 0.5*mathBetaInc(t.V/(t.V+x*x), t.V/2, 0.5)
+ } else if x < 0 {
+ return 1 - t.CDF(-x)
+ } else {
+ return math.NaN()
+ }
+}
+
+func (t TDist) Bounds() (float64, float64) {
+ return -4, 4
+}
diff --git a/internal/stats/tdist_test.go b/internal/stats/tdist_test.go
new file mode 100644
index 0000000..7c0b9b7
--- /dev/null
+++ b/internal/stats/tdist_test.go
@@ -0,0 +1,95 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "testing"
+
+func TestT(t *testing.T) {
+ testFunc(t, "PDF(%v|v=1)", TDist{1}.PDF, map[float64]float64{
+ -10: 0.0031515830315226806,
+ -9: 0.0038818278802901312,
+ -8: 0.0048970751720583188,
+ -7: 0.0063661977236758151,
+ -6: 0.0086029698968592104,
+ -5: 0.012242687930145799,
+ -4: 0.018724110951987692,
+ -3: 0.031830988618379075,
+ -2: 0.063661977236758149,
+ -1: 0.15915494309189537,
+ 0: 0.31830988618379075,
+ 1: 0.15915494309189537,
+ 2: 0.063661977236758149,
+ 3: 0.031830988618379075,
+ 4: 0.018724110951987692,
+ 5: 0.012242687930145799,
+ 6: 0.0086029698968592104,
+ 7: 0.0063661977236758151,
+ 8: 0.0048970751720583188,
+ 9: 0.0038818278802901312})
+ testFunc(t, "PDF(%v|v=5)", TDist{5}.PDF, map[float64]float64{
+ -10: 4.0989816415343313e-05,
+ -9: 7.4601664362590413e-05,
+ -8: 0.00014444303269563934,
+ -7: 0.00030134402928803911,
+ -6: 0.00068848154013743002,
+ -5: 0.0017574383788078445,
+ -4: 0.0051237270519179133,
+ -3: 0.017292578800222964,
+ -2: 0.065090310326216455,
+ -1: 0.21967979735098059,
+ 0: 0.3796066898224944,
+ 1: 0.21967979735098059,
+ 2: 0.065090310326216455,
+ 3: 0.017292578800222964,
+ 4: 0.0051237270519179133,
+ 5: 0.0017574383788078445,
+ 6: 0.00068848154013743002,
+ 7: 0.00030134402928803911,
+ 8: 0.00014444303269563934,
+ 9: 7.4601664362590413e-05})
+
+ testFunc(t, "CDF(%v|v=1)", TDist{1}.CDF, map[float64]float64{
+ -10: 0.03172551743055356,
+ -9: 0.035223287477277272,
+ -8: 0.039583424160565539,
+ -7: 0.045167235300866547,
+ -6: 0.052568456711253424,
+ -5: 0.06283295818900117,
+ -4: 0.077979130377369324,
+ -3: 0.10241638234956672,
+ -2: 0.14758361765043321,
+ -1: 0.24999999999999978,
+ 0: 0.5,
+ 1: 0.75000000000000022,
+ 2: 0.85241638234956674,
+ 3: 0.89758361765043326,
+ 4: 0.92202086962263075,
+ 5: 0.93716704181099886,
+ 6: 0.94743154328874657,
+ 7: 0.95483276469913347,
+ 8: 0.96041657583943452,
+ 9: 0.96477671252272279})
+ testFunc(t, "CDF(%v|v=5)", TDist{5}.CDF, map[float64]float64{
+ -10: 8.5473787871481787e-05,
+ -9: 0.00014133998712194845,
+ -8: 0.00024645333028622187,
+ -7: 0.00045837375719920225,
+ -6: 0.00092306914479700695,
+ -5: 0.0020523579900266612,
+ -4: 0.0051617077404157259,
+ -3: 0.015049623948731284,
+ -2: 0.05096973941492914,
+ -1: 0.18160873382456127,
+ 0: 0.5,
+ 1: 0.81839126617543867,
+ 2: 0.9490302605850709,
+ 3: 0.98495037605126878,
+ 4: 0.99483829225958431,
+ 5: 0.99794764200997332,
+ 6: 0.99907693085520299,
+ 7: 0.99954162624280074,
+ 8: 0.99975354666971372,
+ 9: 0.9998586600128780})
+}
diff --git a/internal/stats/ttest.go b/internal/stats/ttest.go
new file mode 100644
index 0000000..8742298
--- /dev/null
+++ b/internal/stats/ttest.go
@@ -0,0 +1,147 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "errors"
+ "math"
+)
+
+// A TTestResult is the result of a t-test.
+type TTestResult struct {
+ // N1 and N2 are the sizes of the input samples. For a
+ // one-sample t-test, N2 is 0.
+ N1, N2 int
+
+ // T is the value of the t-statistic for this t-test.
+ T float64
+
+ // DoF is the degrees of freedom for this t-test.
+ DoF float64
+
+ // AltHypothesis specifies the alternative hypothesis tested
+ // by this test against the null hypothesis that there is no
+ // difference in the means of the samples.
+ AltHypothesis LocationHypothesis
+
+ // P is p-value for this t-test for the given null hypothesis.
+ P float64
+}
+
+func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
+ dist := TDist{dof}
+ var p float64
+ switch alt {
+ case LocationDiffers:
+ p = 2 * (1 - dist.CDF(math.Abs(t)))
+ case LocationLess:
+ p = dist.CDF(t)
+ case LocationGreater:
+ p = 1 - dist.CDF(t)
+ }
+ return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
+}
+
+// A TTestSample is a sample that can be used for a one or two sample
+// t-test.
+type TTestSample interface {
+ Weight() float64
+ Mean() float64
+ Variance() float64
+}
+
+var (
+ ErrSampleSize = errors.New("sample is too small")
+ ErrZeroVariance = errors.New("sample has zero variance")
+ ErrMismatchedSamples = errors.New("samples have different lengths")
+)
+
+// TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
+// samples x1 and x2. This is a test of the null hypothesis that x1
+// and x2 are drawn from populations with equal means. It assumes x1
+// and x2 are independent samples, that the distributions have equal
+// variance, and that the populations are normally distributed.
+func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
+ n1, n2 := x1.Weight(), x2.Weight()
+ if n1 == 0 || n2 == 0 {
+ return nil, ErrSampleSize
+ }
+ v1, v2 := x1.Variance(), x2.Variance()
+ if v1 == 0 && v2 == 0 {
+ return nil, ErrZeroVariance
+ }
+
+ dof := n1 + n2 - 2
+ v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
+ t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
+ return newTTestResult(int(n1), int(n2), t, dof, alt), nil
+}
+
+// TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
+// on samples x1 and x2. This is like TwoSampleTTest, but does not
+// assume the distributions have equal variance.
+func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
+ n1, n2 := x1.Weight(), x2.Weight()
+ if n1 <= 1 || n2 <= 1 {
+ // TODO: Can we still do this with n == 1?
+ return nil, ErrSampleSize
+ }
+ v1, v2 := x1.Variance(), x2.Variance()
+ if v1 == 0 && v2 == 0 {
+ return nil, ErrZeroVariance
+ }
+
+ dof := math.Pow(v1/n1+v2/n2, 2) /
+ (math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
+ s := math.Sqrt(v1/n1 + v2/n2)
+ t := (x1.Mean() - x2.Mean()) / s
+ return newTTestResult(int(n1), int(n2), t, dof, alt), nil
+}
+
+// PairedTTest performs a two-sample paired t-test on samples x1 and
+// x2. If μ0 is non-zero, this tests if the average of the difference
+// is significantly different from μ0. If x1 and x2 are identical,
+// this returns nil.
+func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
+ if len(x1) != len(x2) {
+ return nil, ErrMismatchedSamples
+ }
+ if len(x1) <= 1 {
+ // TODO: Can we still do this with n == 1?
+ return nil, ErrSampleSize
+ }
+
+ dof := float64(len(x1) - 1)
+
+ diff := make([]float64, len(x1))
+ for i := range x1 {
+ diff[i] = x1[i] - x2[i]
+ }
+ sd := StdDev(diff)
+ if sd == 0 {
+ // TODO: Can we still do the test?
+ return nil, ErrZeroVariance
+ }
+ t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
+ return newTTestResult(len(x1), len(x2), t, dof, alt), nil
+}
+
+// OneSampleTTest performs a one-sample t-test on sample x. This tests
+// the null hypothesis that the population mean is equal to μ0. This
+// assumes the distribution of the population of sample means is
+// normal.
+func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
+ n, v := x.Weight(), x.Variance()
+ if n == 0 {
+ return nil, ErrSampleSize
+ }
+ if v == 0 {
+ // TODO: Can we still do the test?
+ return nil, ErrZeroVariance
+ }
+ dof := n - 1
+ t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
+ return newTTestResult(int(n), 0, t, dof, alt), nil
+}
diff --git a/internal/stats/ttest_test.go b/internal/stats/ttest_test.go
new file mode 100644
index 0000000..094f20f
--- /dev/null
+++ b/internal/stats/ttest_test.go
@@ -0,0 +1,71 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "testing"
+
+func TestTTest(t *testing.T) {
+ s1 := Sample{Xs: []float64{2, 1, 3, 4}}
+ s2 := Sample{Xs: []float64{6, 5, 7, 9}}
+
+ check := func(want, got *TTestResult) {
+ if want.N1 != got.N1 || want.N2 != got.N2 ||
+ !aeq(want.T, got.T) || !aeq(want.DoF, got.DoF) ||
+ want.AltHypothesis != got.AltHypothesis ||
+ !aeq(want.P, got.P) {
+ t.Errorf("want %+v, got %+v", want, got)
+ }
+ }
+ check3 := func(test func(alt LocationHypothesis) (*TTestResult, error), n1, n2 int, t, dof float64, pless, pdiff, pgreater float64) {
+ want := &TTestResult{N1: n1, N2: n2, T: t, DoF: dof}
+
+ want.AltHypothesis = LocationLess
+ want.P = pless
+ got, _ := test(want.AltHypothesis)
+ check(want, got)
+
+ want.AltHypothesis = LocationDiffers
+ want.P = pdiff
+ got, _ = test(want.AltHypothesis)
+ check(want, got)
+
+ want.AltHypothesis = LocationGreater
+ want.P = pgreater
+ got, _ = test(want.AltHypothesis)
+ check(want, got)
+ }
+
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return TwoSampleTTest(s1, s1, alt)
+ }, 4, 4, 0, 6,
+ 0.5, 1, 0.5)
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return TwoSampleWelchTTest(s1, s1, alt)
+ }, 4, 4, 0, 6,
+ 0.5, 1, 0.5)
+
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return TwoSampleTTest(s1, s2, alt)
+ }, 4, 4, -3.9703446152237674, 6,
+ 0.0036820296121056195, 0.0073640592242113214, 0.9963179703878944)
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return TwoSampleWelchTTest(s1, s2, alt)
+ }, 4, 4, -3.9703446152237674, 5.584615384615385,
+ 0.004256431565689112, 0.0085128631313781695, 0.9957435684343109)
+
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return PairedTTest(s1.Xs, s2.Xs, 0, alt)
+ }, 4, 4, -17, 3,
+ 0.0002216717691559955, 0.00044334353831207749, 0.999778328230844)
+
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return OneSampleTTest(s1, 0, alt)
+ }, 4, 0, 3.872983346207417, 3,
+ 0.9847668541689145, 0.030466291662170977, 0.015233145831085482)
+ check3(func(alt LocationHypothesis) (*TTestResult, error) {
+ return OneSampleTTest(s1, 2.5, alt)
+ }, 4, 0, 0, 3,
+ 0.5, 1, 0.5)
+}
diff --git a/internal/stats/udist.go b/internal/stats/udist.go
new file mode 100644
index 0000000..d69f13a
--- /dev/null
+++ b/internal/stats/udist.go
@@ -0,0 +1,387 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "math"
+)
+
+// A UDist is the discrete probability distribution of the
+// Mann-Whitney U statistic for a pair of samples of sizes N1 and N2.
+//
+// The details of computing this distribution with no ties can be
+// found in Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
+// Whether one of Two Random Variables is Stochastically Larger than
+// the Other". Annals of Mathematical Statistics 18 (1): 50–60.
+// Computing this distribution in the presence of ties is described in
+// Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
+// Journal of the American Statistical Association 61 (315): 772-787
+// and Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
+// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
+// 805-813 (the former paper contains details that are glossed over in
+// the latter paper but has mathematical typesetting issues, so it's
+// easiest to get the context from the former paper and the details
+// from the latter).
+type UDist struct {
+ N1, N2 int
+
+ // T is the count of the number of ties at each rank in the
+ // input distributions. T may be nil, in which case it is
+ // assumed there are no ties (which is equivalent to an M+N
+ // slice of 1s). It must be the case that Sum(T) == M+N.
+ T []int
+}
+
+// hasTies returns true if d has any tied samples.
+func (d UDist) hasTies() bool {
+ for _, t := range d.T {
+ if t > 1 {
+ return true
+ }
+ }
+ return false
+}
+
+// p returns the p_{d.N1,d.N2} function defined by Mann, Whitney 1947
+// for values of U from 0 up to and including the U argument.
+//
+// This algorithm runs in Θ(N1*N2*U) = O(N1²N2²) time and is quite
+// fast for small values of N1 and N2. However, it does not handle ties.
+func (d UDist) p(U int) []float64 {
+ // This is a dynamic programming implementation of the
+ // recursive recurrence definition given by Mann and Whitney:
+ //
+ // p_{n,m}(U) = (n * p_{n-1,m}(U-m) + m * p_{n,m-1}(U)) / (n+m)
+ // p_{n,m}(U) = 0 if U < 0
+ // p_{0,m}(U) = p{n,0}(U) = 1 / nCr(m+n, n) if U = 0
+ // = 0 if U > 0
+ //
+ // (Note that there is a typo in the original paper. The first
+ // recursive application of p should be for U-m, not U-M.)
+ //
+ // Since p{n,m} only depends on p{n-1,m} and p{n,m-1}, we only
+ // need to store one "plane" of the three dimensional space at
+ // a time.
+ //
+ // Furthermore, p_{n,m} = p_{m,n}, so we only construct values
+ // for n <= m and obtain the rest through symmetry.
+ //
+ // We organize the computed values of p as followed:
+ //
+ // n → N
+ // m *
+ // ↓ * *
+ // * * *
+ // * * * *
+ // * * * *
+ // M * * * *
+ //
+ // where each * is a slice indexed by U. The code below
+ // computes these left-to-right, top-to-bottom, so it only
+ // stores one row of this matrix at a time. Furthermore,
+ // computing an element in a given U slice only depends on the
+ // same and smaller values of U, so we can overwrite the U
+ // slice we're computing in place as long as we start with the
+ // largest value of U. Finally, even though the recurrence
+ // depends on (n,m) above the diagonal and we use symmetry to
+ // mirror those across the diagonal to (m,n), the mirrored
+ // indexes are always available in the current row, so this
+ // mirroring does not interfere with our ability to recycle
+ // state.
+
+ N, M := d.N1, d.N2
+ if N > M {
+ N, M = M, N
+ }
+
+ memo := make([][]float64, N+1)
+ for n := range memo {
+ memo[n] = make([]float64, U+1)
+ }
+
+ for m := 0; m <= M; m++ {
+ // Compute p_{0,m}. This is zero except for U=0.
+ memo[0][0] = 1
+
+ // Compute the remainder of this row.
+ nlim := N
+ if m < nlim {
+ nlim = m
+ }
+ for n := 1; n <= nlim; n++ {
+ lp := memo[n-1] // p_{n-1,m}
+ var rp []float64
+ if n <= m-1 {
+ rp = memo[n] // p_{n,m-1}
+ } else {
+ rp = memo[m-1] // p{m-1,n} and m==n
+ }
+
+ // For a given n,m, U is at most n*m.
+ //
+ // TODO: Actually, it's at most ⌈n*m/2⌉, but
+ // then we need to use more complex symmetries
+ // in the inner loop below.
+ ulim := n * m
+ if U < ulim {
+ ulim = U
+ }
+
+ out := memo[n] // p_{n,m}
+ nplusm := float64(n + m)
+ for U1 := ulim; U1 >= 0; U1-- {
+ l := 0.0
+ if U1-m >= 0 {
+ l = float64(n) * lp[U1-m]
+ }
+ r := float64(m) * rp[U1]
+ out[U1] = (l + r) / nplusm
+ }
+ }
+ }
+ return memo[N]
+}
+
+type ukey struct {
+ n1 int // size of first sample
+ twoU int // 2*U statistic for this permutation
+}
+
+// This computes the cumulative counts of the Mann-Whitney U
+// distribution in the presence of ties using the computation from
+// Cheung, Ying Kuen; Klotz, Jerome H. (1997). "The Mann Whitney
+// Wilcoxon Distribution Using Linked Lists". Statistica Sinica 7:
+// 805-813, with much guidance from appendix L of Klotz, A
+// Computational Approach to Statistics.
+//
+// makeUmemo constructs a table memo[K][ukey{n1, 2*U}], where K is the
+// number of ranks (up to len(t)), n1 is the size of the first sample
+// (up to the n1 argument), and U is the U statistic (up to the
+// argument twoU/2). The value of an entry in the memo table is the
+// number of permutations of a sample of size n1 in a ranking with tie
+// vector t[:K] having a U statistic <= U.
+func makeUmemo(twoU, n1 int, t []int) []map[ukey]float64 {
+ // Another candidate for a fast implementation is van de Wiel,
+ // "The split-up algorithm: a fast symbolic method for
+ // computing p-values of distribution-free statistics". This
+ // is what's used by R's coin package. It's a comparatively
+ // recent publication, so it's presumably faster (or perhaps
+ // just more general) than previous techniques, but I can't
+ // get my hands on the paper.
+ //
+ // TODO: ~40% of this function's time is spent in mapassign on
+ // the assignment lines in the two loops and another ~20% in
+ // map access and iteration. Improving map behavior or
+ // replacing the maps altogether with some other constant-time
+ // structure could double performance.
+ //
+ // TODO: The worst case for this function is when there are
+ // few ties. Yet the best case overall is when there are *no*
+ // ties. Can we get the best of both worlds? Use the fast
+ // algorithm for the most part when there are few ties and mix
+ // in the general algorithm just where we need it? That's
+ // certainly possible for sub-problems where t[:k] has no
+ // ties, but that doesn't help if t[0] has a tie but nothing
+ // else does. Is it possible to rearrange the ranks without
+ // messing up our computation of the U statistic for
+ // sub-problems?
+
+ K := len(t)
+
+ // Compute a coefficients. The a slice is indexed by k (a[0]
+ // is unused).
+ a := make([]int, K+1)
+ a[1] = t[0]
+ for k := 2; k <= K; k++ {
+ a[k] = a[k-1] + t[k-2] + t[k-1]
+ }
+
+ // Create the memo table for the counts function, A. The A
+ // slice is indexed by k (A[0] is unused).
+ //
+ // In "The Mann Whitney Distribution Using Linked Lists", they
+ // use linked lists (*gasp*) for this, but within each K it's
+ // really just a memoization table, so it's faster to use a
+ // map. The outer structure is a slice indexed by k because we
+ // need to find all memo entries with certain values of k.
+ //
+ // TODO: The n1 and twoU values in the ukeys follow strict
+ // patterns. For each K value, the n1 values are every integer
+ // between two bounds. For each (K, n1) value, the twoU values
+ // are every integer multiple of a certain base between two
+ // bounds. It might be worth turning these into directly
+ // indexible slices.
+ A := make([]map[ukey]float64, K+1)
+ A[K] = map[ukey]float64{ukey{n1: n1, twoU: twoU}: 0}
+
+ // Compute memo table (k, n1, twoU) triples from high K values
+ // to low K values. This drives the recurrence relation
+ // downward to figure out all of the needed argument triples.
+ //
+ // TODO: Is it possible to generate this table bottom-up? If
+ // so, this could be a pure dynamic programming algorithm and
+ // we could discard the K dimension. We could at least store
+ // the inputs in a more compact representation that replaces
+ // the twoU dimension with an interval and a step size (as
+ // suggested by Cheung, Klotz, not that they make it at all
+ // clear *why* they're suggesting this).
+ tsum := sumint(t) // always ∑ t[0:k]
+ for k := K - 1; k >= 2; k-- {
+ tsum -= t[k]
+ A[k] = make(map[ukey]float64)
+
+ // Construct A[k] from A[k+1].
+ for A_kplus1 := range A[k+1] {
+ rkLow := maxint(0, A_kplus1.n1-tsum)
+ rkHigh := minint(A_kplus1.n1, t[k])
+ for rk := rkLow; rk <= rkHigh; rk++ {
+ twoU_k := A_kplus1.twoU - rk*(a[k+1]-2*A_kplus1.n1+rk)
+ n1_k := A_kplus1.n1 - rk
+ if twoUmin(n1_k, t[:k], a) <= twoU_k && twoU_k <= twoUmax(n1_k, t[:k], a) {
+ key := ukey{n1: n1_k, twoU: twoU_k}
+ A[k][key] = 0
+ }
+ }
+ }
+ }
+
+ // Fill counts in memo table from low K values to high K
+ // values. This unwinds the recurrence relation.
+
+ // Start with K==2 base case.
+ //
+ // TODO: Later computations depend on these, but these don't
+ // depend on anything (including each other), so if K==2, we
+ // can skip the memo table altogether.
+ if K < 2 {
+ panic("K < 2")
+ }
+ N_2 := t[0] + t[1]
+ for A_2i := range A[2] {
+ Asum := 0.0
+ r2Low := maxint(0, A_2i.n1-t[0])
+ r2High := (A_2i.twoU - A_2i.n1*(t[0]-A_2i.n1)) / N_2
+ for r2 := r2Low; r2 <= r2High; r2++ {
+ Asum += mathChoose(t[0], A_2i.n1-r2) *
+ mathChoose(t[1], r2)
+ }
+ A[2][A_2i] = Asum
+ }
+
+ // Derive counts for the rest of the memo table.
+ tsum = t[0] // always ∑ t[0:k-1]
+ for k := 3; k <= K; k++ {
+ tsum += t[k-2]
+
+ // Compute A[k] counts from A[k-1] counts.
+ for A_ki := range A[k] {
+ Asum := 0.0
+ rkLow := maxint(0, A_ki.n1-tsum)
+ rkHigh := minint(A_ki.n1, t[k-1])
+ for rk := rkLow; rk <= rkHigh; rk++ {
+ twoU_kminus1 := A_ki.twoU - rk*(a[k]-2*A_ki.n1+rk)
+ n1_kminus1 := A_ki.n1 - rk
+ x, ok := A[k-1][ukey{n1: n1_kminus1, twoU: twoU_kminus1}]
+ if !ok && twoUmax(n1_kminus1, t[:k-1], a) < twoU_kminus1 {
+ x = mathChoose(tsum, n1_kminus1)
+ }
+ Asum += x * mathChoose(t[k-1], rk)
+ }
+ A[k][A_ki] = Asum
+ }
+ }
+
+ return A
+}
+
+func twoUmin(n1 int, t, a []int) int {
+ K := len(t)
+ twoU := -n1 * n1
+ n1_k := n1
+ for k := 1; k <= K; k++ {
+ twoU_k := minint(n1_k, t[k-1])
+ twoU += twoU_k * a[k]
+ n1_k -= twoU_k
+ }
+ return twoU
+}
+
+func twoUmax(n1 int, t, a []int) int {
+ K := len(t)
+ twoU := -n1 * n1
+ n1_k := n1
+ for k := K; k > 0; k-- {
+ twoU_k := minint(n1_k, t[k-1])
+ twoU += twoU_k * a[k]
+ n1_k -= twoU_k
+ }
+ return twoU
+}
+
+func (d UDist) PMF(U float64) float64 {
+ if U < 0 || U >= 0.5+float64(d.N1*d.N2) {
+ return 0
+ }
+
+ if d.hasTies() {
+ // makeUmemo computes the CDF directly. Take its
+ // difference to get the PMF.
+ p1, ok1 := makeUmemo(int(2*U)-1, d.N1, d.T)[len(d.T)][ukey{d.N1, int(2*U) - 1}]
+ p2, ok2 := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
+ if !ok1 || !ok2 {
+ panic("makeUmemo did not return expected memoization table")
+ }
+ return (p2 - p1) / mathChoose(d.N1+d.N2, d.N1)
+ }
+
+ // There are no ties. Use the fast algorithm. U must be integral.
+ Ui := int(math.Floor(U))
+ // TODO: Use symmetry to minimize U
+ return d.p(Ui)[Ui]
+}
+
+func (d UDist) CDF(U float64) float64 {
+ if U < 0 {
+ return 0
+ } else if U >= float64(d.N1*d.N2) {
+ return 1
+ }
+
+ if d.hasTies() {
+ // TODO: Minimize U?
+ p, ok := makeUmemo(int(2*U), d.N1, d.T)[len(d.T)][ukey{d.N1, int(2 * U)}]
+ if !ok {
+ panic("makeUmemo did not return expected memoization table")
+ }
+ return p / mathChoose(d.N1+d.N2, d.N1)
+ }
+
+ // There are no ties. Use the fast algorithm. U must be integral.
+ Ui := int(math.Floor(U))
+ // The distribution is symmetric around U = m * n / 2. Sum up
+ // whichever tail is smaller.
+ flip := Ui >= (d.N1*d.N2+1)/2
+ if flip {
+ Ui = d.N1*d.N2 - Ui - 1
+ }
+ pdfs := d.p(Ui)
+ p := 0.0
+ for _, pdf := range pdfs[:Ui+1] {
+ p += pdf
+ }
+ if flip {
+ p = 1 - p
+ }
+ return p
+}
+
+func (d UDist) Step() float64 {
+ return 0.5
+}
+
+func (d UDist) Bounds() (float64, float64) {
+ // TODO: More precise bounds when there are ties.
+ return 0, float64(d.N1 * d.N2)
+}
diff --git a/internal/stats/udist_test.go b/internal/stats/udist_test.go
new file mode 100644
index 0000000..8efcc9d
--- /dev/null
+++ b/internal/stats/udist_test.go
@@ -0,0 +1,322 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "fmt"
+ "math"
+ "testing"
+)
+
+func aeqTable(a, b [][]float64) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if len(a[i]) != len(b[i]) {
+ return false
+ }
+ for j := range a[i] {
+ // "%f" precision
+ if math.Abs(a[i][j]-b[i][j]) >= 0.000001 {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// U distribution for N=3 up to U=5.
+var udist3 = [][]float64{
+ // m=1 2 3
+ {0.250000, 0.100000, 0.050000}, // U=0
+ {0.500000, 0.200000, 0.100000}, // U=1
+ {0.750000, 0.400000, 0.200000}, // U=2
+ {1.000000, 0.600000, 0.350000}, // U=3
+ {1.000000, 0.800000, 0.500000}, // U=4
+ {1.000000, 0.900000, 0.650000}, // U=5
+}
+
+// U distribution for N=5 up to U=5.
+var udist5 = [][]float64{
+ // m=1 2 3 4 5
+ {0.166667, 0.047619, 0.017857, 0.007937, 0.003968}, // U=0
+ {0.333333, 0.095238, 0.035714, 0.015873, 0.007937}, // U=1
+ {0.500000, 0.190476, 0.071429, 0.031746, 0.015873}, // U=2
+ {0.666667, 0.285714, 0.125000, 0.055556, 0.027778}, // U=3
+ {0.833333, 0.428571, 0.196429, 0.095238, 0.047619}, // U=4
+ {1.000000, 0.571429, 0.285714, 0.142857, 0.075397}, // U=5
+}
+
+func TestUDist(t *testing.T) {
+ makeTable := func(n int) [][]float64 {
+ out := make([][]float64, 6)
+ for U := 0; U < 6; U++ {
+ out[U] = make([]float64, n)
+ for m := 1; m <= n; m++ {
+ out[U][m-1] = UDist{N1: m, N2: n}.CDF(float64(U))
+ }
+ }
+ return out
+ }
+ fmtTable := func(a [][]float64) string {
+ out := fmt.Sprintf("%8s", "m=")
+ for m := 1; m <= len(a[0]); m++ {
+ out += fmt.Sprintf("%9d", m)
+ }
+ out += "\n"
+
+ for U, row := range a {
+ out += fmt.Sprintf("U=%-6d", U)
+ for m := 1; m <= len(a[0]); m++ {
+ out += fmt.Sprintf(" %f", row[m-1])
+ }
+ out += "\n"
+ }
+ return out
+ }
+
+ // Compare against tables given in Mann, Whitney (1947).
+ got3 := makeTable(3)
+ if !aeqTable(got3, udist3) {
+ t.Errorf("For n=3, want:\n%sgot:\n%s", fmtTable(udist3), fmtTable(got3))
+ }
+
+ got5 := makeTable(5)
+ if !aeqTable(got5, udist5) {
+ t.Errorf("For n=5, want:\n%sgot:\n%s", fmtTable(udist5), fmtTable(got5))
+ }
+}
+
+func BenchmarkUDist(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ // R uses the exact distribution up to N=50.
+ // N*M/2=1250 is the hardest point to get the CDF for.
+ UDist{N1: 50, N2: 50}.CDF(1250)
+ }
+}
+
+func TestUDistTies(t *testing.T) {
+ makeTable := func(m, N int, t []int, minx, maxx float64) [][]float64 {
+ out := [][]float64{}
+ dist := UDist{N1: m, N2: N - m, T: t}
+ for x := minx; x <= maxx; x += 0.5 {
+ // Convert x from uQt' to uQv'.
+ U := x - float64(m*m)/2
+ P := dist.CDF(U)
+ if len(out) == 0 || !aeq(out[len(out)-1][1], P) {
+ out = append(out, []float64{x, P})
+ }
+ }
+ return out
+ }
+ fmtTable := func(table [][]float64) string {
+ out := ""
+ for _, row := range table {
+ out += fmt.Sprintf("%5.1f %f\n", row[0], row[1])
+ }
+ return out
+ }
+
+ // Compare against Table 1 from Klotz (1966).
+ got := makeTable(5, 10, []int{1, 1, 2, 1, 1, 2, 1, 1}, 12.5, 19.5)
+ want := [][]float64{
+ {12.5, 0.003968}, {13.5, 0.007937},
+ {15.0, 0.023810}, {16.5, 0.047619},
+ {17.5, 0.071429}, {18.0, 0.087302},
+ {19.0, 0.134921}, {19.5, 0.138889},
+ }
+ if !aeqTable(got, want) {
+ t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
+ }
+
+ got = makeTable(10, 21, []int{6, 5, 4, 3, 2, 1}, 52, 87)
+ want = [][]float64{
+ {52.0, 0.000014}, {56.5, 0.000128},
+ {57.5, 0.000145}, {60.0, 0.000230},
+ {61.0, 0.000400}, {62.0, 0.000740},
+ {62.5, 0.000797}, {64.0, 0.000825},
+ {64.5, 0.001165}, {65.5, 0.001477},
+ {66.5, 0.002498}, {67.0, 0.002725},
+ {67.5, 0.002895}, {68.0, 0.003150},
+ {68.5, 0.003263}, {69.0, 0.003518},
+ {69.5, 0.003603}, {70.0, 0.005648},
+ {70.5, 0.005818}, {71.0, 0.006626},
+ {71.5, 0.006796}, {72.0, 0.008157},
+ {72.5, 0.009688}, {73.0, 0.009801},
+ {73.5, 0.010430}, {74.0, 0.011111},
+ {74.5, 0.014230}, {75.0, 0.014612},
+ {75.5, 0.017249}, {76.0, 0.018307},
+ {76.5, 0.020178}, {77.0, 0.022270},
+ {77.5, 0.023189}, {78.0, 0.026931},
+ {78.5, 0.028207}, {79.0, 0.029979},
+ {79.5, 0.030931}, {80.0, 0.038969},
+ {80.5, 0.043063}, {81.0, 0.044262},
+ {81.5, 0.046389}, {82.0, 0.049581},
+ {82.5, 0.056300}, {83.0, 0.058027},
+ {83.5, 0.063669}, {84.0, 0.067454},
+ {84.5, 0.074122}, {85.0, 0.077425},
+ {85.5, 0.083498}, {86.0, 0.094079},
+ {86.5, 0.096693}, {87.0, 0.101132},
+ }
+ if !aeqTable(got, want) {
+ t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
+ }
+
+ got = makeTable(8, 16, []int{2, 2, 2, 2, 2, 2, 2, 2}, 32, 54)
+ want = [][]float64{
+ {32.0, 0.000078}, {34.0, 0.000389},
+ {36.0, 0.001088}, {38.0, 0.002642},
+ {40.0, 0.005905}, {42.0, 0.011500},
+ {44.0, 0.021057}, {46.0, 0.035664},
+ {48.0, 0.057187}, {50.0, 0.086713},
+ {52.0, 0.126263}, {54.0, 0.175369},
+ }
+ if !aeqTable(got, want) {
+ t.Errorf("Want:\n%sgot:\n%s", fmtTable(want), fmtTable(got))
+ }
+
+ // Check remaining tables from Klotz against the reference
+ // implementation.
+ checkRef := func(n1 int, tie []int) {
+ wantPMF1, wantCDF1 := udistRef(n1, tie)
+
+ dist := UDist{N1: n1, N2: sumint(tie) - n1, T: tie}
+ gotPMF, wantPMF := [][]float64{}, [][]float64{}
+ gotCDF, wantCDF := [][]float64{}, [][]float64{}
+ N := sumint(tie)
+ for U := 0.0; U <= float64(n1*(N-n1)); U += 0.5 {
+ gotPMF = append(gotPMF, []float64{U, dist.PMF(U)})
+ gotCDF = append(gotCDF, []float64{U, dist.CDF(U)})
+ wantPMF = append(wantPMF, []float64{U, wantPMF1[int(U*2)]})
+ wantCDF = append(wantCDF, []float64{U, wantCDF1[int(U*2)]})
+ }
+ if !aeqTable(wantPMF, gotPMF) {
+ t.Errorf("For PMF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantPMF), fmtTable(gotPMF))
+ }
+ if !aeqTable(wantCDF, gotCDF) {
+ t.Errorf("For CDF of n1=%v, t=%v, want:\n%sgot:\n%s", n1, tie, fmtTable(wantCDF), fmtTable(gotCDF))
+ }
+ }
+ checkRef(5, []int{1, 1, 2, 1, 1, 2, 1, 1})
+ checkRef(5, []int{1, 1, 2, 1, 1, 1, 2, 1})
+ checkRef(5, []int{1, 3, 1, 2, 1, 1, 1})
+ checkRef(8, []int{1, 2, 1, 1, 1, 1, 2, 2, 1, 2})
+ checkRef(12, []int{3, 3, 4, 3, 4, 5})
+ checkRef(10, []int{1, 2, 3, 4, 5, 6})
+}
+
+func BenchmarkUDistTies(b *testing.B) {
+ // Worst case: just one tie.
+ n := 20
+ t := make([]int, 2*n-1)
+ for i := range t {
+ t[i] = 1
+ }
+ t[0] = 2
+
+ for i := 0; i < b.N; i++ {
+ UDist{N1: n, N2: n, T: t}.CDF(float64(n*n) / 2)
+ }
+}
+
+func XTestPrintUmemo(t *testing.T) {
+ // Reproduce table from Cheung, Klotz.
+ ties := []int{4, 5, 3, 4, 6}
+ printUmemo(makeUmemo(80, 10, ties), ties)
+}
+
+// udistRef computes the PMF and CDF of the U distribution for two
+// samples of sizes n1 and sum(t)-n1 with tie vector t. The returned
+// pmf and cdf are indexed by 2*U.
+//
+// This uses the "graphical method" of Klotz (1966). It is very slow
+// (Θ(∏ (t[i]+1)) = Ω(2^|t|)), but very correct, and hence useful as a
+// reference for testing faster implementations.
+func udistRef(n1 int, t []int) (pmf, cdf []float64) {
+ // Enumerate all u vectors for which 0 <= u_i <= t_i. Count
+ // the number of permutations of two samples of sizes n1 and
+ // sum(t)-n1 with tie vector t and accumulate these counts by
+ // their U statistics in count[2*U].
+ counts := make([]int, 1+2*n1*(sumint(t)-n1))
+
+ u := make([]int, len(t))
+ u[0] = -1 // Get enumeration started.
+enumu:
+ for {
+ // Compute the next u vector.
+ u[0]++
+ for i := 0; i < len(u) && u[i] > t[i]; i++ {
+ if i == len(u)-1 {
+ // All u vectors have been enumerated.
+ break enumu
+ }
+ // Carry.
+ u[i+1]++
+ u[i] = 0
+ }
+
+ // Is this a legal u vector?
+ if sumint(u) != n1 {
+ // Klotz (1966) has a method for directly
+ // enumerating legal u vectors, but the point
+ // of this is to be correct, not fast.
+ continue
+ }
+
+ // Compute 2*U statistic for this u vector.
+ twoU, vsum := 0, 0
+ for i, u_i := range u {
+ v_i := t[i] - u_i
+ // U = U + vsum*u_i + u_i*v_i/2
+ twoU += 2*vsum*u_i + u_i*v_i
+ vsum += v_i
+ }
+
+ // Compute Π choose(t_i, u_i). This is the number of
+ // ways of permuting the input sample under u.
+ prod := 1
+ for i, u_i := range u {
+ prod *= int(mathChoose(t[i], u_i) + 0.5)
+ }
+
+ // Accumulate the permutations on this u path.
+ counts[twoU] += prod
+
+ if false {
+ // Print a table in the form of Klotz's
+ // "direct enumeration" example.
+ //
+ // Convert 2U = 2UQV' to UQt' used in Klotz
+ // examples.
+ UQt := float64(twoU)/2 + float64(n1*n1)/2
+ fmt.Printf("%+v %f %-2d\n", u, UQt, prod)
+ }
+ }
+
+ // Convert counts into probabilities for PMF and CDF.
+ pmf = make([]float64, len(counts))
+ cdf = make([]float64, len(counts))
+ total := int(mathChoose(sumint(t), n1) + 0.5)
+ for i, count := range counts {
+ pmf[i] = float64(count) / float64(total)
+ if i > 0 {
+ cdf[i] = cdf[i-1]
+ }
+ cdf[i] += pmf[i]
+ }
+ return
+}
+
+// printUmemo prints the output of makeUmemo for debugging.
+func printUmemo(A []map[ukey]float64, t []int) {
+ fmt.Printf("K\tn1\t2*U\tpr\n")
+ for K := len(A) - 1; K >= 0; K-- {
+ for i, pr := range A[K] {
+ _, ref := udistRef(i.n1, t[:K])
+ fmt.Printf("%v\t%v\t%v\t%v\t%v\n", K, i.n1, i.twoU, pr, ref[i.twoU])
+ }
+ }
+}
diff --git a/internal/stats/utest.go b/internal/stats/utest.go
new file mode 100644
index 0000000..048ebe7
--- /dev/null
+++ b/internal/stats/utest.go
@@ -0,0 +1,274 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "math"
+ "sort"
+)
+
+// A LocationHypothesis specifies the alternative hypothesis of a
+// location test such as a t-test or a Mann-Whitney U-test. The
+// default (zero) value is to test against the alternative hypothesis
+// that they differ.
+type LocationHypothesis int
+
+//go:generate stringer -type LocationHypothesis
+
+const (
+ // LocationLess specifies the alternative hypothesis that the
+ // location of the first sample is less than the second. This
+ // is a one-tailed test.
+ LocationLess LocationHypothesis = -1
+
+ // LocationDiffers specifies the alternative hypothesis that
+ // the locations of the two samples are not equal. This is a
+ // two-tailed test.
+ LocationDiffers LocationHypothesis = 0
+
+ // LocationGreater specifies the alternative hypothesis that
+ // the location of the first sample is greater than the
+ // second. This is a one-tailed test.
+ LocationGreater LocationHypothesis = 1
+)
+
+// A MannWhitneyUTestResult is the result of a Mann-Whitney U-test.
+type MannWhitneyUTestResult struct {
+ // N1 and N2 are the sizes of the input samples.
+ N1, N2 int
+
+ // U is the value of the Mann-Whitney U statistic for this
+ // test, generalized by counting ties as 0.5.
+ //
+ // Given the Cartesian product of the two samples, this is the
+ // number of pairs in which the value from the first sample is
+ // greater than the value of the second, plus 0.5 times the
+ // number of pairs where the values from the two samples are
+ // equal. Hence, U is always an integer multiple of 0.5 (it is
+ // a whole integer if there are no ties) in the range [0, N1*N2].
+ //
+ // U statistics always come in pairs, depending on which
+ // sample is "first". The mirror U for the other sample can be
+ // calculated as N1*N2 - U.
+ //
+ // There are many equivalent statistics with slightly
+ // different definitions. The Wilcoxon (1945) W statistic
+ // (generalized for ties) is U + (N1(N1+1))/2. It is also
+ // common to use 2U to eliminate the half steps and Smid
+ // (1956) uses N1*N2 - 2U to additionally center the
+ // distribution.
+ U float64
+
+ // AltHypothesis specifies the alternative hypothesis tested
+ // by this test against the null hypothesis that there is no
+ // difference in the locations of the samples.
+ AltHypothesis LocationHypothesis
+
+ // P is the p-value of the Mann-Whitney test for the given
+ // null hypothesis.
+ P float64
+}
+
+// MannWhitneyExactLimit gives the largest sample size for which the
+// exact U distribution will be used for the Mann-Whitney U-test.
+//
+// Using the exact distribution is necessary for small sample sizes
+// because the distribution is highly irregular. However, computing
+// the distribution for large sample sizes is both computationally
+// expensive and unnecessary because it quickly approaches a normal
+// approximation. Computing the distribution for two 50 value samples
+// takes a few milliseconds on a 2014 laptop.
+var MannWhitneyExactLimit = 50
+
+// MannWhitneyTiesExactLimit gives the largest sample size for which
+// the exact U distribution will be used for the Mann-Whitney U-test
+// in the presence of ties.
+//
+// Computing this distribution is more expensive than computing the
+// distribution without ties, so this is set lower. Computing this
+// distribution for two 25 value samples takes about ten milliseconds
+// on a 2014 laptop.
+var MannWhitneyTiesExactLimit = 25
+
+// MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null
+// hypothesis that two samples come from the same population against
+// the alternative hypothesis that one sample tends to have larger or
+// smaller values than the other.
+//
+// This is similar to a t-test, but unlike the t-test, the
+// Mann-Whitney U-test is non-parametric (it does not assume a normal
+// distribution). It has very slightly lower efficiency than the
+// t-test on normal distributions.
+//
+// Computing the exact U distribution is expensive for large sample
+// sizes, so this uses a normal approximation for sample sizes larger
+// than MannWhitneyExactLimit if there are no ties or
+// MannWhitneyTiesExactLimit if there are ties. This normal
+// approximation uses both the tie correction and the continuity
+// correction.
+//
+// This can fail with ErrSampleSize if either sample is empty or
+// ErrSamplesEqual if all sample values are equal.
+//
+// This is also known as a Mann-Whitney-Wilcoxon test and is
+// equivalent to the Wilcoxon rank-sum test, though the Wilcoxon
+// rank-sum test differs in nomenclature.
+//
+// [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
+// Whether one of Two Random Variables is Stochastically Larger than
+// the Other". Annals of Mathematical Statistics 18 (1): 50–60.
+//
+// [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
+// Journal of the American Statistical Association 61 (315): 772-787.
+func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
+ n1, n2 := len(x1), len(x2)
+ if n1 == 0 || n2 == 0 {
+ return nil, ErrSampleSize
+ }
+
+ // Compute the U statistic and tie vector T.
+ x1 = append([]float64(nil), x1...)
+ x2 = append([]float64(nil), x2...)
+ sort.Float64s(x1)
+ sort.Float64s(x2)
+ merged, labels := labeledMerge(x1, x2)
+
+ R1 := 0.0
+ T, hasTies := []int{}, false
+ for i := 0; i < len(merged); {
+ rank1, nx1, v1 := i+1, 0, merged[i]
+ // Consume samples that tie this sample (including itself).
+ for ; i < len(merged) && merged[i] == v1; i++ {
+ if labels[i] == 1 {
+ nx1++
+ }
+ }
+ // Assign all tied samples the average rank of the
+ // samples, where merged[0] has rank 1.
+ if nx1 != 0 {
+ rank := float64(i+rank1) / 2
+ R1 += rank * float64(nx1)
+ }
+ T = append(T, i-rank1+1)
+ if i > rank1 {
+ hasTies = true
+ }
+ }
+ U1 := R1 - float64(n1*(n1+1))/2
+
+ // Compute the smaller of U1 and U2
+ U2 := float64(n1*n2) - U1
+ Usmall := math.Min(U1, U2)
+
+ var p float64
+ if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
+ hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
+ // Use exact U distribution. U1 will be an integer.
+ if len(T) == 1 {
+ // All values are equal. Test is meaningless.
+ return nil, ErrSamplesEqual
+ }
+
+ dist := UDist{N1: n1, N2: n2, T: T}
+ switch alt {
+ case LocationDiffers:
+ if U1 == U2 {
+ // The distribution is symmetric about
+ // Usmall. Since the distribution is
+ // discrete, the CDF is discontinuous
+ // and if simply double CDF(Usmall),
+ // we'll double count the
+ // (non-infinitesimal) probability
+ // mass at Usmall. What we want is
+ // just the integral of the whole CDF,
+ // which is 1.
+ p = 1
+ } else {
+ p = dist.CDF(Usmall) * 2
+ }
+
+ case LocationLess:
+ p = dist.CDF(U1)
+
+ case LocationGreater:
+ p = 1 - dist.CDF(U1-1)
+ }
+ } else {
+ // Use normal approximation (with tie and continuity
+ // correction).
+ t := tieCorrection(T)
+ N := float64(n1 + n2)
+ μ_U := float64(n1*n2) / 2
+ σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
+ if σ_U == 0 {
+ return nil, ErrSamplesEqual
+ }
+ numer := U1 - μ_U
+ // Perform continuity correction.
+ switch alt {
+ case LocationDiffers:
+ numer -= mathSign(numer) * 0.5
+ case LocationLess:
+ numer += 0.5
+ case LocationGreater:
+ numer -= 0.5
+ }
+ z := numer / σ_U
+ switch alt {
+ case LocationDiffers:
+ p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
+ case LocationLess:
+ p = StdNormal.CDF(z)
+ case LocationGreater:
+ p = 1 - StdNormal.CDF(z)
+ }
+ }
+
+ return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
+ AltHypothesis: alt, P: p}, nil
+}
+
+// labeledMerge merges sorted lists x1 and x2 into sorted list merged.
+// labels[i] is 1 or 2 depending on whether merged[i] is a value from
+// x1 or x2, respectively.
+func labeledMerge(x1, x2 []float64) (merged []float64, labels []byte) {
+ merged = make([]float64, len(x1)+len(x2))
+ labels = make([]byte, len(x1)+len(x2))
+
+ i, j, o := 0, 0, 0
+ for i < len(x1) && j < len(x2) {
+ if x1[i] < x2[j] {
+ merged[o] = x1[i]
+ labels[o] = 1
+ i++
+ } else {
+ merged[o] = x2[j]
+ labels[o] = 2
+ j++
+ }
+ o++
+ }
+ for ; i < len(x1); i++ {
+ merged[o] = x1[i]
+ labels[o] = 1
+ o++
+ }
+ for ; j < len(x2); j++ {
+ merged[o] = x2[j]
+ labels[o] = 2
+ o++
+ }
+ return
+}
+
+// tieCorrection computes the tie correction factor Σ_j (t_j³ - t_j)
+// where t_j is the number of ties in the j'th rank.
+func tieCorrection(ties []int) float64 {
+ t := 0
+ for _, tie := range ties {
+ t += tie*tie*tie - tie
+ }
+ return float64(t)
+}
diff --git a/internal/stats/utest_test.go b/internal/stats/utest_test.go
new file mode 100644
index 0000000..f784eb1
--- /dev/null
+++ b/internal/stats/utest_test.go
@@ -0,0 +1,80 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import "testing"
+
+func TestMannWhitneyUTest(t *testing.T) {
+ check := func(want, got *MannWhitneyUTestResult) {
+ if want.N1 != got.N1 || want.N2 != got.N2 ||
+ !aeq(want.U, got.U) ||
+ want.AltHypothesis != got.AltHypothesis ||
+ !aeq(want.P, got.P) {
+ t.Errorf("want %+v, got %+v", want, got)
+ }
+ }
+ check3 := func(x1, x2 []float64, U float64, pless, pdiff, pgreater float64) {
+ want := &MannWhitneyUTestResult{N1: len(x1), N2: len(x2), U: U}
+
+ want.AltHypothesis = LocationLess
+ want.P = pless
+ got, _ := MannWhitneyUTest(x1, x2, want.AltHypothesis)
+ check(want, got)
+
+ want.AltHypothesis = LocationDiffers
+ want.P = pdiff
+ got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis)
+ check(want, got)
+
+ want.AltHypothesis = LocationGreater
+ want.P = pgreater
+ got, _ = MannWhitneyUTest(x1, x2, want.AltHypothesis)
+ check(want, got)
+ }
+
+ s1 := []float64{2, 1, 3, 5}
+ s2 := []float64{12, 11, 13, 15}
+ s3 := []float64{0, 4, 6, 7} // Interleaved with s1, but no ties
+ s4 := []float64{2, 2, 2, 2}
+ s5 := []float64{1, 1, 1, 1, 1}
+
+ // Small sample, no ties
+ check3(s1, s2, 0, 0.014285714285714289, 0.028571428571428577, 1)
+ check3(s2, s1, 16, 1, 0.028571428571428577, 0.014285714285714289)
+ check3(s1, s3, 5, 0.24285714285714288, 0.485714285714285770, 0.8285714285714285)
+
+ // Small sample, ties
+ // TODO: Check these against some other implementation.
+ check3(s1, s1, 8, 0.6285714285714286, 1, 0.6285714285714286)
+ check3(s1, s4, 10, 0.8571428571428571, 0.7142857142857143, 0.3571428571428571)
+ check3(s1, s5, 17.5, 1, 0, 0.04761904761904767)
+
+ r, err := MannWhitneyUTest(s4, s4, LocationDiffers)
+ if err != ErrSamplesEqual {
+ t.Errorf("want ErrSamplesEqual, got %+v, %+v", r, err)
+ }
+
+ // Large samples.
+ l1 := make([]float64, 500)
+ for i := range l1 {
+ l1[i] = float64(i * 2)
+ }
+ l2 := make([]float64, 600)
+ for i := range l2 {
+ l2[i] = float64(i*2 - 41)
+ }
+ l3 := append([]float64{}, l2...)
+ for i := 0; i < 30; i++ {
+ l3[i] = l1[i]
+ }
+ // For comparing with R's wilcox.test:
+ // l1 <- seq(0, 499)*2
+ // l2 <- seq(0,599)*2-41
+ // l3 <- l2; for (i in 1:30) { l3[i] = l1[i] }
+
+ check3(l1, l2, 135250, 0.0024667680407086112, 0.0049335360814172224, 0.9975346930458906)
+ check3(l1, l1, 125000, 0.5000436801680628, 1, 0.5000436801680628)
+ check3(l1, l3, 134845, 0.0019351907119808942, 0.0038703814239617884, 0.9980659818257166)
+}
diff --git a/internal/stats/util_test.go b/internal/stats/util_test.go
new file mode 100644
index 0000000..4ab13f6
--- /dev/null
+++ b/internal/stats/util_test.go
@@ -0,0 +1,117 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package stats
+
+import (
+ "fmt"
+ "math"
+ "sort"
+ "strings"
+ "testing"
+)
+
+func testDiscreteCDF(t *testing.T, name string, dist DiscreteDist) {
+ // Build the expected CDF out of the PMF.
+ l, h := dist.Bounds()
+ s := dist.Step()
+ want := map[float64]float64{l - 0.1: 0, h: 1}
+ sum := 0.0
+ for x := l; x < h; x += s {
+ sum += dist.PMF(x)
+ want[x] = sum
+ want[x+s/2] = sum
+ }
+
+ testFunc(t, name, dist.CDF, want)
+}
+
+func testInvCDF(t *testing.T, dist Dist, bounded bool) {
+ inv := InvCDF(dist)
+ name := fmt.Sprintf("InvCDF(%+v)", dist)
+ cdfName := fmt.Sprintf("CDF(%+v)", dist)
+
+ // Test bounds.
+ vals := map[float64]float64{-0.01: nan, 1.01: nan}
+ if !bounded {
+ vals[0] = -inf
+ vals[1] = inf
+ }
+ testFunc(t, name, inv, vals)
+
+ if bounded {
+ lo, hi := inv(0), inv(1)
+ vals := map[float64]float64{
+ lo - 0.01: 0, lo: 0,
+ hi: 1, hi + 0.01: 1,
+ }
+ testFunc(t, cdfName, dist.CDF, vals)
+ if got := dist.CDF(lo + 0.01); !(got > 0) {
+ t.Errorf("%s(0)=%v, but %s(%v)=0", name, lo, cdfName, lo+0.01)
+ }
+ if got := dist.CDF(hi - 0.01); !(got < 1) {
+ t.Errorf("%s(1)=%v, but %s(%v)=1", name, hi, cdfName, hi-0.01)
+ }
+ }
+
+ // Test points between.
+ vals = map[float64]float64{}
+ for _, p := range vecLinspace(0, 1, 11) {
+ if p == 0 || p == 1 {
+ continue
+ }
+ x := inv(p)
+ vals[x] = x
+ }
+ testFunc(t, fmt.Sprintf("InvCDF(CDF(%+v))", dist),
+ func(x float64) float64 {
+ return inv(dist.CDF(x))
+ },
+ vals)
+}
+
+// aeq returns true if expect and got are equal to 8 significant
+// figures (1 part in 100 million).
+func aeq(expect, got float64) bool {
+ if expect < 0 && got < 0 {
+ expect, got = -expect, -got
+ }
+ return expect*0.99999999 <= got && got*0.99999999 <= expect
+}
+
+func testFunc(t *testing.T, name string, f func(float64) float64, vals map[float64]float64) {
+ xs := make([]float64, 0, len(vals))
+ for x := range vals {
+ xs = append(xs, x)
+ }
+ sort.Float64s(xs)
+
+ for _, x := range xs {
+ want, got := vals[x], f(x)
+ if math.IsNaN(want) && math.IsNaN(got) || aeq(want, got) {
+ continue
+ }
+ var label string
+ if strings.Contains(name, "%v") {
+ label = fmt.Sprintf(name, x)
+ } else {
+ label = fmt.Sprintf("%s(%v)", name, x)
+ }
+ t.Errorf("want %s=%v, got %v", label, want, got)
+ }
+}
+
+// vecLinspace returns num values spaced evenly between lo and hi,
+// inclusive. If num is 1, this returns an array consisting of lo.
+func vecLinspace(lo, hi float64, num int) []float64 {
+ res := make([]float64, num)
+ if num == 1 {
+ res[0] = lo
+ return res
+ }
+ for i := 0; i < num; i++ {
+ res[i] = lo + float64(i)*(hi-lo)/float64(num-1)
+ }
+ return res
+}