| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package stats |
| |
| import ( |
| "math" |
| |
| "github.com/aclements/go-moremath/mathx" |
| ) |
| |
| // HypergeometicDist is a hypergeometric distribution. |
| type HypergeometicDist struct { |
| // N is the size of the population. N >= 0. |
| N int |
| |
| // K is the number of successes in the population. 0 <= K <= N. |
| K int |
| |
| // Draws is the number of draws from the population. This is |
| // usually written "n", but is called Draws here because of |
| // limitations on Go identifier naming. 0 <= Draws <= N. |
| Draws int |
| } |
| |
| // PMF is the probability of getting exactly int(k) successes in |
| // d.Draws draws with replacement from a population of size d.N that |
| // contains exactly d.K successes. |
| func (d HypergeometicDist) PMF(k float64) float64 { |
| ki := int(math.Floor(k)) |
| l, h := d.bounds() |
| if ki < l || ki > h { |
| return 0 |
| } |
| return d.pmf(ki) |
| } |
| |
| func (d HypergeometicDist) pmf(k int) float64 { |
| return math.Exp(mathx.Lchoose(d.K, k) + mathx.Lchoose(d.N-d.K, d.Draws-k) - mathx.Lchoose(d.N, d.Draws)) |
| } |
| |
| // CDF is the probability of getting int(k) or fewer successes in |
| // d.Draws draws with replacement from a population of size d.N that |
| // contains exactly d.K successes. |
| func (d HypergeometicDist) CDF(k float64) float64 { |
| // Based on Klotz, A Computational Approach to Statistics. |
| ki := int(math.Floor(k)) |
| l, h := d.bounds() |
| if ki < l { |
| return 0 |
| } else if ki >= h { |
| return 1 |
| } |
| // Use symmetry to compute the smaller sum. |
| flip := false |
| if ki > (d.Draws+1)/(d.N+1)*(d.K+1) { |
| flip = true |
| ki = d.K - ki - 1 |
| d.Draws = d.N - d.Draws |
| } |
| p := d.pmf(ki) * d.sum(ki) |
| if flip { |
| p = 1 - p |
| } |
| return p |
| } |
| |
| func (d HypergeometicDist) sum(k int) float64 { |
| const epsilon = 1e-14 |
| sum, ak := 1.0, 1.0 |
| L := maxint(0, d.Draws+d.K-d.N) |
| for dk := 1; dk <= k-L && ak/sum > epsilon; dk++ { |
| ak *= float64(1+k-dk) / float64(d.Draws-k+dk) |
| ak *= float64(d.N-d.K-d.Draws+k+1-dk) / float64(d.K-k+dk) |
| sum += ak |
| } |
| return sum |
| } |
| |
| func (d HypergeometicDist) bounds() (int, int) { |
| return maxint(0, d.Draws+d.K-d.N), minint(d.Draws, d.K) |
| } |
| |
| func (d HypergeometicDist) Bounds() (float64, float64) { |
| l, h := d.bounds() |
| return float64(l), float64(h) |
| } |
| |
| func (d HypergeometicDist) Step() float64 { |
| return 1 |
| } |
| |
| func (d HypergeometicDist) Mean() float64 { |
| return float64(d.Draws*d.K) / float64(d.N) |
| } |
| |
| func (d HypergeometicDist) Variance() float64 { |
| return float64(d.Draws*d.K*(d.N-d.K)*(d.N-d.Draws)) / |
| float64(d.N*d.N*(d.N-1)) |
| } |