blob: 2e43d055293cf14eeb8735e3d9248e856c6621e1 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
import (
"container/heap"
"math"
)
type maxHeap []float64
func (h maxHeap) Len() int { return len(h) }
func (h maxHeap) Less(i, j int) bool { return h[i] > h[j] }
func (h maxHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *maxHeap) Push(x interface{}) {
*h = append(*h, x.(float64))
}
func (h *maxHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
type minHeap []float64
func (h minHeap) Len() int { return len(h) }
func (h minHeap) Less(i, j int) bool { return h[i] < h[j] }
func (h minHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *minHeap) Push(x interface{}) {
*h = append(*h, x.(float64))
}
func (h *minHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// addToHeaps adds a value to the appropriate heap,and keeps their sizes close.
// This is a direct translation of the addToHeaps function described in the
// paper.
func addToHeaps(min *minHeap, max *maxHeap, x float64) {
// NB: There's an ambiguity in the paper here related to if-then/else
// evaluation. This structure seems to yield the desired results.
if min.Len() == 0 || x < (*min)[0] {
heap.Push(max, x)
} else {
heap.Push(min, x)
}
if min.Len() > max.Len()+1 {
heap.Push(max, heap.Pop(min))
} else if max.Len() > min.Len()+1 {
heap.Push(min, heap.Pop(max))
}
}
// getMedian finds the median of the min and max heaps.
// This is a direct translation of the getMedian function described in the
// paper.
func getMedian(min minHeap, max maxHeap) float64 {
if min.Len() > max.Len() {
return min[0]
}
if max.Len() > min.Len() {
return max[0]
}
return (max[0] + min[0]) / 2
}
// edmx implements the EDM-X algorithm.
// This algorithm runs in place, modifying the data.
func edmx(input []float64, delta int) int {
input = normalize(input)
var lmax, rmax maxHeap
var lmin, rmin minHeap
heap.Init(&lmax)
heap.Init(&rmax)
heap.Init(&lmin)
heap.Init(&rmin)
bestStat := math.Inf(-1)
bestLoc := -1
for i := 0; i < delta-1; i++ {
addToHeaps(&lmin, &lmax, input[i])
}
for i := delta; i < len(input)-delta+1; i++ {
addToHeaps(&lmin, &lmax, input[i-1])
ml := getMedian(lmin, lmax)
rmax, rmin = rmax[:0], rmin[:0]
for j := i; j < i+delta-1; j++ {
addToHeaps(&rmin, &rmax, input[j])
}
for j := i + delta; j < len(input)+1; j++ {
addToHeaps(&rmin, &rmax, input[j-1])
mr := getMedian(rmin, rmax)
stat := float64(i*(j-i)) / float64(j) * (ml - mr) * (ml - mr)
if stat > bestStat {
bestStat = stat
bestLoc = i
}
}
}
return bestLoc
}
// EDMX runs the EDM-X algorithm on a slice of floats.
func EDMX(input []float64, delta int) int {
// edmx modfies the input, don't do that.
c := make([]float64, len(input))
copy(c, input)
return edmx(c, delta)
}
// EMDXInt runs the EDM-X algorithm on a slice of integer datapoints.
func EDMXInt(input []int, delta int) int {
return edmx(toFloat(input), delta)
}