blob: 153343679e9591d499966f558baa1905f6a900f4 [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stats
import (
"math"
)
// edm carries data to calculate e-divisive with medians.
type edm struct {
z []float64
delta int
bestStat float64
bestIdx int
tau int
ta, tb, tab *IntervalTree
}
// normalize normalizes a slice of floats in place.
func normalize(input []float64) []float64 {
// gracefully handle empty inputs.
if len(input) == 0 {
return input
}
min, max := input[0], input[0]
for _, v := range input {
if v > max {
max = v
}
if v < min {
min = v
}
}
for i, v := range input {
input[i] = (v - min) / (max - min)
}
return input
}
// medianResolution finds a good compromise for the approximate median, as
// described in the paper.
func medianResolution(l int) int {
d := 10 // min resolution 1/(1<<d).
if l := int(math.Ceil(math.Log2(float64(l)))); l > d {
d = l
}
return d
}
// toFloat converts an slice of integers to float64.
func toFloat(input []int) []float64 {
output := make([]float64, len(input))
for i, v := range input {
output[i] = float64(v)
}
return output
}
// stat calculates the edx stat for a given location, saving it if it's better
// than the currently stored statistic.
func (e *edm) stat(tau2 int) float64 {
a, b, c := e.ta.Median(), e.tb.Median(), e.tab.Median()
a, b, c = a*a, b*b, c*c
stat := 2*c - a - b
stat *= float64(e.tau*(tau2-e.tau)) / float64(tau2)
if stat > e.bestStat {
e.bestStat = stat
e.bestIdx = e.tau
}
return stat
}
// calc handles the brunt of the E-divisive with median algorithm described in:
// https://courses.cit.cornell.edu/nj89/docs/edm.pdf.
func (e *edm) calc() int {
normalize(e.z)
e.bestStat = math.Inf(-1)
e.tau = e.delta
tau2 := 2 * e.delta
d := medianResolution(len(e.z))
e.ta = NewIntervalTree(d)
e.tb = NewIntervalTree(d)
e.tab = NewIntervalTree(d)
for i := 0; i < e.tau; i++ {
for j := i + 1; j < e.tau; j++ {
e.ta.Insert(e.z[i] - e.z[j])
}
}
for i := e.tau; i < tau2; i++ {
for j := i + 1; j < tau2; j++ {
e.tb.Insert(e.z[i] - e.z[j])
}
}
for i := 0; i < e.tau; i++ {
for j := e.tau; j < tau2; j++ {
e.tab.Insert(e.z[i] - e.z[j])
}
}
tau2 += 1
for ; tau2 < len(e.z)+1; tau2++ {
e.tb.Insert(e.z[tau2-1] - e.z[tau2-2])
e.stat(tau2)
}
forward := false
for e.tau < len(e.z)-e.delta {
if forward {
e.forwardUpdate()
} else {
e.backwardUpdate()
}
forward = !forward
}
return e.bestIdx
}
func (e *edm) forwardUpdate() {
tau2 := e.tau + e.delta
e.tau += 1
for i := e.tau - e.delta; i < e.tau-1; i++ {
e.ta.Insert(e.z[i] - e.z[e.tau-1])
}
for i := e.tau - e.delta; i < e.tau; i++ {
e.ta.Remove(e.z[i] - e.z[e.tau-e.delta-1])
}
e.ta.Insert(e.z[e.tau-e.delta-1] - e.z[e.tau-e.delta])
e.tab.Remove(e.z[e.tau-1] - e.z[e.tau-e.delta-1])
for i := e.tau; i < tau2; i++ {
e.tab.Remove(e.z[i] - e.z[e.tau-e.delta-1])
e.tab.Insert(e.z[i] - e.z[e.tau-1])
}
for i := e.tau - e.delta; i < e.tau-1; i++ {
e.tab.Remove(e.z[i] - e.z[e.tau-1])
e.tab.Insert(e.z[i] - e.z[tau2])
}
e.tab.Insert(e.z[e.tau-1] - e.z[tau2])
for i := e.tau; i < tau2; i++ {
e.tb.Remove(e.z[i] - e.z[e.tau-1])
e.tb.Insert(e.z[i] - e.z[tau2])
}
tau2 += 1
for ; tau2 < len(e.z)+1; tau2++ {
e.tb.Insert(e.z[tau2-1] - e.z[tau2-2])
e.stat(tau2)
}
}
func (e *edm) backwardUpdate() {
tau2 := e.tau + e.delta
e.tau += 1
for i := e.tau - e.delta; i < e.tau-1; i++ {
e.ta.Insert(e.z[i] - e.z[e.tau-1])
}
for i := e.tau - e.delta; i < e.tau; i++ {
e.ta.Remove(e.z[i] - e.z[e.tau-e.delta-1])
}
e.ta.Insert(e.z[e.tau-e.delta-1] - e.z[e.tau-e.delta])
e.tab.Remove(e.z[e.tau-1] - e.z[e.tau-e.delta-1])
for i := e.tau; i < tau2; i++ {
e.tab.Remove(e.z[i] - e.z[e.tau-e.delta-1])
e.tab.Insert(e.z[i] - e.z[e.tau-1])
}
for i := e.tau - e.delta; i < e.tau-1; i++ {
e.tab.Remove(e.z[i] - e.z[e.tau-1])
e.tab.Insert(e.z[i] - e.z[tau2])
}
e.tab.Insert(e.z[e.tau-1] - e.z[tau2])
for i := e.tau; i < e.tau+e.delta-1; i++ {
e.tb.Insert(e.z[e.tau+e.delta-1] - e.z[i])
e.tb.Remove(e.z[i] - e.z[e.tau-1])
}
for tau2 = len(e.z); tau2 >= e.tau+e.delta; tau2-- {
e.tb.Remove(e.z[tau2-1] - e.z[tau2-2])
e.stat(tau2)
}
}
// EDM performs the approximate E-divisive with means calculation as described
// in https://courses.cit.cornell.edu/nj89/docs/edm.pdf.
//
// EDM is an algorithm for finding breakout points in a time-series data set.
// The function accepts the input vector, and a window width that describes the
// search window during which the median breakout must occur. The window, ∂,
// is described in the paper.
//
// Note this algorithm uses the interval tree median approach as described in
// the paper. The medians used will have a resolution of 1/(2^d), where d is
// max(log2(len(input)), 10). That is the value recommended in the paper.
func EDM(input []float64, delta int) int {
// edm modifies the input, we don't want to do that to our callers.
c := make([]float64, len(input))
copy(c, input)
e := &edm{z: c, delta: delta}
return e.calc()
}
// EDMInt is the integer version of EDM.
func EDMInt(input []int, delta int) int {
e := &edm{z: toFloat(input), delta: delta}
return e.calc()
}