blob: 153343679e9591d499966f558baa1905f6a900f4 [file] [log] [blame]
 // Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package stats import ( "math" ) // edm carries data to calculate e-divisive with medians. type edm struct { z []float64 delta int bestStat float64 bestIdx int tau int ta, tb, tab *IntervalTree } // normalize normalizes a slice of floats in place. func normalize(input []float64) []float64 { // gracefully handle empty inputs. if len(input) == 0 { return input } min, max := input[0], input[0] for _, v := range input { if v > max { max = v } if v < min { min = v } } for i, v := range input { input[i] = (v - min) / (max - min) } return input } // medianResolution finds a good compromise for the approximate median, as // described in the paper. func medianResolution(l int) int { d := 10 // min resolution 1/(1< d { d = l } return d } // toFloat converts an slice of integers to float64. func toFloat(input []int) []float64 { output := make([]float64, len(input)) for i, v := range input { output[i] = float64(v) } return output } // stat calculates the edx stat for a given location, saving it if it's better // than the currently stored statistic. func (e *edm) stat(tau2 int) float64 { a, b, c := e.ta.Median(), e.tb.Median(), e.tab.Median() a, b, c = a*a, b*b, c*c stat := 2*c - a - b stat *= float64(e.tau*(tau2-e.tau)) / float64(tau2) if stat > e.bestStat { e.bestStat = stat e.bestIdx = e.tau } return stat } // calc handles the brunt of the E-divisive with median algorithm described in: // https://courses.cit.cornell.edu/nj89/docs/edm.pdf. func (e *edm) calc() int { normalize(e.z) e.bestStat = math.Inf(-1) e.tau = e.delta tau2 := 2 * e.delta d := medianResolution(len(e.z)) e.ta = NewIntervalTree(d) e.tb = NewIntervalTree(d) e.tab = NewIntervalTree(d) for i := 0; i < e.tau; i++ { for j := i + 1; j < e.tau; j++ { e.ta.Insert(e.z[i] - e.z[j]) } } for i := e.tau; i < tau2; i++ { for j := i + 1; j < tau2; j++ { e.tb.Insert(e.z[i] - e.z[j]) } } for i := 0; i < e.tau; i++ { for j := e.tau; j < tau2; j++ { e.tab.Insert(e.z[i] - e.z[j]) } } tau2 += 1 for ; tau2 < len(e.z)+1; tau2++ { e.tb.Insert(e.z[tau2-1] - e.z[tau2-2]) e.stat(tau2) } forward := false for e.tau < len(e.z)-e.delta { if forward { e.forwardUpdate() } else { e.backwardUpdate() } forward = !forward } return e.bestIdx } func (e *edm) forwardUpdate() { tau2 := e.tau + e.delta e.tau += 1 for i := e.tau - e.delta; i < e.tau-1; i++ { e.ta.Insert(e.z[i] - e.z[e.tau-1]) } for i := e.tau - e.delta; i < e.tau; i++ { e.ta.Remove(e.z[i] - e.z[e.tau-e.delta-1]) } e.ta.Insert(e.z[e.tau-e.delta-1] - e.z[e.tau-e.delta]) e.tab.Remove(e.z[e.tau-1] - e.z[e.tau-e.delta-1]) for i := e.tau; i < tau2; i++ { e.tab.Remove(e.z[i] - e.z[e.tau-e.delta-1]) e.tab.Insert(e.z[i] - e.z[e.tau-1]) } for i := e.tau - e.delta; i < e.tau-1; i++ { e.tab.Remove(e.z[i] - e.z[e.tau-1]) e.tab.Insert(e.z[i] - e.z[tau2]) } e.tab.Insert(e.z[e.tau-1] - e.z[tau2]) for i := e.tau; i < tau2; i++ { e.tb.Remove(e.z[i] - e.z[e.tau-1]) e.tb.Insert(e.z[i] - e.z[tau2]) } tau2 += 1 for ; tau2 < len(e.z)+1; tau2++ { e.tb.Insert(e.z[tau2-1] - e.z[tau2-2]) e.stat(tau2) } } func (e *edm) backwardUpdate() { tau2 := e.tau + e.delta e.tau += 1 for i := e.tau - e.delta; i < e.tau-1; i++ { e.ta.Insert(e.z[i] - e.z[e.tau-1]) } for i := e.tau - e.delta; i < e.tau; i++ { e.ta.Remove(e.z[i] - e.z[e.tau-e.delta-1]) } e.ta.Insert(e.z[e.tau-e.delta-1] - e.z[e.tau-e.delta]) e.tab.Remove(e.z[e.tau-1] - e.z[e.tau-e.delta-1]) for i := e.tau; i < tau2; i++ { e.tab.Remove(e.z[i] - e.z[e.tau-e.delta-1]) e.tab.Insert(e.z[i] - e.z[e.tau-1]) } for i := e.tau - e.delta; i < e.tau-1; i++ { e.tab.Remove(e.z[i] - e.z[e.tau-1]) e.tab.Insert(e.z[i] - e.z[tau2]) } e.tab.Insert(e.z[e.tau-1] - e.z[tau2]) for i := e.tau; i < e.tau+e.delta-1; i++ { e.tb.Insert(e.z[e.tau+e.delta-1] - e.z[i]) e.tb.Remove(e.z[i] - e.z[e.tau-1]) } for tau2 = len(e.z); tau2 >= e.tau+e.delta; tau2-- { e.tb.Remove(e.z[tau2-1] - e.z[tau2-2]) e.stat(tau2) } } // EDM performs the approximate E-divisive with means calculation as described // in https://courses.cit.cornell.edu/nj89/docs/edm.pdf. // // EDM is an algorithm for finding breakout points in a time-series data set. // The function accepts the input vector, and a window width that describes the // search window during which the median breakout must occur. The window, ∂, // is described in the paper. // // Note this algorithm uses the interval tree median approach as described in // the paper. The medians used will have a resolution of 1/(2^d), where d is // max(log2(len(input)), 10). That is the value recommended in the paper. func EDM(input []float64, delta int) int { // edm modifies the input, we don't want to do that to our callers. c := make([]float64, len(input)) copy(c, input) e := &edm{z: c, delta: delta} return e.calc() } // EDMInt is the integer version of EDM. func EDMInt(input []int, delta int) int { e := &edm{z: toFloat(input), delta: delta} return e.calc() }