| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package trace |
| |
| import ( |
| "container/heap" |
| tracev2 "internal/trace/v2" |
| "math" |
| "sort" |
| "strings" |
| "time" |
| ) |
| |
| // MutatorUtil is a change in mutator utilization at a particular |
| // time. Mutator utilization functions are represented as a |
| // time-ordered []MutatorUtil. |
| type MutatorUtil struct { |
| Time int64 |
| // Util is the mean mutator utilization starting at Time. This |
| // is in the range [0, 1]. |
| Util float64 |
| } |
| |
| // UtilFlags controls the behavior of MutatorUtilization. |
| type UtilFlags int |
| |
| const ( |
| // UtilSTW means utilization should account for STW events. |
| // This includes non-GC STW events, which are typically user-requested. |
| UtilSTW UtilFlags = 1 << iota |
| // UtilBackground means utilization should account for |
| // background mark workers. |
| UtilBackground |
| // UtilAssist means utilization should account for mark |
| // assists. |
| UtilAssist |
| // UtilSweep means utilization should account for sweeping. |
| UtilSweep |
| |
| // UtilPerProc means each P should be given a separate |
| // utilization function. Otherwise, there is a single function |
| // and each P is given a fraction of the utilization. |
| UtilPerProc |
| ) |
| |
| // MutatorUtilization returns a set of mutator utilization functions |
| // for the given trace. Each function will always end with 0 |
| // utilization. The bounds of each function are implicit in the first |
| // and last event; outside of these bounds each function is undefined. |
| // |
| // If the UtilPerProc flag is not given, this always returns a single |
| // utilization function. Otherwise, it returns one function per P. |
| func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil { |
| if len(events) == 0 { |
| return nil |
| } |
| |
| type perP struct { |
| // gc > 0 indicates that GC is active on this P. |
| gc int |
| // series the logical series number for this P. This |
| // is necessary because Ps may be removed and then |
| // re-added, and then the new P needs a new series. |
| series int |
| } |
| ps := []perP{} |
| stw := 0 |
| |
| out := [][]MutatorUtil{} |
| assists := map[uint64]bool{} |
| block := map[uint64]*Event{} |
| bgMark := map[uint64]bool{} |
| |
| for _, ev := range events { |
| switch ev.Type { |
| case EvGomaxprocs: |
| gomaxprocs := int(ev.Args[0]) |
| if len(ps) > gomaxprocs { |
| if flags&UtilPerProc != 0 { |
| // End each P's series. |
| for _, p := range ps[gomaxprocs:] { |
| out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0}) |
| } |
| } |
| ps = ps[:gomaxprocs] |
| } |
| for len(ps) < gomaxprocs { |
| // Start new P's series. |
| series := 0 |
| if flags&UtilPerProc != 0 || len(out) == 0 { |
| series = len(out) |
| out = append(out, []MutatorUtil{{ev.Ts, 1}}) |
| } |
| ps = append(ps, perP{series: series}) |
| } |
| case EvSTWStart: |
| if flags&UtilSTW != 0 { |
| stw++ |
| } |
| case EvSTWDone: |
| if flags&UtilSTW != 0 { |
| stw-- |
| } |
| case EvGCMarkAssistStart: |
| if flags&UtilAssist != 0 { |
| ps[ev.P].gc++ |
| assists[ev.G] = true |
| } |
| case EvGCMarkAssistDone: |
| if flags&UtilAssist != 0 { |
| ps[ev.P].gc-- |
| delete(assists, ev.G) |
| } |
| case EvGCSweepStart: |
| if flags&UtilSweep != 0 { |
| ps[ev.P].gc++ |
| } |
| case EvGCSweepDone: |
| if flags&UtilSweep != 0 { |
| ps[ev.P].gc-- |
| } |
| case EvGoStartLabel: |
| if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" { |
| // Background mark worker. |
| // |
| // If we're in per-proc mode, we don't |
| // count dedicated workers because |
| // they kick all of the goroutines off |
| // that P, so don't directly |
| // contribute to goroutine latency. |
| if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") { |
| bgMark[ev.G] = true |
| ps[ev.P].gc++ |
| } |
| } |
| fallthrough |
| case EvGoStart: |
| if assists[ev.G] { |
| // Unblocked during assist. |
| ps[ev.P].gc++ |
| } |
| block[ev.G] = ev.Link |
| default: |
| if ev != block[ev.G] { |
| continue |
| } |
| |
| if assists[ev.G] { |
| // Blocked during assist. |
| ps[ev.P].gc-- |
| } |
| if bgMark[ev.G] { |
| // Background mark worker done. |
| ps[ev.P].gc-- |
| delete(bgMark, ev.G) |
| } |
| delete(block, ev.G) |
| } |
| |
| if flags&UtilPerProc == 0 { |
| // Compute the current average utilization. |
| if len(ps) == 0 { |
| continue |
| } |
| gcPs := 0 |
| if stw > 0 { |
| gcPs = len(ps) |
| } else { |
| for i := range ps { |
| if ps[i].gc > 0 { |
| gcPs++ |
| } |
| } |
| } |
| mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))} |
| |
| // Record the utilization change. (Since |
| // len(ps) == len(out), we know len(out) > 0.) |
| out[0] = addUtil(out[0], mu) |
| } else { |
| // Check for per-P utilization changes. |
| for i := range ps { |
| p := &ps[i] |
| util := 1.0 |
| if stw > 0 || p.gc > 0 { |
| util = 0.0 |
| } |
| out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util}) |
| } |
| } |
| } |
| |
| // Add final 0 utilization event to any remaining series. This |
| // is important to mark the end of the trace. The exact value |
| // shouldn't matter since no window should extend beyond this, |
| // but using 0 is symmetric with the start of the trace. |
| mu := MutatorUtil{events[len(events)-1].Ts, 0} |
| for i := range ps { |
| out[ps[i].series] = addUtil(out[ps[i].series], mu) |
| } |
| return out |
| } |
| |
| // MutatorUtilizationV2 returns a set of mutator utilization functions |
| // for the given v2 trace, passed as an io.Reader. Each function will |
| // always end with 0 utilization. The bounds of each function are implicit |
| // in the first and last event; outside of these bounds each function is |
| // undefined. |
| // |
| // If the UtilPerProc flag is not given, this always returns a single |
| // utilization function. Otherwise, it returns one function per P. |
| func MutatorUtilizationV2(events []tracev2.Event, flags UtilFlags) [][]MutatorUtil { |
| // Set up a bunch of analysis state. |
| type perP struct { |
| // gc > 0 indicates that GC is active on this P. |
| gc int |
| // series the logical series number for this P. This |
| // is necessary because Ps may be removed and then |
| // re-added, and then the new P needs a new series. |
| series int |
| } |
| type procsCount struct { |
| // time at which procs changed. |
| time int64 |
| // n is the number of procs at that point. |
| n int |
| } |
| out := [][]MutatorUtil{} |
| stw := 0 |
| ps := []perP{} |
| inGC := make(map[tracev2.GoID]bool) |
| states := make(map[tracev2.GoID]tracev2.GoState) |
| bgMark := make(map[tracev2.GoID]bool) |
| procs := []procsCount{} |
| seenSync := false |
| |
| // Helpers. |
| handleSTW := func(r tracev2.Range) bool { |
| return flags&UtilSTW != 0 && isGCSTW(r) |
| } |
| handleMarkAssist := func(r tracev2.Range) bool { |
| return flags&UtilAssist != 0 && isGCMarkAssist(r) |
| } |
| handleSweep := func(r tracev2.Range) bool { |
| return flags&UtilSweep != 0 && isGCSweep(r) |
| } |
| |
| // Iterate through the trace, tracking mutator utilization. |
| var lastEv *tracev2.Event |
| for i := range events { |
| ev := &events[i] |
| lastEv = ev |
| |
| // Process the event. |
| switch ev.Kind() { |
| case tracev2.EventSync: |
| seenSync = true |
| case tracev2.EventMetric: |
| m := ev.Metric() |
| if m.Name != "/sched/gomaxprocs:threads" { |
| break |
| } |
| gomaxprocs := int(m.Value.Uint64()) |
| if len(ps) > gomaxprocs { |
| if flags&UtilPerProc != 0 { |
| // End each P's series. |
| for _, p := range ps[gomaxprocs:] { |
| out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), 0}) |
| } |
| } |
| ps = ps[:gomaxprocs] |
| } |
| for len(ps) < gomaxprocs { |
| // Start new P's series. |
| series := 0 |
| if flags&UtilPerProc != 0 || len(out) == 0 { |
| series = len(out) |
| out = append(out, []MutatorUtil{{int64(ev.Time()), 1}}) |
| } |
| ps = append(ps, perP{series: series}) |
| } |
| if len(procs) == 0 || gomaxprocs != procs[len(procs)-1].n { |
| procs = append(procs, procsCount{time: int64(ev.Time()), n: gomaxprocs}) |
| } |
| } |
| if len(ps) == 0 { |
| // We can't start doing any analysis until we see what GOMAXPROCS is. |
| // It will show up very early in the trace, but we need to be robust to |
| // something else being emitted beforehand. |
| continue |
| } |
| |
| switch ev.Kind() { |
| case tracev2.EventRangeActive: |
| if seenSync { |
| // If we've seen a sync, then we can be sure we're not finding out about |
| // something late; we have complete information after that point, and these |
| // active events will just be redundant. |
| break |
| } |
| // This range is active back to the start of the trace. We're failing to account |
| // for this since we just found out about it now. Fix up the mutator utilization. |
| // |
| // N.B. A trace can't start during a STW, so we don't handle it here. |
| r := ev.Range() |
| switch { |
| case handleMarkAssist(r): |
| if !states[ev.Goroutine()].Executing() { |
| // If the goroutine isn't executing, then the fact that it was in mark |
| // assist doesn't actually count. |
| break |
| } |
| // This G has been in a mark assist *and running on its P* since the start |
| // of the trace. |
| fallthrough |
| case handleSweep(r): |
| // This P has been in sweep (or mark assist, from above) in the start of the trace. |
| // |
| // We don't need to do anything if UtilPerProc is set. If we get an event like |
| // this for a running P, it must show up the first time a P is mentioned. Therefore, |
| // this P won't actually have any MutatorUtils on its list yet. |
| // |
| // However, if UtilPerProc isn't set, then we probably have data from other procs |
| // and from previous events. We need to fix that up. |
| if flags&UtilPerProc != 0 { |
| break |
| } |
| // Subtract out 1/gomaxprocs mutator utilization for all time periods |
| // from the beginning of the trace until now. |
| mi, pi := 0, 0 |
| for mi < len(out[0]) { |
| if pi < len(procs)-1 && procs[pi+1].time < out[0][mi].Time { |
| pi++ |
| continue |
| } |
| out[0][mi].Util -= float64(1) / float64(procs[pi].n) |
| if out[0][mi].Util < 0 { |
| out[0][mi].Util = 0 |
| } |
| mi++ |
| } |
| } |
| // After accounting for the portion we missed, this just acts like the |
| // beginning of a new range. |
| fallthrough |
| case tracev2.EventRangeBegin: |
| r := ev.Range() |
| if handleSTW(r) { |
| stw++ |
| } else if handleSweep(r) { |
| ps[ev.Proc()].gc++ |
| } else if handleMarkAssist(r) { |
| ps[ev.Proc()].gc++ |
| if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { |
| inGC[g] = true |
| } |
| } |
| case tracev2.EventRangeEnd: |
| r := ev.Range() |
| if handleSTW(r) { |
| stw-- |
| } else if handleSweep(r) { |
| ps[ev.Proc()].gc-- |
| } else if handleMarkAssist(r) { |
| ps[ev.Proc()].gc-- |
| if g := r.Scope.Goroutine(); g != tracev2.NoGoroutine { |
| delete(inGC, g) |
| } |
| } |
| case tracev2.EventStateTransition: |
| st := ev.StateTransition() |
| if st.Resource.Kind != tracev2.ResourceGoroutine { |
| break |
| } |
| old, new := st.Goroutine() |
| g := st.Resource.Goroutine() |
| if inGC[g] || bgMark[g] { |
| if !old.Executing() && new.Executing() { |
| // Started running while doing GC things. |
| ps[ev.Proc()].gc++ |
| } else if old.Executing() && !new.Executing() { |
| // Stopped running while doing GC things. |
| ps[ev.Proc()].gc-- |
| } |
| } |
| states[g] = new |
| case tracev2.EventLabel: |
| l := ev.Label() |
| if flags&UtilBackground != 0 && strings.HasPrefix(l.Label, "GC ") && l.Label != "GC (idle)" { |
| // Background mark worker. |
| // |
| // If we're in per-proc mode, we don't |
| // count dedicated workers because |
| // they kick all of the goroutines off |
| // that P, so don't directly |
| // contribute to goroutine latency. |
| if !(flags&UtilPerProc != 0 && l.Label == "GC (dedicated)") { |
| bgMark[ev.Goroutine()] = true |
| ps[ev.Proc()].gc++ |
| } |
| } |
| } |
| |
| if flags&UtilPerProc == 0 { |
| // Compute the current average utilization. |
| if len(ps) == 0 { |
| continue |
| } |
| gcPs := 0 |
| if stw > 0 { |
| gcPs = len(ps) |
| } else { |
| for i := range ps { |
| if ps[i].gc > 0 { |
| gcPs++ |
| } |
| } |
| } |
| mu := MutatorUtil{int64(ev.Time()), 1 - float64(gcPs)/float64(len(ps))} |
| |
| // Record the utilization change. (Since |
| // len(ps) == len(out), we know len(out) > 0.) |
| out[0] = addUtil(out[0], mu) |
| } else { |
| // Check for per-P utilization changes. |
| for i := range ps { |
| p := &ps[i] |
| util := 1.0 |
| if stw > 0 || p.gc > 0 { |
| util = 0.0 |
| } |
| out[p.series] = addUtil(out[p.series], MutatorUtil{int64(ev.Time()), util}) |
| } |
| } |
| } |
| |
| // No events in the stream. |
| if lastEv == nil { |
| return nil |
| } |
| |
| // Add final 0 utilization event to any remaining series. This |
| // is important to mark the end of the trace. The exact value |
| // shouldn't matter since no window should extend beyond this, |
| // but using 0 is symmetric with the start of the trace. |
| mu := MutatorUtil{int64(lastEv.Time()), 0} |
| for i := range ps { |
| out[ps[i].series] = addUtil(out[ps[i].series], mu) |
| } |
| return out |
| } |
| |
| func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil { |
| if len(util) > 0 { |
| if mu.Util == util[len(util)-1].Util { |
| // No change. |
| return util |
| } |
| if mu.Time == util[len(util)-1].Time { |
| // Take the lowest utilization at a time stamp. |
| if mu.Util < util[len(util)-1].Util { |
| util[len(util)-1] = mu |
| } |
| return util |
| } |
| } |
| return append(util, mu) |
| } |
| |
| // totalUtil is total utilization, measured in nanoseconds. This is a |
| // separate type primarily to distinguish it from mean utilization, |
| // which is also a float64. |
| type totalUtil float64 |
| |
| func totalUtilOf(meanUtil float64, dur int64) totalUtil { |
| return totalUtil(meanUtil * float64(dur)) |
| } |
| |
| // mean returns the mean utilization over dur. |
| func (u totalUtil) mean(dur time.Duration) float64 { |
| return float64(u) / float64(dur) |
| } |
| |
| // An MMUCurve is the minimum mutator utilization curve across |
| // multiple window sizes. |
| type MMUCurve struct { |
| series []mmuSeries |
| } |
| |
| type mmuSeries struct { |
| util []MutatorUtil |
| // sums[j] is the cumulative sum of util[:j]. |
| sums []totalUtil |
| // bands summarizes util in non-overlapping bands of duration |
| // bandDur. |
| bands []mmuBand |
| // bandDur is the duration of each band. |
| bandDur int64 |
| } |
| |
| type mmuBand struct { |
| // minUtil is the minimum instantaneous mutator utilization in |
| // this band. |
| minUtil float64 |
| // cumUtil is the cumulative total mutator utilization between |
| // time 0 and the left edge of this band. |
| cumUtil totalUtil |
| |
| // integrator is the integrator for the left edge of this |
| // band. |
| integrator integrator |
| } |
| |
| // NewMMUCurve returns an MMU curve for the given mutator utilization |
| // function. |
| func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve { |
| series := make([]mmuSeries, len(utils)) |
| for i, util := range utils { |
| series[i] = newMMUSeries(util) |
| } |
| return &MMUCurve{series} |
| } |
| |
| // bandsPerSeries is the number of bands to divide each series into. |
| // This is only changed by tests. |
| var bandsPerSeries = 1000 |
| |
| func newMMUSeries(util []MutatorUtil) mmuSeries { |
| // Compute cumulative sum. |
| sums := make([]totalUtil, len(util)) |
| var prev MutatorUtil |
| var sum totalUtil |
| for j, u := range util { |
| sum += totalUtilOf(prev.Util, u.Time-prev.Time) |
| sums[j] = sum |
| prev = u |
| } |
| |
| // Divide the utilization curve up into equal size |
| // non-overlapping "bands" and compute a summary for each of |
| // these bands. |
| // |
| // Compute the duration of each band. |
| numBands := bandsPerSeries |
| if numBands > len(util) { |
| // There's no point in having lots of bands if there |
| // aren't many events. |
| numBands = len(util) |
| } |
| dur := util[len(util)-1].Time - util[0].Time |
| bandDur := (dur + int64(numBands) - 1) / int64(numBands) |
| if bandDur < 1 { |
| bandDur = 1 |
| } |
| // Compute the bands. There are numBands+1 bands in order to |
| // record the final cumulative sum. |
| bands := make([]mmuBand, numBands+1) |
| s := mmuSeries{util, sums, bands, bandDur} |
| leftSum := integrator{&s, 0} |
| for i := range bands { |
| startTime, endTime := s.bandTime(i) |
| cumUtil := leftSum.advance(startTime) |
| predIdx := leftSum.pos |
| minUtil := 1.0 |
| for i := predIdx; i < len(util) && util[i].Time < endTime; i++ { |
| minUtil = math.Min(minUtil, util[i].Util) |
| } |
| bands[i] = mmuBand{minUtil, cumUtil, leftSum} |
| } |
| |
| return s |
| } |
| |
| func (s *mmuSeries) bandTime(i int) (start, end int64) { |
| start = int64(i)*s.bandDur + s.util[0].Time |
| end = start + s.bandDur |
| return |
| } |
| |
| type bandUtil struct { |
| // Utilization series index |
| series int |
| // Band index |
| i int |
| // Lower bound of mutator utilization for all windows |
| // with a left edge in this band. |
| utilBound float64 |
| } |
| |
| type bandUtilHeap []bandUtil |
| |
| func (h bandUtilHeap) Len() int { |
| return len(h) |
| } |
| |
| func (h bandUtilHeap) Less(i, j int) bool { |
| return h[i].utilBound < h[j].utilBound |
| } |
| |
| func (h bandUtilHeap) Swap(i, j int) { |
| h[i], h[j] = h[j], h[i] |
| } |
| |
| func (h *bandUtilHeap) Push(x any) { |
| *h = append(*h, x.(bandUtil)) |
| } |
| |
| func (h *bandUtilHeap) Pop() any { |
| x := (*h)[len(*h)-1] |
| *h = (*h)[:len(*h)-1] |
| return x |
| } |
| |
| // UtilWindow is a specific window at Time. |
| type UtilWindow struct { |
| Time int64 |
| // MutatorUtil is the mean mutator utilization in this window. |
| MutatorUtil float64 |
| } |
| |
| type utilHeap []UtilWindow |
| |
| func (h utilHeap) Len() int { |
| return len(h) |
| } |
| |
| func (h utilHeap) Less(i, j int) bool { |
| if h[i].MutatorUtil != h[j].MutatorUtil { |
| return h[i].MutatorUtil > h[j].MutatorUtil |
| } |
| return h[i].Time > h[j].Time |
| } |
| |
| func (h utilHeap) Swap(i, j int) { |
| h[i], h[j] = h[j], h[i] |
| } |
| |
| func (h *utilHeap) Push(x any) { |
| *h = append(*h, x.(UtilWindow)) |
| } |
| |
| func (h *utilHeap) Pop() any { |
| x := (*h)[len(*h)-1] |
| *h = (*h)[:len(*h)-1] |
| return x |
| } |
| |
| // An accumulator takes a windowed mutator utilization function and |
| // tracks various statistics for that function. |
| type accumulator struct { |
| mmu float64 |
| |
| // bound is the mutator utilization bound where adding any |
| // mutator utilization above this bound cannot affect the |
| // accumulated statistics. |
| bound float64 |
| |
| // Worst N window tracking |
| nWorst int |
| wHeap utilHeap |
| |
| // Mutator utilization distribution tracking |
| mud *mud |
| // preciseMass is the distribution mass that must be precise |
| // before accumulation is stopped. |
| preciseMass float64 |
| // lastTime and lastMU are the previous point added to the |
| // windowed mutator utilization function. |
| lastTime int64 |
| lastMU float64 |
| } |
| |
| // resetTime declares a discontinuity in the windowed mutator |
| // utilization function by resetting the current time. |
| func (acc *accumulator) resetTime() { |
| // This only matters for distribution collection, since that's |
| // the only thing that depends on the progression of the |
| // windowed mutator utilization function. |
| acc.lastTime = math.MaxInt64 |
| } |
| |
| // addMU adds a point to the windowed mutator utilization function at |
| // (time, mu). This must be called for monotonically increasing values |
| // of time. |
| // |
| // It returns true if further calls to addMU would be pointless. |
| func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool { |
| if mu < acc.mmu { |
| acc.mmu = mu |
| } |
| acc.bound = acc.mmu |
| |
| if acc.nWorst == 0 { |
| // If the minimum has reached zero, it can't go any |
| // lower, so we can stop early. |
| return mu == 0 |
| } |
| |
| // Consider adding this window to the n worst. |
| if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil { |
| // This window is lower than the K'th worst window. |
| // |
| // Check if there's any overlapping window |
| // already in the heap and keep whichever is |
| // worse. |
| for i, ui := range acc.wHeap { |
| if time+int64(window) > ui.Time && ui.Time+int64(window) > time { |
| if ui.MutatorUtil <= mu { |
| // Keep the first window. |
| goto keep |
| } else { |
| // Replace it with this window. |
| heap.Remove(&acc.wHeap, i) |
| break |
| } |
| } |
| } |
| |
| heap.Push(&acc.wHeap, UtilWindow{time, mu}) |
| if len(acc.wHeap) > acc.nWorst { |
| heap.Pop(&acc.wHeap) |
| } |
| keep: |
| } |
| |
| if len(acc.wHeap) < acc.nWorst { |
| // We don't have N windows yet, so keep accumulating. |
| acc.bound = 1.0 |
| } else { |
| // Anything above the least worst window has no effect. |
| acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil) |
| } |
| |
| if acc.mud != nil { |
| if acc.lastTime != math.MaxInt64 { |
| // Update distribution. |
| acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime)) |
| } |
| acc.lastTime, acc.lastMU = time, mu |
| if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok { |
| acc.bound = math.Max(acc.bound, mudBound) |
| } else { |
| // We haven't accumulated enough total precise |
| // mass yet to even reach our goal, so keep |
| // accumulating. |
| acc.bound = 1 |
| } |
| // It's not worth checking percentiles every time, so |
| // just keep accumulating this band. |
| return false |
| } |
| |
| // If we've found enough 0 utilizations, we can stop immediately. |
| return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0 |
| } |
| |
| // MMU returns the minimum mutator utilization for the given time |
| // window. This is the minimum utilization for all windows of this |
| // duration across the execution. The returned value is in the range |
| // [0, 1]. |
| func (c *MMUCurve) MMU(window time.Duration) (mmu float64) { |
| acc := accumulator{mmu: 1.0, bound: 1.0} |
| c.mmu(window, &acc) |
| return acc.mmu |
| } |
| |
| // Examples returns n specific examples of the lowest mutator |
| // utilization for the given window size. The returned windows will be |
| // disjoint (otherwise there would be a huge number of |
| // mostly-overlapping windows at the single lowest point). There are |
| // no guarantees on which set of disjoint windows this returns. |
| func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) { |
| acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n} |
| c.mmu(window, &acc) |
| sort.Sort(sort.Reverse(acc.wHeap)) |
| return ([]UtilWindow)(acc.wHeap) |
| } |
| |
| // MUD returns mutator utilization distribution quantiles for the |
| // given window size. |
| // |
| // The mutator utilization distribution is the distribution of mean |
| // mutator utilization across all windows of the given window size in |
| // the trace. |
| // |
| // The minimum mutator utilization is the minimum (0th percentile) of |
| // this distribution. (However, if only the minimum is desired, it's |
| // more efficient to use the MMU method.) |
| func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 { |
| if len(quantiles) == 0 { |
| return []float64{} |
| } |
| |
| // Each unrefined band contributes a known total mass to the |
| // distribution (bandDur except at the end), but in an unknown |
| // way. However, we know that all the mass it contributes must |
| // be at or above its worst-case mean mutator utilization. |
| // |
| // Hence, we refine bands until the highest desired |
| // distribution quantile is less than the next worst-case mean |
| // mutator utilization. At this point, all further |
| // contributions to the distribution must be beyond the |
| // desired quantile and hence cannot affect it. |
| // |
| // First, find the highest desired distribution quantile. |
| maxQ := quantiles[0] |
| for _, q := range quantiles { |
| if q > maxQ { |
| maxQ = q |
| } |
| } |
| // The distribution's mass is in units of time (it's not |
| // normalized because this would make it more annoying to |
| // account for future contributions of unrefined bands). The |
| // total final mass will be the duration of the trace itself |
| // minus the window size. Using this, we can compute the mass |
| // corresponding to quantile maxQ. |
| var duration int64 |
| for _, s := range c.series { |
| duration1 := s.util[len(s.util)-1].Time - s.util[0].Time |
| if duration1 >= int64(window) { |
| duration += duration1 - int64(window) |
| } |
| } |
| qMass := float64(duration) * maxQ |
| |
| // Accumulate the MUD until we have precise information for |
| // everything to the left of qMass. |
| acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)} |
| acc.mud.setTrackMass(qMass) |
| c.mmu(window, &acc) |
| |
| // Evaluate the quantiles on the accumulated MUD. |
| out := make([]float64, len(quantiles)) |
| for i := range out { |
| mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i]) |
| if math.IsNaN(mu) { |
| // There are a few legitimate ways this can |
| // happen: |
| // |
| // 1. If the window is the full trace |
| // duration, then the windowed MU function is |
| // only defined at a single point, so the MU |
| // distribution is not well-defined. |
| // |
| // 2. If there are no events, then the MU |
| // distribution has no mass. |
| // |
| // Either way, all of the quantiles will have |
| // converged toward the MMU at this point. |
| mu = acc.mmu |
| } |
| out[i] = mu |
| } |
| return out |
| } |
| |
| func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { |
| if window <= 0 { |
| acc.mmu = 0 |
| return |
| } |
| |
| var bandU bandUtilHeap |
| windows := make([]time.Duration, len(c.series)) |
| for i, s := range c.series { |
| windows[i] = window |
| if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max { |
| windows[i] = max |
| } |
| |
| bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i])) |
| if bandU == nil { |
| bandU = bandU1 |
| } else { |
| bandU = append(bandU, bandU1...) |
| } |
| } |
| |
| // Process bands from lowest utilization bound to highest. |
| heap.Init(&bandU) |
| |
| // Refine each band into a precise window and MMU until |
| // refining the next lowest band can no longer affect the MMU |
| // or windows. |
| for len(bandU) > 0 && bandU[0].utilBound < acc.bound { |
| i := bandU[0].series |
| c.series[i].bandMMU(bandU[0].i, windows[i], acc) |
| heap.Pop(&bandU) |
| } |
| } |
| |
| func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil { |
| // For each band, compute the worst-possible total mutator |
| // utilization for all windows that start in that band. |
| |
| // minBands is the minimum number of bands a window can span |
| // and maxBands is the maximum number of bands a window can |
| // span in any alignment. |
| minBands := int((int64(window) + c.bandDur - 1) / c.bandDur) |
| maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur) |
| if window > 1 && maxBands < 2 { |
| panic("maxBands < 2") |
| } |
| tailDur := int64(window) % c.bandDur |
| nUtil := len(c.bands) - maxBands + 1 |
| if nUtil < 0 { |
| nUtil = 0 |
| } |
| bandU := make([]bandUtil, nUtil) |
| for i := range bandU { |
| // To compute the worst-case MU, we assume the minimum |
| // for any bands that are only partially overlapped by |
| // some window and the mean for any bands that are |
| // completely covered by all windows. |
| var util totalUtil |
| |
| // Find the lowest and second lowest of the partial |
| // bands. |
| l := c.bands[i].minUtil |
| r1 := c.bands[i+minBands-1].minUtil |
| r2 := c.bands[i+maxBands-1].minUtil |
| minBand := math.Min(l, math.Min(r1, r2)) |
| // Assume the worst window maximally overlaps the |
| // worst minimum and then the rest overlaps the second |
| // worst minimum. |
| if minBands == 1 { |
| util += totalUtilOf(minBand, int64(window)) |
| } else { |
| util += totalUtilOf(minBand, c.bandDur) |
| midBand := 0.0 |
| switch { |
| case minBand == l: |
| midBand = math.Min(r1, r2) |
| case minBand == r1: |
| midBand = math.Min(l, r2) |
| case minBand == r2: |
| midBand = math.Min(l, r1) |
| } |
| util += totalUtilOf(midBand, tailDur) |
| } |
| |
| // Add the total mean MU of bands that are completely |
| // overlapped by all windows. |
| if minBands > 2 { |
| util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil |
| } |
| |
| bandU[i] = bandUtil{series, i, util.mean(window)} |
| } |
| |
| return bandU |
| } |
| |
| // bandMMU computes the precise minimum mutator utilization for |
| // windows with a left edge in band bandIdx. |
| func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { |
| util := c.util |
| |
| // We think of the mutator utilization over time as the |
| // box-filtered utilization function, which we call the |
| // "windowed mutator utilization function". The resulting |
| // function is continuous and piecewise linear (unless |
| // window==0, which we handle elsewhere), where the boundaries |
| // between segments occur when either edge of the window |
| // encounters a change in the instantaneous mutator |
| // utilization function. Hence, the minimum of this function |
| // will always occur when one of the edges of the window |
| // aligns with a utilization change, so these are the only |
| // points we need to consider. |
| // |
| // We compute the mutator utilization function incrementally |
| // by tracking the integral from t=0 to the left edge of the |
| // window and to the right edge of the window. |
| left := c.bands[bandIdx].integrator |
| right := left |
| time, endTime := c.bandTime(bandIdx) |
| if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime { |
| endTime = utilEnd |
| } |
| acc.resetTime() |
| for { |
| // Advance edges to time and time+window. |
| mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window) |
| if acc.addMU(time, mu, window) { |
| break |
| } |
| if time == endTime { |
| break |
| } |
| |
| // The maximum slope of the windowed mutator |
| // utilization function is 1/window, so we can always |
| // advance the time by at least (mu - mmu) * window |
| // without dropping below mmu. |
| minTime := time + int64((mu-acc.bound)*float64(window)) |
| |
| // Advance the window to the next time where either |
| // the left or right edge of the window encounters a |
| // change in the utilization curve. |
| if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 { |
| time = t1 |
| } else { |
| time = t2 |
| } |
| if time < minTime { |
| time = minTime |
| } |
| if time >= endTime { |
| // For MMUs we could stop here, but for MUDs |
| // it's important that we span the entire |
| // band. |
| time = endTime |
| } |
| } |
| } |
| |
| // An integrator tracks a position in a utilization function and |
| // integrates it. |
| type integrator struct { |
| u *mmuSeries |
| // pos is the index in u.util of the current time's non-strict |
| // predecessor. |
| pos int |
| } |
| |
| // advance returns the integral of the utilization function from 0 to |
| // time. advance must be called on monotonically increasing values of |
| // times. |
| func (in *integrator) advance(time int64) totalUtil { |
| util, pos := in.u.util, in.pos |
| // Advance pos until pos+1 is time's strict successor (making |
| // pos time's non-strict predecessor). |
| // |
| // Very often, this will be nearby, so we optimize that case, |
| // but it may be arbitrarily far away, so we handled that |
| // efficiently, too. |
| const maxSeq = 8 |
| if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time { |
| // Nearby. Use a linear scan. |
| for pos+1 < len(util) && util[pos+1].Time <= time { |
| pos++ |
| } |
| } else { |
| // Far. Binary search for time's strict successor. |
| l, r := pos, len(util) |
| for l < r { |
| h := int(uint(l+r) >> 1) |
| if util[h].Time <= time { |
| l = h + 1 |
| } else { |
| r = h |
| } |
| } |
| pos = l - 1 // Non-strict predecessor. |
| } |
| in.pos = pos |
| var partial totalUtil |
| if time != util[pos].Time { |
| partial = totalUtilOf(util[pos].Util, time-util[pos].Time) |
| } |
| return in.u.sums[pos] + partial |
| } |
| |
| // next returns the smallest time t' > time of a change in the |
| // utilization function. |
| func (in *integrator) next(time int64) int64 { |
| for _, u := range in.u.util[in.pos:] { |
| if u.Time > time { |
| return u.Time |
| } |
| } |
| return 1<<63 - 1 |
| } |
| |
| func isGCSTW(r tracev2.Range) bool { |
| return strings.HasPrefix(r.Name, "stop-the-world") && strings.Contains(r.Name, "GC") |
| } |
| |
| func isGCMarkAssist(r tracev2.Range) bool { |
| return r.Name == "GC mark assist" |
| } |
| |
| func isGCSweep(r tracev2.Range) bool { |
| return r.Name == "GC incremental sweep" |
| } |