| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "context" |
| "log" |
| "math/rand/v2" |
| "os" |
| "runtime" |
| "runtime/debug" |
| "runtime/metrics" |
| "runtime/trace" |
| "sync/atomic" |
| ) |
| |
| func init() { |
| register("TraceSTW", TraceSTW) |
| register("TraceGCSTW", TraceGCSTW) |
| } |
| |
| // The parent writes to ping and waits for the children to write back |
| // via pong to show that they are running. |
| var ping atomic.Uint32 |
| var pong [2]atomic.Uint32 |
| |
| // Tell runners to stop. |
| var stop atomic.Bool |
| |
| func traceSTWTarget(i int) { |
| for !stop.Load() { |
| // Async preemption often takes 100ms+ to preempt this loop on |
| // windows-386. This makes the test flaky, as the traceReadCPU |
| // timer often fires by the time STW finishes, jumbling the |
| // goroutine scheduling. As a workaround, ensure we have a |
| // morestack call for prompt preemption. |
| ensureMorestack() |
| |
| pong[i].Store(ping.Load()) |
| } |
| } |
| |
| func TraceSTW() { |
| ctx := context.Background() |
| |
| // The idea here is to have 2 target goroutines that are constantly |
| // running. When the world restarts after STW, we expect these |
| // goroutines to continue execution on the same M and P. |
| // |
| // Set GOMAXPROCS=4 to make room for the 2 target goroutines, 1 parent, |
| // and 1 slack for potential misscheduling. |
| // |
| // Disable the GC because GC STW generally moves goroutines (see |
| // https://go.dev/issue/65694). Alternatively, we could just ignore the |
| // trace if the GC runs. |
| runtime.GOMAXPROCS(4) |
| debug.SetGCPercent(-1) |
| |
| if err := trace.Start(os.Stdout); err != nil { |
| log.Fatalf("failed to start tracing: %v", err) |
| } |
| defer trace.Stop() |
| |
| for i := range 2 { |
| go traceSTWTarget(i) |
| } |
| |
| // Wait for children to start running. |
| ping.Store(1) |
| for pong[0].Load() != 1 {} |
| for pong[1].Load() != 1 {} |
| |
| trace.Log(ctx, "TraceSTW", "start") |
| |
| // STW |
| var ms runtime.MemStats |
| runtime.ReadMemStats(&ms) |
| |
| // Make sure to run long enough for the children to schedule again |
| // after STW. |
| ping.Store(2) |
| for pong[0].Load() != 2 {} |
| for pong[1].Load() != 2 {} |
| |
| trace.Log(ctx, "TraceSTW", "end") |
| |
| stop.Store(true) |
| } |
| |
| // Variant of TraceSTW for GC STWs. We want the GC mark workers to start on |
| // previously-idle Ps, rather than bumping the current P. |
| func TraceGCSTW() { |
| ctx := context.Background() |
| |
| // The idea here is to have 2 target goroutines that are constantly |
| // running. When the world restarts after STW, we expect these |
| // goroutines to continue execution on the same M and P. |
| // |
| // Set GOMAXPROCS=8 to make room for the 2 target goroutines, 1 parent, |
| // 2 dedicated workers, and a bit of slack. |
| // |
| // Disable the GC initially so we can be sure it only triggers once we |
| // are ready. |
| runtime.GOMAXPROCS(8) |
| debug.SetGCPercent(-1) |
| |
| if err := trace.Start(os.Stdout); err != nil { |
| log.Fatalf("failed to start tracing: %v", err) |
| } |
| defer trace.Stop() |
| |
| for i := range 2 { |
| go traceSTWTarget(i) |
| } |
| |
| // Wait for children to start running. |
| ping.Store(1) |
| for pong[0].Load() != 1 {} |
| for pong[1].Load() != 1 {} |
| |
| trace.Log(ctx, "TraceSTW", "start") |
| |
| // STW |
| triggerGC() |
| |
| // Make sure to run long enough for the children to schedule again |
| // after STW. This is included for good measure, but the goroutines |
| // really ought to have already scheduled since the entire GC |
| // completed. |
| ping.Store(2) |
| for pong[0].Load() != 2 {} |
| for pong[1].Load() != 2 {} |
| |
| trace.Log(ctx, "TraceSTW", "end") |
| |
| stop.Store(true) |
| } |
| |
| func triggerGC() { |
| // Allocate a bunch to trigger the GC rather than using runtime.GC. The |
| // latter blocks until the GC is complete, which is convenient, but |
| // messes with scheduling as it gives this P a chance to steal the |
| // other goroutines before their Ps get up and running again. |
| |
| // Bring heap size up prior to enabling the GC to ensure that there is |
| // a decent amount of work in case the GC triggers immediately upon |
| // re-enabling. |
| for range 1000 { |
| alloc() |
| } |
| |
| sample := make([]metrics.Sample, 1) |
| sample[0].Name = "/gc/cycles/total:gc-cycles" |
| metrics.Read(sample) |
| |
| start := sample[0].Value.Uint64() |
| |
| debug.SetGCPercent(100) |
| |
| // Keep allocating until the GC is complete. We really only need to |
| // continue until the mark workers are scheduled, but there isn't a |
| // good way to measure that. |
| for { |
| metrics.Read(sample) |
| if sample[0].Value.Uint64() != start { |
| return |
| } |
| |
| alloc() |
| } |
| } |
| |
| // Allocate a tree data structure to generate plenty of scan work for the GC. |
| |
| type node struct { |
| children []*node |
| } |
| |
| var gcSink node |
| |
| func alloc() { |
| // 10% chance of adding a node a each layer. |
| |
| curr := &gcSink |
| for { |
| if len(curr.children) == 0 || rand.Float32() < 0.1 { |
| curr.children = append(curr.children, new(node)) |
| return |
| } |
| |
| i := rand.IntN(len(curr.children)) |
| curr = curr.children[i] |
| } |
| } |
| |
| // Manually insert a morestack call. Leaf functions can omit morestack, but |
| // non-leaf functions should include them. |
| |
| //go:noinline |
| func ensureMorestack() { |
| ensureMorestack1() |
| } |
| |
| //go:noinline |
| func ensureMorestack1() { |
| } |