|  | // Copyright 2020 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package runtime_test | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "fmt" | 
|  | "internal/goexperiment" | 
|  | "internal/profile" | 
|  | "os" | 
|  | "reflect" | 
|  | "runtime" | 
|  | "runtime/debug" | 
|  | "runtime/metrics" | 
|  | "runtime/pprof" | 
|  | "runtime/trace" | 
|  | "slices" | 
|  | "sort" | 
|  | "strings" | 
|  | "sync" | 
|  | "sync/atomic" | 
|  | "testing" | 
|  | "time" | 
|  | "unsafe" | 
|  | ) | 
|  |  | 
|  | func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) { | 
|  | all := metrics.All() | 
|  | samples := make([]metrics.Sample, len(all)) | 
|  | descs := make(map[string]metrics.Description) | 
|  | for i := range all { | 
|  | samples[i].Name = all[i].Name | 
|  | descs[all[i].Name] = all[i] | 
|  | } | 
|  | return descs, samples | 
|  | } | 
|  |  | 
|  | func TestReadMetrics(t *testing.T) { | 
|  | // Run a GC cycle to get some of the stats to be non-zero. | 
|  | runtime.GC() | 
|  |  | 
|  | // Set an arbitrary memory limit to check the metric for it | 
|  | limit := int64(512 * 1024 * 1024) | 
|  | oldLimit := debug.SetMemoryLimit(limit) | 
|  | defer debug.SetMemoryLimit(oldLimit) | 
|  |  | 
|  | // Set a GC percent to check the metric for it | 
|  | gcPercent := 99 | 
|  | oldGCPercent := debug.SetGCPercent(gcPercent) | 
|  | defer debug.SetGCPercent(oldGCPercent) | 
|  |  | 
|  | // Tests whether readMetrics produces values aligning | 
|  | // with ReadMemStats while the world is stopped. | 
|  | var mstats runtime.MemStats | 
|  | _, samples := prepareAllMetricsSamples() | 
|  | runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples)) | 
|  |  | 
|  | checkUint64 := func(t *testing.T, m string, got, want uint64) { | 
|  | t.Helper() | 
|  | if got != want { | 
|  | t.Errorf("metric %q: got %d, want %d", m, got, want) | 
|  | } | 
|  | } | 
|  |  | 
|  | // Check to make sure the values we read line up with other values we read. | 
|  | var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram | 
|  | var tinyAllocs uint64 | 
|  | var mallocs, frees uint64 | 
|  | for i := range samples { | 
|  | switch name := samples[i].Name; name { | 
|  | case "/cgo/go-to-c-calls:calls": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall())) | 
|  | case "/memory/classes/heap/free:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased) | 
|  | case "/memory/classes/heap/released:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased) | 
|  | case "/memory/classes/heap/objects:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc) | 
|  | case "/memory/classes/heap/unused:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc) | 
|  | case "/memory/classes/heap/stacks:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse) | 
|  | case "/memory/classes/metadata/mcache/free:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse) | 
|  | case "/memory/classes/metadata/mcache/inuse:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse) | 
|  | case "/memory/classes/metadata/mspan/free:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse) | 
|  | case "/memory/classes/metadata/mspan/inuse:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse) | 
|  | case "/memory/classes/metadata/other:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys) | 
|  | case "/memory/classes/os-stacks:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse) | 
|  | case "/memory/classes/other:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys) | 
|  | case "/memory/classes/profiling/buckets:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) | 
|  | case "/memory/classes/total:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) | 
|  | case "/gc/heap/allocs-by-size:bytes": | 
|  | hist := samples[i].Value.Float64Histogram() | 
|  | // Skip size class 0 in BySize, because it's always empty and not represented | 
|  | // in the histogram. | 
|  | for i, sc := range mstats.BySize[1:] { | 
|  | if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { | 
|  | t.Errorf("bucket does not match size class: got %f, want %f", b, s) | 
|  | // The rest of the checks aren't expected to work anyway. | 
|  | continue | 
|  | } | 
|  | if c, m := hist.Counts[i], sc.Mallocs; c != m { | 
|  | t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m) | 
|  | } | 
|  | } | 
|  | allocsBySize = hist | 
|  | case "/gc/heap/allocs:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc) | 
|  | case "/gc/heap/frees-by-size:bytes": | 
|  | hist := samples[i].Value.Float64Histogram() | 
|  | // Skip size class 0 in BySize, because it's always empty and not represented | 
|  | // in the histogram. | 
|  | for i, sc := range mstats.BySize[1:] { | 
|  | if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { | 
|  | t.Errorf("bucket does not match size class: got %f, want %f", b, s) | 
|  | // The rest of the checks aren't expected to work anyway. | 
|  | continue | 
|  | } | 
|  | if c, f := hist.Counts[i], sc.Frees; c != f { | 
|  | t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f) | 
|  | } | 
|  | } | 
|  | case "/gc/heap/frees:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc) | 
|  | case "/gc/heap/tiny/allocs:objects": | 
|  | // Currently, MemStats adds tiny alloc count to both Mallocs AND Frees. | 
|  | // The reason for this is because MemStats couldn't be extended at the time | 
|  | // but there was a desire to have Mallocs at least be a little more representative, | 
|  | // while having Mallocs - Frees still represent a live object count. | 
|  | // Unfortunately, MemStats doesn't actually export a large allocation count, | 
|  | // so it's impossible to pull this number out directly. | 
|  | // | 
|  | // Check tiny allocation count outside of this loop, by using the allocs-by-size | 
|  | // histogram in order to figure out how many large objects there are. | 
|  | tinyAllocs = samples[i].Value.Uint64() | 
|  | // Because the next two metrics tests are checking against Mallocs and Frees, | 
|  | // we can't check them directly for the same reason: we need to account for tiny | 
|  | // allocations included in Mallocs and Frees. | 
|  | case "/gc/heap/allocs:objects": | 
|  | mallocs = samples[i].Value.Uint64() | 
|  | case "/gc/heap/frees:objects": | 
|  | frees = samples[i].Value.Uint64() | 
|  | case "/gc/heap/live:bytes": | 
|  | // Check for "obviously wrong" values. We can't check a stronger invariant, | 
|  | // such as live <= HeapAlloc, because live is not 100% accurate. It's computed | 
|  | // under racy conditions, and some objects may be double-counted (this is | 
|  | // intentional and necessary for GC performance). | 
|  | // | 
|  | // Instead, check against a much more reasonable upper-bound: the amount of | 
|  | // mapped heap memory. We can't possibly overcount to the point of exceeding | 
|  | // total mapped heap memory, except if there's an accounting bug. | 
|  | if live := samples[i].Value.Uint64(); live > mstats.HeapSys { | 
|  | t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys) | 
|  | } else if live == 0 { | 
|  | // Might happen if we don't call runtime.GC() above. | 
|  | t.Error("live bytes is 0") | 
|  | } | 
|  | case "/gc/gomemlimit:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit)) | 
|  | case "/gc/heap/objects:objects": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) | 
|  | case "/gc/heap/goal:bytes": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC) | 
|  | case "/gc/gogc:percent": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent)) | 
|  | case "/gc/cycles/automatic:gc-cycles": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC)) | 
|  | case "/gc/cycles/forced:gc-cycles": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC)) | 
|  | case "/gc/cycles/total:gc-cycles": | 
|  | checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC)) | 
|  | case "/gc/pauses:seconds": | 
|  | gcPauses = samples[i].Value.Float64Histogram() | 
|  | case "/sched/pauses/total/gc:seconds": | 
|  | schedPausesTotalGC = samples[i].Value.Float64Histogram() | 
|  | } | 
|  | } | 
|  |  | 
|  | // Check tinyAllocs. | 
|  | nonTinyAllocs := uint64(0) | 
|  | for _, c := range allocsBySize.Counts { | 
|  | nonTinyAllocs += c | 
|  | } | 
|  | checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs) | 
|  |  | 
|  | // Check allocation and free counts. | 
|  | checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs) | 
|  | checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs) | 
|  |  | 
|  | // Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds | 
|  | if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) { | 
|  | t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts) | 
|  | } | 
|  | if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) { | 
|  | t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts) | 
|  | } | 
|  | } | 
|  |  | 
|  | func TestReadMetricsConsistency(t *testing.T) { | 
|  | // Tests whether readMetrics produces consistent, sensible values. | 
|  | // The values are read concurrently with the runtime doing other | 
|  | // things (e.g. allocating) so what we read can't reasonably compared | 
|  | // to other runtime values (e.g. MemStats). | 
|  |  | 
|  | // Run a few GC cycles to get some of the stats to be non-zero. | 
|  | runtime.GC() | 
|  | runtime.GC() | 
|  | runtime.GC() | 
|  |  | 
|  | // Set GOMAXPROCS high then sleep briefly to ensure we generate | 
|  | // some idle time. | 
|  | oldmaxprocs := runtime.GOMAXPROCS(10) | 
|  | time.Sleep(time.Millisecond) | 
|  | runtime.GOMAXPROCS(oldmaxprocs) | 
|  |  | 
|  | // Read all the supported metrics through the metrics package. | 
|  | descs, samples := prepareAllMetricsSamples() | 
|  | metrics.Read(samples) | 
|  |  | 
|  | // Check to make sure the values we read make sense. | 
|  | var totalVirtual struct { | 
|  | got, want uint64 | 
|  | } | 
|  | var objects struct { | 
|  | alloc, free             *metrics.Float64Histogram | 
|  | allocs, frees           uint64 | 
|  | allocdBytes, freedBytes uint64 | 
|  | total, totalBytes       uint64 | 
|  | } | 
|  | var gc struct { | 
|  | numGC  uint64 | 
|  | pauses uint64 | 
|  | } | 
|  | var totalScan struct { | 
|  | got, want uint64 | 
|  | } | 
|  | var cpu struct { | 
|  | gcAssist    float64 | 
|  | gcDedicated float64 | 
|  | gcIdle      float64 | 
|  | gcPause     float64 | 
|  | gcTotal     float64 | 
|  |  | 
|  | idle float64 | 
|  | user float64 | 
|  |  | 
|  | scavengeAssist float64 | 
|  | scavengeBg     float64 | 
|  | scavengeTotal  float64 | 
|  |  | 
|  | total float64 | 
|  | } | 
|  | for i := range samples { | 
|  | kind := samples[i].Value.Kind() | 
|  | if want := descs[samples[i].Name].Kind; kind != want { | 
|  | t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want) | 
|  | continue | 
|  | } | 
|  | if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") { | 
|  | v := samples[i].Value.Uint64() | 
|  | totalVirtual.want += v | 
|  |  | 
|  | // None of these stats should ever get this big. | 
|  | // If they do, there's probably overflow involved, | 
|  | // usually due to bad accounting. | 
|  | if int64(v) < 0 { | 
|  | t.Errorf("%q has high/negative value: %d", samples[i].Name, v) | 
|  | } | 
|  | } | 
|  | switch samples[i].Name { | 
|  | case "/cpu/classes/gc/mark/assist:cpu-seconds": | 
|  | cpu.gcAssist = samples[i].Value.Float64() | 
|  | case "/cpu/classes/gc/mark/dedicated:cpu-seconds": | 
|  | cpu.gcDedicated = samples[i].Value.Float64() | 
|  | case "/cpu/classes/gc/mark/idle:cpu-seconds": | 
|  | cpu.gcIdle = samples[i].Value.Float64() | 
|  | case "/cpu/classes/gc/pause:cpu-seconds": | 
|  | cpu.gcPause = samples[i].Value.Float64() | 
|  | case "/cpu/classes/gc/total:cpu-seconds": | 
|  | cpu.gcTotal = samples[i].Value.Float64() | 
|  | case "/cpu/classes/idle:cpu-seconds": | 
|  | cpu.idle = samples[i].Value.Float64() | 
|  | case "/cpu/classes/scavenge/assist:cpu-seconds": | 
|  | cpu.scavengeAssist = samples[i].Value.Float64() | 
|  | case "/cpu/classes/scavenge/background:cpu-seconds": | 
|  | cpu.scavengeBg = samples[i].Value.Float64() | 
|  | case "/cpu/classes/scavenge/total:cpu-seconds": | 
|  | cpu.scavengeTotal = samples[i].Value.Float64() | 
|  | case "/cpu/classes/total:cpu-seconds": | 
|  | cpu.total = samples[i].Value.Float64() | 
|  | case "/cpu/classes/user:cpu-seconds": | 
|  | cpu.user = samples[i].Value.Float64() | 
|  | case "/memory/classes/total:bytes": | 
|  | totalVirtual.got = samples[i].Value.Uint64() | 
|  | case "/memory/classes/heap/objects:bytes": | 
|  | objects.totalBytes = samples[i].Value.Uint64() | 
|  | case "/gc/heap/objects:objects": | 
|  | objects.total = samples[i].Value.Uint64() | 
|  | case "/gc/heap/allocs:bytes": | 
|  | objects.allocdBytes = samples[i].Value.Uint64() | 
|  | case "/gc/heap/allocs:objects": | 
|  | objects.allocs = samples[i].Value.Uint64() | 
|  | case "/gc/heap/allocs-by-size:bytes": | 
|  | objects.alloc = samples[i].Value.Float64Histogram() | 
|  | case "/gc/heap/frees:bytes": | 
|  | objects.freedBytes = samples[i].Value.Uint64() | 
|  | case "/gc/heap/frees:objects": | 
|  | objects.frees = samples[i].Value.Uint64() | 
|  | case "/gc/heap/frees-by-size:bytes": | 
|  | objects.free = samples[i].Value.Float64Histogram() | 
|  | case "/gc/cycles:gc-cycles": | 
|  | gc.numGC = samples[i].Value.Uint64() | 
|  | case "/gc/pauses:seconds": | 
|  | h := samples[i].Value.Float64Histogram() | 
|  | gc.pauses = 0 | 
|  | for i := range h.Counts { | 
|  | gc.pauses += h.Counts[i] | 
|  | } | 
|  | case "/gc/scan/heap:bytes": | 
|  | totalScan.want += samples[i].Value.Uint64() | 
|  | case "/gc/scan/globals:bytes": | 
|  | totalScan.want += samples[i].Value.Uint64() | 
|  | case "/gc/scan/stack:bytes": | 
|  | totalScan.want += samples[i].Value.Uint64() | 
|  | case "/gc/scan/total:bytes": | 
|  | totalScan.got = samples[i].Value.Uint64() | 
|  | case "/sched/gomaxprocs:threads": | 
|  | if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want { | 
|  | t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want) | 
|  | } | 
|  | case "/sched/goroutines:goroutines": | 
|  | if samples[i].Value.Uint64() < 1 { | 
|  | t.Error("number of goroutines is less than one") | 
|  | } | 
|  | } | 
|  | } | 
|  | // Only check this on Linux where we can be reasonably sure we have a high-resolution timer. | 
|  | if runtime.GOOS == "linux" { | 
|  | if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 { | 
|  | t.Errorf("found no time spent on GC work: %#v", cpu) | 
|  | } | 
|  | if cpu.gcPause <= 0 { | 
|  | t.Errorf("found no GC pauses: %f", cpu.gcPause) | 
|  | } | 
|  | if cpu.idle <= 0 { | 
|  | t.Errorf("found no idle time: %f", cpu.idle) | 
|  | } | 
|  | if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) { | 
|  | t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal) | 
|  | } | 
|  | if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) { | 
|  | t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal) | 
|  | } | 
|  | if cpu.total <= 0 { | 
|  | t.Errorf("found no total CPU time passed") | 
|  | } | 
|  | if cpu.user <= 0 { | 
|  | t.Errorf("found no user time passed") | 
|  | } | 
|  | if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) { | 
|  | t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total) | 
|  | } | 
|  | } | 
|  | if totalVirtual.got != totalVirtual.want { | 
|  | t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) | 
|  | } | 
|  | if got, want := objects.allocs-objects.frees, objects.total; got != want { | 
|  | t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) | 
|  | } | 
|  | if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want { | 
|  | t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) | 
|  | } | 
|  | if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 { | 
|  | t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) | 
|  | } | 
|  | if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 { | 
|  | t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) | 
|  | } | 
|  | if len(objects.alloc.Buckets) != len(objects.free.Buckets) { | 
|  | t.Error("allocs-by-size and frees-by-size buckets don't match in length") | 
|  | } else if len(objects.alloc.Counts) != len(objects.free.Counts) { | 
|  | t.Error("allocs-by-size and frees-by-size counts don't match in length") | 
|  | } else { | 
|  | for i := range objects.alloc.Buckets { | 
|  | ba := objects.alloc.Buckets[i] | 
|  | bf := objects.free.Buckets[i] | 
|  | if ba != bf { | 
|  | t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf) | 
|  | } | 
|  | } | 
|  | if !t.Failed() { | 
|  | var gotAlloc, gotFree uint64 | 
|  | want := objects.total | 
|  | for i := range objects.alloc.Counts { | 
|  | if objects.alloc.Counts[i] < objects.free.Counts[i] { | 
|  | t.Errorf("found more allocs than frees in object dist bucket %d", i) | 
|  | continue | 
|  | } | 
|  | gotAlloc += objects.alloc.Counts[i] | 
|  | gotFree += objects.free.Counts[i] | 
|  | } | 
|  | if got := gotAlloc - gotFree; got != want { | 
|  | t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want) | 
|  | } | 
|  | if gotAlloc != objects.allocs { | 
|  | t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs) | 
|  | } | 
|  | if gotFree != objects.frees { | 
|  | t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees) | 
|  | } | 
|  | } | 
|  | } | 
|  | // The current GC has at least 2 pauses per GC. | 
|  | // Check to see if that value makes sense. | 
|  | if gc.pauses < gc.numGC*2 { | 
|  | t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) | 
|  | } | 
|  | if totalScan.got <= 0 { | 
|  | t.Errorf("scannable GC space is empty: %d", totalScan.got) | 
|  | } | 
|  | if totalScan.got != totalScan.want { | 
|  | t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want) | 
|  | } | 
|  | } | 
|  |  | 
|  | func BenchmarkReadMetricsLatency(b *testing.B) { | 
|  | stop := applyGCLoad(b) | 
|  |  | 
|  | // Spend this much time measuring latencies. | 
|  | latencies := make([]time.Duration, 0, 1024) | 
|  | _, samples := prepareAllMetricsSamples() | 
|  |  | 
|  | // Hit metrics.Read continuously and measure. | 
|  | b.ResetTimer() | 
|  | for i := 0; i < b.N; i++ { | 
|  | start := time.Now() | 
|  | metrics.Read(samples) | 
|  | latencies = append(latencies, time.Since(start)) | 
|  | } | 
|  | // Make sure to stop the timer before we wait! The load created above | 
|  | // is very heavy-weight and not easy to stop, so we could end up | 
|  | // confusing the benchmarking framework for small b.N. | 
|  | b.StopTimer() | 
|  | stop() | 
|  |  | 
|  | // Disable the default */op metrics. | 
|  | // ns/op doesn't mean anything because it's an average, but we | 
|  | // have a sleep in our b.N loop above which skews this significantly. | 
|  | b.ReportMetric(0, "ns/op") | 
|  | b.ReportMetric(0, "B/op") | 
|  | b.ReportMetric(0, "allocs/op") | 
|  |  | 
|  | // Sort latencies then report percentiles. | 
|  | sort.Slice(latencies, func(i, j int) bool { | 
|  | return latencies[i] < latencies[j] | 
|  | }) | 
|  | b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") | 
|  | b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") | 
|  | b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") | 
|  | } | 
|  |  | 
|  | var readMetricsSink [1024]interface{} | 
|  |  | 
|  | func TestReadMetricsCumulative(t *testing.T) { | 
|  | // Set up the set of metrics marked cumulative. | 
|  | descs := metrics.All() | 
|  | var samples [2][]metrics.Sample | 
|  | samples[0] = make([]metrics.Sample, len(descs)) | 
|  | samples[1] = make([]metrics.Sample, len(descs)) | 
|  | total := 0 | 
|  | for i := range samples[0] { | 
|  | if !descs[i].Cumulative { | 
|  | continue | 
|  | } | 
|  | samples[0][total].Name = descs[i].Name | 
|  | total++ | 
|  | } | 
|  | samples[0] = samples[0][:total] | 
|  | samples[1] = samples[1][:total] | 
|  | copy(samples[1], samples[0]) | 
|  |  | 
|  | // Start some noise in the background. | 
|  | var wg sync.WaitGroup | 
|  | wg.Add(1) | 
|  | done := make(chan struct{}) | 
|  | go func() { | 
|  | defer wg.Done() | 
|  | for { | 
|  | // Add more things here that could influence metrics. | 
|  | for i := 0; i < len(readMetricsSink); i++ { | 
|  | readMetricsSink[i] = make([]byte, 1024) | 
|  | select { | 
|  | case <-done: | 
|  | return | 
|  | default: | 
|  | } | 
|  | } | 
|  | runtime.GC() | 
|  | } | 
|  | }() | 
|  |  | 
|  | sum := func(us []uint64) uint64 { | 
|  | total := uint64(0) | 
|  | for _, u := range us { | 
|  | total += u | 
|  | } | 
|  | return total | 
|  | } | 
|  |  | 
|  | // Populate the first generation. | 
|  | metrics.Read(samples[0]) | 
|  |  | 
|  | // Check to make sure that these metrics only grow monotonically. | 
|  | for gen := 1; gen < 10; gen++ { | 
|  | metrics.Read(samples[gen%2]) | 
|  | for i := range samples[gen%2] { | 
|  | name := samples[gen%2][i].Name | 
|  | vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value | 
|  |  | 
|  | switch vNew.Kind() { | 
|  | case metrics.KindUint64: | 
|  | new := vNew.Uint64() | 
|  | old := vOld.Uint64() | 
|  | if new < old { | 
|  | t.Errorf("%s decreased: %d < %d", name, new, old) | 
|  | } | 
|  | case metrics.KindFloat64: | 
|  | new := vNew.Float64() | 
|  | old := vOld.Float64() | 
|  | if new < old { | 
|  | t.Errorf("%s decreased: %f < %f", name, new, old) | 
|  | } | 
|  | case metrics.KindFloat64Histogram: | 
|  | new := sum(vNew.Float64Histogram().Counts) | 
|  | old := sum(vOld.Float64Histogram().Counts) | 
|  | if new < old { | 
|  | t.Errorf("%s counts decreased: %d < %d", name, new, old) | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | close(done) | 
|  |  | 
|  | wg.Wait() | 
|  | } | 
|  |  | 
|  | func withinEpsilon(v1, v2, e float64) bool { | 
|  | return v2-v2*e <= v1 && v1 <= v2+v2*e | 
|  | } | 
|  |  | 
|  | func TestMutexWaitTimeMetric(t *testing.T) { | 
|  | var sample [1]metrics.Sample | 
|  | sample[0].Name = "/sync/mutex/wait/total:seconds" | 
|  |  | 
|  | locks := []locker2{ | 
|  | new(mutex), | 
|  | new(rwmutexWrite), | 
|  | new(rwmutexReadWrite), | 
|  | new(rwmutexWriteRead), | 
|  | } | 
|  | for _, lock := range locks { | 
|  | t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) { | 
|  | metrics.Read(sample[:]) | 
|  | before := time.Duration(sample[0].Value.Float64() * 1e9) | 
|  |  | 
|  | minMutexWaitTime := generateMutexWaitTime(lock) | 
|  |  | 
|  | metrics.Read(sample[:]) | 
|  | after := time.Duration(sample[0].Value.Float64() * 1e9) | 
|  |  | 
|  | if wt := after - before; wt < minMutexWaitTime { | 
|  | t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime) | 
|  | } | 
|  | }) | 
|  | } | 
|  | } | 
|  |  | 
|  | // locker2 represents an API surface of two concurrent goroutines | 
|  | // locking the same resource, but through different APIs. It's intended | 
|  | // to abstract over the relationship of two Lock calls or an RLock | 
|  | // and a Lock call. | 
|  | type locker2 interface { | 
|  | Lock1() | 
|  | Unlock1() | 
|  | Lock2() | 
|  | Unlock2() | 
|  | } | 
|  |  | 
|  | type mutex struct { | 
|  | mu sync.Mutex | 
|  | } | 
|  |  | 
|  | func (m *mutex) Lock1()   { m.mu.Lock() } | 
|  | func (m *mutex) Unlock1() { m.mu.Unlock() } | 
|  | func (m *mutex) Lock2()   { m.mu.Lock() } | 
|  | func (m *mutex) Unlock2() { m.mu.Unlock() } | 
|  |  | 
|  | type rwmutexWrite struct { | 
|  | mu sync.RWMutex | 
|  | } | 
|  |  | 
|  | func (m *rwmutexWrite) Lock1()   { m.mu.Lock() } | 
|  | func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() } | 
|  | func (m *rwmutexWrite) Lock2()   { m.mu.Lock() } | 
|  | func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() } | 
|  |  | 
|  | type rwmutexReadWrite struct { | 
|  | mu sync.RWMutex | 
|  | } | 
|  |  | 
|  | func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() } | 
|  | func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() } | 
|  | func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() } | 
|  | func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() } | 
|  |  | 
|  | type rwmutexWriteRead struct { | 
|  | mu sync.RWMutex | 
|  | } | 
|  |  | 
|  | func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() } | 
|  | func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() } | 
|  | func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() } | 
|  | func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() } | 
|  |  | 
|  | // generateMutexWaitTime causes a couple of goroutines | 
|  | // to block a whole bunch of times on a sync.Mutex, returning | 
|  | // the minimum amount of time that should be visible in the | 
|  | // /sync/mutex-wait:seconds metric. | 
|  | func generateMutexWaitTime(mu locker2) time.Duration { | 
|  | // Set up the runtime to always track casgstatus transitions for metrics. | 
|  | *runtime.CasGStatusAlwaysTrack = true | 
|  |  | 
|  | mu.Lock1() | 
|  |  | 
|  | // Start up a goroutine to wait on the lock. | 
|  | gc := make(chan *runtime.G) | 
|  | done := make(chan bool) | 
|  | go func() { | 
|  | gc <- runtime.Getg() | 
|  |  | 
|  | for { | 
|  | mu.Lock2() | 
|  | mu.Unlock2() | 
|  | if <-done { | 
|  | return | 
|  | } | 
|  | } | 
|  | }() | 
|  | gp := <-gc | 
|  |  | 
|  | // Set the block time high enough so that it will always show up, even | 
|  | // on systems with coarse timer granularity. | 
|  | const blockTime = 100 * time.Millisecond | 
|  |  | 
|  | // Make sure the goroutine spawned above actually blocks on the lock. | 
|  | for { | 
|  | if runtime.GIsWaitingOnMutex(gp) { | 
|  | break | 
|  | } | 
|  | runtime.Gosched() | 
|  | } | 
|  |  | 
|  | // Let some amount of time pass. | 
|  | time.Sleep(blockTime) | 
|  |  | 
|  | // Let the other goroutine acquire the lock. | 
|  | mu.Unlock1() | 
|  | done <- true | 
|  |  | 
|  | // Reset flag. | 
|  | *runtime.CasGStatusAlwaysTrack = false | 
|  | return blockTime | 
|  | } | 
|  |  | 
|  | // See issue #60276. | 
|  | func TestCPUMetricsSleep(t *testing.T) { | 
|  | if runtime.GOOS == "wasip1" { | 
|  | // Since wasip1 busy-waits in the scheduler, there's no meaningful idle | 
|  | // time. This is accurately reflected in the metrics, but it means this | 
|  | // test is basically meaningless on this platform. | 
|  | t.Skip("wasip1 currently busy-waits in idle time; test not applicable") | 
|  | } | 
|  |  | 
|  | names := []string{ | 
|  | "/cpu/classes/idle:cpu-seconds", | 
|  |  | 
|  | "/cpu/classes/gc/mark/assist:cpu-seconds", | 
|  | "/cpu/classes/gc/mark/dedicated:cpu-seconds", | 
|  | "/cpu/classes/gc/mark/idle:cpu-seconds", | 
|  | "/cpu/classes/gc/pause:cpu-seconds", | 
|  | "/cpu/classes/gc/total:cpu-seconds", | 
|  | "/cpu/classes/scavenge/assist:cpu-seconds", | 
|  | "/cpu/classes/scavenge/background:cpu-seconds", | 
|  | "/cpu/classes/scavenge/total:cpu-seconds", | 
|  | "/cpu/classes/total:cpu-seconds", | 
|  | "/cpu/classes/user:cpu-seconds", | 
|  | } | 
|  | prep := func() []metrics.Sample { | 
|  | mm := make([]metrics.Sample, len(names)) | 
|  | for i := range names { | 
|  | mm[i].Name = names[i] | 
|  | } | 
|  | return mm | 
|  | } | 
|  | m1, m2 := prep(), prep() | 
|  |  | 
|  | const ( | 
|  | // Expected time spent idle. | 
|  | dur = 100 * time.Millisecond | 
|  |  | 
|  | // maxFailures is the number of consecutive failures requires to cause the test to fail. | 
|  | maxFailures = 10 | 
|  | ) | 
|  |  | 
|  | failureIdleTimes := make([]float64, 0, maxFailures) | 
|  |  | 
|  | // If the bug we expect is happening, then the Sleep CPU time will be accounted for | 
|  | // as user time rather than idle time. In an ideal world we'd expect the whole application | 
|  | // to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that | 
|  | // duration. However, the Go runtime can easily eat into idle time while this goroutine is | 
|  | // blocked in a sleep. For example, slow platforms might spend more time expected in the | 
|  | // scheduler. Another example is that a Go runtime background goroutine could run while | 
|  | // everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough | 
|  | // time may pass such that the goroutine is ready to wake, even though the runtime couldn't | 
|  | // observe itself as idle with nanotime. | 
|  | // | 
|  | // To deal with all this, we give a half-proc's worth of leniency. | 
|  | // | 
|  | // We also retry multiple times to deal with the fact that the OS might deschedule us before | 
|  | // we yield and go idle. That has a rare enough chance that retries should resolve it. | 
|  | // If the issue we expect is happening, it should be persistent. | 
|  | minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5) | 
|  |  | 
|  | // Let's make sure there's no background scavenge work to do. | 
|  | // | 
|  | // The runtime.GC calls below ensure the background sweeper | 
|  | // will not run during the idle period. | 
|  | debug.FreeOSMemory() | 
|  |  | 
|  | for retries := 0; retries < maxFailures; retries++ { | 
|  | // Read 1. | 
|  | runtime.GC() // Update /cpu/classes metrics. | 
|  | metrics.Read(m1) | 
|  |  | 
|  | // Sleep. | 
|  | time.Sleep(dur) | 
|  |  | 
|  | // Read 2. | 
|  | runtime.GC() // Update /cpu/classes metrics. | 
|  | metrics.Read(m2) | 
|  |  | 
|  | dt := m2[0].Value.Float64() - m1[0].Value.Float64() | 
|  | if dt >= minIdleCPUSeconds { | 
|  | // All is well. Test passed. | 
|  | return | 
|  | } | 
|  | failureIdleTimes = append(failureIdleTimes, dt) | 
|  | // Try again. | 
|  | } | 
|  |  | 
|  | // We couldn't observe the expected idle time even once. | 
|  | for i, dt := range failureIdleTimes { | 
|  | t.Logf("try %2d: idle time = %.5fs\n", i+1, dt) | 
|  | } | 
|  | t.Logf("try %d breakdown:\n", len(failureIdleTimes)) | 
|  | for i := range names { | 
|  | if m1[i].Value.Kind() == metrics.KindBad { | 
|  | continue | 
|  | } | 
|  | t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64()) | 
|  | } | 
|  | t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds) | 
|  | } | 
|  |  | 
|  | // Call f() and verify that the correct STW metrics increment. If isGC is true, | 
|  | // fn triggers a GC STW. Otherwise, fn triggers an other STW. | 
|  | func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) { | 
|  | m := []metrics.Sample{ | 
|  | {Name: "/sched/pauses/stopping/gc:seconds"}, | 
|  | {Name: "/sched/pauses/stopping/other:seconds"}, | 
|  | {Name: "/sched/pauses/total/gc:seconds"}, | 
|  | {Name: "/sched/pauses/total/other:seconds"}, | 
|  | } | 
|  |  | 
|  | stoppingGC := &m[0] | 
|  | stoppingOther := &m[1] | 
|  | totalGC := &m[2] | 
|  | totalOther := &m[3] | 
|  |  | 
|  | sampleCount := func(s *metrics.Sample) uint64 { | 
|  | h := s.Value.Float64Histogram() | 
|  |  | 
|  | var n uint64 | 
|  | for _, c := range h.Counts { | 
|  | n += c | 
|  | } | 
|  | return n | 
|  | } | 
|  |  | 
|  | // Read baseline. | 
|  | metrics.Read(m) | 
|  |  | 
|  | baselineStartGC := sampleCount(stoppingGC) | 
|  | baselineStartOther := sampleCount(stoppingOther) | 
|  | baselineTotalGC := sampleCount(totalGC) | 
|  | baselineTotalOther := sampleCount(totalOther) | 
|  |  | 
|  | fn(t) | 
|  |  | 
|  | metrics.Read(m) | 
|  |  | 
|  | if isGC { | 
|  | if got := sampleCount(stoppingGC); got <= baselineStartGC { | 
|  | t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC) | 
|  | } | 
|  | if got := sampleCount(totalGC); got <= baselineTotalGC { | 
|  | t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC) | 
|  | } | 
|  |  | 
|  | if got := sampleCount(stoppingOther); got != baselineStartOther { | 
|  | t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther) | 
|  | } | 
|  | if got := sampleCount(totalOther); got != baselineTotalOther { | 
|  | t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther) | 
|  | } | 
|  | } else { | 
|  | if got := sampleCount(stoppingGC); got != baselineStartGC { | 
|  | t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC) | 
|  | } | 
|  | if got := sampleCount(totalGC); got != baselineTotalGC { | 
|  | t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC) | 
|  | } | 
|  |  | 
|  | if got := sampleCount(stoppingOther); got <= baselineStartOther { | 
|  | t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther) | 
|  | } | 
|  | if got := sampleCount(totalOther); got <= baselineTotalOther { | 
|  | t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther) | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | func TestSchedPauseMetrics(t *testing.T) { | 
|  | tests := []struct { | 
|  | name string | 
|  | isGC bool | 
|  | fn   func(t *testing.T) | 
|  | }{ | 
|  | { | 
|  | name: "runtime.GC", | 
|  | isGC: true, | 
|  | fn: func(t *testing.T) { | 
|  | runtime.GC() | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime.GOMAXPROCS", | 
|  | fn: func(t *testing.T) { | 
|  | if runtime.GOARCH == "wasm" { | 
|  | t.Skip("GOMAXPROCS >1 not supported on wasm") | 
|  | } | 
|  |  | 
|  | n := runtime.GOMAXPROCS(0) | 
|  | defer runtime.GOMAXPROCS(n) | 
|  |  | 
|  | runtime.GOMAXPROCS(n + 1) | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime.GoroutineProfile", | 
|  | fn: func(t *testing.T) { | 
|  | var s [1]runtime.StackRecord | 
|  | runtime.GoroutineProfile(s[:]) | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime.ReadMemStats", | 
|  | fn: func(t *testing.T) { | 
|  | var mstats runtime.MemStats | 
|  | runtime.ReadMemStats(&mstats) | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime.Stack", | 
|  | fn: func(t *testing.T) { | 
|  | var b [64]byte | 
|  | runtime.Stack(b[:], true) | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime/debug.WriteHeapDump", | 
|  | fn: func(t *testing.T) { | 
|  | if runtime.GOOS == "js" { | 
|  | t.Skip("WriteHeapDump not supported on js") | 
|  | } | 
|  |  | 
|  | f, err := os.CreateTemp(t.TempDir(), "heapdumptest") | 
|  | if err != nil { | 
|  | t.Fatalf("os.CreateTemp failed: %v", err) | 
|  | } | 
|  | defer os.Remove(f.Name()) | 
|  | defer f.Close() | 
|  | debug.WriteHeapDump(f.Fd()) | 
|  | }, | 
|  | }, | 
|  | { | 
|  | name: "runtime/trace.Start", | 
|  | fn: func(t *testing.T) { | 
|  | if trace.IsEnabled() { | 
|  | t.Skip("tracing already enabled") | 
|  | } | 
|  |  | 
|  | var buf bytes.Buffer | 
|  | if err := trace.Start(&buf); err != nil { | 
|  | t.Errorf("trace.Start err got %v want nil", err) | 
|  | } | 
|  | trace.Stop() | 
|  | }, | 
|  | }, | 
|  | } | 
|  |  | 
|  | // These tests count STW pauses, classified based on whether they're related | 
|  | // to the GC or not. Disable automatic GC cycles during the test so we don't | 
|  | // have an incidental GC pause when we're trying to observe only | 
|  | // non-GC-related pauses. This is especially important for the | 
|  | // runtime/trace.Start test, since (as of this writing) that will block | 
|  | // until any active GC mark phase completes. | 
|  | defer debug.SetGCPercent(debug.SetGCPercent(-1)) | 
|  | runtime.GC() | 
|  |  | 
|  | for _, tc := range tests { | 
|  | t.Run(tc.name, func(t *testing.T) { | 
|  | testSchedPauseMetrics(t, tc.fn, tc.isGC) | 
|  | }) | 
|  | } | 
|  | } | 
|  |  | 
|  | func TestRuntimeLockMetricsAndProfile(t *testing.T) { | 
|  | old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests | 
|  | defer runtime.SetMutexProfileFraction(old) | 
|  | if old != 0 { | 
|  | t.Fatalf("need MutexProfileRate 0, got %d", old) | 
|  | } | 
|  |  | 
|  | { | 
|  | before := os.Getenv("GODEBUG") | 
|  | for _, s := range strings.Split(before, ",") { | 
|  | if strings.HasPrefix(s, "runtimecontentionstacks=") { | 
|  | t.Logf("GODEBUG includes explicit setting %q", s) | 
|  | } | 
|  | } | 
|  | defer func() { os.Setenv("GODEBUG", before) }() | 
|  | os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before)) | 
|  | } | 
|  |  | 
|  | t.Logf("NumCPU %d", runtime.NumCPU()) | 
|  | t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0)) | 
|  | if minCPU := 2; runtime.NumCPU() < minCPU { | 
|  | t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU) | 
|  | } | 
|  |  | 
|  | loadProfile := func(t *testing.T) *profile.Profile { | 
|  | var w bytes.Buffer | 
|  | pprof.Lookup("mutex").WriteTo(&w, 0) | 
|  | p, err := profile.Parse(&w) | 
|  | if err != nil { | 
|  | t.Fatalf("failed to parse profile: %v", err) | 
|  | } | 
|  | if err := p.CheckValid(); err != nil { | 
|  | t.Fatalf("invalid profile: %v", err) | 
|  | } | 
|  | return p | 
|  | } | 
|  |  | 
|  | measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) { | 
|  | beforeProfile := loadProfile(t) | 
|  | beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} | 
|  | metrics.Read(beforeMetrics) | 
|  |  | 
|  | fn() | 
|  |  | 
|  | afterProfile := loadProfile(t) | 
|  | afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} | 
|  | metrics.Read(afterMetrics) | 
|  |  | 
|  | sumSamples := func(p *profile.Profile, i int) int64 { | 
|  | var sum int64 | 
|  | for _, s := range p.Sample { | 
|  | sum += s.Value[i] | 
|  | } | 
|  | return sum | 
|  | } | 
|  |  | 
|  | metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64() | 
|  | profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds() | 
|  |  | 
|  | // The internal/profile package does not support compaction; this delta | 
|  | // profile will include separate positive and negative entries. | 
|  | p = afterProfile.Copy() | 
|  | if len(beforeProfile.Sample) > 0 { | 
|  | err := p.Merge(beforeProfile, -1) | 
|  | if err != nil { | 
|  | t.Fatalf("Merge profiles: %v", err) | 
|  | } | 
|  | } | 
|  |  | 
|  | return metricGrowth, profileGrowth, p | 
|  | } | 
|  |  | 
|  | testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { | 
|  | return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { | 
|  | metricGrowth, profileGrowth, p := measureDelta(t, func() { | 
|  | var started, stopped sync.WaitGroup | 
|  | started.Add(workers) | 
|  | stopped.Add(workers) | 
|  | for i := 0; i < workers; i++ { | 
|  | w := &contentionWorker{ | 
|  | before: func() { | 
|  | started.Done() | 
|  | started.Wait() | 
|  | }, | 
|  | after: func() { | 
|  | stopped.Done() | 
|  | }, | 
|  | fn: fn, | 
|  | } | 
|  | go w.run() | 
|  | } | 
|  | stopped.Wait() | 
|  | }) | 
|  |  | 
|  | if profileGrowth == 0 { | 
|  | t.Errorf("no increase in mutex profile") | 
|  | } | 
|  | if metricGrowth == 0 && strictTiming { | 
|  | // If the critical section is very short, systems with low timer | 
|  | // resolution may be unable to measure it via nanotime. | 
|  | t.Errorf("no increase in /sync/mutex/wait/total:seconds metric") | 
|  | } | 
|  | // This comparison is possible because the time measurements in support of | 
|  | // runtime/pprof and runtime/metrics for runtime-internal locks are so close | 
|  | // together. It doesn't work as well for user-space contention, where the | 
|  | // involved goroutines are not _Grunnable the whole time and so need to pass | 
|  | // through the scheduler. | 
|  | t.Logf("lock contention growth in runtime/pprof's view  (%fs)", profileGrowth) | 
|  | t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) | 
|  |  | 
|  | acceptStacks = append([][]string(nil), acceptStacks...) | 
|  | for i, stk := range acceptStacks { | 
|  | if goexperiment.StaticLockRanking { | 
|  | if !slices.ContainsFunc(stk, func(s string) bool { | 
|  | return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart" | 
|  | }) { | 
|  | // stk is a call stack that is still on the user stack when | 
|  | // it calls runtime.unlock. Add the extra function that | 
|  | // we'll see, when the static lock ranking implementation of | 
|  | // runtime.unlockWithRank switches to the system stack. | 
|  | stk = append([]string{"runtime.unlockWithRank"}, stk...) | 
|  | } | 
|  | } | 
|  | acceptStacks[i] = stk | 
|  | } | 
|  |  | 
|  | var stks [][]string | 
|  | values := make([][2]int64, len(acceptStacks)) | 
|  | for _, s := range p.Sample { | 
|  | var have []string | 
|  | for _, loc := range s.Location { | 
|  | for _, line := range loc.Line { | 
|  | have = append(have, line.Function.Name) | 
|  | } | 
|  | } | 
|  | stks = append(stks, have) | 
|  | for i, stk := range acceptStacks { | 
|  | if slices.Equal(have, stk) { | 
|  | values[i][0] += s.Value[0] | 
|  | values[i][1] += s.Value[1] | 
|  | } | 
|  | } | 
|  | } | 
|  | for i, stk := range acceptStacks { | 
|  | n += values[i][0] | 
|  | value += values[i][1] | 
|  | t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1]) | 
|  | } | 
|  | if n == 0 && value == 0 { | 
|  | t.Logf("profile:\n%s", p) | 
|  | for _, have := range stks { | 
|  | t.Logf("have stack %v", have) | 
|  | } | 
|  | for _, stk := range acceptStacks { | 
|  | t.Errorf("want stack %v", stk) | 
|  | } | 
|  | } | 
|  |  | 
|  | return metricGrowth, profileGrowth, n, value | 
|  | } | 
|  | } | 
|  |  | 
|  | name := t.Name() | 
|  |  | 
|  | t.Run("runtime.lock", func(t *testing.T) { | 
|  | mus := make([]runtime.Mutex, 100) | 
|  | var needContention atomic.Int64 | 
|  | delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks | 
|  | delayMicros := delay.Microseconds() | 
|  |  | 
|  | // The goroutine that acquires the lock will only proceed when it | 
|  | // detects that its partner is contended for the lock. That will lead to | 
|  | // live-lock if anything (such as a STW) prevents the partner goroutine | 
|  | // from running. Allowing the contention workers to pause and restart | 
|  | // (to allow a STW to proceed) makes it harder to confirm that we're | 
|  | // counting the correct number of contention events, since some locks | 
|  | // will end up contended twice. Instead, disable the GC. | 
|  | defer debug.SetGCPercent(debug.SetGCPercent(-1)) | 
|  |  | 
|  | const workers = 2 | 
|  | if runtime.GOMAXPROCS(0) < workers { | 
|  | t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) | 
|  | } | 
|  |  | 
|  | fn := func() bool { | 
|  | n := int(needContention.Load()) | 
|  | if n < 0 { | 
|  | return false | 
|  | } | 
|  | mu := &mus[n] | 
|  |  | 
|  | runtime.Lock(mu) | 
|  | for int(needContention.Load()) == n { | 
|  | if runtime.MutexContended(mu) { | 
|  | // make them wait a little while | 
|  | for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { | 
|  | runtime.Usleep(uint32(delayMicros)) | 
|  | } | 
|  | break | 
|  | } | 
|  | } | 
|  | runtime.Unlock(mu) | 
|  | needContention.Store(int64(n - 1)) | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  | stks := [][]string{{ | 
|  | "runtime.unlock", | 
|  | "runtime_test." + name + ".func5.1", | 
|  | "runtime_test.(*contentionWorker).run", | 
|  | }} | 
|  |  | 
|  | t.Run("sample-1", func(t *testing.T) { | 
|  | old := runtime.SetMutexProfileFraction(1) | 
|  | defer runtime.SetMutexProfileFraction(old) | 
|  |  | 
|  | needContention.Store(int64(len(mus) - 1)) | 
|  | metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) | 
|  |  | 
|  | if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { | 
|  | // The test imposes a delay with usleep, verified with calls to | 
|  | // nanotime. Compare against the runtime/metrics package's view | 
|  | // (based on nanotime) rather than runtime/pprof's view (based | 
|  | // on cputicks). | 
|  | t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want) | 
|  | } | 
|  | if have, want := n, int64(len(mus)); have != want { | 
|  | t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) | 
|  | } | 
|  |  | 
|  | const slop = 1.5 // account for nanotime vs cputicks | 
|  | if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { | 
|  | t.Errorf("views differ by more than %fx", slop) | 
|  | } | 
|  | }) | 
|  |  | 
|  | t.Run("sample-2", func(t *testing.T) { | 
|  | old := runtime.SetMutexProfileFraction(2) | 
|  | defer runtime.SetMutexProfileFraction(old) | 
|  |  | 
|  | needContention.Store(int64(len(mus) - 1)) | 
|  | metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) | 
|  |  | 
|  | // With 100 trials and profile fraction of 2, we expect to capture | 
|  | // 50 samples. Allow the test to pass if we get at least 20 samples; | 
|  | // the CDF of the binomial distribution says there's less than a | 
|  | // 1e-9 chance of that, which is an acceptably low flakiness rate. | 
|  | const samplingSlop = 2.5 | 
|  |  | 
|  | if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { | 
|  | // The test imposes a delay with usleep, verified with calls to | 
|  | // nanotime. Compare against the runtime/metrics package's view | 
|  | // (based on nanotime) rather than runtime/pprof's view (based | 
|  | // on cputicks). | 
|  | t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want) | 
|  | } | 
|  | if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop { | 
|  | t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) | 
|  | } | 
|  |  | 
|  | const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling | 
|  | if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { | 
|  | t.Errorf("views differ by more than %fx", timerSlop) | 
|  | } | 
|  | }) | 
|  | }) | 
|  |  | 
|  | t.Run("runtime.semrelease", func(t *testing.T) { | 
|  | old := runtime.SetMutexProfileFraction(1) | 
|  | defer runtime.SetMutexProfileFraction(old) | 
|  |  | 
|  | const workers = 3 | 
|  | if runtime.GOMAXPROCS(0) < workers { | 
|  | t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers) | 
|  | } | 
|  |  | 
|  | var sem uint32 = 1 | 
|  | var tries atomic.Int32 | 
|  | tries.Store(10_000_000) // prefer controlled failure to timeout | 
|  | var sawContention atomic.Int32 | 
|  | var need int32 = 1 | 
|  | fn := func() bool { | 
|  | if sawContention.Load() >= need { | 
|  | return false | 
|  | } | 
|  | if tries.Add(-1) < 0 { | 
|  | return false | 
|  | } | 
|  |  | 
|  | runtime.Semacquire(&sem) | 
|  | runtime.Semrelease1(&sem, false, 0) | 
|  | if runtime.MutexContended(runtime.SemRootLock(&sem)) { | 
|  | sawContention.Add(1) | 
|  | } | 
|  | return true | 
|  | } | 
|  |  | 
|  | stks := [][]string{ | 
|  | { | 
|  | "runtime.unlock", | 
|  | "runtime.semrelease1", | 
|  | "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", | 
|  | "runtime_test.(*contentionWorker).run", | 
|  | }, | 
|  | { | 
|  | "runtime.unlock", | 
|  | "runtime.semacquire1", | 
|  | "runtime.semacquire", | 
|  | "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", | 
|  | "runtime_test.(*contentionWorker).run", | 
|  | }, | 
|  | } | 
|  |  | 
|  | // Verify that we get call stack we expect, with anything more than zero | 
|  | // cycles / zero samples. The duration of each contention event is too | 
|  | // small relative to the expected overhead for us to verify its value | 
|  | // more directly. Leave that to the explicit lock/unlock test. | 
|  |  | 
|  | testcase(false, stks, workers, fn)(t) | 
|  |  | 
|  | if remaining := tries.Load(); remaining >= 0 { | 
|  | t.Logf("finished test early (%d tries remaining)", remaining) | 
|  | } | 
|  | }) | 
|  | } | 
|  |  | 
|  | // contentionWorker provides cleaner call stacks for lock contention profile tests | 
|  | type contentionWorker struct { | 
|  | before func() | 
|  | fn     func() bool | 
|  | after  func() | 
|  | } | 
|  |  | 
|  | func (w *contentionWorker) run() { | 
|  | defer w.after() | 
|  | w.before() | 
|  |  | 
|  | for w.fn() { | 
|  | } | 
|  | } |