blob: f23581acbe5f27cb40eb0455f15cda6656916f67 [file] [log] [blame]
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
"flag"
"fmt"
"internal/cpu"
"internal/runtime/atomic"
"io"
"math/bits"
. "runtime"
"runtime/debug"
"slices"
"strings"
"sync"
"testing"
"time"
"unsafe"
)
// flagQuick is set by the -quick option to skip some relatively slow tests.
// This is used by the cmd/dist test runtime:cpu124.
// The cmd/dist test passes both -test.short and -quick;
// there are tests that only check testing.Short, and those tests will
// not be skipped if only -quick is used.
var flagQuick = flag.Bool("quick", false, "skip slow tests, for cmd/dist test runtime:cpu124")
func init() {
// We're testing the runtime, so make tracebacks show things
// in the runtime. This only raises the level, so it won't
// override GOTRACEBACK=crash from the user.
SetTracebackEnv("system")
}
var errf error
func errfn() error {
return errf
}
func errfn1() error {
return io.EOF
}
func BenchmarkIfaceCmp100(b *testing.B) {
for i := 0; i < b.N; i++ {
for j := 0; j < 100; j++ {
if errfn() == io.EOF {
b.Fatal("bad comparison")
}
}
}
}
func BenchmarkIfaceCmpNil100(b *testing.B) {
for i := 0; i < b.N; i++ {
for j := 0; j < 100; j++ {
if errfn1() == nil {
b.Fatal("bad comparison")
}
}
}
}
var efaceCmp1 any
var efaceCmp2 any
func BenchmarkEfaceCmpDiff(b *testing.B) {
x := 5
efaceCmp1 = &x
y := 6
efaceCmp2 = &y
for i := 0; i < b.N; i++ {
for j := 0; j < 100; j++ {
if efaceCmp1 == efaceCmp2 {
b.Fatal("bad comparison")
}
}
}
}
func BenchmarkEfaceCmpDiffIndirect(b *testing.B) {
efaceCmp1 = [2]int{1, 2}
efaceCmp2 = [2]int{1, 2}
for i := 0; i < b.N; i++ {
for j := 0; j < 100; j++ {
if efaceCmp1 != efaceCmp2 {
b.Fatal("bad comparison")
}
}
}
}
func BenchmarkDefer(b *testing.B) {
for i := 0; i < b.N; i++ {
defer1()
}
}
func defer1() {
defer func(x, y, z int) {
if recover() != nil || x != 1 || y != 2 || z != 3 {
panic("bad recover")
}
}(1, 2, 3)
}
func BenchmarkDefer10(b *testing.B) {
for i := 0; i < b.N/10; i++ {
defer2()
}
}
func defer2() {
for i := 0; i < 10; i++ {
defer func(x, y, z int) {
if recover() != nil || x != 1 || y != 2 || z != 3 {
panic("bad recover")
}
}(1, 2, 3)
}
}
func BenchmarkDeferMany(b *testing.B) {
for i := 0; i < b.N; i++ {
defer func(x, y, z int) {
if recover() != nil || x != 1 || y != 2 || z != 3 {
panic("bad recover")
}
}(1, 2, 3)
}
}
func BenchmarkPanicRecover(b *testing.B) {
for i := 0; i < b.N; i++ {
defer3()
}
}
func defer3() {
defer func(x, y, z int) {
if recover() == nil {
panic("failed recover")
}
}(1, 2, 3)
panic("hi")
}
// golang.org/issue/7063
func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
SetCPUProfileRate(0)
}
// Addresses to test for faulting behavior.
// This is less a test of SetPanicOnFault and more a check that
// the operating system and the runtime can process these faults
// correctly. That is, we're indirectly testing that without SetPanicOnFault
// these would manage to turn into ordinary crashes.
// Note that these are truncated on 32-bit systems, so the bottom 32 bits
// of the larger addresses must themselves be invalid addresses.
// We might get unlucky and the OS might have mapped one of these
// addresses, but probably not: they're all in the first page, very high
// addresses that normally an OS would reserve for itself, or malformed
// addresses. Even so, we might have to remove one or two on different
// systems. We will see.
var faultAddrs = []uint64{
// low addresses
0,
1,
0xfff,
// high (kernel) addresses
// or else malformed.
0xffffffffffffffff,
0xfffffffffffff001,
0xffffffffffff0001,
0xfffffffffff00001,
0xffffffffff000001,
0xfffffffff0000001,
0xffffffff00000001,
0xfffffff000000001,
0xffffff0000000001,
0xfffff00000000001,
0xffff000000000001,
0xfff0000000000001,
0xff00000000000001,
0xf000000000000001,
0x8000000000000001,
}
func TestSetPanicOnFault(t *testing.T) {
old := debug.SetPanicOnFault(true)
defer debug.SetPanicOnFault(old)
nfault := 0
for _, addr := range faultAddrs {
testSetPanicOnFault(t, uintptr(addr), &nfault)
}
if nfault == 0 {
t.Fatalf("none of the addresses faulted")
}
}
// testSetPanicOnFault tests one potentially faulting address.
// It deliberately constructs and uses an invalid pointer,
// so mark it as nocheckptr.
//
//go:nocheckptr
func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
if GOOS == "js" || GOOS == "wasip1" {
t.Skip(GOOS + " does not support catching faults")
}
defer func() {
if err := recover(); err != nil {
*nfault++
}
}()
// The read should fault, except that sometimes we hit
// addresses that have had C or kernel pages mapped there
// readable by user code. So just log the content.
// If no addresses fault, we'll fail the test.
v := *(*byte)(unsafe.Pointer(addr))
t.Logf("addr %#x: %#x\n", addr, v)
}
func eqstring_generic(s1, s2 string) bool {
if len(s1) != len(s2) {
return false
}
// optimization in assembly versions:
// if s1.str == s2.str { return true }
for i := 0; i < len(s1); i++ {
if s1[i] != s2[i] {
return false
}
}
return true
}
func TestEqString(t *testing.T) {
// This isn't really an exhaustive test of == on strings, it's
// just a convenient way of documenting (via eqstring_generic)
// what == does.
s := []string{
"",
"a",
"c",
"aaa",
"ccc",
"cccc"[:3], // same contents, different string
"1234567890",
}
for _, s1 := range s {
for _, s2 := range s {
x := s1 == s2
y := eqstring_generic(s1, s2)
if x != y {
t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
}
}
}
}
func TestTrailingZero(t *testing.T) {
// make sure we add padding for structs with trailing zero-sized fields
type T1 struct {
n int32
z [0]byte
}
if unsafe.Sizeof(T1{}) != 8 {
t.Errorf("sizeof(%#v)==%d, want 8", T1{}, unsafe.Sizeof(T1{}))
}
type T2 struct {
n int64
z struct{}
}
if unsafe.Sizeof(T2{}) != 8+unsafe.Sizeof(uintptr(0)) {
t.Errorf("sizeof(%#v)==%d, want %d", T2{}, unsafe.Sizeof(T2{}), 8+unsafe.Sizeof(uintptr(0)))
}
type T3 struct {
n byte
z [4]struct{}
}
if unsafe.Sizeof(T3{}) != 2 {
t.Errorf("sizeof(%#v)==%d, want 2", T3{}, unsafe.Sizeof(T3{}))
}
// make sure padding can double for both zerosize and alignment
type T4 struct {
a int32
b int16
c int8
z struct{}
}
if unsafe.Sizeof(T4{}) != 8 {
t.Errorf("sizeof(%#v)==%d, want 8", T4{}, unsafe.Sizeof(T4{}))
}
// make sure we don't pad a zero-sized thing
type T5 struct {
}
if unsafe.Sizeof(T5{}) != 0 {
t.Errorf("sizeof(%#v)==%d, want 0", T5{}, unsafe.Sizeof(T5{}))
}
}
func TestAppendGrowth(t *testing.T) {
var x []int64
check := func(want int) {
if cap(x) != want {
t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
}
}
check(0)
want := 1
for i := 1; i <= 100; i++ {
x = append(x, 1)
check(want)
if i&(i-1) == 0 {
want = 2 * i
}
}
}
var One = []int64{1}
func TestAppendSliceGrowth(t *testing.T) {
var x []int64
check := func(want int) {
if cap(x) != want {
t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
}
}
check(0)
want := 1
for i := 1; i <= 100; i++ {
x = append(x, One...)
check(want)
if i&(i-1) == 0 {
want = 2 * i
}
}
}
func TestGoroutineProfileTrivial(t *testing.T) {
// Calling GoroutineProfile twice in a row should find the same number of goroutines,
// but it's possible there are goroutines just about to exit, so we might end up
// with fewer in the second call. Try a few times; it should converge once those
// zombies are gone.
for i := 0; ; i++ {
n1, ok := GoroutineProfile(nil) // should fail, there's at least 1 goroutine
if n1 < 1 || ok {
t.Fatalf("GoroutineProfile(nil) = %d, %v, want >0, false", n1, ok)
}
n2, ok := GoroutineProfile(make([]StackRecord, n1))
if n2 == n1 && ok {
break
}
t.Logf("GoroutineProfile(%d) = %d, %v, want %d, true", n1, n2, ok, n1)
if i >= 10 {
t.Fatalf("GoroutineProfile not converging")
}
}
}
func BenchmarkGoroutineProfile(b *testing.B) {
run := func(fn func() bool) func(b *testing.B) {
runOne := func(b *testing.B) {
latencies := make([]time.Duration, 0, b.N)
b.ResetTimer()
for i := 0; i < b.N; i++ {
start := time.Now()
ok := fn()
if !ok {
b.Fatal("goroutine profile failed")
}
latencies = append(latencies, time.Since(start))
}
b.StopTimer()
// Sort latencies then report percentiles.
slices.Sort(latencies)
b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
}
return func(b *testing.B) {
b.Run("idle", runOne)
b.Run("loaded", func(b *testing.B) {
stop := applyGCLoad(b)
runOne(b)
// Make sure to stop the timer before we wait! The load created above
// is very heavy-weight and not easy to stop, so we could end up
// confusing the benchmarking framework for small b.N.
b.StopTimer()
stop()
})
}
}
// Measure the cost of counting goroutines
b.Run("small-nil", run(func() bool {
GoroutineProfile(nil)
return true
}))
// Measure the cost with a small set of goroutines
n := NumGoroutine()
p := make([]StackRecord, 2*n+2*GOMAXPROCS(0))
b.Run("small", run(func() bool {
_, ok := GoroutineProfile(p)
return ok
}))
// Measure the cost with a large set of goroutines
ch := make(chan int)
var ready, done sync.WaitGroup
for i := 0; i < 5000; i++ {
ready.Add(1)
done.Add(1)
go func() { ready.Done(); <-ch; done.Done() }()
}
ready.Wait()
// Count goroutines with a large allgs list
b.Run("large-nil", run(func() bool {
GoroutineProfile(nil)
return true
}))
n = NumGoroutine()
p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
b.Run("large", run(func() bool {
_, ok := GoroutineProfile(p)
return ok
}))
close(ch)
done.Wait()
// Count goroutines with a large (but unused) allgs list
b.Run("sparse-nil", run(func() bool {
GoroutineProfile(nil)
return true
}))
// Measure the cost of a large (but unused) allgs list
n = NumGoroutine()
p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
b.Run("sparse", run(func() bool {
_, ok := GoroutineProfile(p)
return ok
}))
}
func TestVersion(t *testing.T) {
// Test that version does not contain \r or \n.
vers := Version()
if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
t.Fatalf("cr/nl in version: %q", vers)
}
}
func TestTimediv(t *testing.T) {
for _, tc := range []struct {
num int64
div int32
ret int32
rem int32
}{
{
num: 8,
div: 2,
ret: 4,
rem: 0,
},
{
num: 9,
div: 2,
ret: 4,
rem: 1,
},
{
// Used by runtime.check.
num: 12345*1000000000 + 54321,
div: 1000000000,
ret: 12345,
rem: 54321,
},
{
num: 1<<32 - 1,
div: 2,
ret: 1<<31 - 1, // no overflow.
rem: 1,
},
{
num: 1 << 32,
div: 2,
ret: 1<<31 - 1, // overflow.
rem: 0,
},
{
num: 1 << 40,
div: 2,
ret: 1<<31 - 1, // overflow.
rem: 0,
},
{
num: 1<<40 + 1,
div: 1 << 10,
ret: 1 << 30,
rem: 1,
},
} {
name := fmt.Sprintf("%d div %d", tc.num, tc.div)
t.Run(name, func(t *testing.T) {
// Double check that the inputs make sense using
// standard 64-bit division.
ret64 := tc.num / int64(tc.div)
rem64 := tc.num % int64(tc.div)
if ret64 != int64(int32(ret64)) {
// Simulate timediv overflow value.
ret64 = 1<<31 - 1
rem64 = 0
}
if ret64 != int64(tc.ret) {
t.Errorf("%d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret64, rem64, tc.ret, tc.rem)
}
var rem int32
ret := Timediv(tc.num, tc.div, &rem)
if ret != tc.ret || rem != tc.rem {
t.Errorf("timediv %d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret, rem, tc.ret, tc.rem)
}
})
}
}
func BenchmarkProcYield(b *testing.B) {
benchN := func(n uint32) func(*testing.B) {
return func(b *testing.B) {
for i := 0; i < b.N; i++ {
ProcYield(n)
}
}
}
b.Run("1", benchN(1))
b.Run("10", benchN(10))
b.Run("30", benchN(30)) // active_spin_cnt in lock_sema.go and lock_futex.go
b.Run("100", benchN(100))
b.Run("1000", benchN(1000))
}
func BenchmarkOSYield(b *testing.B) {
for i := 0; i < b.N; i++ {
OSYield()
}
}
func BenchmarkMutexContention(b *testing.B) {
// Measure throughput of a single mutex with all threads contending
//
// Share a single counter across all threads. Progress from any thread is
// progress for the benchmark as a whole. We don't measure or give points
// for fairness here, arbitrary delay to any given thread's progress is
// invisible and allowed.
//
// The cache line that holds the count value will need to move between
// processors, but not as often as the cache line that holds the mutex. The
// mutex protects access to the count value, which limits contention on that
// cache line. This is a simple design, but it helps to make the behavior of
// the benchmark clear. Most real uses of mutex will protect some number of
// cache lines anyway.
var state struct {
_ cpu.CacheLinePad
lock Mutex
_ cpu.CacheLinePad
count atomic.Int64
_ cpu.CacheLinePad
}
procs := GOMAXPROCS(0)
var wg sync.WaitGroup
for range procs {
wg.Add(1)
go func() {
defer wg.Done()
for {
Lock(&state.lock)
ours := state.count.Add(1)
Unlock(&state.lock)
if ours >= int64(b.N) {
return
}
}
}()
}
wg.Wait()
}
func BenchmarkMutexCapture(b *testing.B) {
// Measure mutex fairness.
//
// Have several threads contend for a single mutex value. Measure how
// effectively a single thread is able to capture the lock and report the
// duration of those "streak" events. Measure how long other individual
// threads need to wait between their turns with the lock. Report the
// duration of those "starve" events.
//
// Report in terms of wall clock time (assuming a constant time per
// lock/unlock pair) rather than number of locks/unlocks. This keeps
// timekeeping overhead out of the critical path, and avoids giving an
// advantage to lock/unlock implementations that take less time per
// operation.
var state struct {
_ cpu.CacheLinePad
lock Mutex
_ cpu.CacheLinePad
count atomic.Int64
_ cpu.CacheLinePad
}
procs := GOMAXPROCS(0)
var wg sync.WaitGroup
histograms := make(chan [2][65]int)
for range procs {
wg.Add(1)
go func() {
var (
prev int64
streak int64
histogram [2][65]int
)
for {
Lock(&state.lock)
ours := state.count.Add(1)
Unlock(&state.lock)
delta := ours - prev - 1
prev = ours
if delta == 0 {
streak++
} else {
histogram[0][bits.LeadingZeros64(uint64(streak))]++
histogram[1][bits.LeadingZeros64(uint64(delta))]++
streak = 1
}
if ours >= int64(b.N) {
wg.Done()
if delta == 0 {
histogram[0][bits.LeadingZeros64(uint64(streak))]++
histogram[1][bits.LeadingZeros64(uint64(delta))]++
}
histograms <- histogram
return
}
}
}()
}
wg.Wait()
b.StopTimer()
var histogram [2][65]int
for range procs {
h := <-histograms
for i := range h {
for j := range h[i] {
histogram[i][j] += h[i][j]
}
}
}
percentile := func(h [65]int, p float64) int {
sum := 0
for i, v := range h {
bound := uint64(1<<63) >> i
sum += int(bound) * v
}
// Imagine that the longest streak / starvation events were instead half
// as long but twice in number. (Note that we've pre-multiplied by the
// [lower] "bound" value.) Continue those splits until we meet the
// percentile target.
part := 0
for i, v := range h {
bound := uint64(1<<63) >> i
part += int(bound) * v
// have we trimmed off enough at the head to dip below the percentile goal
if float64(sum-part) < float64(sum)*p {
return int(bound)
}
}
return 0
}
perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
}
func BenchmarkMutexHandoff(b *testing.B) {
testcase := func(delay func(l *Mutex)) func(b *testing.B) {
return func(b *testing.B) {
if workers := 2; GOMAXPROCS(0) < workers {
b.Skipf("requires GOMAXPROCS >= %d", workers)
}
// Measure latency of mutex handoff between threads.
//
// Hand off a runtime.mutex between two threads, one running a
// "coordinator" goroutine and the other running a "worker"
// goroutine. We don't override the runtime's typical
// goroutine/thread mapping behavior.
//
// Measure the latency, starting when the coordinator enters a call
// to runtime.unlock and ending when the worker's call to
// runtime.lock returns. The benchmark can specify a "delay"
// function to simulate the length of the mutex-holder's critical
// section, including to arrange for the worker's thread to be in
// either the "spinning" or "sleeping" portions of the runtime.lock2
// implementation. Measurement starts after any such "delay".
//
// The two threads' goroutines communicate their current position to
// each other in a non-blocking way via the "turn" state.
var state struct {
_ cpu.CacheLinePad
lock Mutex
_ cpu.CacheLinePad
turn atomic.Int64
_ cpu.CacheLinePad
}
var delta atomic.Int64
var wg sync.WaitGroup
// coordinator:
// - acquire the mutex
// - set the turn to 2 mod 4, instructing the worker to begin its Lock call
// - wait until the mutex is contended
// - wait a bit more so the worker can commit to its sleep
// - release the mutex and wait for it to be our turn (0 mod 4) again
wg.Add(1)
go func() {
defer wg.Done()
var t int64
for range b.N {
Lock(&state.lock)
state.turn.Add(2)
delay(&state.lock)
t -= Nanotime() // start the timer
Unlock(&state.lock)
for state.turn.Load()&0x2 != 0 {
}
}
state.turn.Add(1)
delta.Add(t)
}()
// worker:
// - wait until its our turn (2 mod 4)
// - acquire and release the mutex
// - switch the turn counter back to the coordinator (0 mod 4)
wg.Add(1)
go func() {
defer wg.Done()
var t int64
for {
switch state.turn.Load() & 0x3 {
case 0:
case 1, 3:
delta.Add(t)
return
case 2:
Lock(&state.lock)
t += Nanotime() // stop the timer
Unlock(&state.lock)
state.turn.Add(2)
}
}
}()
wg.Wait()
b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
}
}
b.Run("Solo", func(b *testing.B) {
var lock Mutex
for range b.N {
Lock(&lock)
Unlock(&lock)
}
})
b.Run("FastPingPong", testcase(func(l *Mutex) {}))
b.Run("SlowPingPong", testcase(func(l *Mutex) {
// Wait for the worker to stop spinning and prepare to sleep
for !MutexContended(l) {
}
// Wait a bit longer so the OS can finish committing the worker to its
// sleep. Balance consistency against getting enough iterations.
const extraNs = 10e3
for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
}
}))
}