libgo/go/runtime/cpuprof.go - gofrontend - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // CPU profiling.
 // Based on algorithms and data structures used in
 // https://github.com/google/pprof.
 //
 // The main difference between this code and the google-perftools
 // code is that this code is written to allow copying the profile data
 // to an arbitrary io.Writer, while the google-perftools code always
 // writes to an operating system file.
 //
 // The signal handler for the profiling clock tick adds a new stack trace
 // to a hash table tracking counts for recent traces. Most clock ticks
 // hit in the cache. In the event of a cache miss, an entry must be
 // evicted from the hash table, copied to a log that will eventually be
 // written as profile data. The google-perftools code flushed the
 // log itself during the signal handler. This code cannot do that, because
 // the io.Writer might block or need system calls or locks that are not
 // safe to use from within the signal handler. Instead, we split the log
 // into two halves and let the signal handler fill one half while a goroutine
 // is writing out the other half. When the signal handler fills its half, it
 // offers to swap with the goroutine. If the writer is not done with its half,
 // we lose the stack trace for this clock tick (and record that loss).
 // The goroutine interacts with the signal handler by calling getprofile() to
 // get the next log piece to write, implicitly handing back the last log
 // piece it obtained.
 //
 // The state of this dance between the signal handler and the goroutine
 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
 // is not using either log half and is waiting (or will soon be waiting) for
 // a new piece by calling notesleep(&p.wait).  If the signal handler
 // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
 // to wake the goroutine. The value indicates the number of entries in the
 // log half being handed off. The goroutine leaves the non-zero value in
 // place until it has finished processing the log half and then flips the number
 // back to zero. Setting the high bit in handoff means that the profiling is over,
 // and the goroutine is now in charge of flushing the data left in the hash table
 // to the log and returning that data.
 //
 // The handoff field is manipulated using atomic operations.
 // For the most part, the manipulation of handoff is orderly: if handoff == 0
 // then the signal handler owns it and can change it to non-zero.
 // If handoff != 0 then the goroutine owns it and can change it to zero.
 // If that were the end of the story then we would not need to manipulate
 // handoff using atomic operations. The operations are needed, however,
 // in order to let the log closer set the high bit to indicate "EOF" safely
 // in the situation when normally the goroutine "owns" handoff.

 package runtime

 import (
 	"runtime/internal/atomic"
 	"unsafe"
 )

 const (
 	numBuckets      = 1 << 10
 	logSize         = 1 << 17
 	assoc           = 4
 	maxCPUProfStack = 64
 )

 type cpuprofEntry struct {
 	count uintptr
 	depth int
 	stack [maxCPUProfStack]uintptr
 }

 //go:notinheap
 type cpuProfile struct {
 	on     bool    // profiling is on
 	wait   note    // goroutine waits here
 	count  uintptr // tick count
 	evicts uintptr // eviction count
 	lost   uintptr // lost ticks that need to be logged

 	// Active recent stack traces.
 	hash [numBuckets]struct {
 		entry [assoc]cpuprofEntry
 	}

 	// Log of traces evicted from hash.
 	// Signal handler has filled log[toggle][:nlog].
 	// Goroutine is writing log[1-toggle][:handoff].
 	log     [2][logSize / 2]uintptr
 	nlog    int
 	toggle  int32
 	handoff uint32

 	// Writer state.
 	// Writer maintains its own toggle to avoid races
 	// looking at signal handler's toggle.
 	wtoggle  uint32
 	wholding bool // holding & need to release a log half
 	flushing bool // flushing hash table - profile is over
 	eodSent  bool // special end-of-data record sent; => flushing
 }

 var (
 	cpuprofLock mutex
 	cpuprof     *cpuProfile

 	eod = [3]uintptr{0, 1, 0}
 )

 func setcpuprofilerate(hz int32) {
 	systemstack(func() {
 		setcpuprofilerate_m(hz)
 	})
 }

 // lostProfileData is a no-op function used in profiles
 // to mark the number of profiling stack traces that were
 // discarded due to slow data writers.
 func lostProfileData() {}

 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
 // If hz <= 0, SetCPUProfileRate turns off profiling.
 // If the profiler is on, the rate cannot be changed without first turning it off.
 //
 // Most clients should use the runtime/pprof package or
 // the testing package's -test.cpuprofile flag instead of calling
 // SetCPUProfileRate directly.
 func SetCPUProfileRate(hz int) {
 	// Clamp hz to something reasonable.
 	if hz < 0 {
 		hz = 0
 	}
 	if hz > 1000000 {
 		hz = 1000000
 	}

 	lock(&cpuprofLock)
 	if hz > 0 {
 		if cpuprof == nil {
 			cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys))
 			if cpuprof == nil {
 				print("runtime: cpu profiling cannot allocate memory\n")
 				unlock(&cpuprofLock)
 				return
 			}
 		}
 		if cpuprof.on || cpuprof.handoff != 0 {
 			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
 			unlock(&cpuprofLock)
 			return
 		}

 		cpuprof.on = true
 		// pprof binary header format.
 		// https://github.com/gperftools/gperftools/blob/master/src/profiledata.cc#L119
 		p := &cpuprof.log[0]
 		p[0] = 0                 // count for header
 		p[1] = 3                 // depth for header
 		p[2] = 0                 // version number
 		p[3] = uintptr(1e6 / hz) // period (microseconds)
 		p[4] = 0
 		cpuprof.nlog = 5
 		cpuprof.toggle = 0
 		cpuprof.wholding = false
 		cpuprof.wtoggle = 0
 		cpuprof.flushing = false
 		cpuprof.eodSent = false
 		noteclear(&cpuprof.wait)

 		setcpuprofilerate(int32(hz))
 	} else if cpuprof != nil && cpuprof.on {
 		setcpuprofilerate(0)
 		cpuprof.on = false

 		// Now add is not running anymore, and getprofile owns the entire log.
 		// Set the high bit in cpuprof.handoff to tell getprofile.
 		for {
 			n := cpuprof.handoff
 			if n&0x80000000 != 0 {
 				print("runtime: setcpuprofile(off) twice\n")
 			}
 			if atomic.Cas(&cpuprof.handoff, n, n|0x80000000) {
 				if n == 0 {
 					// we did the transition from 0 -> nonzero so we wake getprofile
 					notewakeup(&cpuprof.wait)
 				}
 				break
 			}
 		}
 	}
 	unlock(&cpuprofLock)
 }

 // add adds the stack trace to the profile.
 // It is called from signal handlers and other limited environments
 // and cannot allocate memory or acquire locks that might be
 // held at the time of the signal, nor can it use substantial amounts
 // of stack. It is allowed to call evict.
 //go:nowritebarrierrec
 func (p *cpuProfile) add(pc []uintptr) {
 	p.addWithFlushlog(pc, p.flushlog)
 }

 // addWithFlushlog implements add and addNonGo.
 // It is called from signal handlers and other limited environments
 // and cannot allocate memory or acquire locks that might be
 // held at the time of the signal, nor can it use substantial amounts
 // of stack. It may be called by a signal handler with no g or m.
 // It is allowed to call evict, passing the flushlog parameter.
 //go:nosplit
 //go:nowritebarrierrec
 func (p *cpuProfile) addWithFlushlog(pc []uintptr, flushlog func() bool) {
 	if len(pc) > maxCPUProfStack {
 		pc = pc[:maxCPUProfStack]
 	}

 	// Compute hash.
 	h := uintptr(0)
 	for _, x := range pc {
 		h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
 		h += x * 41
 	}
 	p.count++

 	// Add to entry count if already present in table.
 	b := &p.hash[h%numBuckets]
 Assoc:
 	for i := range b.entry {
 		e := &b.entry[i]
 		if e.depth != len(pc) {
 			continue
 		}
 		for j := range pc {
 			if e.stack[j] != pc[j] {
 				continue Assoc
 			}
 		}
 		e.count++
 		return
 	}

 	// Evict entry with smallest count.
 	var e *cpuprofEntry
 	for i := range b.entry {
 		if e == nil || b.entry[i].count < e.count {
 			e = &b.entry[i]
 		}
 	}
 	if e.count > 0 {
 		if !p.evict(e, flushlog) {
 			// Could not evict entry. Record lost stack.
 			p.lost++
 			return
 		}
 		p.evicts++
 	}

 	// Reuse the newly evicted entry.
 	e.depth = len(pc)
 	e.count = 1
 	copy(e.stack[:], pc)
 }

 // evict copies the given entry's data into the log, so that
 // the entry can be reused.  evict is called from add, which
 // is called from the profiling signal handler, so it must not
 // allocate memory or block, and it may be called with no g or m.
 // It is safe to call flushlog. evict returns true if the entry was
 // copied to the log, false if there was no room available.
 //go:nosplit
 //go:nowritebarrierrec
 func (p *cpuProfile) evict(e *cpuprofEntry, flushlog func() bool) bool {
 	d := e.depth
 	nslot := d + 2
 	log := &p.log[p.toggle]
 	if p.nlog+nslot > len(log) {
 		if !flushlog() {
 			return false
 		}
 		log = &p.log[p.toggle]
 	}

 	q := p.nlog
 	log[q] = e.count
 	q++
 	log[q] = uintptr(d)
 	q++
 	copy(log[q:], e.stack[:d])
 	q += d
 	p.nlog = q
 	e.count = 0
 	return true
 }

 // flushlog tries to flush the current log and switch to the other one.
 // flushlog is called from evict, called from add, called from the signal handler,
 // so it cannot allocate memory or block. It can try to swap logs with
 // the writing goroutine, as explained in the comment at the top of this file.
 //go:nowritebarrierrec
 func (p *cpuProfile) flushlog() bool {
 	if !atomic.Cas(&p.handoff, 0, uint32(p.nlog)) {
 		return false
 	}
 	notewakeup(&p.wait)

 	p.toggle = 1 - p.toggle
 	log := &p.log[p.toggle]
 	q := 0
 	if p.lost > 0 {
 		lostPC := funcPC(lostProfileData)
 		log[0] = p.lost
 		log[1] = 1
 		log[2] = lostPC
 		q = 3
 		p.lost = 0
 	}
 	p.nlog = q
 	return true
 }

 // addNonGo is like add, but runs on a non-Go thread.
 // It can't do anything that might need a g or an m.
 // With this entry point, we don't try to flush the log when evicting an
 // old entry. Instead, we just drop the stack trace if we're out of space.
 //go:nosplit
 //go:nowritebarrierrec
 func (p *cpuProfile) addNonGo(pc []uintptr) {
 	p.addWithFlushlog(pc, func() bool { return false })
 }

 // getprofile blocks until the next block of profiling data is available
 // and returns it as a []byte. It is called from the writing goroutine.
 func (p *cpuProfile) getprofile() []byte {
 	if p == nil {
 		return nil
 	}

 	if p.wholding {
 		// Release previous log to signal handling side.
 		// Loop because we are racing against SetCPUProfileRate(0).
 		for {
 			n := p.handoff
 			if n == 0 {
 				print("runtime: phase error during cpu profile handoff\n")
 				return nil
 			}
 			if n&0x80000000 != 0 {
 				p.wtoggle = 1 - p.wtoggle
 				p.wholding = false
 				p.flushing = true
 				goto Flush
 			}
 			if atomic.Cas(&p.handoff, n, 0) {
 				break
 			}
 		}
 		p.wtoggle = 1 - p.wtoggle
 		p.wholding = false
 	}

 	if p.flushing {
 		goto Flush
 	}

 	if !p.on && p.handoff == 0 {
 		return nil
 	}

 	// Wait for new log.
 	notetsleepg(&p.wait, -1)
 	noteclear(&p.wait)

 	switch n := p.handoff; {
 	case n == 0:
 		print("runtime: phase error during cpu profile wait\n")
 		return nil
 	case n == 0x80000000:
 		p.flushing = true
 		goto Flush
 	default:
 		n &^= 0x80000000

 		// Return new log to caller.
 		p.wholding = true

 		return uintptrBytes(p.log[p.wtoggle][:n])
 	}

 	// In flush mode.
 	// Add is no longer being called. We own the log.
 	// Also, p.handoff is non-zero, so flushlog will return false.
 	// Evict the hash table into the log and return it.
 Flush:
 	for i := range p.hash {
 		b := &p.hash[i]
 		for j := range b.entry {
 			e := &b.entry[j]
 			if e.count > 0 && !p.evict(e, p.flushlog) {
 				// Filled the log. Stop the loop and return what we've got.
 				break Flush
 			}
 		}
 	}

 	// Return pending log data.
 	if p.nlog > 0 {
 		// Note that we're using toggle now, not wtoggle,
 		// because we're working on the log directly.
 		n := p.nlog
 		p.nlog = 0
 		return uintptrBytes(p.log[p.toggle][:n])
 	}

 	// Made it through the table without finding anything to log.
 	if !p.eodSent {
 		// We may not have space to append this to the partial log buf,
 		// so we always return a new slice for the end-of-data marker.
 		p.eodSent = true
 		return uintptrBytes(eod[:])
 	}

 	// Finally done. Clean up and return nil.
 	p.flushing = false
 	if !atomic.Cas(&p.handoff, p.handoff, 0) {
 		print("runtime: profile flush racing with something\n")
 	}
 	return nil
 }

 func uintptrBytes(p []uintptr) (ret []byte) {
 	pp := (*slice)(unsafe.Pointer(&p))
 	rp := (*slice)(unsafe.Pointer(&ret))

 	rp.array = pp.array
 	rp.len = pp.len * int(unsafe.Sizeof(p[0]))
 	rp.cap = rp.len

 	return
 }

 // CPUProfile returns the next chunk of binary CPU profiling stack trace data,
 // blocking until data is available. If profiling is turned off and all the profile
 // data accumulated while it was on has been returned, CPUProfile returns nil.
 // The caller must save the returned data before calling CPUProfile again.
 //
 // Most clients should use the runtime/pprof package or
 // the testing package's -test.cpuprofile flag instead of calling
 // CPUProfile directly.
 func CPUProfile() []byte {
 	return cpuprof.getprofile()
 }

 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime_pprof.runtime_cyclesPerSecond
 func runtime_pprof_runtime_cyclesPerSecond() int64 {
 	return tickspersecond()
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// CPU profiling.
	// Based on algorithms and data structures used in
	// https://github.com/google/pprof.
	//
	// The main difference between this code and the google-perftools
	// code is that this code is written to allow copying the profile data
	// to an arbitrary io.Writer, while the google-perftools code always
	// writes to an operating system file.
	//
	// The signal handler for the profiling clock tick adds a new stack trace
	// to a hash table tracking counts for recent traces. Most clock ticks
	// hit in the cache. In the event of a cache miss, an entry must be
	// evicted from the hash table, copied to a log that will eventually be
	// written as profile data. The google-perftools code flushed the
	// log itself during the signal handler. This code cannot do that, because
	// the io.Writer might block or need system calls or locks that are not
	// safe to use from within the signal handler. Instead, we split the log
	// into two halves and let the signal handler fill one half while a goroutine
	// is writing out the other half. When the signal handler fills its half, it
	// offers to swap with the goroutine. If the writer is not done with its half,
	// we lose the stack trace for this clock tick (and record that loss).
	// The goroutine interacts with the signal handler by calling getprofile() to
	// get the next log piece to write, implicitly handing back the last log
	// piece it obtained.
	//
	// The state of this dance between the signal handler and the goroutine
	// is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
	// is not using either log half and is waiting (or will soon be waiting) for
	// a new piece by calling notesleep(&p.wait). If the signal handler
	// changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
	// to wake the goroutine. The value indicates the number of entries in the
	// log half being handed off. The goroutine leaves the non-zero value in
	// place until it has finished processing the log half and then flips the number
	// back to zero. Setting the high bit in handoff means that the profiling is over,
	// and the goroutine is now in charge of flushing the data left in the hash table
	// to the log and returning that data.
	//
	// The handoff field is manipulated using atomic operations.
	// For the most part, the manipulation of handoff is orderly: if handoff == 0
	// then the signal handler owns it and can change it to non-zero.
	// If handoff != 0 then the goroutine owns it and can change it to zero.
	// If that were the end of the story then we would not need to manipulate
	// handoff using atomic operations. The operations are needed, however,
	// in order to let the log closer set the high bit to indicate "EOF" safely
	// in the situation when normally the goroutine "owns" handoff.

	package runtime

	import (
	"runtime/internal/atomic"
	"unsafe"
	)

	const (
	numBuckets = 1 << 10
	logSize = 1 << 17
	assoc = 4
	maxCPUProfStack = 64
	)

	type cpuprofEntry struct {
	count uintptr
	depth int
	stack [maxCPUProfStack]uintptr
	}

	//go:notinheap
	type cpuProfile struct {
	on bool // profiling is on
	wait note // goroutine waits here
	count uintptr // tick count
	evicts uintptr // eviction count
	lost uintptr // lost ticks that need to be logged

	// Active recent stack traces.
	hash [numBuckets]struct {
	entry [assoc]cpuprofEntry
	}

	// Log of traces evicted from hash.
	// Signal handler has filled log[toggle][:nlog].
	// Goroutine is writing log[1-toggle][:handoff].
	log [2][logSize / 2]uintptr
	nlog int
	toggle int32
	handoff uint32

	// Writer state.
	// Writer maintains its own toggle to avoid races
	// looking at signal handler's toggle.
	wtoggle uint32
	wholding bool // holding & need to release a log half
	flushing bool // flushing hash table - profile is over
	eodSent bool // special end-of-data record sent; => flushing
	}

	var (
	cpuprofLock mutex
	cpuprof *cpuProfile

	eod = [3]uintptr{0, 1, 0}
	)

	func setcpuprofilerate(hz int32) {
	systemstack(func() {
	setcpuprofilerate_m(hz)
	})
	}

	// lostProfileData is a no-op function used in profiles
	// to mark the number of profiling stack traces that were
	// discarded due to slow data writers.
	func lostProfileData() {}

	// SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
	// If hz <= 0, SetCPUProfileRate turns off profiling.
	// If the profiler is on, the rate cannot be changed without first turning it off.
	//
	// Most clients should use the runtime/pprof package or
	// the testing package's -test.cpuprofile flag instead of calling
	// SetCPUProfileRate directly.
	func SetCPUProfileRate(hz int) {
	// Clamp hz to something reasonable.
	if hz < 0 {
	hz = 0
	}
	if hz > 1000000 {
	hz = 1000000
	}

	lock(&cpuprofLock)
	if hz > 0 {
	if cpuprof == nil {
	cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys))
	if cpuprof == nil {
	print("runtime: cpu profiling cannot allocate memory\n")
	unlock(&cpuprofLock)
	return
	}
	}
	if cpuprof.on \|\| cpuprof.handoff != 0 {
	print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
	unlock(&cpuprofLock)
	return
	}

	cpuprof.on = true
	// pprof binary header format.
	// https://github.com/gperftools/gperftools/blob/master/src/profiledata.cc#L119
	p := &cpuprof.log[0]
	p[0] = 0 // count for header
	p[1] = 3 // depth for header
	p[2] = 0 // version number
	p[3] = uintptr(1e6 / hz) // period (microseconds)
	p[4] = 0
	cpuprof.nlog = 5
	cpuprof.toggle = 0
	cpuprof.wholding = false
	cpuprof.wtoggle = 0
	cpuprof.flushing = false
	cpuprof.eodSent = false
	noteclear(&cpuprof.wait)

	setcpuprofilerate(int32(hz))
	} else if cpuprof != nil && cpuprof.on {
	setcpuprofilerate(0)
	cpuprof.on = false

	// Now add is not running anymore, and getprofile owns the entire log.
	// Set the high bit in cpuprof.handoff to tell getprofile.
	for {
	n := cpuprof.handoff
	if n&0x80000000 != 0 {
	print("runtime: setcpuprofile(off) twice\n")
	}
	if atomic.Cas(&cpuprof.handoff, n, n\|0x80000000) {
	if n == 0 {
	// we did the transition from 0 -> nonzero so we wake getprofile
	notewakeup(&cpuprof.wait)
	}
	break
	}
	}
	}
	unlock(&cpuprofLock)
	}

	// add adds the stack trace to the profile.
	// It is called from signal handlers and other limited environments
	// and cannot allocate memory or acquire locks that might be
	// held at the time of the signal, nor can it use substantial amounts
	// of stack. It is allowed to call evict.
	//go:nowritebarrierrec
	func (p *cpuProfile) add(pc []uintptr) {
	p.addWithFlushlog(pc, p.flushlog)
	}

	// addWithFlushlog implements add and addNonGo.
	// It is called from signal handlers and other limited environments
	// and cannot allocate memory or acquire locks that might be
	// held at the time of the signal, nor can it use substantial amounts
	// of stack. It may be called by a signal handler with no g or m.
	// It is allowed to call evict, passing the flushlog parameter.
	//go:nosplit
	//go:nowritebarrierrec
	func (p *cpuProfile) addWithFlushlog(pc []uintptr, flushlog func() bool) {
	if len(pc) > maxCPUProfStack {
	pc = pc[:maxCPUProfStack]
	}

	// Compute hash.
	h := uintptr(0)
	for _, x := range pc {
	h = h<<8 \| (h >> (8 * (unsafe.Sizeof(h) - 1)))
	h += x * 41
	}
	p.count++

	// Add to entry count if already present in table.
	b := &p.hash[h%numBuckets]
	Assoc:
	for i := range b.entry {
	e := &b.entry[i]
	if e.depth != len(pc) {
	continue
	}
	for j := range pc {
	if e.stack[j] != pc[j] {
	continue Assoc
	}
	}
	e.count++
	return
	}

	// Evict entry with smallest count.
	var e *cpuprofEntry
	for i := range b.entry {
	if e == nil \|\| b.entry[i].count < e.count {
	e = &b.entry[i]
	}
	}
	if e.count > 0 {
	if !p.evict(e, flushlog) {
	// Could not evict entry. Record lost stack.
	p.lost++
	return
	}
	p.evicts++
	}

	// Reuse the newly evicted entry.
	e.depth = len(pc)
	e.count = 1
	copy(e.stack[:], pc)
	}

	// evict copies the given entry's data into the log, so that
	// the entry can be reused. evict is called from add, which
	// is called from the profiling signal handler, so it must not
	// allocate memory or block, and it may be called with no g or m.
	// It is safe to call flushlog. evict returns true if the entry was
	// copied to the log, false if there was no room available.
	//go:nosplit
	//go:nowritebarrierrec
	func (p cpuProfile) evict(e cpuprofEntry, flushlog func() bool) bool {
	d := e.depth
	nslot := d + 2
	log := &p.log[p.toggle]
	if p.nlog+nslot > len(log) {
	if !flushlog() {
	return false
	}
	log = &p.log[p.toggle]
	}

	q := p.nlog
	log[q] = e.count
	q++
	log[q] = uintptr(d)
	q++
	copy(log[q:], e.stack[:d])
	q += d
	p.nlog = q
	e.count = 0
	return true
	}

	// flushlog tries to flush the current log and switch to the other one.
	// flushlog is called from evict, called from add, called from the signal handler,
	// so it cannot allocate memory or block. It can try to swap logs with
	// the writing goroutine, as explained in the comment at the top of this file.
	//go:nowritebarrierrec
	func (p *cpuProfile) flushlog() bool {
	if !atomic.Cas(&p.handoff, 0, uint32(p.nlog)) {
	return false
	}
	notewakeup(&p.wait)

	p.toggle = 1 - p.toggle
	log := &p.log[p.toggle]
	q := 0
	if p.lost > 0 {
	lostPC := funcPC(lostProfileData)
	log[0] = p.lost
	log[1] = 1
	log[2] = lostPC
	q = 3
	p.lost = 0
	}
	p.nlog = q
	return true
	}

	// addNonGo is like add, but runs on a non-Go thread.
	// It can't do anything that might need a g or an m.
	// With this entry point, we don't try to flush the log when evicting an
	// old entry. Instead, we just drop the stack trace if we're out of space.
	//go:nosplit
	//go:nowritebarrierrec
	func (p *cpuProfile) addNonGo(pc []uintptr) {
	p.addWithFlushlog(pc, func() bool { return false })
	}

	// getprofile blocks until the next block of profiling data is available
	// and returns it as a []byte. It is called from the writing goroutine.
	func (p *cpuProfile) getprofile() []byte {
	if p == nil {
	return nil
	}

	if p.wholding {
	// Release previous log to signal handling side.
	// Loop because we are racing against SetCPUProfileRate(0).
	for {
	n := p.handoff
	if n == 0 {
	print("runtime: phase error during cpu profile handoff\n")
	return nil
	}
	if n&0x80000000 != 0 {
	p.wtoggle = 1 - p.wtoggle
	p.wholding = false
	p.flushing = true
	goto Flush
	}
	if atomic.Cas(&p.handoff, n, 0) {
	break
	}
	}
	p.wtoggle = 1 - p.wtoggle
	p.wholding = false
	}

	if p.flushing {
	goto Flush
	}

	if !p.on && p.handoff == 0 {
	return nil
	}

	// Wait for new log.
	notetsleepg(&p.wait, -1)
	noteclear(&p.wait)

	switch n := p.handoff; {
	case n == 0:
	print("runtime: phase error during cpu profile wait\n")
	return nil
	case n == 0x80000000:
	p.flushing = true
	goto Flush
	default:
	n &^= 0x80000000

	// Return new log to caller.
	p.wholding = true

	return uintptrBytes(p.log[p.wtoggle][:n])
	}

	// In flush mode.
	// Add is no longer being called. We own the log.
	// Also, p.handoff is non-zero, so flushlog will return false.
	// Evict the hash table into the log and return it.
	Flush:
	for i := range p.hash {
	b := &p.hash[i]
	for j := range b.entry {
	e := &b.entry[j]
	if e.count > 0 && !p.evict(e, p.flushlog) {
	// Filled the log. Stop the loop and return what we've got.
	break Flush
	}
	}
	}

	// Return pending log data.
	if p.nlog > 0 {
	// Note that we're using toggle now, not wtoggle,
	// because we're working on the log directly.
	n := p.nlog
	p.nlog = 0
	return uintptrBytes(p.log[p.toggle][:n])
	}

	// Made it through the table without finding anything to log.
	if !p.eodSent {
	// We may not have space to append this to the partial log buf,
	// so we always return a new slice for the end-of-data marker.
	p.eodSent = true
	return uintptrBytes(eod[:])
	}

	// Finally done. Clean up and return nil.
	p.flushing = false
	if !atomic.Cas(&p.handoff, p.handoff, 0) {
	print("runtime: profile flush racing with something\n")
	}
	return nil
	}

	func uintptrBytes(p []uintptr) (ret []byte) {
	pp := (*slice)(unsafe.Pointer(&p))
	rp := (*slice)(unsafe.Pointer(&ret))

	rp.array = pp.array
	rp.len = pp.len * int(unsafe.Sizeof(p[0]))
	rp.cap = rp.len

	return
	}

	// CPUProfile returns the next chunk of binary CPU profiling stack trace data,
	// blocking until data is available. If profiling is turned off and all the profile
	// data accumulated while it was on has been returned, CPUProfile returns nil.
	// The caller must save the returned data before calling CPUProfile again.
	//
	// Most clients should use the runtime/pprof package or
	// the testing package's -test.cpuprofile flag instead of calling
	// CPUProfile directly.
	func CPUProfile() []byte {
	return cpuprof.getprofile()
	}

	//go:linkname runtime_pprof_runtime_cyclesPerSecond runtime_pprof.runtime_cyclesPerSecond
	func runtime_pprof_runtime_cyclesPerSecond() int64 {
	return tickspersecond()
	}