src/runtime/mheap.go - go - Git at Google

 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Page heap.
 //
 // See malloc.go for overview.

 package runtime

 import (
 	"internal/cpu"
 	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )

 const (
 	// minPhysPageSize is a lower-bound on the physical page size. The
 	// true physical page size may be larger than this. In contrast,
 	// sys.PhysPageSize is an upper-bound on the physical page size.
 	minPhysPageSize = 4096

 	// maxPhysPageSize is the maximum page size the runtime supports.
 	maxPhysPageSize = 512 << 10

 	// maxPhysHugePageSize sets an upper-bound on the maximum huge page size
 	// that the runtime supports.
 	maxPhysHugePageSize = pallocChunkBytes

 	// pagesPerReclaimerChunk indicates how many pages to scan from the
 	// pageInUse bitmap at a time. Used by the page reclaimer.
 	//
 	// Higher values reduce contention on scanning indexes (such as
 	// h.reclaimIndex), but increase the minimum latency of the
 	// operation.
 	//
 	// The time required to scan this many pages can vary a lot depending
 	// on how many spans are actually freed. Experimentally, it can
 	// scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
 	// free spans at ~32 MB/ms. Using 512 pages bounds this at
 	// roughly 100µs.
 	//
 	// Must be a multiple of the pageInUse bitmap element size and
 	// must also evenly divid pagesPerArena.
 	pagesPerReclaimerChunk = 512

 	// go115NewMCentralImpl is a feature flag for the new mcentral implementation.
 	//
 	// This flag depends on go115NewMarkrootSpans because the new mcentral
 	// implementation requires that markroot spans no longer rely on mgcsweepbufs.
 	// The definition of this flag helps ensure that if there's a problem with
 	// the new markroot spans implementation and it gets turned off, that the new
 	// mcentral implementation also gets turned off so the runtime isn't broken.
 	go115NewMCentralImpl = true && go115NewMarkrootSpans
 )

 // Main malloc heap.
 // The heap itself is the "free" and "scav" treaps,
 // but all the other global data is here too.
 //
 // mheap must not be heap-allocated because it contains mSpanLists,
 // which must not be heap-allocated.
 //
 //go:notinheap
 type mheap struct {
 	// lock must only be acquired on the system stack, otherwise a g
 	// could self-deadlock if its stack grows with the lock held.
 	lock      mutex
 	pages     pageAlloc // page allocation data structure
 	sweepgen  uint32    // sweep generation, see comment in mspan; written during STW
 	sweepdone uint32    // all spans are swept
 	sweepers  uint32    // number of active sweepone calls

 	// allspans is a slice of all mspans ever created. Each mspan
 	// appears exactly once.
 	//
 	// The memory for allspans is manually managed and can be
 	// reallocated and move as the heap grows.
 	//
 	// In general, allspans is protected by mheap_.lock, which
 	// prevents concurrent access as well as freeing the backing
 	// store. Accesses during STW might not hold the lock, but
 	// must ensure that allocation cannot happen around the
 	// access (since that may free the backing store).
 	allspans []*mspan // all spans out there

 	// sweepSpans contains two mspan stacks: one of swept in-use
 	// spans, and one of unswept in-use spans. These two trade
 	// roles on each GC cycle. Since the sweepgen increases by 2
 	// on each cycle, this means the swept spans are in
 	// sweepSpans[sweepgen/2%2] and the unswept spans are in
 	// sweepSpans[1-sweepgen/2%2]. Sweeping pops spans from the
 	// unswept stack and pushes spans that are still in-use on the
 	// swept stack. Likewise, allocating an in-use span pushes it
 	// on the swept stack.
 	//
 	// For !go115NewMCentralImpl.
 	sweepSpans [2]gcSweepBuf

 	_ uint32 // align uint64 fields on 32-bit for atomics

 	// Proportional sweep
 	//
 	// These parameters represent a linear function from heap_live
 	// to page sweep count. The proportional sweep system works to
 	// stay in the black by keeping the current page sweep count
 	// above this line at the current heap_live.
 	//
 	// The line has slope sweepPagesPerByte and passes through a
 	// basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
 	// any given time, the system is at (memstats.heap_live,
 	// pagesSwept) in this space.
 	//
 	// It's important that the line pass through a point we
 	// control rather than simply starting at a (0,0) origin
 	// because that lets us adjust sweep pacing at any time while
 	// accounting for current progress. If we could only adjust
 	// the slope, it would create a discontinuity in debt if any
 	// progress has already been made.
 	pagesInUse         uint64  // pages of spans in stats mSpanInUse; updated atomically
 	pagesSwept         uint64  // pages swept this cycle; updated atomically
 	pagesSweptBasis    uint64  // pagesSwept to use as the origin of the sweep ratio; updated atomically
 	sweepHeapLiveBasis uint64  // value of heap_live to use as the origin of sweep ratio; written with lock, read without
 	sweepPagesPerByte  float64 // proportional sweep ratio; written with lock, read without
 	// TODO(austin): pagesInUse should be a uintptr, but the 386
 	// compiler can't 8-byte align fields.

 	// scavengeGoal is the amount of total retained heap memory (measured by
 	// heapRetained) that the runtime will try to maintain by returning memory
 	// to the OS.
 	scavengeGoal uint64

 	// Page reclaimer state

 	// reclaimIndex is the page index in allArenas of next page to
 	// reclaim. Specifically, it refers to page (i %
 	// pagesPerArena) of arena allArenas[i / pagesPerArena].
 	//
 	// If this is >= 1<<63, the page reclaimer is done scanning
 	// the page marks.
 	//
 	// This is accessed atomically.
 	reclaimIndex uint64
 	// reclaimCredit is spare credit for extra pages swept. Since
 	// the page reclaimer works in large chunks, it may reclaim
 	// more than requested. Any spare pages released go to this
 	// credit pool.
 	//
 	// This is accessed atomically.
 	reclaimCredit uintptr

 	// Malloc stats.
 	largealloc  uint64                  // bytes allocated for large objects
 	nlargealloc uint64                  // number of large object allocations
 	largefree   uint64                  // bytes freed for large objects (>maxsmallsize)
 	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
 	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)

 	// arenas is the heap arena map. It points to the metadata for
 	// the heap for every arena frame of the entire usable virtual
 	// address space.
 	//
 	// Use arenaIndex to compute indexes into this array.
 	//
 	// For regions of the address space that are not backed by the
 	// Go heap, the arena map contains nil.
 	//
 	// Modifications are protected by mheap_.lock. Reads can be
 	// performed without locking; however, a given entry can
 	// transition from nil to non-nil at any time when the lock
 	// isn't held. (Entries never transitions back to nil.)
 	//
 	// In general, this is a two-level mapping consisting of an L1
 	// map and possibly many L2 maps. This saves space when there
 	// are a huge number of arena frames. However, on many
 	// platforms (even 64-bit), arenaL1Bits is 0, making this
 	// effectively a single-level map. In this case, arenas[0]
 	// will never be nil.
 	arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena

 	// heapArenaAlloc is pre-reserved space for allocating heapArena
 	// objects. This is only used on 32-bit, where we pre-reserve
 	// this space to avoid interleaving it with the heap itself.
 	heapArenaAlloc linearAlloc

 	// arenaHints is a list of addresses at which to attempt to
 	// add more heap arenas. This is initially populated with a
 	// set of general hint addresses, and grown with the bounds of
 	// actual heap arena ranges.
 	arenaHints *arenaHint

 	// arena is a pre-reserved space for allocating heap arenas
 	// (the actual arenas). This is only used on 32-bit.
 	arena linearAlloc

 	// allArenas is the arenaIndex of every mapped arena. This can
 	// be used to iterate through the address space.
 	//
 	// Access is protected by mheap_.lock. However, since this is
 	// append-only and old backing arrays are never freed, it is
 	// safe to acquire mheap_.lock, copy the slice header, and
 	// then release mheap_.lock.
 	allArenas []arenaIdx

 	// sweepArenas is a snapshot of allArenas taken at the
 	// beginning of the sweep cycle. This can be read safely by
 	// simply blocking GC (by disabling preemption).
 	sweepArenas []arenaIdx

 	// markArenas is a snapshot of allArenas taken at the beginning
 	// of the mark cycle. Because allArenas is append-only, neither
 	// this slice nor its contents will change during the mark, so
 	// it can be read safely.
 	markArenas []arenaIdx

 	// curArena is the arena that the heap is currently growing
 	// into. This should always be physPageSize-aligned.
 	curArena struct {
 		base, end uintptr
 	}

 	// _ uint32 // ensure 64-bit alignment of central

 	// central free lists for small size classes.
 	// the padding makes sure that the mcentrals are
 	// spaced CacheLinePadSize bytes apart, so that each mcentral.lock
 	// gets its own cache line.
 	// central is indexed by spanClass.
 	central [numSpanClasses]struct {
 		mcentral mcentral
 		pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
 	}

 	spanalloc             fixalloc // allocator for span*
 	cachealloc            fixalloc // allocator for mcache*
 	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
 	specialprofilealloc   fixalloc // allocator for specialprofile*
 	speciallock           mutex    // lock for special record allocators.
 	arenaHintAlloc        fixalloc // allocator for arenaHints

 	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
 }

 var mheap_ mheap

 // A heapArena stores metadata for a heap arena. heapArenas are stored
 // outside of the Go heap and accessed via the mheap_.arenas index.
 //
 //go:notinheap
 type heapArena struct {
 	// bitmap stores the pointer/scalar bitmap for the words in
 	// this arena. See mbitmap.go for a description. Use the
 	// heapBits type to access this.
 	bitmap [heapArenaBitmapBytes]byte

 	// spans maps from virtual address page ID within this arena to *mspan.
 	// For allocated spans, their pages map to the span itself.
 	// For free spans, only the lowest and highest pages map to the span itself.
 	// Internal pages map to an arbitrary span.
 	// For pages that have never been allocated, spans entries are nil.
 	//
 	// Modifications are protected by mheap.lock. Reads can be
 	// performed without locking, but ONLY from indexes that are
 	// known to contain in-use or stack spans. This means there
 	// must not be a safe-point between establishing that an
 	// address is live and looking it up in the spans array.
 	spans [pagesPerArena]*mspan

 	// pageInUse is a bitmap that indicates which spans are in
 	// state mSpanInUse. This bitmap is indexed by page number,
 	// but only the bit corresponding to the first page in each
 	// span is used.
 	//
 	// Reads and writes are atomic.
 	pageInUse [pagesPerArena / 8]uint8

 	// pageMarks is a bitmap that indicates which spans have any
 	// marked objects on them. Like pageInUse, only the bit
 	// corresponding to the first page in each span is used.
 	//
 	// Writes are done atomically during marking. Reads are
 	// non-atomic and lock-free since they only occur during
 	// sweeping (and hence never race with writes).
 	//
 	// This is used to quickly find whole spans that can be freed.
 	//
 	// TODO(austin): It would be nice if this was uint64 for
 	// faster scanning, but we don't have 64-bit atomic bit
 	// operations.
 	pageMarks [pagesPerArena / 8]uint8

 	// pageSpecials is a bitmap that indicates which spans have
 	// specials (finalizers or other). Like pageInUse, only the bit
 	// corresponding to the first page in each span is used.
 	//
 	// Writes are done atomically whenever a special is added to
 	// a span and whenever the last special is removed from a span.
 	// Reads are done atomically to find spans containing specials
 	// during marking.
 	pageSpecials [pagesPerArena / 8]uint8

 	// zeroedBase marks the first byte of the first page in this
 	// arena which hasn't been used yet and is therefore already
 	// zero. zeroedBase is relative to the arena base.
 	// Increases monotonically until it hits heapArenaBytes.
 	//
 	// This field is sufficient to determine if an allocation
 	// needs to be zeroed because the page allocator follows an
 	// address-ordered first-fit policy.
 	//
 	// Read atomically and written with an atomic CAS.
 	zeroedBase uintptr
 }

 // arenaHint is a hint for where to grow the heap arenas. See
 // mheap_.arenaHints.
 //
 //go:notinheap
 type arenaHint struct {
 	addr uintptr
 	down bool
 	next *arenaHint
 }

 // An mspan is a run of pages.
 //
 // When a mspan is in the heap free treap, state == mSpanFree
 // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
 // If the mspan is in the heap scav treap, then in addition to the
 // above scavenged == true. scavenged == false in all other cases.
 //
 // When a mspan is allocated, state == mSpanInUse or mSpanManual
 // and heapmap(i) == span for all s->start <= i < s->start+s->npages.

 // Every mspan is in one doubly-linked list, either in the mheap's
 // busy list or one of the mcentral's span lists.

 // An mspan representing actual memory has state mSpanInUse,
 // mSpanManual, or mSpanFree. Transitions between these states are
 // constrained as follows:
 //
 // * A span may transition from free to in-use or manual during any GC
 //   phase.
 //
 // * During sweeping (gcphase == _GCoff), a span may transition from
 //   in-use to free (as a result of sweeping) or manual to free (as a
 //   result of stacks being freed).
 //
 // * During GC (gcphase != _GCoff), a span *must not* transition from
 //   manual or in-use to free. Because concurrent GC may read a pointer
 //   and then look up its span, the span state must be monotonic.
 //
 // Setting mspan.state to mSpanInUse or mSpanManual must be done
 // atomically and only after all other span fields are valid.
 // Likewise, if inspecting a span is contingent on it being
 // mSpanInUse, the state should be loaded atomically and checked
 // before depending on other fields. This allows the garbage collector
 // to safely deal with potentially invalid pointers, since resolving
 // such pointers may race with a span being allocated.
 type mSpanState uint8

 const (
 	mSpanDead   mSpanState = iota
 	mSpanInUse             // allocated for garbage collected heap
 	mSpanManual            // allocated for manual management (e.g., stack allocator)
 )

 // mSpanStateNames are the names of the span states, indexed by
 // mSpanState.
 var mSpanStateNames = []string{
 	"mSpanDead",
 	"mSpanInUse",
 	"mSpanManual",
 	"mSpanFree",
 }

 // mSpanStateBox holds an mSpanState and provides atomic operations on
 // it. This is a separate type to disallow accidental comparison or
 // assignment with mSpanState.
 type mSpanStateBox struct {
 	s mSpanState
 }

 func (b *mSpanStateBox) set(s mSpanState) {
 	atomic.Store8((*uint8)(&b.s), uint8(s))
 }

 func (b *mSpanStateBox) get() mSpanState {
 	return mSpanState(atomic.Load8((*uint8)(&b.s)))
 }

 // mSpanList heads a linked list of spans.
 //
 //go:notinheap
 type mSpanList struct {
 	first *mspan // first span in list, or nil if none
 	last  *mspan // last span in list, or nil if none
 }

 //go:notinheap
 type mspan struct {
 	next *mspan     // next span in list, or nil if none
 	prev *mspan     // previous span in list, or nil if none
 	list *mSpanList // For debugging. TODO: Remove.

 	startAddr uintptr // address of first byte of span aka s.base()
 	npages    uintptr // number of pages in span

 	manualFreeList gclinkptr // list of free objects in mSpanManual spans

 	// freeindex is the slot index between 0 and nelems at which to begin scanning
 	// for the next free object in this span.
 	// Each allocation scans allocBits starting at freeindex until it encounters a 0
 	// indicating a free object. freeindex is then adjusted so that subsequent scans begin
 	// just past the newly discovered free object.
 	//
 	// If freeindex == nelem, this span has no free objects.
 	//
 	// allocBits is a bitmap of objects in this span.
 	// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
 	// then object n is free;
 	// otherwise, object n is allocated. Bits starting at nelem are
 	// undefined and should never be referenced.
 	//
 	// Object n starts at address n*elemsize + (start << pageShift).
 	freeindex uintptr
 	// TODO: Look up nelems from sizeclass and remove this field if it
 	// helps performance.
 	nelems uintptr // number of object in the span.

 	// Cache of the allocBits at freeindex. allocCache is shifted
 	// such that the lowest bit corresponds to the bit freeindex.
 	// allocCache holds the complement of allocBits, thus allowing
 	// ctz (count trailing zero) to use it directly.
 	// allocCache may contain bits beyond s.nelems; the caller must ignore
 	// these.
 	allocCache uint64

 	// allocBits and gcmarkBits hold pointers to a span's mark and
 	// allocation bits. The pointers are 8 byte aligned.
 	// There are three arenas where this data is held.
 	// free: Dirty arenas that are no longer accessed
 	//       and can be reused.
 	// next: Holds information to be used in the next GC cycle.
 	// current: Information being used during this GC cycle.
 	// previous: Information being used during the last GC cycle.
 	// A new GC cycle starts with the call to finishsweep_m.
 	// finishsweep_m moves the previous arena to the free arena,
 	// the current arena to the previous arena, and
 	// the next arena to the current arena.
 	// The next arena is populated as the spans request
 	// memory to hold gcmarkBits for the next GC cycle as well
 	// as allocBits for newly allocated spans.
 	//
 	// The pointer arithmetic is done "by hand" instead of using
 	// arrays to avoid bounds checks along critical performance
 	// paths.
 	// The sweep will free the old allocBits and set allocBits to the
 	// gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
 	// out memory.
 	allocBits  *gcBits
 	gcmarkBits *gcBits

 	// sweep generation:
 	// if sweepgen == h->sweepgen - 2, the span needs sweeping
 	// if sweepgen == h->sweepgen - 1, the span is currently being swept
 	// if sweepgen == h->sweepgen, the span is swept and ready to use
 	// if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
 	// if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
 	// h->sweepgen is incremented by 2 after every GC

 	sweepgen    uint32
 	divMul      uint16        // for divide by elemsize - divMagic.mul
 	baseMask    uint16        // if non-0, elemsize is a power of 2, & this will get object allocation base
 	allocCount  uint16        // number of allocated objects
 	spanclass   spanClass     // size class and noscan (uint8)
 	state       mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
 	needzero    uint8         // needs to be zeroed before allocation
 	divShift    uint8         // for divide by elemsize - divMagic.shift
 	divShift2   uint8         // for divide by elemsize - divMagic.shift2
 	elemsize    uintptr       // computed from sizeclass or from npages
 	limit       uintptr       // end of data in span
 	speciallock mutex         // guards specials list
 	specials    *special      // linked list of special records sorted by offset.
 }

 func (s *mspan) base() uintptr {
 	return s.startAddr
 }

 func (s *mspan) layout() (size, n, total uintptr) {
 	total = s.npages << _PageShift
 	size = s.elemsize
 	if size > 0 {
 		n = total / size
 	}
 	return
 }

 // recordspan adds a newly allocated span to h.allspans.
 //
 // This only happens the first time a span is allocated from
 // mheap.spanalloc (it is not called when a span is reused).
 //
 // Write barriers are disallowed here because it can be called from
 // gcWork when allocating new workbufs. However, because it's an
 // indirect call from the fixalloc initializer, the compiler can't see
 // this.
 //
 //go:nowritebarrierrec
 func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 	h := (*mheap)(vh)
 	s := (*mspan)(p)
 	if len(h.allspans) >= cap(h.allspans) {
 		n := 64 * 1024 / sys.PtrSize
 		if n < cap(h.allspans)*3/2 {
 			n = cap(h.allspans) * 3 / 2
 		}
 		var new []*mspan
 		sp := (*slice)(unsafe.Pointer(&new))
 		sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)
 		if sp.array == nil {
 			throw("runtime: cannot allocate memory")
 		}
 		sp.len = len(h.allspans)
 		sp.cap = n
 		if len(h.allspans) > 0 {
 			copy(new, h.allspans)
 		}
 		oldAllspans := h.allspans
 		*(*notInHeapSlice)(unsafe.Pointer(&h.allspans)) = *(*notInHeapSlice)(unsafe.Pointer(&new))
 		if len(oldAllspans) != 0 {
 			sysFree(unsafe.Pointer(&oldAllspans[0]), uintptr(cap(oldAllspans))*unsafe.Sizeof(oldAllspans[0]), &memstats.other_sys)
 		}
 	}
 	h.allspans = h.allspans[:len(h.allspans)+1]
 	h.allspans[len(h.allspans)-1] = s
 }

 // A spanClass represents the size class and noscan-ness of a span.
 //
 // Each size class has a noscan spanClass and a scan spanClass. The
 // noscan spanClass contains only noscan objects, which do not contain
 // pointers and thus do not need to be scanned by the garbage
 // collector.
 type spanClass uint8

 const (
 	numSpanClasses = _NumSizeClasses << 1
 	tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
 )

 func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
 	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
 }

 func (sc spanClass) sizeclass() int8 {
 	return int8(sc >> 1)
 }

 func (sc spanClass) noscan() bool {
 	return sc&1 != 0
 }

 // arenaIndex returns the index into mheap_.arenas of the arena
 // containing metadata for p. This index combines of an index into the
 // L1 map and an index into the L2 map and should be used as
 // mheap_.arenas[ai.l1()][ai.l2()].
 //
 // If p is outside the range of valid heap addresses, either l1() or
 // l2() will be out of bounds.
 //
 // It is nosplit because it's called by spanOf and several other
 // nosplit functions.
 //
 //go:nosplit
 func arenaIndex(p uintptr) arenaIdx {
 	return arenaIdx((p - arenaBaseOffset) / heapArenaBytes)
 }

 // arenaBase returns the low address of the region covered by heap
 // arena i.
 func arenaBase(i arenaIdx) uintptr {
 	return uintptr(i)*heapArenaBytes + arenaBaseOffset
 }

 type arenaIdx uint

 func (i arenaIdx) l1() uint {
 	if arenaL1Bits == 0 {
 		// Let the compiler optimize this away if there's no
 		// L1 map.
 		return 0
 	} else {
 		return uint(i) >> arenaL1Shift
 	}
 }

 func (i arenaIdx) l2() uint {
 	if arenaL1Bits == 0 {
 		return uint(i)
 	} else {
 		return uint(i) & (1<<arenaL2Bits - 1)
 	}
 }

 // inheap reports whether b is a pointer into a (potentially dead) heap object.
 // It returns false for pointers into mSpanManual spans.
 // Non-preemptible because it is used by write barriers.
 //go:nowritebarrier
 //go:nosplit
 func inheap(b uintptr) bool {
 	return spanOfHeap(b) != nil
 }

 // inHeapOrStack is a variant of inheap that returns true for pointers
 // into any allocated heap span.
 //
 //go:nowritebarrier
 //go:nosplit
 func inHeapOrStack(b uintptr) bool {
 	s := spanOf(b)
 	if s == nil || b < s.base() {
 		return false
 	}
 	switch s.state.get() {
 	case mSpanInUse, mSpanManual:
 		return b < s.limit
 	default:
 		return false
 	}
 }

 // spanOf returns the span of p. If p does not point into the heap
 // arena or no span has ever contained p, spanOf returns nil.
 //
 // If p does not point to allocated memory, this may return a non-nil
 // span that does *not* contain p. If this is a possibility, the
 // caller should either call spanOfHeap or check the span bounds
 // explicitly.
 //
 // Must be nosplit because it has callers that are nosplit.
 //
 //go:nosplit
 func spanOf(p uintptr) *mspan {
 	// This function looks big, but we use a lot of constant
 	// folding around arenaL1Bits to get it under the inlining
 	// budget. Also, many of the checks here are safety checks
 	// that Go needs to do anyway, so the generated code is quite
 	// short.
 	ri := arenaIndex(p)
 	if arenaL1Bits == 0 {
 		// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
 		if ri.l2() >= uint(len(mheap_.arenas[0])) {
 			return nil
 		}
 	} else {
 		// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
 		if ri.l1() >= uint(len(mheap_.arenas)) {
 			return nil
 		}
 	}
 	l2 := mheap_.arenas[ri.l1()]
 	if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
 		return nil
 	}
 	ha := l2[ri.l2()]
 	if ha == nil {
 		return nil
 	}
 	return ha.spans[(p/pageSize)%pagesPerArena]
 }

 // spanOfUnchecked is equivalent to spanOf, but the caller must ensure
 // that p points into an allocated heap arena.
 //
 // Must be nosplit because it has callers that are nosplit.
 //
 //go:nosplit
 func spanOfUnchecked(p uintptr) *mspan {
 	ai := arenaIndex(p)
 	return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
 }

 // spanOfHeap is like spanOf, but returns nil if p does not point to a
 // heap object.
 //
 // Must be nosplit because it has callers that are nosplit.
 //
 //go:nosplit
 func spanOfHeap(p uintptr) *mspan {
 	s := spanOf(p)
 	// s is nil if it's never been allocated. Otherwise, we check
 	// its state first because we don't trust this pointer, so we
 	// have to synchronize with span initialization. Then, it's
 	// still possible we picked up a stale span pointer, so we
 	// have to check the span's bounds.
 	if s == nil || s.state.get() != mSpanInUse || p < s.base() || p >= s.limit {
 		return nil
 	}
 	return s
 }

 // pageIndexOf returns the arena, page index, and page mask for pointer p.
 // The caller must ensure p is in the heap.
 func pageIndexOf(p uintptr) (arena *heapArena, pageIdx uintptr, pageMask uint8) {
 	ai := arenaIndex(p)
 	arena = mheap_.arenas[ai.l1()][ai.l2()]
 	pageIdx = ((p / pageSize) / 8) % uintptr(len(arena.pageInUse))
 	pageMask = byte(1 << ((p / pageSize) % 8))
 	return
 }

 // Initialize the heap.
 func (h *mheap) init() {
 	lockInit(&h.lock, lockRankMheap)
 	lockInit(&h.sweepSpans[0].spineLock, lockRankSpine)
 	lockInit(&h.sweepSpans[1].spineLock, lockRankSpine)
 	lockInit(&h.speciallock, lockRankMheapSpecial)

 	h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
 	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
 	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
 	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)

 	// Don't zero mspan allocations. Background sweeping can
 	// inspect a span concurrently with allocating it, so it's
 	// important that the span's sweepgen survive across freeing
 	// and re-allocating a span to prevent background sweeping
 	// from improperly cas'ing it from 0.
 	//
 	// This is safe because mspan contains no heap pointers.
 	h.spanalloc.zero = false

 	// h->mapcache needs no init

 	for i := range h.central {
 		h.central[i].mcentral.init(spanClass(i))
 	}

 	h.pages.init(&h.lock, &memstats.gc_sys)
 }

 // reclaim sweeps and reclaims at least npage pages into the heap.
 // It is called before allocating npage pages to keep growth in check.
 //
 // reclaim implements the page-reclaimer half of the sweeper.
 //
 // h must NOT be locked.
 func (h *mheap) reclaim(npage uintptr) {
 	// TODO(austin): Half of the time spent freeing spans is in
 	// locking/unlocking the heap (even with low contention). We
 	// could make the slow path here several times faster by
 	// batching heap frees.

 	// Bail early if there's no more reclaim work.
 	if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
 		return
 	}

 	// Disable preemption so the GC can't start while we're
 	// sweeping, so we can read h.sweepArenas, and so
 	// traceGCSweepStart/Done pair on the P.
 	mp := acquirem()

 	if trace.enabled {
 		traceGCSweepStart()
 	}

 	arenas := h.sweepArenas
 	locked := false
 	for npage > 0 {
 		// Pull from accumulated credit first.
 		if credit := atomic.Loaduintptr(&h.reclaimCredit); credit > 0 {
 			take := credit
 			if take > npage {
 				// Take only what we need.
 				take = npage
 			}
 			if atomic.Casuintptr(&h.reclaimCredit, credit, credit-take) {
 				npage -= take
 			}
 			continue
 		}

 		// Claim a chunk of work.
 		idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
 		if idx/pagesPerArena >= uintptr(len(arenas)) {
 			// Page reclaiming is done.
 			atomic.Store64(&h.reclaimIndex, 1<<63)
 			break
 		}

 		if !locked {
 			// Lock the heap for reclaimChunk.
 			lock(&h.lock)
 			locked = true
 		}

 		// Scan this chunk.
 		nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk)
 		if nfound <= npage {
 			npage -= nfound
 		} else {
 			// Put spare pages toward global credit.
 			atomic.Xadduintptr(&h.reclaimCredit, nfound-npage)
 			npage = 0
 		}
 	}
 	if locked {
 		unlock(&h.lock)
 	}

 	if trace.enabled {
 		traceGCSweepDone()
 	}
 	releasem(mp)
 }

 // reclaimChunk sweeps unmarked spans that start at page indexes [pageIdx, pageIdx+n).
 // It returns the number of pages returned to the heap.
 //
 // h.lock must be held and the caller must be non-preemptible. Note: h.lock may be
 // temporarily unlocked and re-locked in order to do sweeping or if tracing is
 // enabled.
 func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 	// The heap lock must be held because this accesses the
 	// heapArena.spans arrays using potentially non-live pointers.
 	// In particular, if a span were freed and merged concurrently
 	// with this probing heapArena.spans, it would be possible to
 	// observe arbitrary, stale span pointers.
 	n0 := n
 	var nFreed uintptr
 	sg := h.sweepgen
 	for n > 0 {
 		ai := arenas[pageIdx/pagesPerArena]
 		ha := h.arenas[ai.l1()][ai.l2()]

 		// Get a chunk of the bitmap to work on.
 		arenaPage := uint(pageIdx % pagesPerArena)
 		inUse := ha.pageInUse[arenaPage/8:]
 		marked := ha.pageMarks[arenaPage/8:]
 		if uintptr(len(inUse)) > n/8 {
 			inUse = inUse[:n/8]
 			marked = marked[:n/8]
 		}

 		// Scan this bitmap chunk for spans that are in-use
 		// but have no marked objects on them.
 		for i := range inUse {
 			inUseUnmarked := atomic.Load8(&inUse[i]) &^ marked[i]
 			if inUseUnmarked == 0 {
 				continue
 			}

 			for j := uint(0); j < 8; j++ {
 				if inUseUnmarked&(1<<j) != 0 {
 					s := ha.spans[arenaPage+uint(i)*8+j]
 					if atomic.Load(&s.sweepgen) == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
 						npages := s.npages
 						unlock(&h.lock)
 						if s.sweep(false) {
 							nFreed += npages
 						}
 						lock(&h.lock)
 						// Reload inUse. It's possible nearby
 						// spans were freed when we dropped the
 						// lock and we don't want to get stale
 						// pointers from the spans array.
 						inUseUnmarked = atomic.Load8(&inUse[i]) &^ marked[i]
 					}
 				}
 			}
 		}

 		// Advance.
 		pageIdx += uintptr(len(inUse) * 8)
 		n -= uintptr(len(inUse) * 8)
 	}
 	if trace.enabled {
 		unlock(&h.lock)
 		// Account for pages scanned but not reclaimed.
 		traceGCSweepSpan((n0 - nFreed) * pageSize)
 		lock(&h.lock)
 	}
 	return nFreed
 }

 // alloc allocates a new span of npage pages from the GC'd heap.
 //
 // spanclass indicates the span's size class and scannability.
 //
 // If needzero is true, the memory for the returned span will be zeroed.
 func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) *mspan {
 	// Don't do any operations that lock the heap on the G stack.
 	// It might trigger stack growth, and the stack growth code needs
 	// to be able to allocate heap.
 	var s *mspan
 	systemstack(func() {
 		// To prevent excessive heap growth, before allocating n pages
 		// we need to sweep and reclaim at least n pages.
 		if h.sweepdone == 0 {
 			h.reclaim(npages)
 		}
 		s = h.allocSpan(npages, false, spanclass, &memstats.heap_inuse)
 	})

 	if s != nil {
 		if needzero && s.needzero != 0 {
 			memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift)
 		}
 		s.needzero = 0
 	}
 	return s
 }

 // allocManual allocates a manually-managed span of npage pages.
 // allocManual returns nil if allocation fails.
 //
 // allocManual adds the bytes used to *stat, which should be a
 // memstats in-use field. Unlike allocations in the GC'd heap, the
 // allocation does *not* count toward heap_inuse or heap_sys.
 //
 // The memory backing the returned span may not be zeroed if
 // span.needzero is set.
 //
 // allocManual must be called on the system stack because it may
 // acquire the heap lock via allocSpan. See mheap for details.
 //
 //go:systemstack
 func (h *mheap) allocManual(npages uintptr, stat *uint64) *mspan {
 	return h.allocSpan(npages, true, 0, stat)
 }

 // setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
 // is s.
 func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
 	p := base / pageSize
 	ai := arenaIndex(base)
 	ha := h.arenas[ai.l1()][ai.l2()]
 	for n := uintptr(0); n < npage; n++ {
 		i := (p + n) % pagesPerArena
 		if i == 0 {
 			ai = arenaIndex(base + n*pageSize)
 			ha = h.arenas[ai.l1()][ai.l2()]
 		}
 		ha.spans[i] = s
 	}
 }

 // allocNeedsZero checks if the region of address space [base, base+npage*pageSize),
 // assumed to be allocated, needs to be zeroed, updating heap arena metadata for
 // future allocations.
 //
 // This must be called each time pages are allocated from the heap, even if the page
 // allocator can otherwise prove the memory it's allocating is already zero because
 // they're fresh from the operating system. It updates heapArena metadata that is
 // critical for future page allocations.
 //
 // There are no locking constraints on this method.
 func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
 	for npage > 0 {
 		ai := arenaIndex(base)
 		ha := h.arenas[ai.l1()][ai.l2()]

 		zeroedBase := atomic.Loaduintptr(&ha.zeroedBase)
 		arenaBase := base % heapArenaBytes
 		if arenaBase < zeroedBase {
 			// We extended into the non-zeroed part of the
 			// arena, so this region needs to be zeroed before use.
 			//
 			// zeroedBase is monotonically increasing, so if we see this now then
 			// we can be sure we need to zero this memory region.
 			//
 			// We still need to update zeroedBase for this arena, and
 			// potentially more arenas.
 			needZero = true
 		}
 		// We may observe arenaBase > zeroedBase if we're racing with one or more
 		// allocations which are acquiring memory directly before us in the address
 		// space. But, because we know no one else is acquiring *this* memory, it's
 		// still safe to not zero.

 		// Compute how far into the arena we extend into, capped
 		// at heapArenaBytes.
 		arenaLimit := arenaBase + npage*pageSize
 		if arenaLimit > heapArenaBytes {
 			arenaLimit = heapArenaBytes
 		}
 		// Increase ha.zeroedBase so it's >= arenaLimit.
 		// We may be racing with other updates.
 		for arenaLimit > zeroedBase {
 			if atomic.Casuintptr(&ha.zeroedBase, zeroedBase, arenaLimit) {
 				break
 			}
 			zeroedBase = atomic.Loaduintptr(&ha.zeroedBase)
 			// Sanity check zeroedBase.
 			if zeroedBase <= arenaLimit && zeroedBase > arenaBase {
 				// The zeroedBase moved into the space we were trying to
 				// claim. That's very bad, and indicates someone allocated
 				// the same region we did.
 				throw("potentially overlapping in-use allocations detected")
 			}
 		}

 		// Move base forward and subtract from npage to move into
 		// the next arena, or finish.
 		base += arenaLimit - arenaBase
 		npage -= (arenaLimit - arenaBase) / pageSize
 	}
 	return
 }

 // tryAllocMSpan attempts to allocate an mspan object from
 // the P-local cache, but may fail.
 //
 // h need not be locked.
 //
 // This caller must ensure that its P won't change underneath
 // it during this function. Currently to ensure that we enforce
 // that the function is run on the system stack, because that's
 // the only place it is used now. In the future, this requirement
 // may be relaxed if its use is necessary elsewhere.
 //
 //go:systemstack
 func (h *mheap) tryAllocMSpan() *mspan {
 	pp := getg().m.p.ptr()
 	// If we don't have a p or the cache is empty, we can't do
 	// anything here.
 	if pp == nil || pp.mspancache.len == 0 {
 		return nil
 	}
 	// Pull off the last entry in the cache.
 	s := pp.mspancache.buf[pp.mspancache.len-1]
 	pp.mspancache.len--
 	return s
 }

 // allocMSpanLocked allocates an mspan object.
 //
 // h must be locked.
 //
 // allocMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
 // Running on the system stack also ensures that we won't
 // switch Ps during this function. See tryAllocMSpan for details.
 //
 //go:systemstack
 func (h *mheap) allocMSpanLocked() *mspan {
 	pp := getg().m.p.ptr()
 	if pp == nil {
 		// We don't have a p so just do the normal thing.
 		return (*mspan)(h.spanalloc.alloc())
 	}
 	// Refill the cache if necessary.
 	if pp.mspancache.len == 0 {
 		const refillCount = len(pp.mspancache.buf) / 2
 		for i := 0; i < refillCount; i++ {
 			pp.mspancache.buf[i] = (*mspan)(h.spanalloc.alloc())
 		}
 		pp.mspancache.len = refillCount
 	}
 	// Pull off the last entry in the cache.
 	s := pp.mspancache.buf[pp.mspancache.len-1]
 	pp.mspancache.len--
 	return s
 }

 // freeMSpanLocked free an mspan object.
 //
 // h must be locked.
 //
 // freeMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
 // Running on the system stack also ensures that we won't
 // switch Ps during this function. See tryAllocMSpan for details.
 //
 //go:systemstack
 func (h *mheap) freeMSpanLocked(s *mspan) {
 	pp := getg().m.p.ptr()
 	// First try to free the mspan directly to the cache.
 	if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
 		pp.mspancache.buf[pp.mspancache.len] = s
 		pp.mspancache.len++
 		return
 	}
 	// Failing that (or if we don't have a p), just free it to
 	// the heap.
 	h.spanalloc.free(unsafe.Pointer(s))
 }

 // allocSpan allocates an mspan which owns npages worth of memory.
 //
 // If manual == false, allocSpan allocates a heap span of class spanclass
 // and updates heap accounting. If manual == true, allocSpan allocates a
 // manually-managed span (spanclass is ignored), and the caller is
 // responsible for any accounting related to its use of the span. Either
 // way, allocSpan will atomically add the bytes in the newly allocated
 // span to *sysStat.
 //
 // The returned span is fully initialized.
 //
 // h must not be locked.
 //
 // allocSpan must be called on the system stack both because it acquires
 // the heap lock and because it must block GC transitions.
 //
 //go:systemstack
 func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysStat *uint64) (s *mspan) {
 	// Function-global state.
 	gp := getg()
 	base, scav := uintptr(0), uintptr(0)

 	// If the allocation is small enough, try the page cache!
 	pp := gp.m.p.ptr()
 	if pp != nil && npages < pageCachePages/4 {
 		c := &pp.pcache

 		// If the cache is empty, refill it.
 		if c.empty() {
 			lock(&h.lock)
 			*c = h.pages.allocToCache()
 			unlock(&h.lock)
 		}

 		// Try to allocate from the cache.
 		base, scav = c.alloc(npages)
 		if base != 0 {
 			s = h.tryAllocMSpan()

 			if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
 				goto HaveSpan
 			}
 			// We're either running duing GC, failed to acquire a mspan,
 			// or the allocation is for a large object. This means we
 			// have to lock the heap and do a bunch of extra work,
 			// so go down the HaveBaseLocked path.
 			//
 			// We must do this during GC to avoid skew with heap_scan
 			// since we flush mcache stats whenever we lock.
 			//
 			// TODO(mknyszek): It would be nice to not have to
 			// lock the heap if it's a large allocation, but
 			// it's fine for now. The critical section here is
 			// short and large object allocations are relatively
 			// infrequent.
 		}
 	}

 	// For one reason or another, we couldn't get the
 	// whole job done without the heap lock.
 	lock(&h.lock)

 	if base == 0 {
 		// Try to acquire a base address.
 		base, scav = h.pages.alloc(npages)
 		if base == 0 {
 			if !h.grow(npages) {
 				unlock(&h.lock)
 				return nil
 			}
 			base, scav = h.pages.alloc(npages)
 			if base == 0 {
 				throw("grew heap, but no adequate free space found")
 			}
 		}
 	}
 	if s == nil {
 		// We failed to get an mspan earlier, so grab
 		// one now that we have the heap lock.
 		s = h.allocMSpanLocked()
 	}
 	if !manual {
 		// This is a heap span, so we should do some additional accounting
 		// which may only be done with the heap locked.

 		// Transfer stats from mcache to global.
 		var c *mcache
 		if gp.m.p != 0 {
 			c = gp.m.p.ptr().mcache
 		} else {
 			// This case occurs while bootstrapping.
 			// See the similar code in mallocgc.
 			c = mcache0
 			if c == nil {
 				throw("mheap.allocSpan called with no P")
 			}
 		}
 		memstats.heap_scan += uint64(c.local_scan)
 		c.local_scan = 0
 		memstats.tinyallocs += uint64(c.local_tinyallocs)
 		c.local_tinyallocs = 0

 		// Do some additional accounting if it's a large allocation.
 		if spanclass.sizeclass() == 0 {
 			mheap_.largealloc += uint64(npages * pageSize)
 			mheap_.nlargealloc++
 			atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
 		}

 		// Either heap_live or heap_scan could have been updated.
 		if gcBlackenEnabled != 0 {
 			gcController.revise()
 		}
 	}
 	unlock(&h.lock)

 HaveSpan:
 	// At this point, both s != nil and base != 0, and the heap
 	// lock is no longer held. Initialize the span.
 	s.init(base, npages)
 	if h.allocNeedsZero(base, npages) {
 		s.needzero = 1
 	}
 	nbytes := npages * pageSize
 	if manual {
 		s.manualFreeList = 0
 		s.nelems = 0
 		s.limit = s.base() + s.npages*pageSize
 		// Manually managed memory doesn't count toward heap_sys.
 		mSysStatDec(&memstats.heap_sys, s.npages*pageSize)
 		s.state.set(mSpanManual)
 	} else {
 		// We must set span properties before the span is published anywhere
 		// since we're not holding the heap lock.
 		s.spanclass = spanclass
 		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
 			s.elemsize = nbytes
 			s.nelems = 1

 			s.divShift = 0
 			s.divMul = 0
 			s.divShift2 = 0
 			s.baseMask = 0
 		} else {
 			s.elemsize = uintptr(class_to_size[sizeclass])
 			s.nelems = nbytes / s.elemsize

 			m := &class_to_divmagic[sizeclass]
 			s.divShift = m.shift
 			s.divMul = m.mul
 			s.divShift2 = m.shift2
 			s.baseMask = m.baseMask
 		}

 		// Initialize mark and allocation structures.
 		s.freeindex = 0
 		s.allocCache = ^uint64(0) // all 1s indicating all free.
 		s.gcmarkBits = newMarkBits(s.nelems)
 		s.allocBits = newAllocBits(s.nelems)

 		// It's safe to access h.sweepgen without the heap lock because it's
 		// only ever updated with the world stopped and we run on the
 		// systemstack which blocks a STW transition.
 		atomic.Store(&s.sweepgen, h.sweepgen)

 		// Now that the span is filled in, set its state. This
 		// is a publication barrier for the other fields in
 		// the span. While valid pointers into this span
 		// should never be visible until the span is returned,
 		// if the garbage collector finds an invalid pointer,
 		// access to the span may race with initialization of
 		// the span. We resolve this race by atomically
 		// setting the state after the span is fully
 		// initialized, and atomically checking the state in
 		// any situation where a pointer is suspect.
 		s.state.set(mSpanInUse)
 	}

 	// Commit and account for any scavenged memory that the span now owns.
 	if scav != 0 {
 		// sysUsed all the pages that are actually available
 		// in the span since some of them might be scavenged.
 		sysUsed(unsafe.Pointer(base), nbytes)
 		mSysStatDec(&memstats.heap_released, scav)
 	}
 	// Update stats.
 	mSysStatInc(sysStat, nbytes)
 	mSysStatDec(&memstats.heap_idle, nbytes)

 	// Publish the span in various locations.

 	// This is safe to call without the lock held because the slots
 	// related to this span will only ever be read or modified by
 	// this thread until pointers into the span are published (and
 	// we execute a publication barrier at the end of this function
 	// before that happens) or pageInUse is updated.
 	h.setSpans(s.base(), npages, s)

 	if !manual {
 		if !go115NewMCentralImpl {
 			// Add to swept in-use list.
 			//
 			// This publishes the span to root marking.
 			//
 			// h.sweepgen is guaranteed to only change during STW,
 			// and preemption is disabled in the page allocator.
 			h.sweepSpans[h.sweepgen/2%2].push(s)
 		}

 		// Mark in-use span in arena page bitmap.
 		//
 		// This publishes the span to the page sweeper, so
 		// it's imperative that the span be completely initialized
 		// prior to this line.
 		arena, pageIdx, pageMask := pageIndexOf(s.base())
 		atomic.Or8(&arena.pageInUse[pageIdx], pageMask)

 		// Update related page sweeper stats.
 		atomic.Xadd64(&h.pagesInUse, int64(npages))

 		if trace.enabled {
 			// Trace that a heap alloc occurred.
 			traceHeapAlloc()
 		}
 	}

 	// Make sure the newly allocated span will be observed
 	// by the GC before pointers into the span are published.
 	publicationBarrier()

 	return s
 }

 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
 //
 // h must be locked.
 func (h *mheap) grow(npage uintptr) bool {
 	// We must grow the heap in whole palloc chunks.
 	ask := alignUp(npage, pallocChunkPages) * pageSize

 	totalGrowth := uintptr(0)
 	// This may overflow because ask could be very large
 	// and is otherwise unrelated to h.curArena.base.
 	end := h.curArena.base + ask
 	nBase := alignUp(end, physPageSize)
 	if nBase > h.curArena.end || /* overflow */ end < h.curArena.base {
 		// Not enough room in the current arena. Allocate more
 		// arena space. This may not be contiguous with the
 		// current arena, so we have to request the full ask.
 		av, asize := h.sysAlloc(ask)
 		if av == nil {
 			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
 			return false
 		}

 		if uintptr(av) == h.curArena.end {
 			// The new space is contiguous with the old
 			// space, so just extend the current space.
 			h.curArena.end = uintptr(av) + asize
 		} else {
 			// The new space is discontiguous. Track what
 			// remains of the current space and switch to
 			// the new space. This should be rare.
 			if size := h.curArena.end - h.curArena.base; size != 0 {
 				h.pages.grow(h.curArena.base, size)
 				totalGrowth += size
 			}
 			// Switch to the new space.
 			h.curArena.base = uintptr(av)
 			h.curArena.end = uintptr(av) + asize
 		}

 		// The memory just allocated counts as both released
 		// and idle, even though it's not yet backed by spans.
 		//
 		// The allocation is always aligned to the heap arena
 		// size which is always > physPageSize, so its safe to
 		// just add directly to heap_released.
 		mSysStatInc(&memstats.heap_released, asize)
 		mSysStatInc(&memstats.heap_idle, asize)

 		// Recalculate nBase.
 		// We know this won't overflow, because sysAlloc returned
 		// a valid region starting at h.curArena.base which is at
 		// least ask bytes in size.
 		nBase = alignUp(h.curArena.base+ask, physPageSize)
 	}

 	// Grow into the current arena.
 	v := h.curArena.base
 	h.curArena.base = nBase
 	h.pages.grow(v, nBase-v)
 	totalGrowth += nBase - v

 	// We just caused a heap growth, so scavenge down what will soon be used.
 	// By scavenging inline we deal with the failure to allocate out of
 	// memory fragments by scavenging the memory fragments that are least
 	// likely to be re-used.
 	if retained := heapRetained(); retained+uint64(totalGrowth) > h.scavengeGoal {
 		todo := totalGrowth
 		if overage := uintptr(retained + uint64(totalGrowth) - h.scavengeGoal); todo > overage {
 			todo = overage
 		}
 		h.pages.scavenge(todo, false)
 	}
 	return true
 }

 // Free the span back into the heap.
 func (h *mheap) freeSpan(s *mspan) {
 	systemstack(func() {
 		c := getg().m.p.ptr().mcache
 		lock(&h.lock)
 		memstats.heap_scan += uint64(c.local_scan)
 		c.local_scan = 0
 		memstats.tinyallocs += uint64(c.local_tinyallocs)
 		c.local_tinyallocs = 0
 		if msanenabled {
 			// Tell msan that this entire span is no longer in use.
 			base := unsafe.Pointer(s.base())
 			bytes := s.npages << _PageShift
 			msanfree(base, bytes)
 		}
 		if gcBlackenEnabled != 0 {
 			// heap_scan changed.
 			gcController.revise()
 		}
 		h.freeSpanLocked(s, true, true)
 		unlock(&h.lock)
 	})
 }

 // freeManual frees a manually-managed span returned by allocManual.
 // stat must be the same as the stat passed to the allocManual that
 // allocated s.
 //
 // This must only be called when gcphase == _GCoff. See mSpanState for
 // an explanation.
 //
 // freeManual must be called on the system stack because it acquires
 // the heap lock. See mheap for details.
 //
 //go:systemstack
 func (h *mheap) freeManual(s *mspan, stat *uint64) {
 	s.needzero = 1
 	lock(&h.lock)
 	mSysStatDec(stat, s.npages*pageSize)
 	mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
 	h.freeSpanLocked(s, false, true)
 	unlock(&h.lock)
 }

 func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool) {
 	switch s.state.get() {
 	case mSpanManual:
 		if s.allocCount != 0 {
 			throw("mheap.freeSpanLocked - invalid stack free")
 		}
 	case mSpanInUse:
 		if s.allocCount != 0 || s.sweepgen != h.sweepgen {
 			print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
 			throw("mheap.freeSpanLocked - invalid free")
 		}
 		atomic.Xadd64(&h.pagesInUse, -int64(s.npages))

 		// Clear in-use bit in arena page bitmap.
 		arena, pageIdx, pageMask := pageIndexOf(s.base())
 		atomic.And8(&arena.pageInUse[pageIdx], ^pageMask)
 	default:
 		throw("mheap.freeSpanLocked - invalid span state")
 	}

 	if acctinuse {
 		mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
 	}
 	if acctidle {
 		mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
 	}

 	// Mark the space as free.
 	h.pages.free(s.base(), s.npages)

 	// Free the span structure. We no longer have a use for it.
 	s.state.set(mSpanDead)
 	h.freeMSpanLocked(s)
 }

 // scavengeAll acquires the heap lock (blocking any additional
 // manipulation of the page allocator) and iterates over the whole
 // heap, scavenging every free page available.
 func (h *mheap) scavengeAll() {
 	// Disallow malloc or panic while holding the heap lock. We do
 	// this here because this is a non-mallocgc entry-point to
 	// the mheap API.
 	gp := getg()
 	gp.m.mallocing++
 	lock(&h.lock)
 	// Start a new scavenge generation so we have a chance to walk
 	// over the whole heap.
 	h.pages.scavengeStartGen()
 	released := h.pages.scavenge(^uintptr(0), false)
 	gen := h.pages.scav.gen
 	unlock(&h.lock)
 	gp.m.mallocing--

 	if debug.scavtrace > 0 {
 		printScavTrace(gen, released, true)
 	}
 }

 //go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
 func runtime_debug_freeOSMemory() {
 	GC()
 	systemstack(func() { mheap_.scavengeAll() })
 }

 // Initialize a new span with the given start and npages.
 func (span *mspan) init(base uintptr, npages uintptr) {
 	// span is *not* zeroed.
 	span.next = nil
 	span.prev = nil
 	span.list = nil
 	span.startAddr = base
 	span.npages = npages
 	span.allocCount = 0
 	span.spanclass = 0
 	span.elemsize = 0
 	span.speciallock.key = 0
 	span.specials = nil
 	span.needzero = 0
 	span.freeindex = 0
 	span.allocBits = nil
 	span.gcmarkBits = nil
 	span.state.set(mSpanDead)
 	lockInit(&span.speciallock, lockRankMspanSpecial)
 }

 func (span *mspan) inList() bool {
 	return span.list != nil
 }

 // Initialize an empty doubly-linked list.
 func (list *mSpanList) init() {
 	list.first = nil
 	list.last = nil
 }

 func (list *mSpanList) remove(span *mspan) {
 	if span.list != list {
 		print("runtime: failed mSpanList.remove span.npages=", span.npages,
 			" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
 		throw("mSpanList.remove")
 	}
 	if list.first == span {
 		list.first = span.next
 	} else {
 		span.prev.next = span.next
 	}
 	if list.last == span {
 		list.last = span.prev
 	} else {
 		span.next.prev = span.prev
 	}
 	span.next = nil
 	span.prev = nil
 	span.list = nil
 }

 func (list *mSpanList) isEmpty() bool {
 	return list.first == nil
 }

 func (list *mSpanList) insert(span *mspan) {
 	if span.next != nil || span.prev != nil || span.list != nil {
 		println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list)
 		throw("mSpanList.insert")
 	}
 	span.next = list.first
 	if list.first != nil {
 		// The list contains at least one span; link it in.
 		// The last span in the list doesn't change.
 		list.first.prev = span
 	} else {
 		// The list contains no spans, so this is also the last span.
 		list.last = span
 	}
 	list.first = span
 	span.list = list
 }

 func (list *mSpanList) insertBack(span *mspan) {
 	if span.next != nil || span.prev != nil || span.list != nil {
 		println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list)
 		throw("mSpanList.insertBack")
 	}
 	span.prev = list.last
 	if list.last != nil {
 		// The list contains at least one span.
 		list.last.next = span
 	} else {
 		// The list contains no spans, so this is also the first span.
 		list.first = span
 	}
 	list.last = span
 	span.list = list
 }

 // takeAll removes all spans from other and inserts them at the front
 // of list.
 func (list *mSpanList) takeAll(other *mSpanList) {
 	if other.isEmpty() {
 		return
 	}

 	// Reparent everything in other to list.
 	for s := other.first; s != nil; s = s.next {
 		s.list = list
 	}

 	// Concatenate the lists.
 	if list.isEmpty() {
 		*list = *other
 	} else {
 		// Neither list is empty. Put other before list.
 		other.last.next = list.first
 		list.first.prev = other.last
 		list.first = other.first
 	}

 	other.first, other.last = nil, nil
 }

 const (
 	_KindSpecialFinalizer = 1
 	_KindSpecialProfile   = 2
 	// Note: The finalizer special must be first because if we're freeing
 	// an object, a finalizer special will cause the freeing operation
 	// to abort, and we want to keep the other special records around
 	// if that happens.
 )

 //go:notinheap
 type special struct {
 	next   *special // linked list in span
 	offset uint16   // span offset of object
 	kind   byte     // kind of special
 }

 // spanHasSpecials marks a span as having specials in the arena bitmap.
 func spanHasSpecials(s *mspan) {
 	arenaPage := (s.base() / pageSize) % pagesPerArena
 	ai := arenaIndex(s.base())
 	ha := mheap_.arenas[ai.l1()][ai.l2()]
 	atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8))
 }

 // spanHasNoSpecials marks a span as having no specials in the arena bitmap.
 func spanHasNoSpecials(s *mspan) {
 	arenaPage := (s.base() / pageSize) % pagesPerArena
 	ai := arenaIndex(s.base())
 	ha := mheap_.arenas[ai.l1()][ai.l2()]
 	atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8)))
 }

 // Adds the special record s to the list of special records for
 // the object p. All fields of s should be filled in except for
 // offset & next, which this routine will fill in.
 // Returns true if the special was successfully added, false otherwise.
 // (The add will fail only if a record with the same p and s->kind
 //  already exists.)
 func addspecial(p unsafe.Pointer, s *special) bool {
 	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("addspecial on invalid pointer")
 	}

 	// Ensure that the span is swept.
 	// Sweeping accesses the specials list w/o locks, so we have
 	// to synchronize with it. And it's just much safer.
 	mp := acquirem()
 	span.ensureSwept()

 	offset := uintptr(p) - span.base()
 	kind := s.kind

 	lock(&span.speciallock)

 	// Find splice point, check for existing record.
 	t := &span.specials
 	for {
 		x := *t
 		if x == nil {
 			break
 		}
 		if offset == uintptr(x.offset) && kind == x.kind {
 			unlock(&span.speciallock)
 			releasem(mp)
 			return false // already exists
 		}
 		if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
 			break
 		}
 		t = &x.next
 	}

 	// Splice in record, fill in offset.
 	s.offset = uint16(offset)
 	s.next = *t
 	*t = s
 	if go115NewMarkrootSpans {
 		spanHasSpecials(span)
 	}
 	unlock(&span.speciallock)
 	releasem(mp)

 	return true
 }

 // Removes the Special record of the given kind for the object p.
 // Returns the record if the record existed, nil otherwise.
 // The caller must FixAlloc_Free the result.
 func removespecial(p unsafe.Pointer, kind uint8) *special {
 	span := spanOfHeap(uintptr(p))
 	if span == nil {
 		throw("removespecial on invalid pointer")
 	}

 	// Ensure that the span is swept.
 	// Sweeping accesses the specials list w/o locks, so we have
 	// to synchronize with it. And it's just much safer.
 	mp := acquirem()
 	span.ensureSwept()

 	offset := uintptr(p) - span.base()

 	var result *special
 	lock(&span.speciallock)
 	t := &span.specials
 	for {
 		s := *t
 		if s == nil {
 			break
 		}
 		// This function is used for finalizers only, so we don't check for
 		// "interior" specials (p must be exactly equal to s->offset).
 		if offset == uintptr(s.offset) && kind == s.kind {
 			*t = s.next
 			result = s
 			break
 		}
 		t = &s.next
 	}
 	if go115NewMarkrootSpans && span.specials == nil {
 		spanHasNoSpecials(span)
 	}
 	unlock(&span.speciallock)
 	releasem(mp)
 	return result
 }

 // The described object has a finalizer set for it.
 //
 // specialfinalizer is allocated from non-GC'd memory, so any heap
 // pointers must be specially handled.
 //
 //go:notinheap
 type specialfinalizer struct {
 	special special
 	fn      *funcval // May be a heap pointer.
 	nret    uintptr
 	fint    *_type   // May be a heap pointer, but always live.
 	ot      *ptrtype // May be a heap pointer, but always live.
 }

 // Adds a finalizer to the object p. Returns true if it succeeded.
 func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
 	lock(&mheap_.speciallock)
 	s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
 	unlock(&mheap_.speciallock)
 	s.special.kind = _KindSpecialFinalizer
 	s.fn = f
 	s.nret = nret
 	s.fint = fint
 	s.ot = ot
 	if addspecial(p, &s.special) {
 		// This is responsible for maintaining the same
 		// GC-related invariants as markrootSpans in any
 		// situation where it's possible that markrootSpans
 		// has already run but mark termination hasn't yet.
 		if gcphase != _GCoff {
 			base, _, _ := findObject(uintptr(p), 0, 0)
 			mp := acquirem()
 			gcw := &mp.p.ptr().gcw
 			// Mark everything reachable from the object
 			// so it's retained for the finalizer.
 			scanobject(base, gcw)
 			// Mark the finalizer itself, since the
 			// special isn't part of the GC'd heap.
 			scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
 			releasem(mp)
 		}
 		return true
 	}

 	// There was an old finalizer
 	lock(&mheap_.speciallock)
 	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
 	unlock(&mheap_.speciallock)
 	return false
 }

 // Removes the finalizer (if any) from the object p.
 func removefinalizer(p unsafe.Pointer) {
 	s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
 	if s == nil {
 		return // there wasn't a finalizer to remove
 	}
 	lock(&mheap_.speciallock)
 	mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
 	unlock(&mheap_.speciallock)
 }

 // The described object is being heap profiled.
 //
 //go:notinheap
 type specialprofile struct {
 	special special
 	b       *bucket
 }

 // Set the heap profile bucket associated with addr to b.
 func setprofilebucket(p unsafe.Pointer, b *bucket) {
 	lock(&mheap_.speciallock)
 	s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
 	unlock(&mheap_.speciallock)
 	s.special.kind = _KindSpecialProfile
 	s.b = b
 	if !addspecial(p, &s.special) {
 		throw("setprofilebucket: profile already set")
 	}
 }

 // Do whatever cleanup needs to be done to deallocate s. It has
 // already been unlinked from the mspan specials list.
 func freespecial(s *special, p unsafe.Pointer, size uintptr) {
 	switch s.kind {
 	case _KindSpecialFinalizer:
 		sf := (*specialfinalizer)(unsafe.Pointer(s))
 		queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
 		lock(&mheap_.speciallock)
 		mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
 		unlock(&mheap_.speciallock)
 	case _KindSpecialProfile:
 		sp := (*specialprofile)(unsafe.Pointer(s))
 		mProf_Free(sp.b, size)
 		lock(&mheap_.speciallock)
 		mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
 		unlock(&mheap_.speciallock)
 	default:
 		throw("bad special kind")
 		panic("not reached")
 	}
 }

 // gcBits is an alloc/mark bitmap. This is always used as *gcBits.
 //
 //go:notinheap
 type gcBits uint8

 // bytep returns a pointer to the n'th byte of b.
 func (b *gcBits) bytep(n uintptr) *uint8 {
 	return addb((*uint8)(b), n)
 }

 // bitp returns a pointer to the byte containing bit n and a mask for
 // selecting that bit from *bytep.
 func (b *gcBits) bitp(n uintptr) (bytep *uint8, mask uint8) {
 	return b.bytep(n / 8), 1 << (n % 8)
 }

 const gcBitsChunkBytes = uintptr(64 << 10)
 const gcBitsHeaderBytes = unsafe.Sizeof(gcBitsHeader{})

 type gcBitsHeader struct {
 	free uintptr // free is the index into bits of the next free byte.
 	next uintptr // *gcBits triggers recursive type bug. (issue 14620)
 }

 //go:notinheap
 type gcBitsArena struct {
 	// gcBitsHeader // side step recursive type bug (issue 14620) by including fields by hand.
 	free uintptr // free is the index into bits of the next free byte; read/write atomically
 	next *gcBitsArena
 	bits [gcBitsChunkBytes - gcBitsHeaderBytes]gcBits
 }

 var gcBitsArenas struct {
 	lock     mutex
 	free     *gcBitsArena
 	next     *gcBitsArena // Read atomically. Write atomically under lock.
 	current  *gcBitsArena
 	previous *gcBitsArena
 }

 // tryAlloc allocates from b or returns nil if b does not have enough room.
 // This is safe to call concurrently.
 func (b *gcBitsArena) tryAlloc(bytes uintptr) *gcBits {
 	if b == nil || atomic.Loaduintptr(&b.free)+bytes > uintptr(len(b.bits)) {
 		return nil
 	}
 	// Try to allocate from this block.
 	end := atomic.Xadduintptr(&b.free, bytes)
 	if end > uintptr(len(b.bits)) {
 		return nil
 	}
 	// There was enough room.
 	start := end - bytes
 	return &b.bits[start]
 }

 // newMarkBits returns a pointer to 8 byte aligned bytes
 // to be used for a span's mark bits.
 func newMarkBits(nelems uintptr) *gcBits {
 	blocksNeeded := uintptr((nelems + 63) / 64)
 	bytesNeeded := blocksNeeded * 8

 	// Try directly allocating from the current head arena.
 	head := (*gcBitsArena)(atomic.Loadp(unsafe.Pointer(&gcBitsArenas.next)))
 	if p := head.tryAlloc(bytesNeeded); p != nil {
 		return p
 	}

 	// There's not enough room in the head arena. We may need to
 	// allocate a new arena.
 	lock(&gcBitsArenas.lock)
 	// Try the head arena again, since it may have changed. Now
 	// that we hold the lock, the list head can't change, but its
 	// free position still can.
 	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
 		unlock(&gcBitsArenas.lock)
 		return p
 	}

 	// Allocate a new arena. This may temporarily drop the lock.
 	fresh := newArenaMayUnlock()
 	// If newArenaMayUnlock dropped the lock, another thread may
 	// have put a fresh arena on the "next" list. Try allocating
 	// from next again.
 	if p := gcBitsArenas.next.tryAlloc(bytesNeeded); p != nil {
 		// Put fresh back on the free list.
 		// TODO: Mark it "already zeroed"
 		fresh.next = gcBitsArenas.free
 		gcBitsArenas.free = fresh
 		unlock(&gcBitsArenas.lock)
 		return p
 	}

 	// Allocate from the fresh arena. We haven't linked it in yet, so
 	// this cannot race and is guaranteed to succeed.
 	p := fresh.tryAlloc(bytesNeeded)
 	if p == nil {
 		throw("markBits overflow")
 	}

 	// Add the fresh arena to the "next" list.
 	fresh.next = gcBitsArenas.next
 	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), unsafe.Pointer(fresh))

 	unlock(&gcBitsArenas.lock)
 	return p
 }

 // newAllocBits returns a pointer to 8 byte aligned bytes
 // to be used for this span's alloc bits.
 // newAllocBits is used to provide newly initialized spans
 // allocation bits. For spans not being initialized the
 // mark bits are repurposed as allocation bits when
 // the span is swept.
 func newAllocBits(nelems uintptr) *gcBits {
 	return newMarkBits(nelems)
 }

 // nextMarkBitArenaEpoch establishes a new epoch for the arenas
 // holding the mark bits. The arenas are named relative to the
 // current GC cycle which is demarcated by the call to finishweep_m.
 //
 // All current spans have been swept.
 // During that sweep each span allocated room for its gcmarkBits in
 // gcBitsArenas.next block. gcBitsArenas.next becomes the gcBitsArenas.current
 // where the GC will mark objects and after each span is swept these bits
 // will be used to allocate objects.
 // gcBitsArenas.current becomes gcBitsArenas.previous where the span's
 // gcAllocBits live until all the spans have been swept during this GC cycle.
 // The span's sweep extinguishes all the references to gcBitsArenas.previous
 // by pointing gcAllocBits into the gcBitsArenas.current.
 // The gcBitsArenas.previous is released to the gcBitsArenas.free list.
 func nextMarkBitArenaEpoch() {
 	lock(&gcBitsArenas.lock)
 	if gcBitsArenas.previous != nil {
 		if gcBitsArenas.free == nil {
 			gcBitsArenas.free = gcBitsArenas.previous
 		} else {
 			// Find end of previous arenas.
 			last := gcBitsArenas.previous
 			for last = gcBitsArenas.previous; last.next != nil; last = last.next {
 			}
 			last.next = gcBitsArenas.free
 			gcBitsArenas.free = gcBitsArenas.previous
 		}
 	}
 	gcBitsArenas.previous = gcBitsArenas.current
 	gcBitsArenas.current = gcBitsArenas.next
 	atomic.StorepNoWB(unsafe.Pointer(&gcBitsArenas.next), nil) // newMarkBits calls newArena when needed
 	unlock(&gcBitsArenas.lock)
 }

 // newArenaMayUnlock allocates and zeroes a gcBits arena.
 // The caller must hold gcBitsArena.lock. This may temporarily release it.
 func newArenaMayUnlock() *gcBitsArena {
 	var result *gcBitsArena
 	if gcBitsArenas.free == nil {
 		unlock(&gcBitsArenas.lock)
 		result = (*gcBitsArena)(sysAlloc(gcBitsChunkBytes, &memstats.gc_sys))
 		if result == nil {
 			throw("runtime: cannot allocate memory")
 		}
 		lock(&gcBitsArenas.lock)
 	} else {
 		result = gcBitsArenas.free
 		gcBitsArenas.free = gcBitsArenas.free.next
 		memclrNoHeapPointers(unsafe.Pointer(result), gcBitsChunkBytes)
 	}
 	result.next = nil
 	// If result.bits is not 8 byte aligned adjust index so
 	// that &result.bits[result.free] is 8 byte aligned.
 	if uintptr(unsafe.Offsetof(gcBitsArena{}.bits))&7 == 0 {
 		result.free = 0
 	} else {
 		result.free = 8 - (uintptr(unsafe.Pointer(&result.bits[0])) & 7)
 	}
 	return result
 }