runtime: reorganize memory code
Move code from malloc1.go, malloc2.go, mem.go, mgc0.go into
appropriate locations.
Factor mgc.go into mgc.go, mgcmark.go, mgcsweep.go, mstats.go.
A lot of this code was in certain files because the right place was in
a C file but it was written in Go, or vice versa. This is one step toward
making things actually well-organized again.
Change-Id: I6741deb88a7cfb1c17ffe0bcca3989e10207968f
Reviewed-on: https://go-review.googlesource.com/5300
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Rick Hudson <rlh@golang.org>
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 7b4a846..f577095 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -13,6 +13,24 @@
import "unsafe"
+//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
+func runtime_debug_WriteHeapDump(fd uintptr) {
+ semacquire(&worldsema, false)
+ gp := getg()
+ gp.m.preemptoff = "write heap dump"
+ systemstack(stoptheworld)
+
+ systemstack(func() {
+ writeheapdump_m(fd)
+ })
+
+ gp.m.preemptoff = ""
+ gp.m.locks++
+ semrelease(&worldsema)
+ systemstack(starttheworld)
+ gp.m.locks--
+}
+
const (
fieldKindEol = 0
fieldKindPtr = 1
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 06ba124..b65bf70 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -2,6 +2,84 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+// Memory allocator, based on tcmalloc.
+// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
+// The main allocator works in runs of pages.
+// Small allocation sizes (up to and including 32 kB) are
+// rounded to one of about 100 size classes, each of which
+// has its own free list of objects of exactly that size.
+// Any free page of memory can be split into a set of objects
+// of one size class, which are then managed using free list
+// allocators.
+//
+// The allocator's data structures are:
+//
+// FixAlloc: a free-list allocator for fixed-size objects,
+// used to manage storage used by the allocator.
+// MHeap: the malloc heap, managed at page (4096-byte) granularity.
+// MSpan: a run of pages managed by the MHeap.
+// MCentral: a shared free list for a given size class.
+// MCache: a per-thread (in Go, per-P) cache for small objects.
+// MStats: allocation statistics.
+//
+// Allocating a small object proceeds up a hierarchy of caches:
+//
+// 1. Round the size up to one of the small size classes
+// and look in the corresponding MCache free list.
+// If the list is not empty, allocate an object from it.
+// This can all be done without acquiring a lock.
+//
+// 2. If the MCache free list is empty, replenish it by
+// taking a bunch of objects from the MCentral free list.
+// Moving a bunch amortizes the cost of acquiring the MCentral lock.
+//
+// 3. If the MCentral free list is empty, replenish it by
+// allocating a run of pages from the MHeap and then
+// chopping that memory into objects of the given size.
+// Allocating many objects amortizes the cost of locking
+// the heap.
+//
+// 4. If the MHeap is empty or has no page runs large enough,
+// allocate a new group of pages (at least 1MB) from the
+// operating system. Allocating a large run of pages
+// amortizes the cost of talking to the operating system.
+//
+// Freeing a small object proceeds up the same hierarchy:
+//
+// 1. Look up the size class for the object and add it to
+// the MCache free list.
+//
+// 2. If the MCache free list is too long or the MCache has
+// too much memory, return some to the MCentral free lists.
+//
+// 3. If all the objects in a given span have returned to
+// the MCentral list, return that span to the page heap.
+//
+// 4. If the heap has too much memory, return some to the
+// operating system.
+//
+// TODO(rsc): Step 4 is not implemented.
+//
+// Allocating and freeing a large object uses the page heap
+// directly, bypassing the MCache and MCentral free lists.
+//
+// The small objects on the MCache and MCentral free lists
+// may or may not be zeroed. They are zeroed if and only if
+// the second word of the object is zero. A span in the
+// page heap is zeroed unless s->needzero is set. When a span
+// is allocated to break into small objects, it is zeroed if needed
+// and s->needzero is set. There are two main benefits to delaying the
+// zeroing this way:
+//
+// 1. stack frames allocated from the small object lists
+// or the page heap can avoid zeroing altogether.
+// 2. the cost of zeroing when reusing a small object is
+// charged to the mutator, not the garbage collector.
+//
+// This code was written with an eye toward translating to Go
+// in the future. Methods have the form Type_Method(Type *t, ...).
+
package runtime
import "unsafe"
@@ -25,29 +103,369 @@
concurrentSweep = _ConcurrentSweep
)
+const (
+ _PageShift = 13
+ _PageSize = 1 << _PageShift
+ _PageMask = _PageSize - 1
+)
+
+const (
+ // _64bit = 1 on 64-bit systems, 0 on 32-bit systems
+ _64bit = 1 << (^uintptr(0) >> 63) / 2
+
+ // Computed constant. The definition of MaxSmallSize and the
+ // algorithm in msize.c produce some number of different allocation
+ // size classes. NumSizeClasses is that number. It's needed here
+ // because there are static arrays of this length; when msize runs its
+ // size choosing algorithm it double-checks that NumSizeClasses agrees.
+ _NumSizeClasses = 67
+
+ // Tunable constants.
+ _MaxSmallSize = 32 << 10
+
+ // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
+ _TinySize = 16
+ _TinySizeClass = 2
+
+ _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
+ _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
+ _HeapAllocChunk = 1 << 20 // Chunk size for heap growth
+
+ // Per-P, per order stack segment cache size.
+ _StackCacheSize = 32 * 1024
+
+ // Number of orders that get caching. Order 0 is FixedStack
+ // and each successive order is twice as large.
+ // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
+ // will be allocated directly.
+ // Since FixedStack is different on different systems, we
+ // must vary NumStackOrders to keep the same maximum cached size.
+ // OS | FixedStack | NumStackOrders
+ // -----------------+------------+---------------
+ // linux/darwin/bsd | 2KB | 4
+ // windows/32 | 4KB | 3
+ // windows/64 | 8KB | 2
+ // plan9 | 4KB | 3
+ _NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
+
+ // Number of bits in page to span calculations (4k pages).
+ // On Windows 64-bit we limit the arena to 32GB or 35 bits.
+ // Windows counts memory used by page table into committed memory
+ // of the process, so we can't reserve too much memory.
+ // See http://golang.org/issue/5402 and http://golang.org/issue/5236.
+ // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
+ // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
+ _MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows))*37 + (1-_64bit)*32
+ _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
+
+ _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
+
+ // Max number of threads to run garbage collection.
+ // 2, 3, and 4 are all plausible maximums depending
+ // on the hardware details of the machine. The garbage
+ // collector scales well to 32 cpus.
+ _MaxGcproc = 32
+)
+
// Page number (address>>pageShift)
type pageID uintptr
+const _MaxArena32 = 2 << 30
+
+// OS-defined helpers:
+//
+// sysAlloc obtains a large chunk of zeroed memory from the
+// operating system, typically on the order of a hundred kilobytes
+// or a megabyte.
+// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysUnused notifies the operating system that the contents
+// of the memory region are no longer needed and can be reused
+// for other purposes.
+// SysUsed notifies the operating system that the contents
+// of the memory region are needed again.
+//
+// SysFree returns it unconditionally; this is only used if
+// an out-of-memory error has been detected midway through
+// an allocation. It is okay if SysFree is a no-op.
+//
+// SysReserve reserves address space without allocating memory.
+// If the pointer passed to it is non-nil, the caller wants the
+// reservation there, but SysReserve can still choose another
+// location if that one is unavailable. On some systems and in some
+// cases SysReserve will simply check that the address space is
+// available and not actually reserve it. If SysReserve returns
+// non-nil, it sets *reserved to true if the address space is
+// reserved, false if it has merely been checked.
+// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysMap maps previously reserved address space for use.
+// The reserved argument is true if the address space was really
+// reserved, not merely checked.
+//
+// SysFault marks a (already sysAlloc'd) region to fault
+// if accessed. Used only for debugging the runtime.
+
+func mallocinit() {
+ initSizes()
+
+ if class_to_size[_TinySizeClass] != _TinySize {
+ throw("bad TinySizeClass")
+ }
+
+ var p, bitmapSize, spansSize, pSize, limit uintptr
+ var reserved bool
+
+ // limit = runtime.memlimit();
+ // See https://golang.org/issue/5049
+ // TODO(rsc): Fix after 1.1.
+ limit = 0
+
+ // Set up the allocation arena, a contiguous area of memory where
+ // allocated data will be found. The arena begins with a bitmap large
+ // enough to hold 4 bits per allocated word.
+ if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
+ // On a 64-bit machine, allocate from a single contiguous reservation.
+ // 128 GB (MaxMem) should be big enough for now.
+ //
+ // The code will work with the reservation at any address, but ask
+ // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
+ // Allocating a 128 GB region takes away 37 bits, and the amd64
+ // doesn't let us choose the top 17 bits, so that leaves the 11 bits
+ // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
+ // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
+ // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+ // UTF-8 sequences, and they are otherwise as far away from
+ // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
+ // addresses. An earlier attempt to use 0x11f8 caused out of memory errors
+ // on OS X during thread allocations. 0x00c0 causes conflicts with
+ // AddressSanitizer which reserves all memory up to 0x0100.
+ // These choices are both for debuggability and to reduce the
+ // odds of the conservative garbage collector not collecting memory
+ // because some non-pointer block of memory had a bit pattern
+ // that matched a memory address.
+ //
+ // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
+ // but it hardly matters: e0 00 is not valid UTF-8 either.
+ //
+ // If this fails we fall back to the 32 bit memory mechanism
+ arenaSize := round(_MaxMem, _PageSize)
+ bitmapSize = arenaSize / (ptrSize * 8 / 4)
+ spansSize = arenaSize / _PageSize * ptrSize
+ spansSize = round(spansSize, _PageSize)
+ for i := 0; i <= 0x7f; i++ {
+ p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
+ pSize = bitmapSize + spansSize + arenaSize + _PageSize
+ p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
+ if p != 0 {
+ break
+ }
+ }
+ }
+
+ if p == 0 {
+ // On a 32-bit machine, we can't typically get away
+ // with a giant virtual address space reservation.
+ // Instead we map the memory information bitmap
+ // immediately after the data segment, large enough
+ // to handle another 2GB of mappings (256 MB),
+ // along with a reservation for an initial arena.
+ // When that gets used up, we'll start asking the kernel
+ // for any memory anywhere and hope it's in the 2GB
+ // following the bitmap (presumably the executable begins
+ // near the bottom of memory, so we'll have to use up
+ // most of memory before the kernel resorts to giving out
+ // memory before the beginning of the text segment).
+ //
+ // Alternatively we could reserve 512 MB bitmap, enough
+ // for 4GB of mappings, and then accept any memory the
+ // kernel threw at us, but normally that's a waste of 512 MB
+ // of address space, which is probably too much in a 32-bit world.
+
+ // If we fail to allocate, try again with a smaller arena.
+ // This is necessary on Android L where we share a process
+ // with ART, which reserves virtual memory aggressively.
+ arenaSizes := []uintptr{
+ 512 << 20,
+ 256 << 20,
+ }
+
+ for _, arenaSize := range arenaSizes {
+ bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
+ spansSize = _MaxArena32 / _PageSize * ptrSize
+ if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
+ bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
+ arenaSize = bitmapSize * 8
+ spansSize = arenaSize / _PageSize * ptrSize
+ }
+ spansSize = round(spansSize, _PageSize)
+
+ // SysReserve treats the address we ask for, end, as a hint,
+ // not as an absolute requirement. If we ask for the end
+ // of the data segment but the operating system requires
+ // a little more space before we can start allocating, it will
+ // give out a slightly higher pointer. Except QEMU, which
+ // is buggy, as usual: it won't adjust the pointer upward.
+ // So adjust it upward a little bit ourselves: 1/4 MB to get
+ // away from the running binary image and then round up
+ // to a MB boundary.
+ p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
+ pSize = bitmapSize + spansSize + arenaSize + _PageSize
+ p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
+ if p != 0 {
+ break
+ }
+ }
+ if p == 0 {
+ throw("runtime: cannot reserve arena virtual address space")
+ }
+ }
+
+ // PageSize can be larger than OS definition of page size,
+ // so SysReserve can give us a PageSize-unaligned pointer.
+ // To overcome this we ask for PageSize more and round up the pointer.
+ p1 := round(p, _PageSize)
+
+ mheap_.spans = (**mspan)(unsafe.Pointer(p1))
+ mheap_.bitmap = p1 + spansSize
+ mheap_.arena_start = p1 + (spansSize + bitmapSize)
+ mheap_.arena_used = mheap_.arena_start
+ mheap_.arena_end = p + pSize
+ mheap_.arena_reserved = reserved
+
+ if mheap_.arena_start&(_PageSize-1) != 0 {
+ println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
+ throw("misrounded allocation in mallocinit")
+ }
+
+ // Initialize the rest of the allocator.
+ mHeap_Init(&mheap_, spansSize)
+ _g_ := getg()
+ _g_.m.mcache = allocmcache()
+}
+
+// sysReserveHigh reserves space somewhere high in the address space.
+// sysReserve doesn't actually reserve the full amount requested on
+// 64-bit systems, because of problems with ulimit. Instead it checks
+// that it can get the first 64 kB and assumes it can grab the rest as
+// needed. This doesn't work well with the "let the kernel pick an address"
+// mode, so don't do that. Pick a high address instead.
+func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
+ if ptrSize == 4 {
+ return sysReserve(nil, n, reserved)
+ }
+
+ for i := 0; i <= 0x7f; i++ {
+ p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
+ *reserved = false
+ p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
+ if p != 0 {
+ return unsafe.Pointer(p)
+ }
+ }
+
+ return sysReserve(nil, n, reserved)
+}
+
+func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
+ if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
+ // We are in 32-bit mode, maybe we didn't use all possible address space yet.
+ // Reserve some more space.
+ p_size := round(n+_PageSize, 256<<20)
+ new_end := h.arena_end + p_size
+ if new_end <= h.arena_start+_MaxArena32 {
+ // TODO: It would be bad if part of the arena
+ // is reserved and part is not.
+ var reserved bool
+ p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
+ if p == h.arena_end {
+ h.arena_end = new_end
+ h.arena_reserved = reserved
+ } else if p+p_size <= h.arena_start+_MaxArena32 {
+ // Keep everything page-aligned.
+ // Our pages are bigger than hardware pages.
+ h.arena_end = p + p_size
+ h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
+ h.arena_reserved = reserved
+ } else {
+ var stat uint64
+ sysFree((unsafe.Pointer)(p), p_size, &stat)
+ }
+ }
+ }
+
+ if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
+ // Keep taking from our reservation.
+ p := h.arena_used
+ sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
+ h.arena_used += n
+ mHeap_MapBits(h)
+ mHeap_MapSpans(h)
+ if raceenabled {
+ racemapshadow((unsafe.Pointer)(p), n)
+ }
+ if mheap_.shadow_enabled {
+ sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys)
+ }
+
+ if uintptr(p)&(_PageSize-1) != 0 {
+ throw("misrounded allocation in MHeap_SysAlloc")
+ }
+ return (unsafe.Pointer)(p)
+ }
+
+ // If using 64-bit, our reservation is all we have.
+ if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
+ return nil
+ }
+
+ // On 32-bit, once the reservation is gone we can
+ // try to get memory at a location chosen by the OS
+ // and hope that it is in the range we allocated bitmap for.
+ p_size := round(n, _PageSize) + _PageSize
+ p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
+ if p == 0 {
+ return nil
+ }
+
+ if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
+ print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
+ sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
+ return nil
+ }
+
+ p_end := p + p_size
+ p += -p & (_PageSize - 1)
+ if uintptr(p)+n > uintptr(h.arena_used) {
+ h.arena_used = p + n
+ if p_end > h.arena_end {
+ h.arena_end = p_end
+ }
+ mHeap_MapBits(h)
+ mHeap_MapSpans(h)
+ if raceenabled {
+ racemapshadow((unsafe.Pointer)(p), n)
+ }
+ }
+
+ if uintptr(p)&(_PageSize-1) != 0 {
+ throw("misrounded allocation in MHeap_SysAlloc")
+ }
+ return (unsafe.Pointer)(p)
+}
+
// base address for all 0-byte allocations
var zerobase uintptr
-// Trigger the concurrent GC when 1/triggerratio memory is available to allocate.
-// Adjust this ratio as part of a scheme to ensure that mutators have enough
-// memory to allocate in durring a concurrent GC cycle.
-var triggerratio = int64(8)
-
-// Determine whether to initiate a GC.
-// If the GC is already working no need to trigger another one.
-// This should establish a feedback loop where if the GC does not
-// have sufficient time to complete then more memory will be
-// requested from the OS increasing heap size thus allow future
-// GCs more time to complete.
-// memstat.heap_alloc and memstat.next_gc reads have benign races
-// A false negative simple does not start a GC, a false positive
-// will start a GC needlessly. Neither have correctness issues.
-func shouldtriggergc() bool {
- return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_alloc)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0
-}
+const (
+ // flags to malloc
+ _FlagNoScan = 1 << 0 // GC doesn't have to scan object
+ _FlagNoZero = 1 << 1 // don't zero memory
+)
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
@@ -250,6 +668,25 @@
return x
}
+func largeAlloc(size uintptr, flag uint32) *mspan {
+ // print("largeAlloc size=", size, "\n")
+
+ if size+_PageSize < size {
+ throw("out of memory")
+ }
+ npages := size >> _PageShift
+ if size&_PageMask != 0 {
+ npages++
+ }
+ s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
+ if s == nil {
+ throw("out of memory")
+ }
+ s.limit = uintptr(s.start)<<_PageShift + size
+ heapBitsForSpan(s.base()).initSpan(s.layout())
+ return s
+}
+
// implementation of new builtin
func newobject(typ *_type) unsafe.Pointer {
flags := uint32(0)
@@ -310,289 +747,6 @@
mProf_Malloc(x, size)
}
-// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
-// all go routines see the new barrier.
-//go:nowritebarrier
-func gcinstallmarkwb() {
- gcphase = _GCmark
-}
-
-// force = 0 - start concurrent GC
-// force = 1 - do STW GC regardless of current heap usage
-// force = 2 - go STW GC and eager sweep
-func gogc(force int32) {
- // The gc is turned off (via enablegc) until the bootstrap has completed.
- // Also, malloc gets called in the guts of a number of libraries that might be
- // holding locks. To avoid deadlocks during stoptheworld, don't bother
- // trying to run gc while holding a lock. The next mallocgc without a lock
- // will do the gc instead.
-
- mp := acquirem()
- if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
- releasem(mp)
- return
- }
- releasem(mp)
- mp = nil
-
- if force == 0 {
- lock(&bggc.lock)
- if !bggc.started {
- bggc.working = 1
- bggc.started = true
- go backgroundgc()
- } else if bggc.working == 0 {
- bggc.working = 1
- ready(bggc.g)
- }
- unlock(&bggc.lock)
- } else {
- gcwork(force)
- }
-}
-
-func gcwork(force int32) {
-
- semacquire(&worldsema, false)
-
- // Pick up the remaining unswept/not being swept spans concurrently
- for gosweepone() != ^uintptr(0) {
- sweep.nbgsweep++
- }
-
- // Ok, we're doing it! Stop everybody else
-
- mp := acquirem()
- mp.preemptoff = "gcing"
- releasem(mp)
- gctimer.count++
- if force == 0 {
- gctimer.cycle.sweepterm = nanotime()
- }
-
- if trace.enabled {
- traceGoSched()
- traceGCStart()
- }
-
- // Pick up the remaining unswept/not being swept spans before we STW
- for gosweepone() != ^uintptr(0) {
- sweep.nbgsweep++
- }
- systemstack(stoptheworld)
- systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
- if force == 0 { // Do as much work concurrently as possible
- gcphase = _GCscan
- systemstack(starttheworld)
- gctimer.cycle.scan = nanotime()
- // Do a concurrent heap scan before we stop the world.
- systemstack(gcscan_m)
- gctimer.cycle.installmarkwb = nanotime()
- systemstack(stoptheworld)
- systemstack(gcinstallmarkwb)
- systemstack(harvestwbufs)
- systemstack(starttheworld)
- gctimer.cycle.mark = nanotime()
- systemstack(gcmark_m)
- gctimer.cycle.markterm = nanotime()
- systemstack(stoptheworld)
- systemstack(gcinstalloffwb_m)
- } else {
- // For non-concurrent GC (force != 0) g stack have not been scanned so
- // set gcscanvalid such that mark termination scans all stacks.
- // No races here since we are in a STW phase.
- for _, gp := range allgs {
- gp.gcworkdone = false // set to true in gcphasework
- gp.gcscanvalid = false // stack has not been scanned
- }
- }
-
- startTime := nanotime()
- if mp != acquirem() {
- throw("gogc: rescheduled")
- }
-
- clearpools()
-
- // Run gc on the g0 stack. We do this so that the g stack
- // we're currently running on will no longer change. Cuts
- // the root set down a bit (g0 stacks are not scanned, and
- // we don't need to scan gc's internal state). We also
- // need to switch to g0 so we can shrink the stack.
- n := 1
- if debug.gctrace > 1 {
- n = 2
- }
- eagersweep := force >= 2
- for i := 0; i < n; i++ {
- if i > 0 {
- // refresh start time if doing a second GC
- startTime = nanotime()
- }
- // switch to g0, call gc, then switch back
- systemstack(func() {
- gc_m(startTime, eagersweep)
- })
- }
-
- systemstack(func() {
- gccheckmark_m(startTime, eagersweep)
- })
-
- if trace.enabled {
- traceGCDone()
- traceGoStart()
- }
-
- // all done
- mp.preemptoff = ""
-
- if force == 0 {
- gctimer.cycle.sweep = nanotime()
- }
-
- semrelease(&worldsema)
-
- if force == 0 {
- if gctimer.verbose > 1 {
- GCprinttimes()
- } else if gctimer.verbose > 0 {
- calctimes() // ignore result
- }
- }
-
- systemstack(starttheworld)
-
- releasem(mp)
- mp = nil
-
- // now that gc is done, kick off finalizer thread if needed
- if !concurrentSweep {
- // give the queued finalizers, if any, a chance to run
- Gosched()
- }
-}
-
-// gctimes records the time in nanoseconds of each phase of the concurrent GC.
-type gctimes struct {
- sweepterm int64 // stw
- scan int64
- installmarkwb int64 // stw
- mark int64
- markterm int64 // stw
- sweep int64
-}
-
-// gcchronograph holds timer information related to GC phases
-// max records the maximum time spent in each GC phase since GCstarttimes.
-// total records the total time spent in each GC phase since GCstarttimes.
-// cycle records the absolute time (as returned by nanoseconds()) that each GC phase last started at.
-type gcchronograph struct {
- count int64
- verbose int64
- maxpause int64
- max gctimes
- total gctimes
- cycle gctimes
-}
-
-var gctimer gcchronograph
-
-// GCstarttimes initializes the gc times. All previous times are lost.
-func GCstarttimes(verbose int64) {
- gctimer = gcchronograph{verbose: verbose}
-}
-
-// GCendtimes stops the gc timers.
-func GCendtimes() {
- gctimer.verbose = 0
-}
-
-// calctimes converts gctimer.cycle into the elapsed times, updates gctimer.total
-// and updates gctimer.max with the max pause time.
-func calctimes() gctimes {
- var times gctimes
-
- var max = func(a, b int64) int64 {
- if a > b {
- return a
- }
- return b
- }
-
- times.sweepterm = gctimer.cycle.scan - gctimer.cycle.sweepterm
- gctimer.total.sweepterm += times.sweepterm
- gctimer.max.sweepterm = max(gctimer.max.sweepterm, times.sweepterm)
- gctimer.maxpause = max(gctimer.maxpause, gctimer.max.sweepterm)
-
- times.scan = gctimer.cycle.installmarkwb - gctimer.cycle.scan
- gctimer.total.scan += times.scan
- gctimer.max.scan = max(gctimer.max.scan, times.scan)
-
- times.installmarkwb = gctimer.cycle.mark - gctimer.cycle.installmarkwb
- gctimer.total.installmarkwb += times.installmarkwb
- gctimer.max.installmarkwb = max(gctimer.max.installmarkwb, times.installmarkwb)
- gctimer.maxpause = max(gctimer.maxpause, gctimer.max.installmarkwb)
-
- times.mark = gctimer.cycle.markterm - gctimer.cycle.mark
- gctimer.total.mark += times.mark
- gctimer.max.mark = max(gctimer.max.mark, times.mark)
-
- times.markterm = gctimer.cycle.sweep - gctimer.cycle.markterm
- gctimer.total.markterm += times.markterm
- gctimer.max.markterm = max(gctimer.max.markterm, times.markterm)
- gctimer.maxpause = max(gctimer.maxpause, gctimer.max.markterm)
-
- return times
-}
-
-// GCprinttimes prints latency information in nanoseconds about various
-// phases in the GC. The information for each phase includes the maximum pause
-// and total time since the most recent call to GCstarttimes as well as
-// the information from the most recent Concurent GC cycle. Calls from the
-// application to runtime.GC() are ignored.
-func GCprinttimes() {
- if gctimer.verbose == 0 {
- println("GC timers not enabled")
- return
- }
-
- // Explicitly put times on the heap so printPhase can use it.
- times := new(gctimes)
- *times = calctimes()
- cycletime := gctimer.cycle.sweep - gctimer.cycle.sweepterm
- pause := times.sweepterm + times.installmarkwb + times.markterm
- gomaxprocs := GOMAXPROCS(-1)
-
- printlock()
- print("GC: #", gctimer.count, " ", cycletime, "ns @", gctimer.cycle.sweepterm, " pause=", pause, " maxpause=", gctimer.maxpause, " goroutines=", allglen, " gomaxprocs=", gomaxprocs, "\n")
- printPhase := func(label string, get func(*gctimes) int64, procs int) {
- print("GC: ", label, " ", get(times), "ns\tmax=", get(&gctimer.max), "\ttotal=", get(&gctimer.total), "\tprocs=", procs, "\n")
- }
- printPhase("sweep term:", func(t *gctimes) int64 { return t.sweepterm }, gomaxprocs)
- printPhase("scan: ", func(t *gctimes) int64 { return t.scan }, 1)
- printPhase("install wb:", func(t *gctimes) int64 { return t.installmarkwb }, gomaxprocs)
- printPhase("mark: ", func(t *gctimes) int64 { return t.mark }, 1)
- printPhase("mark term: ", func(t *gctimes) int64 { return t.markterm }, gomaxprocs)
- printunlock()
-}
-
-// GC runs a garbage collection.
-func GC() {
- gogc(2)
-}
-
-// linker-provided
-var noptrdata struct{}
-var enoptrdata struct{}
-var noptrbss struct{}
-var enoptrbss struct{}
-
-// round n up to a multiple of a. a must be a power of 2.
-func round(n, a uintptr) uintptr {
- return (n + a - 1) &^ (a - 1)
-}
-
var persistent struct {
lock mutex
base unsafe.Pointer
diff --git a/src/runtime/malloc1.go b/src/runtime/malloc1.go
deleted file mode 100644
index 18d998b..0000000
--- a/src/runtime/malloc1.go
+++ /dev/null
@@ -1,358 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// See malloc.h for overview.
-//
-// TODO(rsc): double-check stats.
-
-package runtime
-
-import "unsafe"
-
-const _MaxArena32 = 2 << 30
-
-// For use by Go. If it were a C enum it would be made available automatically,
-// but the value of MaxMem is too large for enum.
-// XXX - uintptr runtime·maxmem = MaxMem;
-
-func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
- _g_ := getg()
-
- _g_.m.mcache.local_nlookup++
- if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
- // purge cache stats to prevent overflow
- lock(&mheap_.lock)
- purgecachedstats(_g_.m.mcache)
- unlock(&mheap_.lock)
- }
-
- s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
- if sp != nil {
- *sp = s
- }
- if s == nil {
- if base != nil {
- *base = 0
- }
- if size != nil {
- *size = 0
- }
- return 0
- }
-
- p := uintptr(s.start) << _PageShift
- if s.sizeclass == 0 {
- // Large object.
- if base != nil {
- *base = p
- }
- if size != nil {
- *size = s.npages << _PageShift
- }
- return 1
- }
-
- n := s.elemsize
- if base != nil {
- i := (uintptr(v) - uintptr(p)) / n
- *base = p + i*n
- }
- if size != nil {
- *size = n
- }
-
- return 1
-}
-
-//go:nosplit
-func purgecachedstats(c *mcache) {
- // Protected by either heap or GC lock.
- h := &mheap_
- memstats.heap_alloc += uint64(c.local_cachealloc)
- c.local_cachealloc = 0
- if trace.enabled {
- traceHeapAlloc()
- }
- memstats.tinyallocs += uint64(c.local_tinyallocs)
- c.local_tinyallocs = 0
- memstats.nlookup += uint64(c.local_nlookup)
- c.local_nlookup = 0
- h.largefree += uint64(c.local_largefree)
- c.local_largefree = 0
- h.nlargefree += uint64(c.local_nlargefree)
- c.local_nlargefree = 0
- for i := 0; i < len(c.local_nsmallfree); i++ {
- h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
- c.local_nsmallfree[i] = 0
- }
-}
-
-func mallocinit() {
- initSizes()
-
- if class_to_size[_TinySizeClass] != _TinySize {
- throw("bad TinySizeClass")
- }
-
- var p, bitmapSize, spansSize, pSize, limit uintptr
- var reserved bool
-
- // limit = runtime.memlimit();
- // See https://golang.org/issue/5049
- // TODO(rsc): Fix after 1.1.
- limit = 0
-
- // Set up the allocation arena, a contiguous area of memory where
- // allocated data will be found. The arena begins with a bitmap large
- // enough to hold 4 bits per allocated word.
- if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
- // On a 64-bit machine, allocate from a single contiguous reservation.
- // 128 GB (MaxMem) should be big enough for now.
- //
- // The code will work with the reservation at any address, but ask
- // SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
- // Allocating a 128 GB region takes away 37 bits, and the amd64
- // doesn't let us choose the top 17 bits, so that leaves the 11 bits
- // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
- // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
- // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
- // UTF-8 sequences, and they are otherwise as far away from
- // ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
- // addresses. An earlier attempt to use 0x11f8 caused out of memory errors
- // on OS X during thread allocations. 0x00c0 causes conflicts with
- // AddressSanitizer which reserves all memory up to 0x0100.
- // These choices are both for debuggability and to reduce the
- // odds of the conservative garbage collector not collecting memory
- // because some non-pointer block of memory had a bit pattern
- // that matched a memory address.
- //
- // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
- // but it hardly matters: e0 00 is not valid UTF-8 either.
- //
- // If this fails we fall back to the 32 bit memory mechanism
- arenaSize := round(_MaxMem, _PageSize)
- bitmapSize = arenaSize / (ptrSize * 8 / 4)
- spansSize = arenaSize / _PageSize * ptrSize
- spansSize = round(spansSize, _PageSize)
- for i := 0; i <= 0x7f; i++ {
- p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
- pSize = bitmapSize + spansSize + arenaSize + _PageSize
- p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
- if p != 0 {
- break
- }
- }
- }
-
- if p == 0 {
- // On a 32-bit machine, we can't typically get away
- // with a giant virtual address space reservation.
- // Instead we map the memory information bitmap
- // immediately after the data segment, large enough
- // to handle another 2GB of mappings (256 MB),
- // along with a reservation for an initial arena.
- // When that gets used up, we'll start asking the kernel
- // for any memory anywhere and hope it's in the 2GB
- // following the bitmap (presumably the executable begins
- // near the bottom of memory, so we'll have to use up
- // most of memory before the kernel resorts to giving out
- // memory before the beginning of the text segment).
- //
- // Alternatively we could reserve 512 MB bitmap, enough
- // for 4GB of mappings, and then accept any memory the
- // kernel threw at us, but normally that's a waste of 512 MB
- // of address space, which is probably too much in a 32-bit world.
-
- // If we fail to allocate, try again with a smaller arena.
- // This is necessary on Android L where we share a process
- // with ART, which reserves virtual memory aggressively.
- arenaSizes := []uintptr{
- 512 << 20,
- 256 << 20,
- }
-
- for _, arenaSize := range arenaSizes {
- bitmapSize = _MaxArena32 / (ptrSize * 8 / 4)
- spansSize = _MaxArena32 / _PageSize * ptrSize
- if limit > 0 && arenaSize+bitmapSize+spansSize > limit {
- bitmapSize = (limit / 9) &^ ((1 << _PageShift) - 1)
- arenaSize = bitmapSize * 8
- spansSize = arenaSize / _PageSize * ptrSize
- }
- spansSize = round(spansSize, _PageSize)
-
- // SysReserve treats the address we ask for, end, as a hint,
- // not as an absolute requirement. If we ask for the end
- // of the data segment but the operating system requires
- // a little more space before we can start allocating, it will
- // give out a slightly higher pointer. Except QEMU, which
- // is buggy, as usual: it won't adjust the pointer upward.
- // So adjust it upward a little bit ourselves: 1/4 MB to get
- // away from the running binary image and then round up
- // to a MB boundary.
- p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
- pSize = bitmapSize + spansSize + arenaSize + _PageSize
- p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
- if p != 0 {
- break
- }
- }
- if p == 0 {
- throw("runtime: cannot reserve arena virtual address space")
- }
- }
-
- // PageSize can be larger than OS definition of page size,
- // so SysReserve can give us a PageSize-unaligned pointer.
- // To overcome this we ask for PageSize more and round up the pointer.
- p1 := round(p, _PageSize)
-
- mheap_.spans = (**mspan)(unsafe.Pointer(p1))
- mheap_.bitmap = p1 + spansSize
- mheap_.arena_start = p1 + (spansSize + bitmapSize)
- mheap_.arena_used = mheap_.arena_start
- mheap_.arena_end = p + pSize
- mheap_.arena_reserved = reserved
-
- if mheap_.arena_start&(_PageSize-1) != 0 {
- println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start))
- throw("misrounded allocation in mallocinit")
- }
-
- // Initialize the rest of the allocator.
- mHeap_Init(&mheap_, spansSize)
- _g_ := getg()
- _g_.m.mcache = allocmcache()
-}
-
-// sysReserveHigh reserves space somewhere high in the address space.
-// sysReserve doesn't actually reserve the full amount requested on
-// 64-bit systems, because of problems with ulimit. Instead it checks
-// that it can get the first 64 kB and assumes it can grab the rest as
-// needed. This doesn't work well with the "let the kernel pick an address"
-// mode, so don't do that. Pick a high address instead.
-func sysReserveHigh(n uintptr, reserved *bool) unsafe.Pointer {
- if ptrSize == 4 {
- return sysReserve(nil, n, reserved)
- }
-
- for i := 0; i <= 0x7f; i++ {
- p := uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
- *reserved = false
- p = uintptr(sysReserve(unsafe.Pointer(p), n, reserved))
- if p != 0 {
- return unsafe.Pointer(p)
- }
- }
-
- return sysReserve(nil, n, reserved)
-}
-
-func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
- if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
- // We are in 32-bit mode, maybe we didn't use all possible address space yet.
- // Reserve some more space.
- p_size := round(n+_PageSize, 256<<20)
- new_end := h.arena_end + p_size
- if new_end <= h.arena_start+_MaxArena32 {
- // TODO: It would be bad if part of the arena
- // is reserved and part is not.
- var reserved bool
- p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
- if p == h.arena_end {
- h.arena_end = new_end
- h.arena_reserved = reserved
- } else if p+p_size <= h.arena_start+_MaxArena32 {
- // Keep everything page-aligned.
- // Our pages are bigger than hardware pages.
- h.arena_end = p + p_size
- h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
- h.arena_reserved = reserved
- } else {
- var stat uint64
- sysFree((unsafe.Pointer)(p), p_size, &stat)
- }
- }
- }
-
- if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
- // Keep taking from our reservation.
- p := h.arena_used
- sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
- h.arena_used += n
- mHeap_MapBits(h)
- mHeap_MapSpans(h)
- if raceenabled {
- racemapshadow((unsafe.Pointer)(p), n)
- }
- if mheap_.shadow_enabled {
- sysMap(unsafe.Pointer(p+mheap_.shadow_heap), n, h.shadow_reserved, &memstats.other_sys)
- }
-
- if uintptr(p)&(_PageSize-1) != 0 {
- throw("misrounded allocation in MHeap_SysAlloc")
- }
- return (unsafe.Pointer)(p)
- }
-
- // If using 64-bit, our reservation is all we have.
- if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
- return nil
- }
-
- // On 32-bit, once the reservation is gone we can
- // try to get memory at a location chosen by the OS
- // and hope that it is in the range we allocated bitmap for.
- p_size := round(n, _PageSize) + _PageSize
- p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
- if p == 0 {
- return nil
- }
-
- if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
- print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
- sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
- return nil
- }
-
- p_end := p + p_size
- p += -p & (_PageSize - 1)
- if uintptr(p)+n > uintptr(h.arena_used) {
- h.arena_used = p + n
- if p_end > h.arena_end {
- h.arena_end = p_end
- }
- mHeap_MapBits(h)
- mHeap_MapSpans(h)
- if raceenabled {
- racemapshadow((unsafe.Pointer)(p), n)
- }
- }
-
- if uintptr(p)&(_PageSize-1) != 0 {
- throw("misrounded allocation in MHeap_SysAlloc")
- }
- return (unsafe.Pointer)(p)
-}
-
-var end struct{}
-
-func largeAlloc(size uintptr, flag uint32) *mspan {
- // print("largeAlloc size=", size, "\n")
-
- if size+_PageSize < size {
- throw("out of memory")
- }
- npages := size >> _PageShift
- if size&_PageMask != 0 {
- npages++
- }
- s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
- if s == nil {
- throw("out of memory")
- }
- s.limit = uintptr(s.start)<<_PageShift + size
- heapBitsForSpan(s.base()).initSpan(s.layout())
- return s
-}
diff --git a/src/runtime/malloc2.go b/src/runtime/malloc2.go
deleted file mode 100644
index 14ffbdb..0000000
--- a/src/runtime/malloc2.go
+++ /dev/null
@@ -1,525 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// Memory allocator, based on tcmalloc.
-// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
-
-// The main allocator works in runs of pages.
-// Small allocation sizes (up to and including 32 kB) are
-// rounded to one of about 100 size classes, each of which
-// has its own free list of objects of exactly that size.
-// Any free page of memory can be split into a set of objects
-// of one size class, which are then managed using free list
-// allocators.
-//
-// The allocator's data structures are:
-//
-// FixAlloc: a free-list allocator for fixed-size objects,
-// used to manage storage used by the allocator.
-// MHeap: the malloc heap, managed at page (4096-byte) granularity.
-// MSpan: a run of pages managed by the MHeap.
-// MCentral: a shared free list for a given size class.
-// MCache: a per-thread (in Go, per-P) cache for small objects.
-// MStats: allocation statistics.
-//
-// Allocating a small object proceeds up a hierarchy of caches:
-//
-// 1. Round the size up to one of the small size classes
-// and look in the corresponding MCache free list.
-// If the list is not empty, allocate an object from it.
-// This can all be done without acquiring a lock.
-//
-// 2. If the MCache free list is empty, replenish it by
-// taking a bunch of objects from the MCentral free list.
-// Moving a bunch amortizes the cost of acquiring the MCentral lock.
-//
-// 3. If the MCentral free list is empty, replenish it by
-// allocating a run of pages from the MHeap and then
-// chopping that memory into objects of the given size.
-// Allocating many objects amortizes the cost of locking
-// the heap.
-//
-// 4. If the MHeap is empty or has no page runs large enough,
-// allocate a new group of pages (at least 1MB) from the
-// operating system. Allocating a large run of pages
-// amortizes the cost of talking to the operating system.
-//
-// Freeing a small object proceeds up the same hierarchy:
-//
-// 1. Look up the size class for the object and add it to
-// the MCache free list.
-//
-// 2. If the MCache free list is too long or the MCache has
-// too much memory, return some to the MCentral free lists.
-//
-// 3. If all the objects in a given span have returned to
-// the MCentral list, return that span to the page heap.
-//
-// 4. If the heap has too much memory, return some to the
-// operating system.
-//
-// TODO(rsc): Step 4 is not implemented.
-//
-// Allocating and freeing a large object uses the page heap
-// directly, bypassing the MCache and MCentral free lists.
-//
-// The small objects on the MCache and MCentral free lists
-// may or may not be zeroed. They are zeroed if and only if
-// the second word of the object is zero. A span in the
-// page heap is zeroed unless s->needzero is set. When a span
-// is allocated to break into small objects, it is zeroed if needed
-// and s->needzero is set. There are two main benefits to delaying the
-// zeroing this way:
-//
-// 1. stack frames allocated from the small object lists
-// or the page heap can avoid zeroing altogether.
-// 2. the cost of zeroing when reusing a small object is
-// charged to the mutator, not the garbage collector.
-//
-// This C code was written with an eye toward translating to Go
-// in the future. Methods have the form Type_Method(Type *t, ...).
-
-const (
- _PageShift = 13
- _PageSize = 1 << _PageShift
- _PageMask = _PageSize - 1
-)
-
-const (
- // _64bit = 1 on 64-bit systems, 0 on 32-bit systems
- _64bit = 1 << (^uintptr(0) >> 63) / 2
-
- // Computed constant. The definition of MaxSmallSize and the
- // algorithm in msize.c produce some number of different allocation
- // size classes. NumSizeClasses is that number. It's needed here
- // because there are static arrays of this length; when msize runs its
- // size choosing algorithm it double-checks that NumSizeClasses agrees.
- _NumSizeClasses = 67
-
- // Tunable constants.
- _MaxSmallSize = 32 << 10
-
- // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
- _TinySize = 16
- _TinySizeClass = 2
-
- _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
- _MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
- _HeapAllocChunk = 1 << 20 // Chunk size for heap growth
-
- // Per-P, per order stack segment cache size.
- _StackCacheSize = 32 * 1024
-
- // Number of orders that get caching. Order 0 is FixedStack
- // and each successive order is twice as large.
- // We want to cache 2KB, 4KB, 8KB, and 16KB stacks. Larger stacks
- // will be allocated directly.
- // Since FixedStack is different on different systems, we
- // must vary NumStackOrders to keep the same maximum cached size.
- // OS | FixedStack | NumStackOrders
- // -----------------+------------+---------------
- // linux/darwin/bsd | 2KB | 4
- // windows/32 | 4KB | 3
- // windows/64 | 8KB | 2
- // plan9 | 4KB | 3
- _NumStackOrders = 4 - ptrSize/4*goos_windows - 1*goos_plan9
-
- // Number of bits in page to span calculations (4k pages).
- // On Windows 64-bit we limit the arena to 32GB or 35 bits.
- // Windows counts memory used by page table into committed memory
- // of the process, so we can't reserve too much memory.
- // See http://golang.org/issue/5402 and http://golang.org/issue/5236.
- // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
- // On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
- _MHeapMap_TotalBits = (_64bit*goos_windows)*35 + (_64bit*(1-goos_windows))*37 + (1-_64bit)*32
- _MHeapMap_Bits = _MHeapMap_TotalBits - _PageShift
-
- _MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
-
- // Max number of threads to run garbage collection.
- // 2, 3, and 4 are all plausible maximums depending
- // on the hardware details of the machine. The garbage
- // collector scales well to 32 cpus.
- _MaxGcproc = 32
-)
-
-// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
-// Since assignments to mlink.next will result in a write barrier being preformed
-// this can not be used by some of the internal GC structures. For example when
-// the sweeper is placing an unmarked object on the free list it does not want the
-// write barrier to be called since that could result in the object being reachable.
-type mlink struct {
- next *mlink
-}
-
-// A gclink is a node in a linked list of blocks, like mlink,
-// but it is opaque to the garbage collector.
-// The GC does not trace the pointers during collection,
-// and the compiler does not emit write barriers for assignments
-// of gclinkptr values. Code should store references to gclinks
-// as gclinkptr, not as *gclink.
-type gclink struct {
- next gclinkptr
-}
-
-// A gclinkptr is a pointer to a gclink, but it is opaque
-// to the garbage collector.
-type gclinkptr uintptr
-
-// ptr returns the *gclink form of p.
-// The result should be used for accessing fields, not stored
-// in other data structures.
-func (p gclinkptr) ptr() *gclink {
- return (*gclink)(unsafe.Pointer(p))
-}
-
-// sysAlloc obtains a large chunk of zeroed memory from the
-// operating system, typically on the order of a hundred kilobytes
-// or a megabyte.
-// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysUnused notifies the operating system that the contents
-// of the memory region are no longer needed and can be reused
-// for other purposes.
-// SysUsed notifies the operating system that the contents
-// of the memory region are needed again.
-//
-// SysFree returns it unconditionally; this is only used if
-// an out-of-memory error has been detected midway through
-// an allocation. It is okay if SysFree is a no-op.
-//
-// SysReserve reserves address space without allocating memory.
-// If the pointer passed to it is non-nil, the caller wants the
-// reservation there, but SysReserve can still choose another
-// location if that one is unavailable. On some systems and in some
-// cases SysReserve will simply check that the address space is
-// available and not actually reserve it. If SysReserve returns
-// non-nil, it sets *reserved to true if the address space is
-// reserved, false if it has merely been checked.
-// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysMap maps previously reserved address space for use.
-// The reserved argument is true if the address space was really
-// reserved, not merely checked.
-//
-// SysFault marks a (already sysAlloc'd) region to fault
-// if accessed. Used only for debugging the runtime.
-
-// FixAlloc is a simple free-list allocator for fixed size objects.
-// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
-// MCache and MSpan objects.
-//
-// Memory returned by FixAlloc_Alloc is not zeroed.
-// The caller is responsible for locking around FixAlloc calls.
-// Callers can keep state in the object but the first word is
-// smashed by freeing and reallocating.
-type fixalloc struct {
- size uintptr
- first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
- arg unsafe.Pointer
- list *mlink
- chunk *byte
- nchunk uint32
- inuse uintptr // in-use bytes now
- stat *uint64
-}
-
-// Statistics.
-// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
-type mstats struct {
- // General statistics.
- alloc uint64 // bytes allocated and still in use
- total_alloc uint64 // bytes allocated (even if freed)
- sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
- nlookup uint64 // number of pointer lookups
- nmalloc uint64 // number of mallocs
- nfree uint64 // number of frees
-
- // Statistics about malloc heap.
- // protected by mheap.lock
- heap_alloc uint64 // bytes allocated and still in use
- heap_sys uint64 // bytes obtained from system
- heap_idle uint64 // bytes in idle spans
- heap_inuse uint64 // bytes in non-idle spans
- heap_released uint64 // bytes released to the os
- heap_objects uint64 // total number of allocated objects
-
- // Statistics about allocation of low-level fixed-size structures.
- // Protected by FixAlloc locks.
- stacks_inuse uint64 // this number is included in heap_inuse above
- stacks_sys uint64 // always 0 in mstats
- mspan_inuse uint64 // mspan structures
- mspan_sys uint64
- mcache_inuse uint64 // mcache structures
- mcache_sys uint64
- buckhash_sys uint64 // profiling bucket hash table
- gc_sys uint64
- other_sys uint64
-
- // Statistics about garbage collector.
- // Protected by mheap or stopping the world during GC.
- next_gc uint64 // next gc (in heap_alloc time)
- last_gc uint64 // last gc (in absolute time)
- pause_total_ns uint64
- pause_ns [256]uint64 // circular buffer of recent gc pause lengths
- pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
- numgc uint32
- enablegc bool
- debuggc bool
-
- // Statistics about allocation size classes.
-
- by_size [_NumSizeClasses]struct {
- size uint32
- nmalloc uint64
- nfree uint64
- }
-
- tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
-}
-
-var memstats mstats
-
-// Size classes. Computed and initialized by InitSizes.
-//
-// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
-// 1 <= sizeclass < NumSizeClasses, for n.
-// Size class 0 is reserved to mean "not small".
-//
-// class_to_size[i] = largest size in class i
-// class_to_allocnpages[i] = number of pages to allocate when
-// making new objects in class i
-
-var class_to_size [_NumSizeClasses]int32
-var class_to_allocnpages [_NumSizeClasses]int32
-var size_to_class8 [1024/8 + 1]int8
-var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
-
-type mcachelist struct {
- list *mlink
- nlist uint32
-}
-
-type stackfreelist struct {
- list gclinkptr // linked list of free stacks
- size uintptr // total size of stacks in list
-}
-
-// Per-thread (in Go, per-P) cache for small objects.
-// No locking needed because it is per-thread (per-P).
-type mcache struct {
- // The following members are accessed on every malloc,
- // so they are grouped here for better caching.
- next_sample int32 // trigger heap sample after allocating this many bytes
- local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
- // Allocator cache for tiny objects w/o pointers.
- // See "Tiny allocator" comment in malloc.go.
- tiny unsafe.Pointer
- tinyoffset uintptr
- local_tinyallocs uintptr // number of tiny allocs not counted in other stats
-
- // The rest is not accessed on every malloc.
- alloc [_NumSizeClasses]*mspan // spans to allocate from
-
- stackcache [_NumStackOrders]stackfreelist
-
- sudogcache *sudog
-
- // Local allocator stats, flushed during GC.
- local_nlookup uintptr // number of pointer lookups
- local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
- local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
- local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
-}
-
-const (
- _KindSpecialFinalizer = 1
- _KindSpecialProfile = 2
- // Note: The finalizer special must be first because if we're freeing
- // an object, a finalizer special will cause the freeing operation
- // to abort, and we want to keep the other special records around
- // if that happens.
-)
-
-type special struct {
- next *special // linked list in span
- offset uint16 // span offset of object
- kind byte // kind of special
-}
-
-// The described object has a finalizer set for it.
-type specialfinalizer struct {
- special special
- fn *funcval
- nret uintptr
- fint *_type
- ot *ptrtype
-}
-
-// The described object is being heap profiled.
-type specialprofile struct {
- special special
- b *bucket
-}
-
-// An MSpan is a run of pages.
-const (
- _MSpanInUse = iota // allocated for garbage collected heap
- _MSpanStack // allocated for use by stack allocator
- _MSpanFree
- _MSpanListHead
- _MSpanDead
-)
-
-type mspan struct {
- next *mspan // in a span linked list
- prev *mspan // in a span linked list
- start pageID // starting page number
- npages uintptr // number of pages in span
- freelist gclinkptr // list of free objects
- // sweep generation:
- // if sweepgen == h->sweepgen - 2, the span needs sweeping
- // if sweepgen == h->sweepgen - 1, the span is currently being swept
- // if sweepgen == h->sweepgen, the span is swept and ready to use
- // h->sweepgen is incremented by 2 after every GC
- sweepgen uint32
- ref uint16 // capacity - number of objects in freelist
- sizeclass uint8 // size class
- incache bool // being used by an mcache
- state uint8 // mspaninuse etc
- needzero uint8 // needs to be zeroed before allocation
- elemsize uintptr // computed from sizeclass or from npages
- unusedsince int64 // first time spotted by gc in mspanfree state
- npreleased uintptr // number of pages released to the os
- limit uintptr // end of data in span
- speciallock mutex // guards specials list
- specials *special // linked list of special records sorted by offset.
-}
-
-func (s *mspan) base() uintptr {
- return uintptr(s.start << _PageShift)
-}
-
-func (s *mspan) layout() (size, n, total uintptr) {
- total = s.npages << _PageShift
- size = s.elemsize
- if size > 0 {
- n = total / size
- }
- return
-}
-
-// Every MSpan is in one doubly-linked list,
-// either one of the MHeap's free lists or one of the
-// MCentral's span lists. We use empty MSpan structures as list heads.
-
-// Central list of free objects of a given size.
-type mcentral struct {
- lock mutex
- sizeclass int32
- nonempty mspan // list of spans with a free object
- empty mspan // list of spans with no free objects (or cached in an mcache)
-}
-
-// Main malloc heap.
-// The heap itself is the "free[]" and "large" arrays,
-// but all the other global data is here too.
-type mheap struct {
- lock mutex
- free [_MaxMHeapList]mspan // free lists of given length
- freelarge mspan // free lists length >= _MaxMHeapList
- busy [_MaxMHeapList]mspan // busy lists of large objects of given length
- busylarge mspan // busy lists of large objects length >= _MaxMHeapList
- allspans **mspan // all spans out there
- gcspans **mspan // copy of allspans referenced by gc marker or sweeper
- nspan uint32
- sweepgen uint32 // sweep generation, see comment in mspan
- sweepdone uint32 // all spans are swept
-
- // span lookup
- spans **mspan
- spans_mapped uintptr
-
- // range of addresses we might see in the heap
- bitmap uintptr
- bitmap_mapped uintptr
- arena_start uintptr
- arena_used uintptr
- arena_end uintptr
- arena_reserved bool
-
- // write barrier shadow data+heap.
- // 64-bit systems only, enabled by GODEBUG=wbshadow=1.
- shadow_enabled bool // shadow should be updated and checked
- shadow_reserved bool // shadow memory is reserved
- shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr
- shadow_data uintptr // data-addr + shadow_data = shadow data addr
- data_start uintptr // start of shadowed data addresses
- data_end uintptr // end of shadowed data addresses
-
- // central free lists for small size classes.
- // the padding makes sure that the MCentrals are
- // spaced CacheLineSize bytes apart, so that each MCentral.lock
- // gets its own cache line.
- central [_NumSizeClasses]struct {
- mcentral mcentral
- pad [_CacheLineSize]byte
- }
-
- spanalloc fixalloc // allocator for span*
- cachealloc fixalloc // allocator for mcache*
- specialfinalizeralloc fixalloc // allocator for specialfinalizer*
- specialprofilealloc fixalloc // allocator for specialprofile*
- speciallock mutex // lock for sepcial record allocators.
-
- // Malloc stats.
- largefree uint64 // bytes freed for large objects (>maxsmallsize)
- nlargefree uint64 // number of frees for large objects (>maxsmallsize)
- nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
-}
-
-var mheap_ mheap
-
-const (
- // flags to malloc
- _FlagNoScan = 1 << 0 // GC doesn't have to scan object
- _FlagNoZero = 1 << 1 // don't zero memory
-)
-
-// NOTE: Layout known to queuefinalizer.
-type finalizer struct {
- fn *funcval // function to call
- arg unsafe.Pointer // ptr to object
- nret uintptr // bytes of return values from fn
- fint *_type // type of first argument of fn
- ot *ptrtype // type of ptr to object
-}
-
-type finblock struct {
- alllink *finblock
- next *finblock
- cnt int32
- _ int32
- fin [(_FinBlockSize - 2*ptrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
-}
-
-// Information from the compiler about the layout of stack frames.
-type bitvector struct {
- n int32 // # of bits
- bytedata *uint8
-}
-
-type stackmap struct {
- n int32 // number of bitmaps
- nbit int32 // number of bits in each bitmap
- bytedata [1]byte // bitmaps, each starting on a 32-bit boundary
-}
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index d73fe60..dce0bd5 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -82,6 +82,12 @@
typeShift = 2
)
+// Information from the compiler about the layout of stack frames.
+type bitvector struct {
+ n int32 // # of bits
+ bytedata *uint8
+}
+
// addb returns the byte pointer p+n.
//go:nowritebarrier
func addb(p *byte, n uintptr) *byte {
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index d93a50d..ec9ccb4 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -2,14 +2,63 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Per-P malloc cache for small objects.
-//
-// See malloc.h for an overview.
-
package runtime
import "unsafe"
+// Per-thread (in Go, per-P) cache for small objects.
+// No locking needed because it is per-thread (per-P).
+type mcache struct {
+ // The following members are accessed on every malloc,
+ // so they are grouped here for better caching.
+ next_sample int32 // trigger heap sample after allocating this many bytes
+ local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
+ // Allocator cache for tiny objects w/o pointers.
+ // See "Tiny allocator" comment in malloc.go.
+ tiny unsafe.Pointer
+ tinyoffset uintptr
+ local_tinyallocs uintptr // number of tiny allocs not counted in other stats
+
+ // The rest is not accessed on every malloc.
+ alloc [_NumSizeClasses]*mspan // spans to allocate from
+
+ stackcache [_NumStackOrders]stackfreelist
+
+ sudogcache *sudog
+
+ // Local allocator stats, flushed during GC.
+ local_nlookup uintptr // number of pointer lookups
+ local_largefree uintptr // bytes freed for large objects (>maxsmallsize)
+ local_nlargefree uintptr // number of frees for large objects (>maxsmallsize)
+ local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+}
+
+// A gclink is a node in a linked list of blocks, like mlink,
+// but it is opaque to the garbage collector.
+// The GC does not trace the pointers during collection,
+// and the compiler does not emit write barriers for assignments
+// of gclinkptr values. Code should store references to gclinks
+// as gclinkptr, not as *gclink.
+type gclink struct {
+ next gclinkptr
+}
+
+// A gclinkptr is a pointer to a gclink, but it is opaque
+// to the garbage collector.
+type gclinkptr uintptr
+
+// ptr returns the *gclink form of p.
+// The result should be used for accessing fields, not stored
+// in other data structures.
+func (p gclinkptr) ptr() *gclink {
+ return (*gclink)(unsafe.Pointer(p))
+}
+
+type stackfreelist struct {
+ list gclinkptr // linked list of free stacks
+ size uintptr // total size of stacks in list
+}
+
// dummy MSpan that contains no free objects.
var emptymspan mspan
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index 45e7553..965cd10 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -12,6 +12,14 @@
package runtime
+// Central list of free objects of a given size.
+type mcentral struct {
+ lock mutex
+ sizeclass int32
+ nonempty mspan // list of spans with a free object
+ empty mspan // list of spans with no free objects (or cached in an mcache)
+}
+
// Initialize a single central free list.
func mCentral_Init(c *mcentral, sizeclass int32) {
c.sizeclass = sizeclass
diff --git a/src/runtime/mem.go b/src/runtime/mem.go
deleted file mode 100644
index c145886..0000000
--- a/src/runtime/mem.go
+++ /dev/null
@@ -1,114 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import "unsafe"
-
-// Note: the MemStats struct should be kept in sync with
-// struct MStats in malloc.h
-
-// A MemStats records statistics about the memory allocator.
-type MemStats struct {
- // General statistics.
- Alloc uint64 // bytes allocated and still in use
- TotalAlloc uint64 // bytes allocated (even if freed)
- Sys uint64 // bytes obtained from system (sum of XxxSys below)
- Lookups uint64 // number of pointer lookups
- Mallocs uint64 // number of mallocs
- Frees uint64 // number of frees
-
- // Main allocation heap statistics.
- HeapAlloc uint64 // bytes allocated and still in use
- HeapSys uint64 // bytes obtained from system
- HeapIdle uint64 // bytes in idle spans
- HeapInuse uint64 // bytes in non-idle span
- HeapReleased uint64 // bytes released to the OS
- HeapObjects uint64 // total number of allocated objects
-
- // Low-level fixed-size structure allocator statistics.
- // Inuse is bytes used now.
- // Sys is bytes obtained from system.
- StackInuse uint64 // bytes used by stack allocator
- StackSys uint64
- MSpanInuse uint64 // mspan structures
- MSpanSys uint64
- MCacheInuse uint64 // mcache structures
- MCacheSys uint64
- BuckHashSys uint64 // profiling bucket hash table
- GCSys uint64 // GC metadata
- OtherSys uint64 // other system allocations
-
- // Garbage collector statistics.
- NextGC uint64 // next collection will happen when HeapAlloc ≥ this amount
- LastGC uint64 // end time of last collection (nanoseconds since 1970)
- PauseTotalNs uint64
- PauseNs [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
- PauseEnd [256]uint64 // circular buffer of recent GC pause end times
- NumGC uint32
- EnableGC bool
- DebugGC bool
-
- // Per-size allocation statistics.
- // 61 is NumSizeClasses in the C code.
- BySize [61]struct {
- Size uint32
- Mallocs uint64
- Frees uint64
- }
-}
-
-// Size of the trailing by_size array differs between Go and C,
-// and all data after by_size is local to runtime, not exported.
-// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
-// sizeof_C_MStats is what C thinks about size of Go struct.
-var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
-
-func init() {
- var memStats MemStats
- if sizeof_C_MStats != unsafe.Sizeof(memStats) {
- println(sizeof_C_MStats, unsafe.Sizeof(memStats))
- throw("MStats vs MemStatsType size mismatch")
- }
-}
-
-// ReadMemStats populates m with memory allocator statistics.
-func ReadMemStats(m *MemStats) {
- // Have to acquire worldsema to stop the world,
- // because stoptheworld can only be used by
- // one goroutine at a time, and there might be
- // a pending garbage collection already calling it.
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "read mem stats"
- systemstack(stoptheworld)
-
- systemstack(func() {
- readmemstats_m(m)
- })
-
- gp.m.preemptoff = ""
- gp.m.locks++
- semrelease(&worldsema)
- systemstack(starttheworld)
- gp.m.locks--
-}
-
-//go:linkname runtime_debug_WriteHeapDump runtime/debug.WriteHeapDump
-func runtime_debug_WriteHeapDump(fd uintptr) {
- semacquire(&worldsema, false)
- gp := getg()
- gp.m.preemptoff = "write heap dump"
- systemstack(stoptheworld)
-
- systemstack(func() {
- writeheapdump_m(fd)
- })
-
- gp.m.preemptoff = ""
- gp.m.locks++
- semrelease(&worldsema)
- systemstack(starttheworld)
- gp.m.locks--
-}
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 525aa09..d066b60 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -8,6 +8,14 @@
import "unsafe"
+type finblock struct {
+ alllink *finblock
+ next *finblock
+ cnt int32
+ _ int32
+ fin [(_FinBlockSize - 2*ptrSize - 2*4) / unsafe.Sizeof(finalizer{})]finalizer
+}
+
var finlock mutex // protects the following variables
var fing *g // goroutine that runs finalizers
var finq *finblock // list of finalizers that are to be executed
@@ -17,6 +25,15 @@
var fingwake bool
var allfin *finblock // list of all blocks
+// NOTE: Layout known to queuefinalizer.
+type finalizer struct {
+ fn *funcval // function to call
+ arg unsafe.Pointer // ptr to object
+ nret uintptr // bytes of return values from fn
+ fint *_type // type of first argument of fn
+ ot *ptrtype // type of ptr to object
+}
+
var finalizer1 = [...]byte{
// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
// Each byte describes 4 words.
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index 3934a9e..c1106b6 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -10,6 +10,34 @@
import "unsafe"
+// FixAlloc is a simple free-list allocator for fixed size objects.
+// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// MCache and MSpan objects.
+//
+// Memory returned by FixAlloc_Alloc is not zeroed.
+// The caller is responsible for locking around FixAlloc calls.
+// Callers can keep state in the object but the first word is
+// smashed by freeing and reallocating.
+type fixalloc struct {
+ size uintptr
+ first unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
+ arg unsafe.Pointer
+ list *mlink
+ chunk *byte
+ nchunk uint32
+ inuse uintptr // in-use bytes now
+ stat *uint64
+}
+
+// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
+// Since assignments to mlink.next will result in a write barrier being preformed
+// this can not be used by some of the internal GC structures. For example when
+// the sweeper is placing an unmarked object on the free list it does not want the
+// write barrier to be called since that could result in the object being reachable.
+type mlink struct {
+ next *mlink
+}
+
// Initialize f to allocate objects of the given size,
// using the allocator to obtain chunks of memory.
func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 0e9756c..497695c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -131,36 +131,82 @@
_RootCount = 5
)
-// ptrmask for an allocation containing a single pointer.
-var oneptr = [...]uint8{typePointer}
+// linker-provided
+var data, edata, bss, ebss, gcdata, gcbss, noptrdata, enoptrdata, noptrbss, enoptrbss, end struct{}
-// Initialized from $GOGC. GOGC=off means no GC.
-var gcpercent int32
+//go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal
+var weak_cgo_allocate byte
-// Holding worldsema grants an M the right to try to stop the world.
-// The procedure is:
-//
-// semacquire(&worldsema);
-// m.preemptoff = "reason";
-// stoptheworld();
-//
-// ... do stuff ...
-//
-// m.preemptoff = "";
-// semrelease(&worldsema);
-// starttheworld();
-//
-var worldsema uint32 = 1
+// Is _cgo_allocate linked into the binary?
+//go:nowritebarrier
+func have_cgo_allocate() bool {
+ return &weak_cgo_allocate != nil
+}
-var data, edata, bss, ebss, gcdata, gcbss struct{}
+// Slow for now as we serialize this, since this is on a debug path
+// speed is not critical at this point.
+var andlock mutex
+
+//go:nowritebarrier
+func atomicand8(src *byte, val byte) {
+ lock(&andlock)
+ *src &= val
+ unlock(&andlock)
+}
var gcdatamask bitvector
var gcbssmask bitvector
-var gclock mutex
+// heapminimum is the minimum number of bytes in the heap.
+// This cleans up the corner case of where we have a very small live set but a lot
+// of allocations and collecting every GOGC * live set is expensive.
+var heapminimum = uint64(4 << 20)
-var badblock [1024]uintptr
-var nbadblock int32
+// Initialized from $GOGC. GOGC=off means no GC.
+var gcpercent int32
+
+func gcinit() {
+ if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
+ throw("size of Workbuf is suboptimal")
+ }
+
+ work.markfor = parforalloc(_MaxGcproc)
+ gcpercent = readgogc()
+ gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
+ gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
+ memstats.next_gc = heapminimum
+}
+
+func setGCPercent(in int32) (out int32) {
+ lock(&mheap_.lock)
+ out = gcpercent
+ if in < 0 {
+ in = -1
+ }
+ gcpercent = in
+ unlock(&mheap_.lock)
+ return out
+}
+
+// Trigger the concurrent GC when 1/triggerratio memory is available to allocate.
+// Adjust this ratio as part of a scheme to ensure that mutators have enough
+// memory to allocate in durring a concurrent GC cycle.
+var triggerratio = int64(8)
+
+// Determine whether to initiate a GC.
+// If the GC is already working no need to trigger another one.
+// This should establish a feedback loop where if the GC does not
+// have sufficient time to complete then more memory will be
+// requested from the OS increasing heap size thus allow future
+// GCs more time to complete.
+// memstat.heap_alloc and memstat.next_gc reads have benign races
+// A false negative simple does not start a GC, a false positive
+// will start a GC needlessly. Neither have correctness issues.
+func shouldtriggergc() bool {
+ return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_alloc)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0
+}
+
+var work workdata
type workdata struct {
full uint64 // lock-free list of full blocks workbuf
@@ -178,847 +224,45 @@
spans []*mspan
}
-var work workdata
-
-//go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal
-var weak_cgo_allocate byte
-
-// Is _cgo_allocate linked into the binary?
-//go:nowritebarrier
-func have_cgo_allocate() bool {
- return &weak_cgo_allocate != nil
+// GC runs a garbage collection.
+func GC() {
+ gogc(2)
}
-// To help debug the concurrent GC we remark with the world
-// stopped ensuring that any object encountered has their normal
-// mark bit set. To do this we use an orthogonal bit
-// pattern to indicate the object is marked. The following pattern
-// uses the upper two bits in the object's bounday nibble.
-// 01: scalar not marked
-// 10: pointer not marked
-// 11: pointer marked
-// 00: scalar marked
-// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
-// The higher bit is 1 for pointers and 0 for scalars, whether the object
-// is marked or not.
-// The first nibble no longer holds the typeDead pattern indicating that the
-// there are no more pointers in the object. This information is held
-// in the second nibble.
+// force = 0 - start concurrent GC
+// force = 1 - do STW GC regardless of current heap usage
+// force = 2 - go STW GC and eager sweep
+func gogc(force int32) {
+ // The gc is turned off (via enablegc) until the bootstrap has completed.
+ // Also, malloc gets called in the guts of a number of libraries that might be
+ // holding locks. To avoid deadlocks during stoptheworld, don't bother
+ // trying to run gc while holding a lock. The next mallocgc without a lock
+ // will do the gc instead.
-// When marking an object if the bool checkmarkphase is true one uses the above
-// encoding, otherwise one uses the bitMarked bit in the lower two bits
-// of the nibble.
-var checkmarkphase = false
-
-// inheap reports whether b is a pointer into a (potentially dead) heap object.
-// It returns false for pointers into stack spans.
-//go:nowritebarrier
-func inheap(b uintptr) bool {
- if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
- return false
+ mp := acquirem()
+ if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
+ releasem(mp)
+ return
}
- // Not a beginning of a block, consult span table to find the block beginning.
- k := b >> _PageShift
- x := k
- x -= mheap_.arena_start >> _PageShift
- s := h_spans[x]
- if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
- return false
- }
- return true
-}
+ releasem(mp)
+ mp = nil
-// Slow for now as we serialize this, since this is on a debug path
-// speed is not critical at this point.
-var andlock mutex
-
-//go:nowritebarrier
-func atomicand8(src *byte, val byte) {
- lock(&andlock)
- *src &= val
- unlock(&andlock)
-}
-
-// When in GCmarkterminate phase we allocate black.
-//go:nowritebarrier
-func gcmarknewobject_m(obj uintptr) {
- if gcphase != _GCmarktermination {
- throw("marking new object while not in mark termination phase")
- }
- if checkmarkphase { // The world should be stopped so this should not happen.
- throw("gcmarknewobject called while doing checkmark")
- }
-
- heapBitsForAddr(obj).setMarked()
-}
-
-// obj is the start of an object with mark mbits.
-// If it isn't already marked, mark it and enqueue into workbuf.
-// Return possibly new workbuf to use.
-// base and off are for debugging only and could be removed.
-//go:nowritebarrier
-func greyobject(obj, base, off uintptr, hbits heapBits, gcw *gcWorkProducer) {
- // obj should be start of allocation, and so must be at least pointer-aligned.
- if obj&(ptrSize-1) != 0 {
- throw("greyobject: obj not pointer-aligned")
- }
-
- if checkmarkphase {
- if !hbits.isMarked() {
- print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
- print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
-
- // Dump the source (base) object
-
- kb := base >> _PageShift
- xb := kb
- xb -= mheap_.arena_start >> _PageShift
- sb := h_spans[xb]
- printlock()
- print("runtime:greyobject Span: base=", hex(base), " kb=", hex(kb))
- if sb == nil {
- print(" sb=nil\n")
- } else {
- print(" sb.start*_PageSize=", hex(sb.start*_PageSize), " sb.limit=", hex(sb.limit), " sb.sizeclass=", sb.sizeclass, " sb.elemsize=", sb.elemsize, "\n")
- // base is (a pointer to) the source object holding the reference to object. Create a pointer to each of the fields
- // fields in base and print them out as hex values.
- for i := 0; i < int(sb.elemsize/ptrSize); i++ {
- print(" *(base+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(base + uintptr(i)*ptrSize))), "\n")
- }
- }
-
- // Dump the object
-
- k := obj >> _PageShift
- x := k
- x -= mheap_.arena_start >> _PageShift
- s := h_spans[x]
- print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k))
- if s == nil {
- print(" s=nil\n")
- } else {
- print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
- // NOTE(rsc): This code is using s.sizeclass as an approximation of the
- // number of pointer-sized words in an object. Perhaps not what was intended.
- for i := 0; i < int(s.sizeclass); i++ {
- print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n")
- }
- }
- throw("checkmark found unmarked object")
+ if force == 0 {
+ lock(&bggc.lock)
+ if !bggc.started {
+ bggc.working = 1
+ bggc.started = true
+ go backgroundgc()
+ } else if bggc.working == 0 {
+ bggc.working = 1
+ ready(bggc.g)
}
- if !hbits.isCheckmarked() {
- return
- }
- hbits.setCheckmarked()
- if !hbits.isCheckmarked() {
- throw("setCheckmarked and isCheckmarked disagree")
- }
+ unlock(&bggc.lock)
} else {
- // If marked we have nothing to do.
- if hbits.isMarked() {
- return
- }
-
- // Each byte of GC bitmap holds info for two words.
- // Might be racing with other updates, so use atomic update always.
- // We used to be clever here and use a non-atomic update in certain
- // cases, but it's not worth the risk.
- hbits.setMarked()
- }
-
- if !checkmarkphase && hbits.typeBits() == typeDead {
- return // noscan object
- }
-
- // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
- // seems like a nice optimization that can be added back in.
- // There needs to be time between the PREFETCH and the use.
- // Previously we put the obj in an 8 element buffer that is drained at a rate
- // to give the PREFETCH time to do its work.
- // Use of PREFETCHNTA might be more appropriate than PREFETCH
-
- gcw.put(obj)
-}
-
-// Scan the object b of size n, adding pointers to wbuf.
-// Return possibly new wbuf to use.
-// If ptrmask != nil, it specifies where pointers are in b.
-// If ptrmask == nil, the GC bitmap should be consulted.
-// In this case, n may be an overestimate of the size; the GC bitmap
-// must also be used to make sure the scan stops at the end of b.
-//go:nowritebarrier
-func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
- arena_start := mheap_.arena_start
- arena_used := mheap_.arena_used
-
- // Find bits of the beginning of the object.
- var hbits heapBits
- if ptrmask == nil {
- b, hbits = heapBitsForObject(b)
- if b == 0 {
- return
- }
- if n == 0 {
- n = mheap_.arena_used - b
- }
- }
- for i := uintptr(0); i < n; i += ptrSize {
- // Find bits for this word.
- var bits uintptr
- if ptrmask != nil {
- // dense mask (stack or data)
- bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
- } else {
- // Check if we have reached end of span.
- // n is an overestimate of the size of the object.
- if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
- break
- }
-
- bits = uintptr(hbits.typeBits())
- if i > 0 && (hbits.isBoundary() || bits == typeDead) {
- break // reached beginning of the next object
- }
- hbits = hbits.next()
- }
-
- if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
- continue
- }
-
- if bits&typePointer != typePointer {
- print("gc checkmarkphase=", checkmarkphase, " b=", hex(b), " ptrmask=", ptrmask, "\n")
- throw("unexpected garbage collection bits")
- }
-
- obj := *(*uintptr)(unsafe.Pointer(b + i))
-
- // At this point we have extracted the next potential pointer.
- // Check if it points into heap.
- if obj == 0 || obj < arena_start || obj >= arena_used {
- continue
- }
-
- if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && checkmarkphase {
- checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
- }
-
- // Mark the object.
- if obj, hbits := heapBitsForObject(obj); obj != 0 {
- greyobject(obj, b, i, hbits, gcw)
- }
+ gcwork(force)
}
}
-// scanblock scans b as scanobject would.
-// If the gcphase is GCscan, scanblock performs additional checks.
-//go:nowritebarrier
-func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
- // Use local copies of original parameters, so that a stack trace
- // due to one of the throws below shows the original block
- // base and extent.
- b := b0
- n := n0
-
- // ptrmask can have 2 possible values:
- // 1. nil - obtain pointer mask from GC bitmap.
- // 2. pointer to a compact mask (for stacks and data).
-
- scanobject(b, n, ptrmask, gcw)
- if gcphase == _GCscan {
- if inheap(b) && ptrmask == nil {
- // b is in heap, we are in GCscan so there should be a ptrmask.
- throw("scanblock: In GCscan phase and inheap is true.")
- }
- }
-}
-
-// gcDrain scans objects in work buffers, blackening grey
-// objects until all work has been drained.
-//go:nowritebarrier
-func gcDrain(gcw *gcWork) {
- if gcphase != _GCmark && gcphase != _GCmarktermination {
- throw("scanblock phase incorrect")
- }
-
- for {
- // If another proc wants a pointer, give it some.
- if work.nwait > 0 && work.full == 0 {
- gcw.balance()
- }
-
- b := gcw.get()
- if b == 0 {
- // work barrier reached
- break
- }
- // If the current wbuf is filled by the scan a new wbuf might be
- // returned that could possibly hold only a single object. This
- // could result in each iteration draining only a single object
- // out of the wbuf passed in + a single object placed
- // into an empty wbuf in scanobject so there could be
- // a performance hit as we keep fetching fresh wbufs.
- scanobject(b, 0, nil, &gcw.gcWorkProducer)
- }
- checknocurrentwbuf()
-}
-
-// gcDrainN scans n objects, blackening grey objects.
-//go:nowritebarrier
-func gcDrainN(gcw *gcWork, n int) {
- checknocurrentwbuf()
- for i := 0; i < n; i++ {
- // This might be a good place to add prefetch code...
- // if(wbuf.nobj > 4) {
- // PREFETCH(wbuf->obj[wbuf.nobj - 3];
- // }
- b := gcw.tryGet()
- if b == 0 {
- return
- }
- scanobject(b, 0, nil, &gcw.gcWorkProducer)
- }
-}
-
-//go:nowritebarrier
-func markroot(desc *parfor, i uint32) {
- var gcw gcWorkProducer
- gcw.initFromCache()
-
- // Note: if you add a case here, please also update heapdump.c:dumproots.
- switch i {
- case _RootData:
- scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata, &gcw)
-
- case _RootBss:
- scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata, &gcw)
-
- case _RootFinalizers:
- for fb := allfin; fb != nil; fb = fb.alllink {
- scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
- }
-
- case _RootSpans:
- // mark MSpan.specials
- sg := mheap_.sweepgen
- for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
- s := work.spans[spanidx]
- if s.state != mSpanInUse {
- continue
- }
- if !checkmarkphase && s.sweepgen != sg {
- // sweepgen was updated (+2) during non-checkmark GC pass
- print("sweep ", s.sweepgen, " ", sg, "\n")
- throw("gc: unswept span")
- }
- for sp := s.specials; sp != nil; sp = sp.next {
- if sp.kind != _KindSpecialFinalizer {
- continue
- }
- // don't mark finalized object, but scan it so we
- // retain everything it points to.
- spf := (*specialfinalizer)(unsafe.Pointer(sp))
- // A finalizer can be set for an inner byte of an object, find object beginning.
- p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
- if gcphase != _GCscan {
- scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
- }
- scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
- }
- }
-
- case _RootFlushCaches:
- if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
- flushallmcaches()
- }
-
- default:
- // the rest is scanning goroutine stacks
- if uintptr(i-_RootCount) >= allglen {
- throw("markroot: bad index")
- }
- gp := allgs[i-_RootCount]
-
- // remember when we've first observed the G blocked
- // needed only to output in traceback
- status := readgstatus(gp) // We are not in a scan state
- if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
- gp.waitsince = work.tstart
- }
-
- // Shrink a stack if not much of it is being used but not in the scan phase.
- if gcphase == _GCmarktermination {
- // Shrink during STW GCmarktermination phase thus avoiding
- // complications introduced by shrinking during
- // non-STW phases.
- shrinkstack(gp)
- }
- if readgstatus(gp) == _Gdead {
- gp.gcworkdone = true
- } else {
- gp.gcworkdone = false
- }
- restart := stopg(gp)
-
- // goroutine will scan its own stack when it stops running.
- // Wait until it has.
- for readgstatus(gp) == _Grunning && !gp.gcworkdone {
- }
-
- // scanstack(gp) is done as part of gcphasework
- // But to make sure we finished we need to make sure that
- // the stack traps have all responded so drop into
- // this while loop until they respond.
- for !gp.gcworkdone {
- status = readgstatus(gp)
- if status == _Gdead {
- gp.gcworkdone = true // scan is a noop
- break
- }
- if status == _Gwaiting || status == _Grunnable {
- restart = stopg(gp)
- }
- }
- if restart {
- restartg(gp)
- }
- }
- gcw.dispose()
-}
-
-//go:nowritebarrier
-func stackmapdata(stkmap *stackmap, n int32) bitvector {
- if n < 0 || n >= stkmap.n {
- throw("stackmapdata: index out of range")
- }
- return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
-}
-
-// Scan a stack frame: local variables and function arguments/results.
-//go:nowritebarrier
-func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWorkProducer) {
-
- f := frame.fn
- targetpc := frame.continpc
- if targetpc == 0 {
- // Frame is dead.
- return
- }
- if _DebugGC > 1 {
- print("scanframe ", funcname(f), "\n")
- }
- if targetpc != f.entry {
- targetpc--
- }
- pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
- if pcdata == -1 {
- // We do not have a valid pcdata value but there might be a
- // stackmap for this function. It is likely that we are looking
- // at the function prologue, assume so and hope for the best.
- pcdata = 0
- }
-
- // Scan local variables if stack frame has been allocated.
- size := frame.varp - frame.sp
- var minsize uintptr
- if thechar != '6' && thechar != '8' {
- minsize = ptrSize
- } else {
- minsize = 0
- }
- if size > minsize {
- stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
- if stkmap == nil || stkmap.n <= 0 {
- print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
- throw("missing stackmap")
- }
-
- // Locals bitmap information, scan just the pointers in locals.
- if pcdata < 0 || pcdata >= stkmap.n {
- // don't know where we are
- print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
- throw("scanframe: bad symbol table")
- }
- bv := stackmapdata(stkmap, pcdata)
- size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
- scanblock(frame.varp-size, size, bv.bytedata, gcw)
- }
-
- // Scan arguments.
- if frame.arglen > 0 {
- var bv bitvector
- if frame.argmap != nil {
- bv = *frame.argmap
- } else {
- stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
- if stkmap == nil || stkmap.n <= 0 {
- print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
- throw("missing stackmap")
- }
- if pcdata < 0 || pcdata >= stkmap.n {
- // don't know where we are
- print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
- throw("scanframe: bad symbol table")
- }
- bv = stackmapdata(stkmap, pcdata)
- }
- scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
- }
-}
-
-//go:nowritebarrier
-func scanstack(gp *g) {
- if gp.gcscanvalid {
- return
- }
-
- if readgstatus(gp)&_Gscan == 0 {
- print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
- throw("scanstack - bad status")
- }
-
- switch readgstatus(gp) &^ _Gscan {
- default:
- print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- throw("mark - bad status")
- case _Gdead:
- return
- case _Grunning:
- print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
- throw("scanstack: goroutine not stopped")
- case _Grunnable, _Gsyscall, _Gwaiting:
- // ok
- }
-
- if gp == getg() {
- throw("can't scan our own stack")
- }
- mp := gp.m
- if mp != nil && mp.helpgc != 0 {
- throw("can't scan gchelper stack")
- }
-
- var gcw gcWorkProducer
- gcw.initFromCache()
- scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
- // Pick up gcw as free variable so gentraceback and friends can
- // keep the same signature.
- scanframeworker(frame, unused, &gcw)
- return true
- }
- gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
- tracebackdefers(gp, scanframe, nil)
- gcw.disposeToCache()
- gp.gcscanvalid = true
-}
-
-// Shade the object if it isn't already.
-// The object is not nil and known to be in the heap.
-//go:nowritebarrier
-func shade(b uintptr) {
- if !inheap(b) {
- throw("shade: passed an address not in the heap")
- }
- if obj, hbits := heapBitsForObject(b); obj != 0 {
- // TODO: this would be a great place to put a check to see
- // if we are harvesting and if we are then we should
- // figure out why there is a call to shade when the
- // harvester thinks we are in a STW.
- // if atomicload(&harvestingwbufs) == uint32(1) {
- // // Throw here to discover write barriers
- // // being executed during a STW.
- // throw("shade during harvest")
- // }
-
- var gcw gcWorkProducer
- greyobject(obj, 0, 0, hbits, &gcw)
- // This is part of the write barrier so put the wbuf back.
- if gcphase == _GCmarktermination {
- gcw.dispose()
- } else {
- // If we added any pointers to the gcw, then
- // currentwbuf must be nil because 1)
- // greyobject got its wbuf from currentwbuf
- // and 2) shade runs on the systemstack, so
- // we're still on the same M. If either of
- // these becomes no longer true, we need to
- // rethink this.
- gcw.disposeToCache()
- }
- }
-}
-
-// gchelpwork does a small bounded amount of gc work. The purpose is to
-// shorten the time (as measured by allocations) spent doing a concurrent GC.
-// The number of mutator calls is roughly propotional to the number of allocations
-// made by that mutator. This slows down the allocation while speeding up the GC.
-//go:nowritebarrier
-func gchelpwork() {
- switch gcphase {
- default:
- throw("gcphasework in bad gcphase")
- case _GCoff, _GCquiesce, _GCstw:
- // No work.
- case _GCsweep:
- // We could help by calling sweepone to sweep a single span.
- // _ = sweepone()
- case _GCscan:
- // scan the stack, mark the objects, put pointers in work buffers
- // hanging off the P where this is being run.
- // scanstack(gp)
- case _GCmark:
- // Get a full work buffer and empty it.
- // drain your own currentwbuf first in the hopes that it will
- // be more cache friendly.
- var gcw gcWork
- gcw.initFromCache()
- const n = len(workbuf{}.obj)
- gcDrainN(&gcw, n) // drain upto one buffer's worth of objects
- gcw.dispose()
- case _GCmarktermination:
- // We should never be here since the world is stopped.
- // All available mark work will be emptied before returning.
- throw("gcphasework in bad gcphase")
- }
-}
-
-// The gp has been moved to a GC safepoint. GC phase specific
-// work is done here.
-//go:nowritebarrier
-func gcphasework(gp *g) {
- switch gcphase {
- default:
- throw("gcphasework in bad gcphase")
- case _GCoff, _GCquiesce, _GCstw, _GCsweep:
- // No work.
- case _GCscan:
- // scan the stack, mark the objects, put pointers in work buffers
- // hanging off the P where this is being run.
- // Indicate that the scan is valid until the goroutine runs again
- scanstack(gp)
- case _GCmark:
- // No work.
- case _GCmarktermination:
- scanstack(gp)
- // All available mark work will be emptied before returning.
- }
- gp.gcworkdone = true
-}
-
-// Returns only when span s has been swept.
-//go:nowritebarrier
-func mSpan_EnsureSwept(s *mspan) {
- // Caller must disable preemption.
- // Otherwise when this function returns the span can become unswept again
- // (if GC is triggered on another goroutine).
- _g_ := getg()
- if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
- throw("MSpan_EnsureSwept: m is not locked")
- }
-
- sg := mheap_.sweepgen
- if atomicload(&s.sweepgen) == sg {
- return
- }
- // The caller must be sure that the span is a MSpanInUse span.
- if cas(&s.sweepgen, sg-2, sg-1) {
- mSpan_Sweep(s, false)
- return
- }
- // unfortunate condition, and we don't have efficient means to wait
- for atomicload(&s.sweepgen) != sg {
- osyield()
- }
-}
-
-// Sweep frees or collects finalizers for blocks not marked in the mark phase.
-// It clears the mark bits in preparation for the next GC round.
-// Returns true if the span was returned to heap.
-// If preserve=true, don't return it to heap nor relink in MCentral lists;
-// caller takes care of it.
-//TODO go:nowritebarrier
-func mSpan_Sweep(s *mspan, preserve bool) bool {
- if checkmarkphase {
- throw("MSpan_Sweep: checkmark only runs in STW and after the sweep")
- }
-
- // It's critical that we enter this function with preemption disabled,
- // GC must not start while we are in the middle of this function.
- _g_ := getg()
- if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
- throw("MSpan_Sweep: m is not locked")
- }
- sweepgen := mheap_.sweepgen
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
- throw("MSpan_Sweep: bad span state")
- }
-
- if trace.enabled {
- traceGCSweepStart()
- }
-
- cl := s.sizeclass
- size := s.elemsize
- res := false
- nfree := 0
-
- var head, end gclinkptr
-
- c := _g_.m.mcache
- sweepgenset := false
-
- // Mark any free objects in this span so we don't collect them.
- for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
- heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
- }
-
- // Unlink & free special records for any objects we're about to free.
- specialp := &s.specials
- special := *specialp
- for special != nil {
- // A finalizer can be set for an inner byte of an object, find object beginning.
- p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
- hbits := heapBitsForAddr(p)
- if !hbits.isMarked() {
- // Find the exact byte for which the special was setup
- // (as opposed to object beginning).
- p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
- // about to free object: splice out special record
- y := special
- special = special.next
- *specialp = special
- if !freespecial(y, unsafe.Pointer(p), size, false) {
- // stop freeing of object if it has a finalizer
- hbits.setMarkedNonAtomic()
- }
- } else {
- // object is still live: keep special record
- specialp = &special.next
- special = *specialp
- }
- }
-
- // Sweep through n objects of given size starting at p.
- // This thread owns the span now, so it can manipulate
- // the block bitmap without atomic operations.
-
- size, n, _ := s.layout()
- heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
- // At this point we know that we are looking at garbage object
- // that needs to be collected.
- if debug.allocfreetrace != 0 {
- tracefree(unsafe.Pointer(p), size)
- }
-
- // Reset to allocated+noscan.
- if cl == 0 {
- // Free large span.
- if preserve {
- throw("can't preserve large span")
- }
- heapBitsForSpan(p).clearSpan(s.layout())
- s.needzero = 1
-
- // important to set sweepgen before returning it to heap
- atomicstore(&s.sweepgen, sweepgen)
- sweepgenset = true
-
- // NOTE(rsc,dvyukov): The original implementation of efence
- // in CL 22060046 used SysFree instead of SysFault, so that
- // the operating system would eventually give the memory
- // back to us again, so that an efence program could run
- // longer without running out of memory. Unfortunately,
- // calling SysFree here without any kind of adjustment of the
- // heap data structures means that when the memory does
- // come back to us, we have the wrong metadata for it, either in
- // the MSpan structures or in the garbage collection bitmap.
- // Using SysFault here means that the program will run out of
- // memory fairly quickly in efence mode, but at least it won't
- // have mysterious crashes due to confused memory reuse.
- // It should be possible to switch back to SysFree if we also
- // implement and then call some kind of MHeap_DeleteSpan.
- if debug.efence > 0 {
- s.limit = 0 // prevent mlookup from finding this span
- sysFault(unsafe.Pointer(p), size)
- } else {
- mHeap_Free(&mheap_, s, 1)
- }
- c.local_nlargefree++
- c.local_largefree += size
- reduction := int64(size) * int64(gcpercent+100) / 100
- if int64(memstats.next_gc)-reduction > int64(heapminimum) {
- xadd64(&memstats.next_gc, -reduction)
- } else {
- atomicstore64(&memstats.next_gc, heapminimum)
- }
- res = true
- } else {
- // Free small object.
- if size > 2*ptrSize {
- *(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
- } else if size > ptrSize {
- *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
- }
- if head.ptr() == nil {
- head = gclinkptr(p)
- } else {
- end.ptr().next = gclinkptr(p)
- }
- end = gclinkptr(p)
- end.ptr().next = gclinkptr(0x0bade5)
- nfree++
- }
- })
-
- // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
- // because of the potential for a concurrent free/SetFinalizer.
- // But we need to set it before we make the span available for allocation
- // (return it to heap or mcentral), because allocation code assumes that a
- // span is already swept if available for allocation.
- if !sweepgenset && nfree == 0 {
- // The span must be in our exclusive ownership until we update sweepgen,
- // check for potential races.
- if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
- print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
- throw("MSpan_Sweep: bad span state after sweep")
- }
- atomicstore(&s.sweepgen, sweepgen)
- }
- if nfree > 0 {
- c.local_nsmallfree[cl] += uintptr(nfree)
- c.local_cachealloc -= intptr(uintptr(nfree) * size)
- reduction := int64(nfree) * int64(size) * int64(gcpercent+100) / 100
- if int64(memstats.next_gc)-reduction > int64(heapminimum) {
- xadd64(&memstats.next_gc, -reduction)
- } else {
- atomicstore64(&memstats.next_gc, heapminimum)
- }
- res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
- // MCentral_FreeSpan updates sweepgen
- }
- if trace.enabled {
- traceGCSweepDone()
- traceNextGC()
- }
- return res
-}
-
-// State of background sweep.
-// Protected by gclock.
-type sweepdata struct {
- g *g
- parked bool
- started bool
-
- spanidx uint32 // background sweeper position
-
- nbgsweep uint32
- npausesweep uint32
-}
-
-var sweep sweepdata
-
// State of the background concurrent GC goroutine.
var bggc struct {
lock mutex
@@ -1027,237 +271,161 @@
started bool
}
-// sweeps one span
-// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
-//go:nowritebarrier
-func sweepone() uintptr {
- _g_ := getg()
-
- // increment locks to ensure that the goroutine is not preempted
- // in the middle of sweep thus leaving the span in an inconsistent state for next GC
- _g_.m.locks++
- sg := mheap_.sweepgen
+// backgroundgc is running in a goroutine and does the concurrent GC work.
+// bggc holds the state of the backgroundgc.
+func backgroundgc() {
+ bggc.g = getg()
for {
- idx := xadd(&sweep.spanidx, 1) - 1
- if idx >= uint32(len(work.spans)) {
- mheap_.sweepdone = 1
- _g_.m.locks--
- return ^uintptr(0)
- }
- s := work.spans[idx]
- if s.state != mSpanInUse {
- s.sweepgen = sg
- continue
- }
- if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
- continue
- }
- npages := s.npages
- if !mSpan_Sweep(s, false) {
- npages = 0
- }
- _g_.m.locks--
- return npages
+ gcwork(0)
+ lock(&bggc.lock)
+ bggc.working = 0
+ goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock)
}
}
-//go:nowritebarrier
-func gosweepone() uintptr {
- var ret uintptr
+func gcwork(force int32) {
+
+ semacquire(&worldsema, false)
+
+ // Pick up the remaining unswept/not being swept spans concurrently
+ for gosweepone() != ^uintptr(0) {
+ sweep.nbgsweep++
+ }
+
+ // Ok, we're doing it! Stop everybody else
+
+ mp := acquirem()
+ mp.preemptoff = "gcing"
+ releasem(mp)
+ gctimer.count++
+ if force == 0 {
+ gctimer.cycle.sweepterm = nanotime()
+ }
+
+ if trace.enabled {
+ traceGoSched()
+ traceGCStart()
+ }
+
+ // Pick up the remaining unswept/not being swept spans before we STW
+ for gosweepone() != ^uintptr(0) {
+ sweep.nbgsweep++
+ }
+ systemstack(stoptheworld)
+ systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
+ if force == 0 { // Do as much work concurrently as possible
+ gcphase = _GCscan
+ systemstack(starttheworld)
+ gctimer.cycle.scan = nanotime()
+ // Do a concurrent heap scan before we stop the world.
+ systemstack(gcscan_m)
+ gctimer.cycle.installmarkwb = nanotime()
+ systemstack(stoptheworld)
+ systemstack(gcinstallmarkwb)
+ systemstack(harvestwbufs)
+ systemstack(starttheworld)
+ gctimer.cycle.mark = nanotime()
+ systemstack(gcmark_m)
+ gctimer.cycle.markterm = nanotime()
+ systemstack(stoptheworld)
+ systemstack(gcinstalloffwb_m)
+ } else {
+ // For non-concurrent GC (force != 0) g stack have not been scanned so
+ // set gcscanvalid such that mark termination scans all stacks.
+ // No races here since we are in a STW phase.
+ for _, gp := range allgs {
+ gp.gcworkdone = false // set to true in gcphasework
+ gp.gcscanvalid = false // stack has not been scanned
+ }
+ }
+
+ startTime := nanotime()
+ if mp != acquirem() {
+ throw("gogc: rescheduled")
+ }
+
+ clearpools()
+
+ // Run gc on the g0 stack. We do this so that the g stack
+ // we're currently running on will no longer change. Cuts
+ // the root set down a bit (g0 stacks are not scanned, and
+ // we don't need to scan gc's internal state). We also
+ // need to switch to g0 so we can shrink the stack.
+ n := 1
+ if debug.gctrace > 1 {
+ n = 2
+ }
+ eagersweep := force >= 2
+ for i := 0; i < n; i++ {
+ if i > 0 {
+ // refresh start time if doing a second GC
+ startTime = nanotime()
+ }
+ // switch to g0, call gc, then switch back
+ systemstack(func() {
+ gc_m(startTime, eagersweep)
+ })
+ }
+
systemstack(func() {
- ret = sweepone()
+ gccheckmark_m(startTime, eagersweep)
})
- return ret
-}
-
-//go:nowritebarrier
-func gosweepdone() bool {
- return mheap_.sweepdone != 0
-}
-
-//go:nowritebarrier
-func gchelper() {
- _g_ := getg()
- _g_.m.traceback = 2
- gchelperstart()
if trace.enabled {
- traceGCScanStart()
+ traceGCDone()
+ traceGoStart()
}
- // parallel mark for over GC roots
- parfordo(work.markfor)
- if gcphase != _GCscan {
- var gcw gcWork
- gcDrain(&gcw) // blocks in getfull
- gcw.dispose()
+ // all done
+ mp.preemptoff = ""
+
+ if force == 0 {
+ gctimer.cycle.sweep = nanotime()
}
- if trace.enabled {
- traceGCScanDone()
+ semrelease(&worldsema)
+
+ if force == 0 {
+ if gctimer.verbose > 1 {
+ GCprinttimes()
+ } else if gctimer.verbose > 0 {
+ calctimes() // ignore result
+ }
}
- nproc := work.nproc // work.nproc can change right after we increment work.ndone
- if xadd(&work.ndone, +1) == nproc-1 {
- notewakeup(&work.alldone)
+ systemstack(starttheworld)
+
+ releasem(mp)
+ mp = nil
+
+ // now that gc is done, kick off finalizer thread if needed
+ if !concurrentSweep {
+ // give the queued finalizers, if any, a chance to run
+ Gosched()
}
- _g_.m.traceback = 0
}
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
//go:nowritebarrier
-func cachestats() {
- for i := 0; ; i++ {
- p := allp[i]
- if p == nil {
- break
- }
- c := p.mcache
- if c == nil {
- continue
- }
- purgecachedstats(c)
- }
+func gcinstalloffwb_m() {
+ gcphase = _GCoff
}
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
//go:nowritebarrier
-func flushallmcaches() {
- for i := 0; ; i++ {
- p := allp[i]
- if p == nil {
- break
- }
- c := p.mcache
- if c == nil {
- continue
- }
- mCache_ReleaseAll(c)
- stackcache_clear(c)
- }
+func gcinstallmarkwb() {
+ gcphase = _GCmark
}
+// Mark all objects that are known about.
+// This is the concurrent mark phase.
//go:nowritebarrier
-func updatememstats(stats *gcstats) {
- if stats != nil {
- *stats = gcstats{}
- }
- for mp := allm; mp != nil; mp = mp.alllink {
- if stats != nil {
- src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
- dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
- for i, v := range src {
- dst[i] += v
- }
- mp.gcstats = gcstats{}
- }
- }
-
- memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
- memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
- memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
- memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
-
- // Calculate memory allocator stats.
- // During program execution we only count number of frees and amount of freed memory.
- // Current number of alive object in the heap and amount of alive heap memory
- // are calculated by scanning all spans.
- // Total number of mallocs is calculated as number of frees plus number of alive objects.
- // Similarly, total amount of allocated memory is calculated as amount of freed memory
- // plus amount of alive heap memory.
- memstats.alloc = 0
- memstats.total_alloc = 0
- memstats.nmalloc = 0
- memstats.nfree = 0
- for i := 0; i < len(memstats.by_size); i++ {
- memstats.by_size[i].nmalloc = 0
- memstats.by_size[i].nfree = 0
- }
-
- // Flush MCache's to MCentral.
- systemstack(flushallmcaches)
-
- // Aggregate local stats.
- cachestats()
-
- // Scan all spans and count number of alive objects.
- lock(&mheap_.lock)
- for i := uint32(0); i < mheap_.nspan; i++ {
- s := h_allspans[i]
- if s.state != mSpanInUse {
- continue
- }
- if s.sizeclass == 0 {
- memstats.nmalloc++
- memstats.alloc += uint64(s.elemsize)
- } else {
- memstats.nmalloc += uint64(s.ref)
- memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
- memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
- }
- }
- unlock(&mheap_.lock)
-
- // Aggregate by size class.
- smallfree := uint64(0)
- memstats.nfree = mheap_.nlargefree
- for i := 0; i < len(memstats.by_size); i++ {
- memstats.nfree += mheap_.nsmallfree[i]
- memstats.by_size[i].nfree = mheap_.nsmallfree[i]
- memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
- smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
- }
- memstats.nfree += memstats.tinyallocs
- memstats.nmalloc += memstats.nfree
-
- // Calculate derived stats.
- memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
- memstats.heap_alloc = memstats.alloc
- memstats.heap_objects = memstats.nmalloc - memstats.nfree
-}
-
-// heapminimum is the minimum number of bytes in the heap.
-// This cleans up the corner case of where we have a very small live set but a lot
-// of allocations and collecting every GOGC * live set is expensive.
-var heapminimum = uint64(4 << 20)
-
-func gcinit() {
- if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
- throw("size of Workbuf is suboptimal")
- }
-
- work.markfor = parforalloc(_MaxGcproc)
- gcpercent = readgogc()
- gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
- gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
- memstats.next_gc = heapminimum
-}
-
-// Called from malloc.go using systemstack, stopping and starting the world handled in caller.
-//go:nowritebarrier
-func gc_m(start_time int64, eagersweep bool) {
- _g_ := getg()
- gp := _g_.m.curg
- casgstatus(gp, _Grunning, _Gwaiting)
- gp.waitreason = "garbage collection"
-
- gc(start_time, eagersweep)
- casgstatus(gp, _Gwaiting, _Grunning)
-}
-
-//go:nowritebarrier
-func initCheckmarks() {
- for _, s := range work.spans {
- if s.state == _MSpanInUse {
- heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
- }
- }
-}
-
-func clearCheckmarks() {
- for _, s := range work.spans {
- if s.state == _MSpanInUse {
- heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
- }
- }
+func gcmark_m() {
+ gcDrain(nil)
+ // TODO add another harvestwbuf and reset work.nwait=0, work.ndone=0, and work.nproc=1
+ // and repeat the above gcDrain.
}
// Called from malloc.go using systemstack.
@@ -1280,90 +448,16 @@
gc_m(startTime, eagersweep) // turns off checkmarkphase + calls clearcheckmarkbits
}
+// Called from malloc.go using systemstack, stopping and starting the world handled in caller.
//go:nowritebarrier
-func finishsweep_m() {
- // The world is stopped so we should be able to complete the sweeps
- // quickly.
- for sweepone() != ^uintptr(0) {
- sweep.npausesweep++
- }
-
- // There may be some other spans being swept concurrently that
- // we need to wait for. If finishsweep_m is done with the world stopped
- // this code is not required.
- sg := mheap_.sweepgen
- for _, s := range work.spans {
- if s.sweepgen != sg && s.state == _MSpanInUse {
- mSpan_EnsureSwept(s)
- }
- }
-}
-
-// Scan all of the stacks, greying (or graying if in America) the referents
-// but not blackening them since the mark write barrier isn't installed.
-//go:nowritebarrier
-func gcscan_m() {
+func gc_m(start_time int64, eagersweep bool) {
_g_ := getg()
+ gp := _g_.m.curg
+ casgstatus(gp, _Grunning, _Gwaiting)
+ gp.waitreason = "garbage collection"
- // Grab the g that called us and potentially allow rescheduling.
- // This allows it to be scanned like other goroutines.
- mastergp := _g_.m.curg
- casgstatus(mastergp, _Grunning, _Gwaiting)
- mastergp.waitreason = "garbage collection scan"
-
- // Span sweeping has been done by finishsweep_m.
- // Long term we will want to make this goroutine runnable
- // by placing it onto a scanenqueue state and then calling
- // runtime·restartg(mastergp) to make it Grunnable.
- // At the bottom we will want to return this p back to the scheduler.
-
- // Prepare flag indicating that the scan has not been completed.
- lock(&allglock)
- local_allglen := allglen
- for i := uintptr(0); i < local_allglen; i++ {
- gp := allgs[i]
- gp.gcworkdone = false // set to true in gcphasework
- gp.gcscanvalid = false // stack has not been scanned
- }
- unlock(&allglock)
-
- work.nwait = 0
- work.ndone = 0
- work.nproc = 1 // For now do not do this in parallel.
- // ackgcphase is not needed since we are not scanning running goroutines.
- parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
- parfordo(work.markfor)
-
- lock(&allglock)
- // Check that gc work is done.
- for i := uintptr(0); i < local_allglen; i++ {
- gp := allgs[i]
- if !gp.gcworkdone {
- throw("scan missed a g")
- }
- }
- unlock(&allglock)
-
- casgstatus(mastergp, _Gwaiting, _Grunning)
- // Let the g that called us continue to run.
-}
-
-// Mark all objects that are known about.
-// This is the concurrent mark phase.
-//go:nowritebarrier
-func gcmark_m() {
- var gcw gcWork
- gcDrain(&gcw)
- gcw.dispose()
- // TODO add another harvestwbuf and reset work.nwait=0, work.ndone=0, and work.nproc=1
- // and repeat the above gcDrain.
-}
-
-// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
-// all go routines see the new barrier.
-//go:nowritebarrier
-func gcinstalloffwb_m() {
- gcphase = _GCoff
+ gc(start_time, eagersweep)
+ casgstatus(gp, _Gwaiting, _Grunning)
}
// STW is in effect at this point.
@@ -1573,68 +667,89 @@
}
}
-func readmemstats_m(stats *MemStats) {
- updatememstats(nil)
+// Hooks for other packages
- // Size of the trailing by_size array differs between Go and C,
- // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
- memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
-
- // Stack numbers are part of the heap numbers, separate those out for user consumption
- stats.StackSys = stats.StackInuse
- stats.HeapInuse -= stats.StackInuse
- stats.HeapSys -= stats.StackInuse
+//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
+func runtime_debug_freeOSMemory() {
+ gogc(2) // force GC and do eager sweep
+ systemstack(scavenge_m)
}
-//go:linkname readGCStats runtime/debug.readGCStats
-func readGCStats(pauses *[]uint64) {
- systemstack(func() {
- readGCStats_m(pauses)
- })
+var poolcleanup func()
+
+//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
+func sync_runtime_registerPoolCleanup(f func()) {
+ poolcleanup = f
}
-func readGCStats_m(pauses *[]uint64) {
- p := *pauses
- // Calling code in runtime/debug should make the slice large enough.
- if cap(p) < len(memstats.pause_ns)+3 {
- throw("short slice passed to readGCStats")
+func clearpools() {
+ // clear sync.Pools
+ if poolcleanup != nil {
+ poolcleanup()
}
- // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
- lock(&mheap_.lock)
+ for _, p := range &allp {
+ if p == nil {
+ break
+ }
+ // clear tinyalloc pool
+ if c := p.mcache; c != nil {
+ c.tiny = nil
+ c.tinyoffset = 0
- n := memstats.numgc
- if n > uint32(len(memstats.pause_ns)) {
- n = uint32(len(memstats.pause_ns))
+ // disconnect cached list before dropping it on the floor,
+ // so that a dangling ref to one entry does not pin all of them.
+ var sg, sgnext *sudog
+ for sg = c.sudogcache; sg != nil; sg = sgnext {
+ sgnext = sg.next
+ sg.next = nil
+ }
+ c.sudogcache = nil
+ }
+
+ // clear defer pools
+ for i := range p.deferpool {
+ // disconnect cached list before dropping it on the floor,
+ // so that a dangling ref to one entry does not pin all of them.
+ var d, dlink *_defer
+ for d = p.deferpool[i]; d != nil; d = dlink {
+ dlink = d.link
+ d.link = nil
+ }
+ p.deferpool[i] = nil
+ }
}
-
- // The pause buffer is circular. The most recent pause is at
- // pause_ns[(numgc-1)%len(pause_ns)], and then backward
- // from there to go back farther in time. We deliver the times
- // most recent first (in p[0]).
- p = p[:cap(p)]
- for i := uint32(0); i < n; i++ {
- j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
- p[i] = memstats.pause_ns[j]
- p[n+i] = memstats.pause_end[j]
- }
-
- p[n+n] = memstats.last_gc
- p[n+n+1] = uint64(memstats.numgc)
- p[n+n+2] = memstats.pause_total_ns
- unlock(&mheap_.lock)
- *pauses = p[:n+n+3]
}
-func setGCPercent(in int32) (out int32) {
- lock(&mheap_.lock)
- out = gcpercent
- if in < 0 {
- in = -1
+// Timing
+
+//go:nowritebarrier
+func gchelper() {
+ _g_ := getg()
+ _g_.m.traceback = 2
+ gchelperstart()
+
+ if trace.enabled {
+ traceGCScanStart()
}
- gcpercent = in
- unlock(&mheap_.lock)
- return out
+
+ // parallel mark for over GC roots
+ parfordo(work.markfor)
+ if gcphase != _GCscan {
+ var gcw gcWork
+ gcDrain(&gcw) // blocks in getfull
+ gcw.dispose()
+ }
+
+ if trace.enabled {
+ traceGCScanDone()
+ }
+
+ nproc := work.nproc // work.nproc can change right after we increment work.ndone
+ if xadd(&work.ndone, +1) == nproc-1 {
+ notewakeup(&work.alldone)
+ }
+ _g_.m.traceback = 0
}
func gchelperstart() {
@@ -1648,7 +763,106 @@
}
}
-func unixnanotime() int64 {
- sec, nsec := time_now()
- return sec*1e9 + int64(nsec)
+// gcchronograph holds timer information related to GC phases
+// max records the maximum time spent in each GC phase since GCstarttimes.
+// total records the total time spent in each GC phase since GCstarttimes.
+// cycle records the absolute time (as returned by nanoseconds()) that each GC phase last started at.
+type gcchronograph struct {
+ count int64
+ verbose int64
+ maxpause int64
+ max gctimes
+ total gctimes
+ cycle gctimes
+}
+
+// gctimes records the time in nanoseconds of each phase of the concurrent GC.
+type gctimes struct {
+ sweepterm int64 // stw
+ scan int64
+ installmarkwb int64 // stw
+ mark int64
+ markterm int64 // stw
+ sweep int64
+}
+
+var gctimer gcchronograph
+
+// GCstarttimes initializes the gc times. All previous times are lost.
+func GCstarttimes(verbose int64) {
+ gctimer = gcchronograph{verbose: verbose}
+}
+
+// GCendtimes stops the gc timers.
+func GCendtimes() {
+ gctimer.verbose = 0
+}
+
+// calctimes converts gctimer.cycle into the elapsed times, updates gctimer.total
+// and updates gctimer.max with the max pause time.
+func calctimes() gctimes {
+ var times gctimes
+
+ var max = func(a, b int64) int64 {
+ if a > b {
+ return a
+ }
+ return b
+ }
+
+ times.sweepterm = gctimer.cycle.scan - gctimer.cycle.sweepterm
+ gctimer.total.sweepterm += times.sweepterm
+ gctimer.max.sweepterm = max(gctimer.max.sweepterm, times.sweepterm)
+ gctimer.maxpause = max(gctimer.maxpause, gctimer.max.sweepterm)
+
+ times.scan = gctimer.cycle.installmarkwb - gctimer.cycle.scan
+ gctimer.total.scan += times.scan
+ gctimer.max.scan = max(gctimer.max.scan, times.scan)
+
+ times.installmarkwb = gctimer.cycle.mark - gctimer.cycle.installmarkwb
+ gctimer.total.installmarkwb += times.installmarkwb
+ gctimer.max.installmarkwb = max(gctimer.max.installmarkwb, times.installmarkwb)
+ gctimer.maxpause = max(gctimer.maxpause, gctimer.max.installmarkwb)
+
+ times.mark = gctimer.cycle.markterm - gctimer.cycle.mark
+ gctimer.total.mark += times.mark
+ gctimer.max.mark = max(gctimer.max.mark, times.mark)
+
+ times.markterm = gctimer.cycle.sweep - gctimer.cycle.markterm
+ gctimer.total.markterm += times.markterm
+ gctimer.max.markterm = max(gctimer.max.markterm, times.markterm)
+ gctimer.maxpause = max(gctimer.maxpause, gctimer.max.markterm)
+
+ return times
+}
+
+// GCprinttimes prints latency information in nanoseconds about various
+// phases in the GC. The information for each phase includes the maximum pause
+// and total time since the most recent call to GCstarttimes as well as
+// the information from the most recent Concurent GC cycle. Calls from the
+// application to runtime.GC() are ignored.
+func GCprinttimes() {
+ if gctimer.verbose == 0 {
+ println("GC timers not enabled")
+ return
+ }
+
+ // Explicitly put times on the heap so printPhase can use it.
+ times := new(gctimes)
+ *times = calctimes()
+ cycletime := gctimer.cycle.sweep - gctimer.cycle.sweepterm
+ pause := times.sweepterm + times.installmarkwb + times.markterm
+ gomaxprocs := GOMAXPROCS(-1)
+
+ printlock()
+ print("GC: #", gctimer.count, " ", cycletime, "ns @", gctimer.cycle.sweepterm, " pause=", pause, " maxpause=", gctimer.maxpause, " goroutines=", allglen, " gomaxprocs=", gomaxprocs, "\n")
+ printPhase := func(label string, get func(*gctimes) int64, procs int) {
+ print("GC: ", label, " ", get(times), "ns\tmax=", get(&gctimer.max), "\ttotal=", get(&gctimer.total), "\tprocs=", procs, "\n")
+ }
+ printPhase("sweep term:", func(t *gctimes) int64 { return t.sweepterm }, gomaxprocs)
+ printPhase("scan: ", func(t *gctimes) int64 { return t.scan }, 1)
+ printPhase("install wb:", func(t *gctimes) int64 { return t.installmarkwb }, gomaxprocs)
+ printPhase("mark: ", func(t *gctimes) int64 { return t.mark }, 1)
+ printPhase("mark term: ", func(t *gctimes) int64 { return t.markterm }, gomaxprocs)
+ printunlock()
}
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
deleted file mode 100644
index f54d933..0000000
--- a/src/runtime/mgc0.go
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-import _ "unsafe" // for go:linkname
-
-//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
-func runtime_debug_freeOSMemory() {
- gogc(2) // force GC and do eager sweep
- systemstack(scavenge_m)
-}
-
-var poolcleanup func()
-
-//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
-func sync_runtime_registerPoolCleanup(f func()) {
- poolcleanup = f
-}
-
-func clearpools() {
- // clear sync.Pools
- if poolcleanup != nil {
- poolcleanup()
- }
-
- for _, p := range &allp {
- if p == nil {
- break
- }
- // clear tinyalloc pool
- if c := p.mcache; c != nil {
- c.tiny = nil
- c.tinyoffset = 0
-
- // disconnect cached list before dropping it on the floor,
- // so that a dangling ref to one entry does not pin all of them.
- var sg, sgnext *sudog
- for sg = c.sudogcache; sg != nil; sg = sgnext {
- sgnext = sg.next
- sg.next = nil
- }
- c.sudogcache = nil
- }
-
- // clear defer pools
- for i := range p.deferpool {
- // disconnect cached list before dropping it on the floor,
- // so that a dangling ref to one entry does not pin all of them.
- var d, dlink *_defer
- for d = p.deferpool[i]; d != nil; d = dlink {
- dlink = d.link
- d.link = nil
- }
- p.deferpool[i] = nil
- }
- }
-}
-
-// backgroundgc is running in a goroutine and does the concurrent GC work.
-// bggc holds the state of the backgroundgc.
-func backgroundgc() {
- bggc.g = getg()
- for {
- gcwork(0)
- lock(&bggc.lock)
- bggc.working = 0
- goparkunlock(&bggc.lock, "Concurrent GC wait", traceEvGoBlock)
- }
-}
-
-func bgsweep() {
- sweep.g = getg()
- for {
- for gosweepone() != ^uintptr(0) {
- sweep.nbgsweep++
- Gosched()
- }
- lock(&gclock)
- if !gosweepdone() {
- // This can happen if a GC runs between
- // gosweepone returning ^0 above
- // and the lock being acquired.
- unlock(&gclock)
- continue
- }
- sweep.parked = true
- goparkunlock(&gclock, "GC sweep wait", traceEvGoBlock)
- }
-}
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
new file mode 100644
index 0000000..d790af3
--- /dev/null
+++ b/src/runtime/mgcmark.go
@@ -0,0 +1,659 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector: marking and scanning
+
+package runtime
+
+import "unsafe"
+
+// Scan all of the stacks, greying (or graying if in America) the referents
+// but not blackening them since the mark write barrier isn't installed.
+//go:nowritebarrier
+func gcscan_m() {
+ _g_ := getg()
+
+ // Grab the g that called us and potentially allow rescheduling.
+ // This allows it to be scanned like other goroutines.
+ mastergp := _g_.m.curg
+ casgstatus(mastergp, _Grunning, _Gwaiting)
+ mastergp.waitreason = "garbage collection scan"
+
+ // Span sweeping has been done by finishsweep_m.
+ // Long term we will want to make this goroutine runnable
+ // by placing it onto a scanenqueue state and then calling
+ // runtime·restartg(mastergp) to make it Grunnable.
+ // At the bottom we will want to return this p back to the scheduler.
+
+ // Prepare flag indicating that the scan has not been completed.
+ lock(&allglock)
+ local_allglen := allglen
+ for i := uintptr(0); i < local_allglen; i++ {
+ gp := allgs[i]
+ gp.gcworkdone = false // set to true in gcphasework
+ gp.gcscanvalid = false // stack has not been scanned
+ }
+ unlock(&allglock)
+
+ work.nwait = 0
+ work.ndone = 0
+ work.nproc = 1 // For now do not do this in parallel.
+ // ackgcphase is not needed since we are not scanning running goroutines.
+ parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
+ parfordo(work.markfor)
+
+ lock(&allglock)
+ // Check that gc work is done.
+ for i := uintptr(0); i < local_allglen; i++ {
+ gp := allgs[i]
+ if !gp.gcworkdone {
+ throw("scan missed a g")
+ }
+ }
+ unlock(&allglock)
+
+ casgstatus(mastergp, _Gwaiting, _Grunning)
+ // Let the g that called us continue to run.
+}
+
+// ptrmask for an allocation containing a single pointer.
+var oneptr = [...]uint8{typePointer}
+
+//go:nowritebarrier
+func markroot(desc *parfor, i uint32) {
+ var gcw gcWorkProducer
+ gcw.initFromCache()
+
+ // Note: if you add a case here, please also update heapdump.c:dumproots.
+ switch i {
+ case _RootData:
+ scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata, &gcw)
+
+ case _RootBss:
+ scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata, &gcw)
+
+ case _RootFinalizers:
+ for fb := allfin; fb != nil; fb = fb.alllink {
+ scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
+ }
+
+ case _RootSpans:
+ // mark MSpan.specials
+ sg := mheap_.sweepgen
+ for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
+ s := work.spans[spanidx]
+ if s.state != mSpanInUse {
+ continue
+ }
+ if !checkmarkphase && s.sweepgen != sg {
+ // sweepgen was updated (+2) during non-checkmark GC pass
+ print("sweep ", s.sweepgen, " ", sg, "\n")
+ throw("gc: unswept span")
+ }
+ for sp := s.specials; sp != nil; sp = sp.next {
+ if sp.kind != _KindSpecialFinalizer {
+ continue
+ }
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf := (*specialfinalizer)(unsafe.Pointer(sp))
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+ if gcphase != _GCscan {
+ scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
+ }
+ scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
+ }
+ }
+
+ case _RootFlushCaches:
+ if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
+ flushallmcaches()
+ }
+
+ default:
+ // the rest is scanning goroutine stacks
+ if uintptr(i-_RootCount) >= allglen {
+ throw("markroot: bad index")
+ }
+ gp := allgs[i-_RootCount]
+
+ // remember when we've first observed the G blocked
+ // needed only to output in traceback
+ status := readgstatus(gp) // We are not in a scan state
+ if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
+ gp.waitsince = work.tstart
+ }
+
+ // Shrink a stack if not much of it is being used but not in the scan phase.
+ if gcphase == _GCmarktermination {
+ // Shrink during STW GCmarktermination phase thus avoiding
+ // complications introduced by shrinking during
+ // non-STW phases.
+ shrinkstack(gp)
+ }
+ if readgstatus(gp) == _Gdead {
+ gp.gcworkdone = true
+ } else {
+ gp.gcworkdone = false
+ }
+ restart := stopg(gp)
+
+ // goroutine will scan its own stack when it stops running.
+ // Wait until it has.
+ for readgstatus(gp) == _Grunning && !gp.gcworkdone {
+ }
+
+ // scanstack(gp) is done as part of gcphasework
+ // But to make sure we finished we need to make sure that
+ // the stack traps have all responded so drop into
+ // this while loop until they respond.
+ for !gp.gcworkdone {
+ status = readgstatus(gp)
+ if status == _Gdead {
+ gp.gcworkdone = true // scan is a noop
+ break
+ }
+ if status == _Gwaiting || status == _Grunnable {
+ restart = stopg(gp)
+ }
+ }
+ if restart {
+ restartg(gp)
+ }
+ }
+ gcw.dispose()
+}
+
+// gchelpwork does a small bounded amount of gc work. The purpose is to
+// shorten the time (as measured by allocations) spent doing a concurrent GC.
+// The number of mutator calls is roughly propotional to the number of allocations
+// made by that mutator. This slows down the allocation while speeding up the GC.
+//go:nowritebarrier
+func gchelpwork() {
+ switch gcphase {
+ default:
+ throw("gcphasework in bad gcphase")
+ case _GCoff, _GCquiesce, _GCstw:
+ // No work.
+ case _GCsweep:
+ // We could help by calling sweepone to sweep a single span.
+ // _ = sweepone()
+ case _GCscan:
+ // scan the stack, mark the objects, put pointers in work buffers
+ // hanging off the P where this is being run.
+ // scanstack(gp)
+ case _GCmark:
+ // Get a full work buffer and empty it.
+ // drain your own currentwbuf first in the hopes that it will
+ // be more cache friendly.
+ var gcw gcWork
+ gcw.initFromCache()
+ const n = len(workbuf{}.obj)
+ gcDrainN(&gcw, n) // drain upto one buffer's worth of objects
+ gcw.dispose()
+ case _GCmarktermination:
+ // We should never be here since the world is stopped.
+ // All available mark work will be emptied before returning.
+ throw("gcphasework in bad gcphase")
+ }
+}
+
+// The gp has been moved to a GC safepoint. GC phase specific
+// work is done here.
+//go:nowritebarrier
+func gcphasework(gp *g) {
+ switch gcphase {
+ default:
+ throw("gcphasework in bad gcphase")
+ case _GCoff, _GCquiesce, _GCstw, _GCsweep:
+ // No work.
+ case _GCscan:
+ // scan the stack, mark the objects, put pointers in work buffers
+ // hanging off the P where this is being run.
+ // Indicate that the scan is valid until the goroutine runs again
+ scanstack(gp)
+ case _GCmark:
+ // No work.
+ case _GCmarktermination:
+ scanstack(gp)
+ // All available mark work will be emptied before returning.
+ }
+ gp.gcworkdone = true
+}
+
+//go:nowritebarrier
+func scanstack(gp *g) {
+ if gp.gcscanvalid {
+ return
+ }
+
+ if readgstatus(gp)&_Gscan == 0 {
+ print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
+ throw("scanstack - bad status")
+ }
+
+ switch readgstatus(gp) &^ _Gscan {
+ default:
+ print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+ throw("mark - bad status")
+ case _Gdead:
+ return
+ case _Grunning:
+ print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+ throw("scanstack: goroutine not stopped")
+ case _Grunnable, _Gsyscall, _Gwaiting:
+ // ok
+ }
+
+ if gp == getg() {
+ throw("can't scan our own stack")
+ }
+ mp := gp.m
+ if mp != nil && mp.helpgc != 0 {
+ throw("can't scan gchelper stack")
+ }
+
+ var gcw gcWorkProducer
+ gcw.initFromCache()
+ scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
+ // Pick up gcw as free variable so gentraceback and friends can
+ // keep the same signature.
+ scanframeworker(frame, unused, &gcw)
+ return true
+ }
+ gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
+ tracebackdefers(gp, scanframe, nil)
+ gcw.disposeToCache()
+ gp.gcscanvalid = true
+}
+
+// Scan a stack frame: local variables and function arguments/results.
+//go:nowritebarrier
+func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWorkProducer) {
+
+ f := frame.fn
+ targetpc := frame.continpc
+ if targetpc == 0 {
+ // Frame is dead.
+ return
+ }
+ if _DebugGC > 1 {
+ print("scanframe ", funcname(f), "\n")
+ }
+ if targetpc != f.entry {
+ targetpc--
+ }
+ pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+ if pcdata == -1 {
+ // We do not have a valid pcdata value but there might be a
+ // stackmap for this function. It is likely that we are looking
+ // at the function prologue, assume so and hope for the best.
+ pcdata = 0
+ }
+
+ // Scan local variables if stack frame has been allocated.
+ size := frame.varp - frame.sp
+ var minsize uintptr
+ if thechar != '6' && thechar != '8' {
+ minsize = ptrSize
+ } else {
+ minsize = 0
+ }
+ if size > minsize {
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+ if stkmap == nil || stkmap.n <= 0 {
+ print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
+ throw("missing stackmap")
+ }
+
+ // Locals bitmap information, scan just the pointers in locals.
+ if pcdata < 0 || pcdata >= stkmap.n {
+ // don't know where we are
+ print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
+ throw("scanframe: bad symbol table")
+ }
+ bv := stackmapdata(stkmap, pcdata)
+ size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
+ scanblock(frame.varp-size, size, bv.bytedata, gcw)
+ }
+
+ // Scan arguments.
+ if frame.arglen > 0 {
+ var bv bitvector
+ if frame.argmap != nil {
+ bv = *frame.argmap
+ } else {
+ stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+ if stkmap == nil || stkmap.n <= 0 {
+ print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
+ throw("missing stackmap")
+ }
+ if pcdata < 0 || pcdata >= stkmap.n {
+ // don't know where we are
+ print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
+ throw("scanframe: bad symbol table")
+ }
+ bv = stackmapdata(stkmap, pcdata)
+ }
+ scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
+ }
+}
+
+// gcDrain scans objects in work buffers (starting with wbuf), blackening grey
+// objects until all work buffers have been drained.
+//go:nowritebarrier
+func gcDrain(gcw *gcWork) {
+ if gcphase != _GCmark && gcphase != _GCmarktermination {
+ throw("scanblock phase incorrect")
+ }
+
+ for {
+ // If another proc wants a pointer, give it some.
+ if work.nwait > 0 && work.full == 0 {
+ gcw.balance()
+ }
+
+ b := gcw.get()
+ if b == 0 {
+ // work barrier reached
+ break
+ }
+ // If the current wbuf is filled by the scan a new wbuf might be
+ // returned that could possibly hold only a single object. This
+ // could result in each iteration draining only a single object
+ // out of the wbuf passed in + a single object placed
+ // into an empty wbuf in scanobject so there could be
+ // a performance hit as we keep fetching fresh wbufs.
+ scanobject(b, 0, nil, &gcw.gcWorkProducer)
+ }
+ checknocurrentwbuf()
+}
+
+// gcDrainN scans n objects, blackening grey objects.
+//go:nowritebarrier
+func gcDrainN(gcw *gcWork, n int) {
+ checknocurrentwbuf()
+ for i := 0; i < n; i++ {
+ // This might be a good place to add prefetch code...
+ // if(wbuf.nobj > 4) {
+ // PREFETCH(wbuf->obj[wbuf.nobj - 3];
+ // }
+ b := gcw.tryGet()
+ if b == 0 {
+ return
+ }
+ scanobject(b, 0, nil, &gcw.gcWorkProducer)
+ }
+}
+
+// scanblock scans b as scanobject would.
+// If the gcphase is GCscan, scanblock performs additional checks.
+//go:nowritebarrier
+func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
+ // Use local copies of original parameters, so that a stack trace
+ // due to one of the throws below shows the original block
+ // base and extent.
+ b := b0
+ n := n0
+
+ // ptrmask can have 2 possible values:
+ // 1. nil - obtain pointer mask from GC bitmap.
+ // 2. pointer to a compact mask (for stacks and data).
+
+ scanobject(b, n, ptrmask, gcw)
+ if gcphase == _GCscan {
+ if inheap(b) && ptrmask == nil {
+ // b is in heap, we are in GCscan so there should be a ptrmask.
+ throw("scanblock: In GCscan phase and inheap is true.")
+ }
+ }
+}
+
+// Scan the object b of size n, adding pointers to wbuf.
+// Return possibly new wbuf to use.
+// If ptrmask != nil, it specifies where pointers are in b.
+// If ptrmask == nil, the GC bitmap should be consulted.
+// In this case, n may be an overestimate of the size; the GC bitmap
+// must also be used to make sure the scan stops at the end of b.
+//go:nowritebarrier
+func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
+ arena_start := mheap_.arena_start
+ arena_used := mheap_.arena_used
+
+ // Find bits of the beginning of the object.
+ var hbits heapBits
+ if ptrmask == nil {
+ b, hbits = heapBitsForObject(b)
+ if b == 0 {
+ return
+ }
+ if n == 0 {
+ n = mheap_.arena_used - b
+ }
+ }
+ for i := uintptr(0); i < n; i += ptrSize {
+ // Find bits for this word.
+ var bits uintptr
+ if ptrmask != nil {
+ // dense mask (stack or data)
+ bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
+ } else {
+ // Check if we have reached end of span.
+ // n is an overestimate of the size of the object.
+ if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
+ break
+ }
+
+ bits = uintptr(hbits.typeBits())
+ if i > 0 && (hbits.isBoundary() || bits == typeDead) {
+ break // reached beginning of the next object
+ }
+ hbits = hbits.next()
+ }
+
+ if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
+ continue
+ }
+
+ if bits&typePointer != typePointer {
+ print("gc checkmarkphase=", checkmarkphase, " b=", hex(b), " ptrmask=", ptrmask, "\n")
+ throw("unexpected garbage collection bits")
+ }
+
+ obj := *(*uintptr)(unsafe.Pointer(b + i))
+
+ // At this point we have extracted the next potential pointer.
+ // Check if it points into heap.
+ if obj == 0 || obj < arena_start || obj >= arena_used {
+ continue
+ }
+
+ if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && checkmarkphase {
+ checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
+ }
+
+ // Mark the object.
+ if obj, hbits := heapBitsForObject(obj); obj != 0 {
+ greyobject(obj, b, i, hbits, gcw)
+ }
+ }
+}
+
+// Shade the object if it isn't already.
+// The object is not nil and known to be in the heap.
+//go:nowritebarrier
+func shade(b uintptr) {
+ if !inheap(b) {
+ throw("shade: passed an address not in the heap")
+ }
+ if obj, hbits := heapBitsForObject(b); obj != 0 {
+ // TODO: this would be a great place to put a check to see
+ // if we are harvesting and if we are then we should
+ // figure out why there is a call to shade when the
+ // harvester thinks we are in a STW.
+ // if atomicload(&harvestingwbufs) == uint32(1) {
+ // // Throw here to discover write barriers
+ // // being executed during a STW.
+ // throw("shade during harvest")
+ // }
+
+ var gcw gcWorkProducer
+ greyobject(obj, 0, 0, hbits, &gcw)
+ // This is part of the write barrier so put the wbuf back.
+ if gcphase == _GCmarktermination {
+ gcw.dispose()
+ } else {
+ // If we added any pointers to the gcw, then
+ // currentwbuf must be nil because 1)
+ // greyobject got its wbuf from currentwbuf
+ // and 2) shade runs on the systemstack, so
+ // we're still on the same M. If either of
+ // these becomes no longer true, we need to
+ // rethink this.
+ gcw.disposeToCache()
+ }
+ }
+}
+
+// obj is the start of an object with mark mbits.
+// If it isn't already marked, mark it and enqueue into workbuf.
+// Return possibly new workbuf to use.
+// base and off are for debugging only and could be removed.
+//go:nowritebarrier
+func greyobject(obj, base, off uintptr, hbits heapBits, gcw *gcWorkProducer) {
+ // obj should be start of allocation, and so must be at least pointer-aligned.
+ if obj&(ptrSize-1) != 0 {
+ throw("greyobject: obj not pointer-aligned")
+ }
+
+ if checkmarkphase {
+ if !hbits.isMarked() {
+ print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
+ print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
+
+ // Dump the source (base) object
+
+ kb := base >> _PageShift
+ xb := kb
+ xb -= mheap_.arena_start >> _PageShift
+ sb := h_spans[xb]
+ printlock()
+ print("runtime:greyobject Span: base=", hex(base), " kb=", hex(kb))
+ if sb == nil {
+ print(" sb=nil\n")
+ } else {
+ print(" sb.start*_PageSize=", hex(sb.start*_PageSize), " sb.limit=", hex(sb.limit), " sb.sizeclass=", sb.sizeclass, " sb.elemsize=", sb.elemsize, "\n")
+ // base is (a pointer to) the source object holding the reference to object. Create a pointer to each of the fields
+ // fields in base and print them out as hex values.
+ for i := 0; i < int(sb.elemsize/ptrSize); i++ {
+ print(" *(base+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(base + uintptr(i)*ptrSize))), "\n")
+ }
+ }
+
+ // Dump the object
+
+ k := obj >> _PageShift
+ x := k
+ x -= mheap_.arena_start >> _PageShift
+ s := h_spans[x]
+ print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k))
+ if s == nil {
+ print(" s=nil\n")
+ } else {
+ print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
+ // NOTE(rsc): This code is using s.sizeclass as an approximation of the
+ // number of pointer-sized words in an object. Perhaps not what was intended.
+ for i := 0; i < int(s.sizeclass); i++ {
+ print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n")
+ }
+ }
+ throw("checkmark found unmarked object")
+ }
+ if !hbits.isCheckmarked() {
+ return
+ }
+ hbits.setCheckmarked()
+ if !hbits.isCheckmarked() {
+ throw("setCheckmarked and isCheckmarked disagree")
+ }
+ } else {
+ // If marked we have nothing to do.
+ if hbits.isMarked() {
+ return
+ }
+
+ // Each byte of GC bitmap holds info for two words.
+ // Might be racing with other updates, so use atomic update always.
+ // We used to be clever here and use a non-atomic update in certain
+ // cases, but it's not worth the risk.
+ hbits.setMarked()
+ }
+
+ if !checkmarkphase && hbits.typeBits() == typeDead {
+ return // noscan object
+ }
+
+ // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
+ // seems like a nice optimization that can be added back in.
+ // There needs to be time between the PREFETCH and the use.
+ // Previously we put the obj in an 8 element buffer that is drained at a rate
+ // to give the PREFETCH time to do its work.
+ // Use of PREFETCHNTA might be more appropriate than PREFETCH
+
+ gcw.put(obj)
+}
+
+// When in GCmarkterminate phase we allocate black.
+//go:nowritebarrier
+func gcmarknewobject_m(obj uintptr) {
+ if gcphase != _GCmarktermination {
+ throw("marking new object while not in mark termination phase")
+ }
+ if checkmarkphase { // The world should be stopped so this should not happen.
+ throw("gcmarknewobject called while doing checkmark")
+ }
+
+ heapBitsForAddr(obj).setMarked()
+}
+
+// Checkmarking
+
+// To help debug the concurrent GC we remark with the world
+// stopped ensuring that any object encountered has their normal
+// mark bit set. To do this we use an orthogonal bit
+// pattern to indicate the object is marked. The following pattern
+// uses the upper two bits in the object's bounday nibble.
+// 01: scalar not marked
+// 10: pointer not marked
+// 11: pointer marked
+// 00: scalar marked
+// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
+// The higher bit is 1 for pointers and 0 for scalars, whether the object
+// is marked or not.
+// The first nibble no longer holds the typeDead pattern indicating that the
+// there are no more pointers in the object. This information is held
+// in the second nibble.
+
+// When marking an object if the bool checkmarkphase is true one uses the above
+// encoding, otherwise one uses the bitMarked bit in the lower two bits
+// of the nibble.
+var checkmarkphase = false
+
+//go:nowritebarrier
+func initCheckmarks() {
+ for _, s := range work.spans {
+ if s.state == _MSpanInUse {
+ heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
+ }
+ }
+}
+
+func clearCheckmarks() {
+ for _, s := range work.spans {
+ if s.state == _MSpanInUse {
+ heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
+ }
+ }
+}
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
new file mode 100644
index 0000000..92ddc51
--- /dev/null
+++ b/src/runtime/mgcsweep.go
@@ -0,0 +1,312 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector: sweeping
+
+package runtime
+
+import "unsafe"
+
+var sweep sweepdata
+
+// State of background sweep.
+// Protected by gclock.
+type sweepdata struct {
+ g *g
+ parked bool
+ started bool
+
+ spanidx uint32 // background sweeper position
+
+ nbgsweep uint32
+ npausesweep uint32
+}
+
+var gclock mutex
+
+//go:nowritebarrier
+func finishsweep_m() {
+ // The world is stopped so we should be able to complete the sweeps
+ // quickly.
+ for sweepone() != ^uintptr(0) {
+ sweep.npausesweep++
+ }
+
+ // There may be some other spans being swept concurrently that
+ // we need to wait for. If finishsweep_m is done with the world stopped
+ // this code is not required.
+ sg := mheap_.sweepgen
+ for _, s := range work.spans {
+ if s.sweepgen != sg && s.state == _MSpanInUse {
+ mSpan_EnsureSwept(s)
+ }
+ }
+}
+
+func bgsweep() {
+ sweep.g = getg()
+ for {
+ for gosweepone() != ^uintptr(0) {
+ sweep.nbgsweep++
+ Gosched()
+ }
+ lock(&gclock)
+ if !gosweepdone() {
+ // This can happen if a GC runs between
+ // gosweepone returning ^0 above
+ // and the lock being acquired.
+ unlock(&gclock)
+ continue
+ }
+ sweep.parked = true
+ goparkunlock(&gclock, "GC sweep wait", traceEvGoBlock)
+ }
+}
+
+// sweeps one span
+// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
+//go:nowritebarrier
+func sweepone() uintptr {
+ _g_ := getg()
+
+ // increment locks to ensure that the goroutine is not preempted
+ // in the middle of sweep thus leaving the span in an inconsistent state for next GC
+ _g_.m.locks++
+ sg := mheap_.sweepgen
+ for {
+ idx := xadd(&sweep.spanidx, 1) - 1
+ if idx >= uint32(len(work.spans)) {
+ mheap_.sweepdone = 1
+ _g_.m.locks--
+ return ^uintptr(0)
+ }
+ s := work.spans[idx]
+ if s.state != mSpanInUse {
+ s.sweepgen = sg
+ continue
+ }
+ if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
+ continue
+ }
+ npages := s.npages
+ if !mSpan_Sweep(s, false) {
+ npages = 0
+ }
+ _g_.m.locks--
+ return npages
+ }
+}
+
+//go:nowritebarrier
+func gosweepone() uintptr {
+ var ret uintptr
+ systemstack(func() {
+ ret = sweepone()
+ })
+ return ret
+}
+
+//go:nowritebarrier
+func gosweepdone() bool {
+ return mheap_.sweepdone != 0
+}
+
+// Returns only when span s has been swept.
+//go:nowritebarrier
+func mSpan_EnsureSwept(s *mspan) {
+ // Caller must disable preemption.
+ // Otherwise when this function returns the span can become unswept again
+ // (if GC is triggered on another goroutine).
+ _g_ := getg()
+ if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+ throw("MSpan_EnsureSwept: m is not locked")
+ }
+
+ sg := mheap_.sweepgen
+ if atomicload(&s.sweepgen) == sg {
+ return
+ }
+ // The caller must be sure that the span is a MSpanInUse span.
+ if cas(&s.sweepgen, sg-2, sg-1) {
+ mSpan_Sweep(s, false)
+ return
+ }
+ // unfortunate condition, and we don't have efficient means to wait
+ for atomicload(&s.sweepgen) != sg {
+ osyield()
+ }
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
+// Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// caller takes care of it.
+//TODO go:nowritebarrier
+func mSpan_Sweep(s *mspan, preserve bool) bool {
+ if checkmarkphase {
+ throw("MSpan_Sweep: checkmark only runs in STW and after the sweep")
+ }
+
+ // It's critical that we enter this function with preemption disabled,
+ // GC must not start while we are in the middle of this function.
+ _g_ := getg()
+ if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+ throw("MSpan_Sweep: m is not locked")
+ }
+ sweepgen := mheap_.sweepgen
+ if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ throw("MSpan_Sweep: bad span state")
+ }
+
+ if trace.enabled {
+ traceGCSweepStart()
+ }
+
+ cl := s.sizeclass
+ size := s.elemsize
+ res := false
+ nfree := 0
+
+ var head, end gclinkptr
+
+ c := _g_.m.mcache
+ sweepgenset := false
+
+ // Mark any free objects in this span so we don't collect them.
+ for link := s.freelist; link.ptr() != nil; link = link.ptr().next {
+ heapBitsForAddr(uintptr(link)).setMarkedNonAtomic()
+ }
+
+ // Unlink & free special records for any objects we're about to free.
+ specialp := &s.specials
+ special := *specialp
+ for special != nil {
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
+ hbits := heapBitsForAddr(p)
+ if !hbits.isMarked() {
+ // Find the exact byte for which the special was setup
+ // (as opposed to object beginning).
+ p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
+ // about to free object: splice out special record
+ y := special
+ special = special.next
+ *specialp = special
+ if !freespecial(y, unsafe.Pointer(p), size, false) {
+ // stop freeing of object if it has a finalizer
+ hbits.setMarkedNonAtomic()
+ }
+ } else {
+ // object is still live: keep special record
+ specialp = &special.next
+ special = *specialp
+ }
+ }
+
+ // Sweep through n objects of given size starting at p.
+ // This thread owns the span now, so it can manipulate
+ // the block bitmap without atomic operations.
+
+ size, n, _ := s.layout()
+ heapBitsSweepSpan(s.base(), size, n, func(p uintptr) {
+ // At this point we know that we are looking at garbage object
+ // that needs to be collected.
+ if debug.allocfreetrace != 0 {
+ tracefree(unsafe.Pointer(p), size)
+ }
+
+ // Reset to allocated+noscan.
+ if cl == 0 {
+ // Free large span.
+ if preserve {
+ throw("can't preserve large span")
+ }
+ heapBitsForSpan(p).clearSpan(s.layout())
+ s.needzero = 1
+
+ // important to set sweepgen before returning it to heap
+ atomicstore(&s.sweepgen, sweepgen)
+ sweepgenset = true
+
+ // NOTE(rsc,dvyukov): The original implementation of efence
+ // in CL 22060046 used SysFree instead of SysFault, so that
+ // the operating system would eventually give the memory
+ // back to us again, so that an efence program could run
+ // longer without running out of memory. Unfortunately,
+ // calling SysFree here without any kind of adjustment of the
+ // heap data structures means that when the memory does
+ // come back to us, we have the wrong metadata for it, either in
+ // the MSpan structures or in the garbage collection bitmap.
+ // Using SysFault here means that the program will run out of
+ // memory fairly quickly in efence mode, but at least it won't
+ // have mysterious crashes due to confused memory reuse.
+ // It should be possible to switch back to SysFree if we also
+ // implement and then call some kind of MHeap_DeleteSpan.
+ if debug.efence > 0 {
+ s.limit = 0 // prevent mlookup from finding this span
+ sysFault(unsafe.Pointer(p), size)
+ } else {
+ mHeap_Free(&mheap_, s, 1)
+ }
+ c.local_nlargefree++
+ c.local_largefree += size
+ reduction := int64(size) * int64(gcpercent+100) / 100
+ if int64(memstats.next_gc)-reduction > int64(heapminimum) {
+ xadd64(&memstats.next_gc, -reduction)
+ } else {
+ atomicstore64(&memstats.next_gc, heapminimum)
+ }
+ res = true
+ } else {
+ // Free small object.
+ if size > 2*ptrSize {
+ *(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
+ } else if size > ptrSize {
+ *(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
+ }
+ if head.ptr() == nil {
+ head = gclinkptr(p)
+ } else {
+ end.ptr().next = gclinkptr(p)
+ }
+ end = gclinkptr(p)
+ end.ptr().next = gclinkptr(0x0bade5)
+ nfree++
+ }
+ })
+
+ // We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
+ // because of the potential for a concurrent free/SetFinalizer.
+ // But we need to set it before we make the span available for allocation
+ // (return it to heap or mcentral), because allocation code assumes that a
+ // span is already swept if available for allocation.
+ if !sweepgenset && nfree == 0 {
+ // The span must be in our exclusive ownership until we update sweepgen,
+ // check for potential races.
+ if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+ print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+ throw("MSpan_Sweep: bad span state after sweep")
+ }
+ atomicstore(&s.sweepgen, sweepgen)
+ }
+ if nfree > 0 {
+ c.local_nsmallfree[cl] += uintptr(nfree)
+ c.local_cachealloc -= intptr(uintptr(nfree) * size)
+ reduction := int64(nfree) * int64(size) * int64(gcpercent+100) / 100
+ if int64(memstats.next_gc)-reduction > int64(heapminimum) {
+ xadd64(&memstats.next_gc, -reduction)
+ } else {
+ atomicstore64(&memstats.next_gc, heapminimum)
+ }
+ res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve)
+ // MCentral_FreeSpan updates sweepgen
+ }
+ if trace.enabled {
+ traceGCSweepDone()
+ traceNextGC()
+ }
+ return res
+}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 13f1b36..d082f8e 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -4,7 +4,72 @@
// Page heap.
//
-// See malloc.h for overview.
+// See malloc.go for overview.
+
+package runtime
+
+import "unsafe"
+
+// Main malloc heap.
+// The heap itself is the "free[]" and "large" arrays,
+// but all the other global data is here too.
+type mheap struct {
+ lock mutex
+ free [_MaxMHeapList]mspan // free lists of given length
+ freelarge mspan // free lists length >= _MaxMHeapList
+ busy [_MaxMHeapList]mspan // busy lists of large objects of given length
+ busylarge mspan // busy lists of large objects length >= _MaxMHeapList
+ allspans **mspan // all spans out there
+ gcspans **mspan // copy of allspans referenced by gc marker or sweeper
+ nspan uint32
+ sweepgen uint32 // sweep generation, see comment in mspan
+ sweepdone uint32 // all spans are swept
+
+ // span lookup
+ spans **mspan
+ spans_mapped uintptr
+
+ // range of addresses we might see in the heap
+ bitmap uintptr
+ bitmap_mapped uintptr
+ arena_start uintptr
+ arena_used uintptr
+ arena_end uintptr
+ arena_reserved bool
+
+ // write barrier shadow data+heap.
+ // 64-bit systems only, enabled by GODEBUG=wbshadow=1.
+ shadow_enabled bool // shadow should be updated and checked
+ shadow_reserved bool // shadow memory is reserved
+ shadow_heap uintptr // heap-addr + shadow_heap = shadow heap addr
+ shadow_data uintptr // data-addr + shadow_data = shadow data addr
+ data_start uintptr // start of shadowed data addresses
+ data_end uintptr // end of shadowed data addresses
+
+ // central free lists for small size classes.
+ // the padding makes sure that the MCentrals are
+ // spaced CacheLineSize bytes apart, so that each MCentral.lock
+ // gets its own cache line.
+ central [_NumSizeClasses]struct {
+ mcentral mcentral
+ pad [_CacheLineSize]byte
+ }
+
+ spanalloc fixalloc // allocator for span*
+ cachealloc fixalloc // allocator for mcache*
+ specialfinalizeralloc fixalloc // allocator for specialfinalizer*
+ specialprofilealloc fixalloc // allocator for specialprofile*
+ speciallock mutex // lock for sepcial record allocators.
+
+ // Malloc stats.
+ largefree uint64 // bytes freed for large objects (>maxsmallsize)
+ nlargefree uint64 // number of frees for large objects (>maxsmallsize)
+ nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
+}
+
+var mheap_ mheap
+
+// An MSpan is a run of pages.
//
// When a MSpan is in the heap free list, state == MSpanFree
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
@@ -12,9 +77,55 @@
// When a MSpan is allocated, state == MSpanInUse or MSpanStack
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
-package runtime
+// Every MSpan is in one doubly-linked list,
+// either one of the MHeap's free lists or one of the
+// MCentral's span lists. We use empty MSpan structures as list heads.
-import "unsafe"
+const (
+ _MSpanInUse = iota // allocated for garbage collected heap
+ _MSpanStack // allocated for use by stack allocator
+ _MSpanFree
+ _MSpanListHead
+ _MSpanDead
+)
+
+type mspan struct {
+ next *mspan // in a span linked list
+ prev *mspan // in a span linked list
+ start pageID // starting page number
+ npages uintptr // number of pages in span
+ freelist gclinkptr // list of free objects
+ // sweep generation:
+ // if sweepgen == h->sweepgen - 2, the span needs sweeping
+ // if sweepgen == h->sweepgen - 1, the span is currently being swept
+ // if sweepgen == h->sweepgen, the span is swept and ready to use
+ // h->sweepgen is incremented by 2 after every GC
+ sweepgen uint32
+ ref uint16 // capacity - number of objects in freelist
+ sizeclass uint8 // size class
+ incache bool // being used by an mcache
+ state uint8 // mspaninuse etc
+ needzero uint8 // needs to be zeroed before allocation
+ elemsize uintptr // computed from sizeclass or from npages
+ unusedsince int64 // first time spotted by gc in mspanfree state
+ npreleased uintptr // number of pages released to the os
+ limit uintptr // end of data in span
+ speciallock mutex // guards specials list
+ specials *special // linked list of special records sorted by offset.
+}
+
+func (s *mspan) base() uintptr {
+ return uintptr(s.start << _PageShift)
+}
+
+func (s *mspan) layout() (size, n, total uintptr) {
+ total = s.npages << _PageShift
+ size = s.elemsize
+ if size > 0 {
+ n = total / size
+ }
+ return
+}
var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
var h_spans []*mspan // TODO: make this h.spans once mheap can be defined in Go
@@ -50,6 +161,73 @@
h.nspan = uint32(len(h_allspans))
}
+// inheap reports whether b is a pointer into a (potentially dead) heap object.
+// It returns false for pointers into stack spans.
+//go:nowritebarrier
+func inheap(b uintptr) bool {
+ if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
+ return false
+ }
+ // Not a beginning of a block, consult span table to find the block beginning.
+ k := b >> _PageShift
+ x := k
+ x -= mheap_.arena_start >> _PageShift
+ s := h_spans[x]
+ if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
+ return false
+ }
+ return true
+}
+
+func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
+ _g_ := getg()
+
+ _g_.m.mcache.local_nlookup++
+ if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
+ // purge cache stats to prevent overflow
+ lock(&mheap_.lock)
+ purgecachedstats(_g_.m.mcache)
+ unlock(&mheap_.lock)
+ }
+
+ s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
+ if sp != nil {
+ *sp = s
+ }
+ if s == nil {
+ if base != nil {
+ *base = 0
+ }
+ if size != nil {
+ *size = 0
+ }
+ return 0
+ }
+
+ p := uintptr(s.start) << _PageShift
+ if s.sizeclass == 0 {
+ // Large object.
+ if base != nil {
+ *base = p
+ }
+ if size != nil {
+ *size = s.npages << _PageShift
+ }
+ return 1
+ }
+
+ n := s.elemsize
+ if base != nil {
+ i := (uintptr(v) - uintptr(p)) / n
+ *base = p + i*n
+ }
+ if size != nil {
+ *size = n
+ }
+
+ return 1
+}
+
// Initialize the heap.
func mHeap_Init(h *mheap, spans_size uintptr) {
fixAlloc_Init(&h.spanalloc, unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
@@ -635,6 +813,21 @@
span.prev.next = span
}
+const (
+ _KindSpecialFinalizer = 1
+ _KindSpecialProfile = 2
+ // Note: The finalizer special must be first because if we're freeing
+ // an object, a finalizer special will cause the freeing operation
+ // to abort, and we want to keep the other special records around
+ // if that happens.
+)
+
+type special struct {
+ next *special // linked list in span
+ offset uint16 // span offset of object
+ kind byte // kind of special
+}
+
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
@@ -723,6 +916,15 @@
return nil
}
+// The described object has a finalizer set for it.
+type specialfinalizer struct {
+ special special
+ fn *funcval
+ nret uintptr
+ fint *_type
+ ot *ptrtype
+}
+
// Adds a finalizer to the object p. Returns true if it succeeded.
func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
lock(&mheap_.speciallock)
@@ -755,6 +957,12 @@
unlock(&mheap_.speciallock)
}
+// The described object is being heap profiled.
+type specialprofile struct {
+ special special
+ b *bucket
+}
+
// Set the heap profile bucket associated with addr to b.
func setprofilebucket(p unsafe.Pointer, b *bucket) {
lock(&mheap_.speciallock)
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
index a113b0d..370cae6 100644
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -27,8 +27,15 @@
package runtime
-//var class_to_size [_NumSizeClasses]int32
-//var class_to_allocnpages [_NumSizeClasses]int32
+// Size classes. Computed and initialized by InitSizes.
+//
+// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
+// 1 <= sizeclass < NumSizeClasses, for n.
+// Size class 0 is reserved to mean "not small".
+//
+// class_to_size[i] = largest size in class i
+// class_to_allocnpages[i] = number of pages to allocate when
+// making new objects in class i
// The SizeToClass lookup is implemented using two arrays,
// one mapping sizes <= 1024 to their class and one mapping
@@ -38,8 +45,11 @@
// are 128-aligned, so the second array is indexed by the
// size divided by 128 (rounded up). The arrays are filled in
// by InitSizes.
-//var size_to_class8 [1024/8 + 1]int8
-//var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+
+var class_to_size [_NumSizeClasses]int32
+var class_to_allocnpages [_NumSizeClasses]int32
+var size_to_class8 [1024/8 + 1]int8
+var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
func sizeToClass(size int32) int32 {
if size > _MaxSmallSize {
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
new file mode 100644
index 0000000..19b6833
--- /dev/null
+++ b/src/runtime/mstats.go
@@ -0,0 +1,340 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Memory statistics
+
+package runtime
+
+import "unsafe"
+
+// Statistics.
+// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
+type mstats struct {
+ // General statistics.
+ alloc uint64 // bytes allocated and still in use
+ total_alloc uint64 // bytes allocated (even if freed)
+ sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
+ nlookup uint64 // number of pointer lookups
+ nmalloc uint64 // number of mallocs
+ nfree uint64 // number of frees
+
+ // Statistics about malloc heap.
+ // protected by mheap.lock
+ heap_alloc uint64 // bytes allocated and still in use
+ heap_sys uint64 // bytes obtained from system
+ heap_idle uint64 // bytes in idle spans
+ heap_inuse uint64 // bytes in non-idle spans
+ heap_released uint64 // bytes released to the os
+ heap_objects uint64 // total number of allocated objects
+
+ // Statistics about allocation of low-level fixed-size structures.
+ // Protected by FixAlloc locks.
+ stacks_inuse uint64 // this number is included in heap_inuse above
+ stacks_sys uint64 // always 0 in mstats
+ mspan_inuse uint64 // mspan structures
+ mspan_sys uint64
+ mcache_inuse uint64 // mcache structures
+ mcache_sys uint64
+ buckhash_sys uint64 // profiling bucket hash table
+ gc_sys uint64
+ other_sys uint64
+
+ // Statistics about garbage collector.
+ // Protected by mheap or stopping the world during GC.
+ next_gc uint64 // next gc (in heap_alloc time)
+ last_gc uint64 // last gc (in absolute time)
+ pause_total_ns uint64
+ pause_ns [256]uint64 // circular buffer of recent gc pause lengths
+ pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
+ numgc uint32
+ enablegc bool
+ debuggc bool
+
+ // Statistics about allocation size classes.
+
+ by_size [_NumSizeClasses]struct {
+ size uint32
+ nmalloc uint64
+ nfree uint64
+ }
+
+ tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
+}
+
+var memstats mstats
+
+// Note: the MemStats struct should be kept in sync with
+// struct MStats in malloc.h
+
+// A MemStats records statistics about the memory allocator.
+type MemStats struct {
+ // General statistics.
+ Alloc uint64 // bytes allocated and still in use
+ TotalAlloc uint64 // bytes allocated (even if freed)
+ Sys uint64 // bytes obtained from system (sum of XxxSys below)
+ Lookups uint64 // number of pointer lookups
+ Mallocs uint64 // number of mallocs
+ Frees uint64 // number of frees
+
+ // Main allocation heap statistics.
+ HeapAlloc uint64 // bytes allocated and still in use
+ HeapSys uint64 // bytes obtained from system
+ HeapIdle uint64 // bytes in idle spans
+ HeapInuse uint64 // bytes in non-idle span
+ HeapReleased uint64 // bytes released to the OS
+ HeapObjects uint64 // total number of allocated objects
+
+ // Low-level fixed-size structure allocator statistics.
+ // Inuse is bytes used now.
+ // Sys is bytes obtained from system.
+ StackInuse uint64 // bytes used by stack allocator
+ StackSys uint64
+ MSpanInuse uint64 // mspan structures
+ MSpanSys uint64
+ MCacheInuse uint64 // mcache structures
+ MCacheSys uint64
+ BuckHashSys uint64 // profiling bucket hash table
+ GCSys uint64 // GC metadata
+ OtherSys uint64 // other system allocations
+
+ // Garbage collector statistics.
+ NextGC uint64 // next collection will happen when HeapAlloc ≥ this amount
+ LastGC uint64 // end time of last collection (nanoseconds since 1970)
+ PauseTotalNs uint64
+ PauseNs [256]uint64 // circular buffer of recent GC pause durations, most recent at [(NumGC+255)%256]
+ PauseEnd [256]uint64 // circular buffer of recent GC pause end times
+ NumGC uint32
+ EnableGC bool
+ DebugGC bool
+
+ // Per-size allocation statistics.
+ // 61 is NumSizeClasses in the C code.
+ BySize [61]struct {
+ Size uint32
+ Mallocs uint64
+ Frees uint64
+ }
+}
+
+// Size of the trailing by_size array differs between Go and C,
+// and all data after by_size is local to runtime, not exported.
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
+
+func init() {
+ var memStats MemStats
+ if sizeof_C_MStats != unsafe.Sizeof(memStats) {
+ println(sizeof_C_MStats, unsafe.Sizeof(memStats))
+ throw("MStats vs MemStatsType size mismatch")
+ }
+}
+
+// ReadMemStats populates m with memory allocator statistics.
+func ReadMemStats(m *MemStats) {
+ // Have to acquire worldsema to stop the world,
+ // because stoptheworld can only be used by
+ // one goroutine at a time, and there might be
+ // a pending garbage collection already calling it.
+ semacquire(&worldsema, false)
+ gp := getg()
+ gp.m.preemptoff = "read mem stats"
+ systemstack(stoptheworld)
+
+ systemstack(func() {
+ readmemstats_m(m)
+ })
+
+ gp.m.preemptoff = ""
+ gp.m.locks++
+ semrelease(&worldsema)
+ systemstack(starttheworld)
+ gp.m.locks--
+}
+
+func readmemstats_m(stats *MemStats) {
+ updatememstats(nil)
+
+ // Size of the trailing by_size array differs between Go and C,
+ // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+ memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
+
+ // Stack numbers are part of the heap numbers, separate those out for user consumption
+ stats.StackSys = stats.StackInuse
+ stats.HeapInuse -= stats.StackInuse
+ stats.HeapSys -= stats.StackInuse
+}
+
+//go:linkname readGCStats runtime/debug.readGCStats
+func readGCStats(pauses *[]uint64) {
+ systemstack(func() {
+ readGCStats_m(pauses)
+ })
+}
+
+func readGCStats_m(pauses *[]uint64) {
+ p := *pauses
+ // Calling code in runtime/debug should make the slice large enough.
+ if cap(p) < len(memstats.pause_ns)+3 {
+ throw("short slice passed to readGCStats")
+ }
+
+ // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
+ lock(&mheap_.lock)
+
+ n := memstats.numgc
+ if n > uint32(len(memstats.pause_ns)) {
+ n = uint32(len(memstats.pause_ns))
+ }
+
+ // The pause buffer is circular. The most recent pause is at
+ // pause_ns[(numgc-1)%len(pause_ns)], and then backward
+ // from there to go back farther in time. We deliver the times
+ // most recent first (in p[0]).
+ p = p[:cap(p)]
+ for i := uint32(0); i < n; i++ {
+ j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
+ p[i] = memstats.pause_ns[j]
+ p[n+i] = memstats.pause_end[j]
+ }
+
+ p[n+n] = memstats.last_gc
+ p[n+n+1] = uint64(memstats.numgc)
+ p[n+n+2] = memstats.pause_total_ns
+ unlock(&mheap_.lock)
+ *pauses = p[:n+n+3]
+}
+
+//go:nowritebarrier
+func updatememstats(stats *gcstats) {
+ if stats != nil {
+ *stats = gcstats{}
+ }
+ for mp := allm; mp != nil; mp = mp.alllink {
+ if stats != nil {
+ src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
+ dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
+ for i, v := range src {
+ dst[i] += v
+ }
+ mp.gcstats = gcstats{}
+ }
+ }
+
+ memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
+ memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
+ memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
+ memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
+
+ // Calculate memory allocator stats.
+ // During program execution we only count number of frees and amount of freed memory.
+ // Current number of alive object in the heap and amount of alive heap memory
+ // are calculated by scanning all spans.
+ // Total number of mallocs is calculated as number of frees plus number of alive objects.
+ // Similarly, total amount of allocated memory is calculated as amount of freed memory
+ // plus amount of alive heap memory.
+ memstats.alloc = 0
+ memstats.total_alloc = 0
+ memstats.nmalloc = 0
+ memstats.nfree = 0
+ for i := 0; i < len(memstats.by_size); i++ {
+ memstats.by_size[i].nmalloc = 0
+ memstats.by_size[i].nfree = 0
+ }
+
+ // Flush MCache's to MCentral.
+ systemstack(flushallmcaches)
+
+ // Aggregate local stats.
+ cachestats()
+
+ // Scan all spans and count number of alive objects.
+ lock(&mheap_.lock)
+ for i := uint32(0); i < mheap_.nspan; i++ {
+ s := h_allspans[i]
+ if s.state != mSpanInUse {
+ continue
+ }
+ if s.sizeclass == 0 {
+ memstats.nmalloc++
+ memstats.alloc += uint64(s.elemsize)
+ } else {
+ memstats.nmalloc += uint64(s.ref)
+ memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
+ memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
+ }
+ }
+ unlock(&mheap_.lock)
+
+ // Aggregate by size class.
+ smallfree := uint64(0)
+ memstats.nfree = mheap_.nlargefree
+ for i := 0; i < len(memstats.by_size); i++ {
+ memstats.nfree += mheap_.nsmallfree[i]
+ memstats.by_size[i].nfree = mheap_.nsmallfree[i]
+ memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
+ smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
+ }
+ memstats.nfree += memstats.tinyallocs
+ memstats.nmalloc += memstats.nfree
+
+ // Calculate derived stats.
+ memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
+ memstats.heap_alloc = memstats.alloc
+ memstats.heap_objects = memstats.nmalloc - memstats.nfree
+}
+
+//go:nowritebarrier
+func cachestats() {
+ for i := 0; ; i++ {
+ p := allp[i]
+ if p == nil {
+ break
+ }
+ c := p.mcache
+ if c == nil {
+ continue
+ }
+ purgecachedstats(c)
+ }
+}
+
+//go:nowritebarrier
+func flushallmcaches() {
+ for i := 0; ; i++ {
+ p := allp[i]
+ if p == nil {
+ break
+ }
+ c := p.mcache
+ if c == nil {
+ continue
+ }
+ mCache_ReleaseAll(c)
+ stackcache_clear(c)
+ }
+}
+
+//go:nosplit
+func purgecachedstats(c *mcache) {
+ // Protected by either heap or GC lock.
+ h := &mheap_
+ memstats.heap_alloc += uint64(c.local_cachealloc)
+ c.local_cachealloc = 0
+ if trace.enabled {
+ traceHeapAlloc()
+ }
+ memstats.tinyallocs += uint64(c.local_tinyallocs)
+ c.local_tinyallocs = 0
+ memstats.nlookup += uint64(c.local_nlookup)
+ c.local_nlookup = 0
+ h.largefree += uint64(c.local_largefree)
+ c.local_largefree = 0
+ h.nlargefree += uint64(c.local_nlargefree)
+ c.local_nlargefree = 0
+ for i := 0; i < len(c.local_nsmallfree); i++ {
+ h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
+ c.local_nsmallfree[i] = 0
+ }
+}
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 471ffc8..a47df13 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -528,6 +528,21 @@
mcall(mquiesce)
}
+// Holding worldsema grants an M the right to try to stop the world.
+// The procedure is:
+//
+// semacquire(&worldsema);
+// m.preemptoff = "reason";
+// stoptheworld();
+//
+// ... do stuff ...
+//
+// m.preemptoff = "";
+// semrelease(&worldsema);
+// starttheworld();
+//
+var worldsema uint32 = 1
+
// This is used by the GC as well as the routines that do stack dumps. In the case
// of GC all the routines can be reliably stopped. This is not always the case
// when the system is in panic or being exited.
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 9aa83ef..00f74f8 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -239,3 +239,13 @@
func prefetcht1(addr uintptr)
func prefetcht2(addr uintptr)
func prefetchnta(addr uintptr)
+
+func unixnanotime() int64 {
+ sec, nsec := time_now()
+ return sec*1e9 + int64(nsec)
+}
+
+// round n up to a multiple of a. a must be a power of 2.
+func round(n, a uintptr) uintptr {
+ return (n + a - 1) &^ (a - 1)
+}
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 3e46d42..4f3111d 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -299,3 +299,17 @@
}
return p, v
}
+
+type stackmap struct {
+ n int32 // number of bitmaps
+ nbit int32 // number of bits in each bitmap
+ bytedata [1]byte // bitmaps, each starting on a 32-bit boundary
+}
+
+//go:nowritebarrier
+func stackmapdata(stkmap *stackmap, n int32) bitvector {
+ if n < 0 || n >= stkmap.n {
+ throw("stackmapdata: index out of range")
+ }
+ return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
+}