| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Garbage collector: sweeping |
| |
| package runtime |
| |
| import ( |
| "runtime/internal/atomic" |
| "unsafe" |
| ) |
| |
| var sweep sweepdata |
| |
| // State of background sweep. |
| type sweepdata struct { |
| lock mutex |
| g *g |
| parked bool |
| started bool |
| |
| spanidx uint32 // background sweeper position |
| |
| nbgsweep uint32 |
| npausesweep uint32 |
| } |
| |
| //go:nowritebarrier |
| func finishsweep_m(stw bool) { |
| // Sweeping must be complete before marking commences, so |
| // sweep any unswept spans. If this is a concurrent GC, there |
| // shouldn't be any spans left to sweep, so this should finish |
| // instantly. If GC was forced before the concurrent sweep |
| // finished, there may be spans to sweep. |
| for sweepone() != ^uintptr(0) { |
| sweep.npausesweep++ |
| } |
| |
| // There may be some other spans being swept concurrently that |
| // we need to wait for. If finishsweep_m is done with the world stopped |
| // this is not required because the STW must have waited for sweeps. |
| // |
| // TODO(austin): As of this writing, we always pass true for stw. |
| // Consider removing this code. |
| if !stw { |
| sg := mheap_.sweepgen |
| for _, s := range work.spans { |
| if s.sweepgen != sg && s.state == _MSpanInUse { |
| s.ensureSwept() |
| } |
| } |
| } |
| nextMarkBitArenaEpoch() |
| } |
| |
| func bgsweep(c chan int) { |
| sweep.g = getg() |
| |
| lock(&sweep.lock) |
| sweep.parked = true |
| c <- 1 |
| goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) |
| |
| for { |
| for gosweepone() != ^uintptr(0) { |
| sweep.nbgsweep++ |
| Gosched() |
| } |
| lock(&sweep.lock) |
| if !gosweepdone() { |
| // This can happen if a GC runs between |
| // gosweepone returning ^0 above |
| // and the lock being acquired. |
| unlock(&sweep.lock) |
| continue |
| } |
| sweep.parked = true |
| goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1) |
| } |
| } |
| |
| // sweeps one span |
| // returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep |
| //go:nowritebarrier |
| func sweepone() uintptr { |
| _g_ := getg() |
| |
| // increment locks to ensure that the goroutine is not preempted |
| // in the middle of sweep thus leaving the span in an inconsistent state for next GC |
| _g_.m.locks++ |
| sg := mheap_.sweepgen |
| for { |
| idx := atomic.Xadd(&sweep.spanidx, 1) - 1 |
| if idx >= uint32(len(work.spans)) { |
| mheap_.sweepdone = 1 |
| _g_.m.locks-- |
| if debug.gcpacertrace > 0 && idx == uint32(len(work.spans)) { |
| print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", mheap_.spanBytesAlloc>>20, "MB of spans; swept ", mheap_.pagesSwept, " pages at ", mheap_.sweepPagesPerByte, " pages/byte\n") |
| } |
| return ^uintptr(0) |
| } |
| s := work.spans[idx] |
| if s.state != mSpanInUse { |
| s.sweepgen = sg |
| continue |
| } |
| if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) { |
| continue |
| } |
| npages := s.npages |
| if !s.sweep(false) { |
| npages = 0 |
| } |
| _g_.m.locks-- |
| return npages |
| } |
| } |
| |
| //go:nowritebarrier |
| func gosweepone() uintptr { |
| var ret uintptr |
| systemstack(func() { |
| ret = sweepone() |
| }) |
| return ret |
| } |
| |
| //go:nowritebarrier |
| func gosweepdone() bool { |
| return mheap_.sweepdone != 0 |
| } |
| |
| // Returns only when span s has been swept. |
| //go:nowritebarrier |
| func (s *mspan) ensureSwept() { |
| // Caller must disable preemption. |
| // Otherwise when this function returns the span can become unswept again |
| // (if GC is triggered on another goroutine). |
| _g_ := getg() |
| if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { |
| throw("MSpan_EnsureSwept: m is not locked") |
| } |
| |
| sg := mheap_.sweepgen |
| if atomic.Load(&s.sweepgen) == sg { |
| return |
| } |
| // The caller must be sure that the span is a MSpanInUse span. |
| if atomic.Cas(&s.sweepgen, sg-2, sg-1) { |
| s.sweep(false) |
| return |
| } |
| // unfortunate condition, and we don't have efficient means to wait |
| for atomic.Load(&s.sweepgen) != sg { |
| osyield() |
| } |
| } |
| |
| // Sweep frees or collects finalizers for blocks not marked in the mark phase. |
| // It clears the mark bits in preparation for the next GC round. |
| // Returns true if the span was returned to heap. |
| // If preserve=true, don't return it to heap nor relink in MCentral lists; |
| // caller takes care of it. |
| //TODO go:nowritebarrier |
| func (s *mspan) sweep(preserve bool) bool { |
| // It's critical that we enter this function with preemption disabled, |
| // GC must not start while we are in the middle of this function. |
| _g_ := getg() |
| if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 { |
| throw("MSpan_Sweep: m is not locked") |
| } |
| sweepgen := mheap_.sweepgen |
| if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { |
| print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") |
| throw("MSpan_Sweep: bad span state") |
| } |
| |
| if trace.enabled { |
| traceGCSweepStart() |
| } |
| |
| atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages)) |
| |
| cl := s.sizeclass |
| size := s.elemsize |
| res := false |
| nfree := 0 |
| |
| c := _g_.m.mcache |
| freeToHeap := false |
| |
| // The allocBits indicate which unmarked objects don't need to be |
| // processed since they were free at the end of the last GC cycle |
| // and were not allocated since then. |
| // If the allocBits index is >= s.freeindex and the bit |
| // is not marked then the object remains unallocated |
| // since the last GC. |
| // This situation is analogous to being on a freelist. |
| |
| // Unlink & free special records for any objects we're about to free. |
| // Two complications here: |
| // 1. An object can have both finalizer and profile special records. |
| // In such case we need to queue finalizer for execution, |
| // mark the object as live and preserve the profile special. |
| // 2. A tiny object can have several finalizers setup for different offsets. |
| // If such object is not marked, we need to queue all finalizers at once. |
| // Both 1 and 2 are possible at the same time. |
| specialp := &s.specials |
| special := *specialp |
| for special != nil { |
| // A finalizer can be set for an inner byte of an object, find object beginning. |
| objIndex := uintptr(special.offset) / size |
| p := s.base() + objIndex*size |
| mbits := s.markBitsForIndex(objIndex) |
| if !mbits.isMarked() { |
| // This object is not marked and has at least one special record. |
| // Pass 1: see if it has at least one finalizer. |
| hasFin := false |
| endOffset := p - s.base() + size |
| for tmp := special; tmp != nil && uintptr(tmp.offset) < endOffset; tmp = tmp.next { |
| if tmp.kind == _KindSpecialFinalizer { |
| // Stop freeing of object if it has a finalizer. |
| mbits.setMarkedNonAtomic() |
| hasFin = true |
| break |
| } |
| } |
| // Pass 2: queue all finalizers _or_ handle profile record. |
| for special != nil && uintptr(special.offset) < endOffset { |
| // Find the exact byte for which the special was setup |
| // (as opposed to object beginning). |
| p := s.base() + uintptr(special.offset) |
| if special.kind == _KindSpecialFinalizer || !hasFin { |
| // Splice out special record. |
| y := special |
| special = special.next |
| *specialp = special |
| freespecial(y, unsafe.Pointer(p), size) |
| } else { |
| // This is profile record, but the object has finalizers (so kept alive). |
| // Keep special record. |
| specialp = &special.next |
| special = *specialp |
| } |
| } |
| } else { |
| // object is still live: keep special record |
| specialp = &special.next |
| special = *specialp |
| } |
| } |
| |
| if debug.allocfreetrace != 0 || raceenabled || msanenabled { |
| // Find all newly freed objects. This doesn't have to |
| // efficient; allocfreetrace has massive overhead. |
| mbits := s.markBitsForBase() |
| abits := s.allocBitsForIndex(0) |
| for i := uintptr(0); i < s.nelems; i++ { |
| if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) { |
| x := s.base() + i*s.elemsize |
| if debug.allocfreetrace != 0 { |
| tracefree(unsafe.Pointer(x), size) |
| } |
| if raceenabled { |
| racefree(unsafe.Pointer(x), size) |
| } |
| if msanenabled { |
| msanfree(unsafe.Pointer(x), size) |
| } |
| } |
| mbits.advance() |
| abits.advance() |
| } |
| } |
| |
| // Count the number of free objects in this span. |
| nfree = s.countFree() |
| if cl == 0 && nfree != 0 { |
| s.needzero = 1 |
| freeToHeap = true |
| } |
| nalloc := uint16(s.nelems) - uint16(nfree) |
| nfreed := s.allocCount - nalloc |
| if nalloc > s.allocCount { |
| print("runtime: nelems=", s.nelems, " nfree=", nfree, " nalloc=", nalloc, " previous allocCount=", s.allocCount, " nfreed=", nfreed, "\n") |
| throw("sweep increased allocation count") |
| } |
| |
| s.allocCount = nalloc |
| wasempty := s.nextFreeIndex() == s.nelems |
| s.freeindex = 0 // reset allocation index to start of span. |
| |
| // gcmarkBits becomes the allocBits. |
| // get a fresh cleared gcmarkBits in preparation for next GC |
| s.allocBits = s.gcmarkBits |
| s.gcmarkBits = newMarkBits(s.nelems) |
| |
| // Initialize alloc bits cache. |
| s.refillAllocCache(0) |
| |
| // We need to set s.sweepgen = h.sweepgen only when all blocks are swept, |
| // because of the potential for a concurrent free/SetFinalizer. |
| // But we need to set it before we make the span available for allocation |
| // (return it to heap or mcentral), because allocation code assumes that a |
| // span is already swept if available for allocation. |
| if freeToHeap || nfreed == 0 { |
| // The span must be in our exclusive ownership until we update sweepgen, |
| // check for potential races. |
| if s.state != mSpanInUse || s.sweepgen != sweepgen-1 { |
| print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n") |
| throw("MSpan_Sweep: bad span state after sweep") |
| } |
| // Serialization point. |
| // At this point the mark bits are cleared and allocation ready |
| // to go so release the span. |
| atomic.Store(&s.sweepgen, sweepgen) |
| } |
| |
| if nfreed > 0 && cl != 0 { |
| c.local_nsmallfree[cl] += uintptr(nfreed) |
| res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty) |
| // MCentral_FreeSpan updates sweepgen |
| } else if freeToHeap { |
| // Free large span to heap |
| |
| // NOTE(rsc,dvyukov): The original implementation of efence |
| // in CL 22060046 used SysFree instead of SysFault, so that |
| // the operating system would eventually give the memory |
| // back to us again, so that an efence program could run |
| // longer without running out of memory. Unfortunately, |
| // calling SysFree here without any kind of adjustment of the |
| // heap data structures means that when the memory does |
| // come back to us, we have the wrong metadata for it, either in |
| // the MSpan structures or in the garbage collection bitmap. |
| // Using SysFault here means that the program will run out of |
| // memory fairly quickly in efence mode, but at least it won't |
| // have mysterious crashes due to confused memory reuse. |
| // It should be possible to switch back to SysFree if we also |
| // implement and then call some kind of MHeap_DeleteSpan. |
| if debug.efence > 0 { |
| s.limit = 0 // prevent mlookup from finding this span |
| sysFault(unsafe.Pointer(s.base()), size) |
| } else { |
| mheap_.freeSpan(s, 1) |
| } |
| c.local_nlargefree++ |
| c.local_largefree += size |
| res = true |
| } |
| if trace.enabled { |
| traceGCSweepDone() |
| } |
| return res |
| } |
| |
| // deductSweepCredit deducts sweep credit for allocating a span of |
| // size spanBytes. This must be performed *before* the span is |
| // allocated to ensure the system has enough credit. If necessary, it |
| // performs sweeping to prevent going in to debt. If the caller will |
| // also sweep pages (e.g., for a large allocation), it can pass a |
| // non-zero callerSweepPages to leave that many pages unswept. |
| // |
| // deductSweepCredit makes a worst-case assumption that all spanBytes |
| // bytes of the ultimately allocated span will be available for object |
| // allocation. The caller should call reimburseSweepCredit if that |
| // turns out not to be the case once the span is allocated. |
| // |
| // deductSweepCredit is the core of the "proportional sweep" system. |
| // It uses statistics gathered by the garbage collector to perform |
| // enough sweeping so that all pages are swept during the concurrent |
| // sweep phase between GC cycles. |
| // |
| // mheap_ must NOT be locked. |
| func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) { |
| if mheap_.sweepPagesPerByte == 0 { |
| // Proportional sweep is done or disabled. |
| return |
| } |
| |
| // Account for this span allocation. |
| spanBytesAlloc := atomic.Xadd64(&mheap_.spanBytesAlloc, int64(spanBytes)) |
| |
| // Fix debt if necessary. |
| pagesOwed := int64(mheap_.sweepPagesPerByte * float64(spanBytesAlloc)) |
| for pagesOwed-int64(atomic.Load64(&mheap_.pagesSwept)) > int64(callerSweepPages) { |
| if gosweepone() == ^uintptr(0) { |
| mheap_.sweepPagesPerByte = 0 |
| break |
| } |
| } |
| } |
| |
| // reimburseSweepCredit records that unusableBytes bytes of a |
| // just-allocated span are not available for object allocation. This |
| // offsets the worst-case charge performed by deductSweepCredit. |
| func reimburseSweepCredit(unusableBytes uintptr) { |
| if mheap_.sweepPagesPerByte == 0 { |
| // Nobody cares about the credit. Avoid the atomic. |
| return |
| } |
| if int64(atomic.Xadd64(&mheap_.spanBytesAlloc, -int64(unusableBytes))) < 0 { |
| throw("spanBytesAlloc underflow") |
| } |
| } |