| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package runtime |
| |
| import ( |
| "unsafe" |
| ) |
| |
| const ( |
| debugMalloc = false |
| |
| flagNoScan = 1 << 0 // GC doesn't have to scan object |
| flagNoZero = 1 << 1 // don't zero memory |
| |
| kindArray = 17 |
| kindFunc = 19 |
| kindInterface = 20 |
| kindPtr = 22 |
| kindStruct = 25 |
| kindMask = 1<<6 - 1 |
| kindGCProg = 1 << 6 |
| kindNoPointers = 1 << 7 |
| |
| maxTinySize = 16 |
| tinySizeClass = 2 |
| maxSmallSize = 32 << 10 |
| |
| pageShift = 13 |
| pageSize = 1 << pageShift |
| pageMask = pageSize - 1 |
| |
| wordsPerBitmapWord = ptrSize * 8 / 4 |
| gcBits = 4 |
| bitsPerPointer = 2 |
| bitsMask = 1<<bitsPerPointer - 1 |
| pointersPerByte = 8 / bitsPerPointer |
| bitPtrMask = bitsMask << 2 |
| maxGCMask = 0 // disabled because wastes several bytes of memory |
| bitsDead = 0 |
| bitsPointer = 2 |
| |
| bitMiddle = 0 |
| bitBoundary = 1 |
| bitAllocated = 2 |
| bitMarked = 3 |
| bitMask = bitMiddle | bitBoundary | bitAllocated | bitMarked |
| ) |
| |
| // All zero-sized allocations return a pointer to this byte. |
| var zeroObject byte |
| |
| // Maximum possible heap size. |
| var maxMem uintptr |
| |
| // Allocate an object of size bytes. |
| // Small objects are allocated from the per-P cache's free lists. |
| // Large objects (> 32 kB) are allocated straight from the heap. |
| func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { |
| if size == 0 { |
| return unsafe.Pointer(&zeroObject) |
| } |
| mp := acquirem() |
| if mp.mallocing != 0 { |
| gothrow("malloc/free - deadlock") |
| } |
| mp.mallocing = 1 |
| size0 := size |
| |
| c := mp.mcache |
| var s *mspan |
| var x unsafe.Pointer |
| if size <= maxSmallSize { |
| if flags&flagNoScan != 0 && size < maxTinySize { |
| // Tiny allocator. |
| // |
| // Tiny allocator combines several tiny allocation requests |
| // into a single memory block. The resulting memory block |
| // is freed when all subobjects are unreachable. The subobjects |
| // must be FlagNoScan (don't have pointers), this ensures that |
| // the amount of potentially wasted memory is bounded. |
| // |
| // Size of the memory block used for combining (maxTinySize) is tunable. |
| // Current setting is 16 bytes, which relates to 2x worst case memory |
| // wastage (when all but one subobjects are unreachable). |
| // 8 bytes would result in no wastage at all, but provides less |
| // opportunities for combining. |
| // 32 bytes provides more opportunities for combining, |
| // but can lead to 4x worst case wastage. |
| // The best case winning is 8x regardless of block size. |
| // |
| // Objects obtained from tiny allocator must not be freed explicitly. |
| // So when an object will be freed explicitly, we ensure that |
| // its size >= maxTinySize. |
| // |
| // SetFinalizer has a special case for objects potentially coming |
| // from tiny allocator, it such case it allows to set finalizers |
| // for an inner byte of a memory block. |
| // |
| // The main targets of tiny allocator are small strings and |
| // standalone escaping variables. On a json benchmark |
| // the allocator reduces number of allocations by ~12% and |
| // reduces heap size by ~20%. |
| |
| tinysize := uintptr(c.tinysize) |
| if size <= tinysize { |
| tiny := unsafe.Pointer(c.tiny) |
| // Align tiny pointer for required (conservative) alignment. |
| if size&7 == 0 { |
| tiny = roundup(tiny, 8) |
| } else if size&3 == 0 { |
| tiny = roundup(tiny, 4) |
| } else if size&1 == 0 { |
| tiny = roundup(tiny, 2) |
| } |
| size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny))) |
| if size1 <= tinysize { |
| // The object fits into existing tiny block. |
| x = tiny |
| c.tiny = (*byte)(add(x, size)) |
| c.tinysize -= uint(size1) |
| mp.mallocing = 0 |
| releasem(mp) |
| return x |
| } |
| } |
| // Allocate a new maxTinySize block. |
| s = c.alloc[tinySizeClass] |
| v := s.freelist |
| if v == nil { |
| mp.scalararg[0] = tinySizeClass |
| onM(&mcacheRefill_m) |
| s = c.alloc[tinySizeClass] |
| v = s.freelist |
| } |
| s.freelist = v.next |
| s.ref++ |
| //TODO: prefetch v.next |
| x = unsafe.Pointer(v) |
| (*[2]uint64)(x)[0] = 0 |
| (*[2]uint64)(x)[1] = 0 |
| // See if we need to replace the existing tiny block with the new one |
| // based on amount of remaining free space. |
| if maxTinySize-size > tinysize { |
| c.tiny = (*byte)(add(x, size)) |
| c.tinysize = uint(maxTinySize - size) |
| } |
| size = maxTinySize |
| } else { |
| var sizeclass int8 |
| if size <= 1024-8 { |
| sizeclass = size_to_class8[(size+7)>>3] |
| } else { |
| sizeclass = size_to_class128[(size-1024+127)>>7] |
| } |
| size = uintptr(class_to_size[sizeclass]) |
| s = c.alloc[sizeclass] |
| v := s.freelist |
| if v == nil { |
| mp.scalararg[0] = uint(sizeclass) |
| onM(&mcacheRefill_m) |
| s = c.alloc[sizeclass] |
| v = s.freelist |
| } |
| s.freelist = v.next |
| s.ref++ |
| //TODO: prefetch |
| x = unsafe.Pointer(v) |
| if flags&flagNoZero == 0 { |
| v.next = nil |
| if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 { |
| memclr(unsafe.Pointer(v), size) |
| } |
| } |
| } |
| c.local_cachealloc += int(size) |
| } else { |
| mp.scalararg[0] = uint(size) |
| mp.scalararg[1] = uint(flags) |
| onM(&largeAlloc_m) |
| s = (*mspan)(mp.ptrarg[0]) |
| mp.ptrarg[0] = nil |
| x = unsafe.Pointer(uintptr(s.start << pageShift)) |
| size = uintptr(s.elemsize) |
| } |
| |
| // From here till marked label marking the object as allocated |
| // and storing type info in the GC bitmap. |
| arena_start := uintptr(unsafe.Pointer(mheap_.arena_start)) |
| off := (uintptr(x) - arena_start) / ptrSize |
| xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) |
| shift := (off % wordsPerBitmapWord) * gcBits |
| if debugMalloc && (((*xbits)>>shift)&bitMask) != bitBoundary { |
| gothrow("bad bits in markallocated") |
| } |
| |
| var ti, te uintptr |
| var ptrmask *uint8 |
| if flags&flagNoScan != 0 { |
| // bitsDead in the first quadruple means don't scan. |
| if size == ptrSize { |
| *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated + (bitsDead << 2)) << shift) |
| } else { |
| xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) |
| *xbitsb = bitAllocated + (bitsDead << 2) |
| } |
| goto marked |
| } |
| if size == ptrSize { |
| // It's one word and it has pointers, it must be a pointer. |
| *xbits = (*xbits & ^((bitBoundary | bitPtrMask) << shift)) | ((bitAllocated | (bitsPointer << 2)) << shift) |
| goto marked |
| } |
| if typ != nil && (uintptr(typ.gc[0])|uintptr(typ.gc[1])) != 0 && uintptr(typ.size) > ptrSize { |
| if typ.kind&kindGCProg != 0 { |
| nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize |
| masksize := nptr |
| if masksize%2 != 0 { |
| masksize *= 2 // repeated |
| } |
| masksize = masksize * pointersPerByte / 8 // 4 bits per word |
| masksize++ // unroll flag in the beginning |
| if masksize > maxGCMask && typ.gc[1] != 0 { |
| // If the mask is too large, unroll the program directly |
| // into the GC bitmap. It's 7 times slower than copying |
| // from the pre-unrolled mask, but saves 1/16 of type size |
| // memory for the mask. |
| mp.ptrarg[0] = x |
| mp.ptrarg[1] = unsafe.Pointer(typ) |
| mp.scalararg[0] = uint(size) |
| mp.scalararg[1] = uint(size0) |
| onM(&unrollgcproginplace_m) |
| goto marked |
| } |
| ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0]))) |
| // Check whether the program is already unrolled. |
| if uintptr(goatomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 { |
| mp.ptrarg[0] = unsafe.Pointer(typ) |
| onM(&unrollgcprog_m) |
| } |
| ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte |
| } else { |
| ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask |
| } |
| if size == 2*ptrSize { |
| xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) |
| *xbitsb = *ptrmask | bitAllocated |
| goto marked |
| } |
| te = uintptr(typ.size) / ptrSize |
| // If the type occupies odd number of words, its mask is repeated. |
| if te%2 == 0 { |
| te /= 2 |
| } |
| } |
| if size == 2*ptrSize { |
| xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) |
| *xbitsb = (bitsPointer << 2) | (bitsPointer << 6) | bitAllocated |
| goto marked |
| } |
| // Copy pointer bitmask into the bitmap. |
| for i := uintptr(0); i < size0; i += 2 * ptrSize { |
| v := uint8((bitsPointer << 2) | (bitsPointer << 6)) |
| if ptrmask != nil { |
| v = *(*uint8)(add(unsafe.Pointer(ptrmask), ti)) |
| ti++ |
| if ti == te { |
| ti = 0 |
| } |
| } |
| if i == 0 { |
| v |= bitAllocated |
| } |
| if i+ptrSize == size0 { |
| v &= ^uint8(bitPtrMask << 4) |
| } |
| |
| off := (uintptr(x) + i - arena_start) / ptrSize |
| xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) |
| shift := (off % wordsPerBitmapWord) * gcBits |
| xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) |
| *xbitsb = v |
| } |
| if size0%(2*ptrSize) == 0 && size0 < size { |
| // Mark the word after last object's word as bitsDead. |
| off := (uintptr(x) + size0 - arena_start) / ptrSize |
| xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize)) |
| shift := (off % wordsPerBitmapWord) * gcBits |
| xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8)) |
| *xbitsb = bitsDead << 2 |
| } |
| marked: |
| mp.mallocing = 0 |
| |
| if raceenabled { |
| racemalloc(x, size) |
| } |
| if debug.allocfreetrace != 0 { |
| tracealloc(x, size, typ) |
| } |
| |
| if rate := MemProfileRate; rate > 0 { |
| if size < uintptr(rate) && int32(size) < c.next_sample { |
| c.next_sample -= int32(size) |
| } else { |
| profilealloc(mp, x, size) |
| } |
| } |
| |
| releasem(mp) |
| |
| if memstats.heap_alloc >= memstats.next_gc { |
| gogc(0) |
| } |
| |
| return x |
| } |
| |
| // cmallocgc is a trampoline used to call the Go malloc from C. |
| func cmallocgc(size uintptr, typ *_type, flags int, ret *unsafe.Pointer) { |
| *ret = gomallocgc(size, typ, flags) |
| } |
| |
| // implementation of new builtin |
| func newobject(typ *_type) unsafe.Pointer { |
| flags := 0 |
| if typ.kind&kindNoPointers != 0 { |
| flags |= flagNoScan |
| } |
| return gomallocgc(uintptr(typ.size), typ, flags) |
| } |
| |
| // implementation of make builtin for slices |
| func newarray(typ *_type, n uintptr) unsafe.Pointer { |
| flags := 0 |
| if typ.kind&kindNoPointers != 0 { |
| flags |= flagNoScan |
| } |
| if int(n) < 0 || (typ.size > 0 && n > maxMem/uintptr(typ.size)) { |
| panic("runtime: allocation size out of range") |
| } |
| return gomallocgc(uintptr(typ.size)*n, typ, flags) |
| } |
| |
| // rawmem returns a chunk of pointerless memory. It is |
| // not zeroed. |
| func rawmem(size uintptr) unsafe.Pointer { |
| return gomallocgc(size, nil, flagNoScan|flagNoZero) |
| } |
| |
| // round size up to next size class |
| func goroundupsize(size uintptr) uintptr { |
| if size < maxSmallSize { |
| if size <= 1024-8 { |
| return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) |
| } |
| return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) |
| } |
| if size+pageSize < size { |
| return size |
| } |
| return (size + pageSize - 1) &^ pageMask |
| } |
| |
| func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { |
| c := mp.mcache |
| rate := MemProfileRate |
| if size < uintptr(rate) { |
| // pick next profile time |
| // If you change this, also change allocmcache. |
| if rate > 0x3fffffff { // make 2*rate not overflow |
| rate = 0x3fffffff |
| } |
| next := int32(fastrand2()) % (2 * int32(rate)) |
| // Subtract the "remainder" of the current allocation. |
| // Otherwise objects that are close in size to sampling rate |
| // will be under-sampled, because we consistently discard this remainder. |
| next -= (int32(size) - c.next_sample) |
| if next < 0 { |
| next = 0 |
| } |
| c.next_sample = next |
| } |
| mp.scalararg[0] = uint(size) |
| mp.ptrarg[0] = x |
| onM(&mprofMalloc_m) |
| } |
| |
| // force = 1 - do GC regardless of current heap usage |
| // force = 2 - go GC and eager sweep |
| func gogc(force int32) { |
| if memstats.enablegc == 0 { |
| return |
| } |
| |
| // TODO: should never happen? Only C calls malloc while holding a lock? |
| mp := acquirem() |
| if mp.locks > 1 { |
| releasem(mp) |
| return |
| } |
| releasem(mp) |
| |
| if panicking != 0 { |
| return |
| } |
| if gcpercent == gcpercentUnknown { |
| golock(&mheap_.lock) |
| if gcpercent == gcpercentUnknown { |
| gcpercent = goreadgogc() |
| } |
| gounlock(&mheap_.lock) |
| } |
| if gcpercent < 0 { |
| return |
| } |
| |
| semacquire(&worldsema, false) |
| |
| if force == 0 && memstats.heap_alloc < memstats.next_gc { |
| // typically threads which lost the race to grab |
| // worldsema exit here when gc is done. |
| semrelease(&worldsema) |
| return |
| } |
| |
| // Ok, we're doing it! Stop everybody else |
| startTime := gonanotime() |
| mp = acquirem() |
| mp.gcing = 1 |
| stoptheworld() |
| |
| clearpools() |
| |
| // Run gc on the g0 stack. We do this so that the g stack |
| // we're currently running on will no longer change. Cuts |
| // the root set down a bit (g0 stacks are not scanned, and |
| // we don't need to scan gc's internal state). We also |
| // need to switch to g0 so we can shrink the stack. |
| n := 1 |
| if debug.gctrace > 1 { |
| n = 2 |
| } |
| for i := 0; i < n; i++ { |
| if i > 0 { |
| startTime = gonanotime() |
| } |
| // switch to g0, call gc, then switch back |
| mp.scalararg[0] = uint(startTime) |
| if force >= 2 { |
| mp.scalararg[1] = 1 // eagersweep |
| } else { |
| mp.scalararg[1] = 0 |
| } |
| onM(&gc_m) |
| } |
| |
| // all done |
| mp.gcing = 0 |
| semrelease(&worldsema) |
| starttheworld() |
| releasem(mp) |
| |
| // now that gc is done, kick off finalizer thread if needed |
| if !concurrentSweep { |
| // give the queued finalizers, if any, a chance to run |
| gosched() |
| } |
| } |
| |
| // GC runs a garbage collection. |
| func GC() { |
| gogc(2) |
| } |
| |
| // SetFinalizer sets the finalizer associated with x to f. |
| // When the garbage collector finds an unreachable block |
| // with an associated finalizer, it clears the association and runs |
| // f(x) in a separate goroutine. This makes x reachable again, but |
| // now without an associated finalizer. Assuming that SetFinalizer |
| // is not called again, the next time the garbage collector sees |
| // that x is unreachable, it will free x. |
| // |
| // SetFinalizer(x, nil) clears any finalizer associated with x. |
| // |
| // The argument x must be a pointer to an object allocated by |
| // calling new or by taking the address of a composite literal. |
| // The argument f must be a function that takes a single argument |
| // to which x's type can be assigned, and can have arbitrary ignored return |
| // values. If either of these is not true, SetFinalizer aborts the |
| // program. |
| // |
| // Finalizers are run in dependency order: if A points at B, both have |
| // finalizers, and they are otherwise unreachable, only the finalizer |
| // for A runs; once A is freed, the finalizer for B can run. |
| // If a cyclic structure includes a block with a finalizer, that |
| // cycle is not guaranteed to be garbage collected and the finalizer |
| // is not guaranteed to run, because there is no ordering that |
| // respects the dependencies. |
| // |
| // The finalizer for x is scheduled to run at some arbitrary time after |
| // x becomes unreachable. |
| // There is no guarantee that finalizers will run before a program exits, |
| // so typically they are useful only for releasing non-memory resources |
| // associated with an object during a long-running program. |
| // For example, an os.File object could use a finalizer to close the |
| // associated operating system file descriptor when a program discards |
| // an os.File without calling Close, but it would be a mistake |
| // to depend on a finalizer to flush an in-memory I/O buffer such as a |
| // bufio.Writer, because the buffer would not be flushed at program exit. |
| // |
| // It is not guaranteed that a finalizer will run if the size of *x is |
| // zero bytes. |
| // |
| // A single goroutine runs all finalizers for a program, sequentially. |
| // If a finalizer must run for a long time, it should do so by starting |
| // a new goroutine. |
| func SetFinalizer(obj interface{}, finalizer interface{}) { |
| // We do just enough work here to make the mcall type safe. |
| // The rest is done on the M stack. |
| e := (*eface)(unsafe.Pointer(&obj)) |
| typ := e._type |
| if typ == nil { |
| gothrow("runtime.SetFinalizer: first argument is nil") |
| } |
| if typ.kind&kindMask != kindPtr { |
| gothrow("runtime.SetFinalizer: first argument is " + *typ._string + ", not pointer") |
| } |
| |
| f := (*eface)(unsafe.Pointer(&finalizer)) |
| ftyp := f._type |
| if ftyp != nil && ftyp.kind&kindMask != kindFunc { |
| gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function") |
| } |
| mp := acquirem() |
| mp.ptrarg[0] = unsafe.Pointer(typ) |
| mp.ptrarg[1] = e.data |
| mp.ptrarg[2] = unsafe.Pointer(ftyp) |
| mp.ptrarg[3] = f.data |
| onM(&setFinalizer_m) |
| releasem(mp) |
| } |