| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Malloc small size classes. |
| // |
| // See malloc.go for overview. |
| // |
| // The size classes are chosen so that rounding an allocation |
| // request up to the next size class wastes at most 12.5% (1.125x). |
| // |
| // Each size class has its own page count that gets allocated |
| // and chopped up when new objects of the size class are needed. |
| // That page count is chosen so that chopping up the run of |
| // pages into objects of the given size wastes at most 12.5% (1.125x) |
| // of the memory. It is not necessary that the cutoff here be |
| // the same as above. |
| // |
| // The two sources of waste multiply, so the worst possible case |
| // for the above constraints would be that allocations of some |
| // size might have a 26.6% (1.266x) overhead. |
| // In practice, only one of the wastes comes into play for a |
| // given size (sizes < 512 waste mainly on the round-up, |
| // sizes > 512 waste mainly on the page chopping). |
| // |
| // TODO(rsc): Compute max waste for any given size. |
| |
| package runtime |
| |
| // Size classes. Computed and initialized by InitSizes. |
| // |
| // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, |
| // 1 <= sizeclass < NumSizeClasses, for n. |
| // Size class 0 is reserved to mean "not small". |
| // |
| // class_to_size[i] = largest size in class i |
| // class_to_allocnpages[i] = number of pages to allocate when |
| // making new objects in class i |
| |
| // The SizeToClass lookup is implemented using two arrays, |
| // one mapping sizes <= 1024 to their class and one mapping |
| // sizes >= 1024 and <= MaxSmallSize to their class. |
| // All objects are 8-aligned, so the first array is indexed by |
| // the size divided by 8 (rounded up). Objects >= 1024 bytes |
| // are 128-aligned, so the second array is indexed by the |
| // size divided by 128 (rounded up). The arrays are filled in |
| // by InitSizes. |
| |
| const ( |
| smallSizeDiv = 8 |
| smallSizeMax = 1024 |
| largeSizeDiv = 128 |
| ) |
| |
| var class_to_size [_NumSizeClasses]uint32 |
| var class_to_allocnpages [_NumSizeClasses]uint32 |
| var class_to_divmagic [_NumSizeClasses]divMagic |
| |
| var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8 |
| var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8 |
| |
| func sizeToClass(size uint32) uint32 { |
| if size > _MaxSmallSize { |
| throw("invalid size") |
| } |
| if size > smallSizeMax-8 { |
| return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]) |
| } |
| return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) |
| } |
| |
| func initSizes() { |
| // Initialize the runtime·class_to_size table (and choose class sizes in the process). |
| class_to_size[0] = 0 |
| sizeclass := 1 // 0 means no class |
| align := 8 |
| for size := align; size <= _MaxSmallSize; size += align { |
| if size&(size-1) == 0 { // bump alignment once in a while |
| if size >= 2048 { |
| align = 256 |
| } else if size >= 128 { |
| align = size / 8 |
| } else if size >= 16 { |
| align = 16 // required for x86 SSE instructions, if we want to use them |
| } |
| } |
| if align&(align-1) != 0 { |
| throw("incorrect alignment") |
| } |
| |
| // Make the allocnpages big enough that |
| // the leftover is less than 1/8 of the total, |
| // so wasted space is at most 12.5%. |
| allocsize := _PageSize |
| for allocsize%size > allocsize/8 { |
| allocsize += _PageSize |
| } |
| npages := allocsize >> _PageShift |
| |
| // If the previous sizeclass chose the same |
| // allocation size and fit the same number of |
| // objects into the page, we might as well |
| // use just this size instead of having two |
| // different sizes. |
| if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { |
| class_to_size[sizeclass-1] = uint32(size) |
| continue |
| } |
| |
| class_to_allocnpages[sizeclass] = uint32(npages) |
| class_to_size[sizeclass] = uint32(size) |
| sizeclass++ |
| } |
| if sizeclass != _NumSizeClasses { |
| print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") |
| throw("bad NumSizeClasses") |
| } |
| |
| // Increase object sizes if we can fit the same number of larger objects |
| // into the same number of pages. For example, we choose size 8448 above |
| // with 6 objects in 7 pages. But we can well use object size 9472, |
| // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). |
| // We need to preserve at least largeSizeDiv alignment otherwise |
| // sizeToClass won't work. |
| for i := 1; i < _NumSizeClasses; i++ { |
| npages := class_to_allocnpages[i] |
| psize := npages * _PageSize |
| size := class_to_size[i] |
| new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1) |
| if new_size > size { |
| class_to_size[i] = new_size |
| } |
| } |
| |
| // Check maxObjsPerSpan => number of objects invariant. |
| for i, size := range class_to_size { |
| if i != 0 && class_to_size[i-1] >= size { |
| throw("non-monotonic size classes") |
| } |
| |
| if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan { |
| throw("span contains too many objects") |
| } |
| if size == 0 && i != 0 { |
| throw("size is 0 but class is not 0") |
| } |
| } |
| // Initialize the size_to_class tables. |
| nextsize := 0 |
| for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { |
| for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { |
| size_to_class8[nextsize/8] = uint8(sizeclass) |
| } |
| if nextsize >= 1024 { |
| for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { |
| size_to_class128[(nextsize-1024)/128] = uint8(sizeclass) |
| } |
| } |
| } |
| |
| // Double-check SizeToClass. |
| if false { |
| for n := uint32(0); n < _MaxSmallSize; n++ { |
| sizeclass := sizeToClass(n) |
| if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { |
| print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") |
| print("incorrect SizeToClass\n") |
| goto dump |
| } |
| if sizeclass > 1 && class_to_size[sizeclass-1] >= n { |
| print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") |
| print("SizeToClass too big\n") |
| goto dump |
| } |
| } |
| } |
| |
| testdefersizes() |
| |
| // Copy out for statistics table. |
| for i := 0; i < len(class_to_size); i++ { |
| memstats.by_size[i].size = uint32(class_to_size[i]) |
| } |
| |
| for i := 1; i < len(class_to_size); i++ { |
| class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) |
| } |
| |
| return |
| |
| dump: |
| if true { |
| print("runtime: NumSizeClasses=", _NumSizeClasses, "\n") |
| print("runtime·class_to_size:") |
| for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { |
| print(" ", class_to_size[sizeclass], "") |
| } |
| print("\n\n") |
| print("runtime: size_to_class8:") |
| for i := 0; i < len(size_to_class8); i++ { |
| print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") |
| } |
| print("\n") |
| print("runtime: size_to_class128:") |
| for i := 0; i < len(size_to_class128); i++ { |
| print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") |
| } |
| print("\n") |
| } |
| throw("InitSizes failed") |
| } |
| |
| // Returns size of the memory block that mallocgc will allocate if you ask for the size. |
| func roundupsize(size uintptr) uintptr { |
| if size < _MaxSmallSize { |
| if size <= smallSizeMax-8 { |
| return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]]) |
| } else { |
| return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]]) |
| } |
| } |
| if size+_PageSize < size { |
| return size |
| } |
| return round(size, _PageSize) |
| } |
| |
| // divMagic holds magic constants to implement division |
| // by a particular constant as a shift, multiply, and shift. |
| // That is, given |
| // m = computeMagic(d) |
| // then |
| // n/d == ((n>>m.shift) * m.mul) >> m.shift2 |
| // |
| // The magic computation picks m such that |
| // d = d₁*d₂ |
| // d₂= 2^m.shift |
| // m.mul = ⌈2^m.shift2 / d₁⌉ |
| // |
| // The magic computation here is tailored for malloc block sizes |
| // and does not handle arbitrary d correctly. Malloc block sizes d are |
| // always even, so the first shift implements the factors of 2 in d |
| // and then the mul and second shift implement the odd factor |
| // that remains. Because the first shift divides n by at least 2 (actually 8) |
| // before the multiply gets involved, the huge corner cases that |
| // require additional adjustment are impossible, so the usual |
| // fixup is not needed. |
| // |
| // For more details see Hacker's Delight, Chapter 10, and |
| // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html |
| // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html |
| type divMagic struct { |
| shift uint8 |
| mul uint32 |
| shift2 uint8 |
| baseMask uintptr |
| } |
| |
| func computeDivMagic(d uint32) divMagic { |
| var m divMagic |
| |
| // If the size is a power of two, heapBitsForObject can divide even faster by masking. |
| // Compute this mask. |
| if d&(d-1) == 0 { |
| // It is a power of 2 (assuming dinptr != 1) |
| m.baseMask = ^(uintptr(d) - 1) |
| } else { |
| m.baseMask = 0 |
| } |
| |
| // Compute pre-shift by factoring power of 2 out of d. |
| for d&1 == 0 { |
| m.shift++ |
| d >>= 1 |
| } |
| |
| // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. |
| // This is always a good enough approximation. |
| // We could use smaller k for some divisors but there's no point. |
| k := uint8(63) |
| d64 := uint64(d) |
| for ((1<<k)+d64-1)/d64 >= 1<<32 { |
| k-- |
| } |
| m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ |
| m.shift2 = k |
| |
| return m |
| } |