Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Malloc small size classes. |
| 6 | // |
Keith Randall | cd5b144 | 2015-03-11 12:58:47 -0700 | [diff] [blame] | 7 | // See malloc.go for overview. |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 8 | // |
| 9 | // The size classes are chosen so that rounding an allocation |
| 10 | // request up to the next size class wastes at most 12.5% (1.125x). |
| 11 | // |
| 12 | // Each size class has its own page count that gets allocated |
| 13 | // and chopped up when new objects of the size class are needed. |
| 14 | // That page count is chosen so that chopping up the run of |
| 15 | // pages into objects of the given size wastes at most 12.5% (1.125x) |
| 16 | // of the memory. It is not necessary that the cutoff here be |
| 17 | // the same as above. |
| 18 | // |
| 19 | // The two sources of waste multiply, so the worst possible case |
| 20 | // for the above constraints would be that allocations of some |
| 21 | // size might have a 26.6% (1.266x) overhead. |
| 22 | // In practice, only one of the wastes comes into play for a |
| 23 | // given size (sizes < 512 waste mainly on the round-up, |
| 24 | // sizes > 512 waste mainly on the page chopping). |
| 25 | // |
| 26 | // TODO(rsc): Compute max waste for any given size. |
| 27 | |
| 28 | package runtime |
| 29 | |
Russ Cox | 484f801 | 2015-02-19 13:38:46 -0500 | [diff] [blame] | 30 | // Size classes. Computed and initialized by InitSizes. |
| 31 | // |
| 32 | // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, |
| 33 | // 1 <= sizeclass < NumSizeClasses, for n. |
| 34 | // Size class 0 is reserved to mean "not small". |
| 35 | // |
| 36 | // class_to_size[i] = largest size in class i |
| 37 | // class_to_allocnpages[i] = number of pages to allocate when |
| 38 | // making new objects in class i |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 39 | |
| 40 | // The SizeToClass lookup is implemented using two arrays, |
| 41 | // one mapping sizes <= 1024 to their class and one mapping |
| 42 | // sizes >= 1024 and <= MaxSmallSize to their class. |
| 43 | // All objects are 8-aligned, so the first array is indexed by |
| 44 | // the size divided by 8 (rounded up). Objects >= 1024 bytes |
| 45 | // are 128-aligned, so the second array is indexed by the |
| 46 | // size divided by 128 (rounded up). The arrays are filled in |
| 47 | // by InitSizes. |
Russ Cox | 484f801 | 2015-02-19 13:38:46 -0500 | [diff] [blame] | 48 | |
| 49 | var class_to_size [_NumSizeClasses]int32 |
| 50 | var class_to_allocnpages [_NumSizeClasses]int32 |
Russ Cox | 9feb24f | 2015-03-04 11:34:50 -0500 | [diff] [blame] | 51 | var class_to_divmagic [_NumSizeClasses]divMagic |
| 52 | |
Russ Cox | 484f801 | 2015-02-19 13:38:46 -0500 | [diff] [blame] | 53 | var size_to_class8 [1024/8 + 1]int8 |
| 54 | var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 55 | |
| 56 | func sizeToClass(size int32) int32 { |
| 57 | if size > _MaxSmallSize { |
Keith Randall | b2a950b | 2014-12-27 20:58:00 -0800 | [diff] [blame] | 58 | throw("SizeToClass - invalid size") |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 59 | } |
| 60 | if size > 1024-8 { |
| 61 | return int32(size_to_class128[(size-1024+127)>>7]) |
| 62 | } |
| 63 | return int32(size_to_class8[(size+7)>>3]) |
| 64 | } |
| 65 | |
| 66 | func initSizes() { |
| 67 | // Initialize the runtime·class_to_size table (and choose class sizes in the process). |
| 68 | class_to_size[0] = 0 |
| 69 | sizeclass := 1 // 0 means no class |
| 70 | align := 8 |
| 71 | for size := align; size <= _MaxSmallSize; size += align { |
| 72 | if size&(size-1) == 0 { // bump alignment once in a while |
| 73 | if size >= 2048 { |
| 74 | align = 256 |
| 75 | } else if size >= 128 { |
| 76 | align = size / 8 |
| 77 | } else if size >= 16 { |
| 78 | align = 16 // required for x86 SSE instructions, if we want to use them |
| 79 | } |
| 80 | } |
| 81 | if align&(align-1) != 0 { |
Keith Randall | b2a950b | 2014-12-27 20:58:00 -0800 | [diff] [blame] | 82 | throw("InitSizes - bug") |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 83 | } |
| 84 | |
| 85 | // Make the allocnpages big enough that |
| 86 | // the leftover is less than 1/8 of the total, |
| 87 | // so wasted space is at most 12.5%. |
| 88 | allocsize := _PageSize |
| 89 | for allocsize%size > allocsize/8 { |
| 90 | allocsize += _PageSize |
| 91 | } |
| 92 | npages := allocsize >> _PageShift |
| 93 | |
| 94 | // If the previous sizeclass chose the same |
| 95 | // allocation size and fit the same number of |
| 96 | // objects into the page, we might as well |
| 97 | // use just this size instead of having two |
| 98 | // different sizes. |
| 99 | if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { |
| 100 | class_to_size[sizeclass-1] = int32(size) |
| 101 | continue |
| 102 | } |
| 103 | |
| 104 | class_to_allocnpages[sizeclass] = int32(npages) |
| 105 | class_to_size[sizeclass] = int32(size) |
| 106 | sizeclass++ |
| 107 | } |
| 108 | if sizeclass != _NumSizeClasses { |
| 109 | print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") |
Keith Randall | b2a950b | 2014-12-27 20:58:00 -0800 | [diff] [blame] | 110 | throw("InitSizes - bad NumSizeClasses") |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 111 | } |
| 112 | |
| 113 | // Initialize the size_to_class tables. |
| 114 | nextsize := 0 |
| 115 | for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { |
| 116 | for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { |
| 117 | size_to_class8[nextsize/8] = int8(sizeclass) |
| 118 | } |
| 119 | if nextsize >= 1024 { |
| 120 | for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { |
| 121 | size_to_class128[(nextsize-1024)/128] = int8(sizeclass) |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | // Double-check SizeToClass. |
| 127 | if false { |
| 128 | for n := int32(0); n < _MaxSmallSize; n++ { |
| 129 | sizeclass := sizeToClass(n) |
| 130 | if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { |
| 131 | print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") |
| 132 | print("incorrect SizeToClass\n") |
| 133 | goto dump |
| 134 | } |
| 135 | if sizeclass > 1 && class_to_size[sizeclass-1] >= n { |
| 136 | print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") |
| 137 | print("SizeToClass too big\n") |
| 138 | goto dump |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | testdefersizes() |
| 144 | |
| 145 | // Copy out for statistics table. |
| 146 | for i := 0; i < len(class_to_size); i++ { |
| 147 | memstats.by_size[i].size = uint32(class_to_size[i]) |
| 148 | } |
Russ Cox | 9feb24f | 2015-03-04 11:34:50 -0500 | [diff] [blame] | 149 | |
| 150 | for i := 1; i < len(class_to_size); i++ { |
| 151 | class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) |
| 152 | } |
| 153 | |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 154 | return |
| 155 | |
| 156 | dump: |
| 157 | if true { |
| 158 | print("NumSizeClasses=", _NumSizeClasses, "\n") |
| 159 | print("runtime·class_to_size:") |
| 160 | for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { |
| 161 | print(" ", class_to_size[sizeclass], "") |
| 162 | } |
| 163 | print("\n\n") |
| 164 | print("size_to_class8:") |
| 165 | for i := 0; i < len(size_to_class8); i++ { |
| 166 | print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") |
| 167 | } |
| 168 | print("\n") |
| 169 | print("size_to_class128:") |
| 170 | for i := 0; i < len(size_to_class128); i++ { |
| 171 | print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") |
| 172 | } |
| 173 | print("\n") |
| 174 | } |
Keith Randall | b2a950b | 2014-12-27 20:58:00 -0800 | [diff] [blame] | 175 | throw("InitSizes failed") |
Russ Cox | 1e2d2f0 | 2014-11-11 17:05:02 -0500 | [diff] [blame] | 176 | } |
| 177 | |
| 178 | // Returns size of the memory block that mallocgc will allocate if you ask for the size. |
| 179 | func roundupsize(size uintptr) uintptr { |
| 180 | if size < _MaxSmallSize { |
| 181 | if size <= 1024-8 { |
| 182 | return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) |
| 183 | } else { |
| 184 | return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) |
| 185 | } |
| 186 | } |
| 187 | if size+_PageSize < size { |
| 188 | return size |
| 189 | } |
| 190 | return round(size, _PageSize) |
| 191 | } |
Russ Cox | 9feb24f | 2015-03-04 11:34:50 -0500 | [diff] [blame] | 192 | |
| 193 | // divMagic holds magic constants to implement division |
| 194 | // by a particular constant as a shift, multiply, and shift. |
| 195 | // That is, given |
| 196 | // m = computeMagic(d) |
| 197 | // then |
| 198 | // n/d == ((n>>m.shift) * m.mul) >> m.shift2 |
| 199 | // |
| 200 | // The magic computation picks m such that |
| 201 | // d = d₁*d₂ |
| 202 | // d₂= 2^m.shift |
| 203 | // m.mul = ⌈2^m.shift2 / d₁⌉ |
| 204 | // |
| 205 | // The magic computation here is tailored for malloc block sizes |
| 206 | // and does not handle arbitrary d correctly. Malloc block sizes d are |
| 207 | // always even, so the first shift implements the factors of 2 in d |
| 208 | // and then the mul and second shift implement the odd factor |
| 209 | // that remains. Because the first shift divides n by at least 2 (actually 8) |
| 210 | // before the multiply gets involved, the huge corner cases that |
| 211 | // require additional adjustment are impossible, so the usual |
| 212 | // fixup is not needed. |
| 213 | // |
| 214 | // For more details see Hacker's Delight, Chapter 10, and |
| 215 | // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html |
| 216 | // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html |
| 217 | type divMagic struct { |
| 218 | shift uint8 |
| 219 | mul uint32 |
| 220 | shift2 uint8 |
| 221 | } |
| 222 | |
| 223 | func computeDivMagic(d uint32) divMagic { |
| 224 | var m divMagic |
| 225 | |
| 226 | // Compute pre-shift by factoring power of 2 out of d. |
| 227 | for d&1 == 0 { |
| 228 | m.shift++ |
| 229 | d >>= 1 |
| 230 | } |
| 231 | |
| 232 | // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. |
| 233 | // This is always a good enough approximation. |
| 234 | // We could use smaller k for some divisors but there's no point. |
| 235 | k := uint8(63) |
| 236 | d64 := uint64(d) |
| 237 | for ((1<<k)+d64-1)/d64 >= 1<<32 { |
| 238 | k-- |
| 239 | } |
| 240 | m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ |
| 241 | m.shift2 = k |
| 242 | return m |
| 243 | } |