Austin Clements | 92e68c8 | 2015-08-28 23:15:41 -0400 | [diff] [blame] | 1 | // Copyright 2015 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Garbage collector: stack barriers |
Austin Clements | e9089e4 | 2015-08-29 00:35:25 -0400 | [diff] [blame] | 6 | // |
| 7 | // Stack barriers enable the garbage collector to determine how much |
| 8 | // of a gorountine stack has changed between when a stack is scanned |
| 9 | // during the concurrent scan phase and when it is re-scanned during |
| 10 | // the stop-the-world mark termination phase. Mark termination only |
| 11 | // needs to re-scan the changed part, so for deep stacks this can |
| 12 | // significantly reduce GC pause time compared to the alternative of |
| 13 | // re-scanning whole stacks. The deeper the stacks, the more stack |
| 14 | // barriers help. |
| 15 | // |
| 16 | // When stacks are scanned during the concurrent scan phase, the stack |
| 17 | // scan installs stack barriers by selecting stack frames and |
| 18 | // overwriting the saved return PCs (or link registers) of these |
| 19 | // frames with the PC of a "stack barrier trampoline". Later, when a |
| 20 | // selected frame returns, it "returns" to this trampoline instead of |
| 21 | // returning to its actual caller. The trampoline records that the |
| 22 | // stack has unwound past this frame and jumps to the original return |
| 23 | // PC recorded when the stack barrier was installed. Mark termination |
| 24 | // re-scans only as far as the first frame that hasn't hit a stack |
| 25 | // barrier and then removes and un-hit stack barriers. |
| 26 | // |
| 27 | // This scheme is very lightweight. No special code is required in the |
| 28 | // mutator to record stack unwinding and the trampoline is only a few |
| 29 | // assembly instructions. |
| 30 | // |
| 31 | // Book-keeping |
| 32 | // ------------ |
| 33 | // |
| 34 | // The primary cost of stack barriers is book-keeping: the runtime has |
| 35 | // to record the locations of all stack barriers and the original |
| 36 | // return PCs in order to return to the correct caller when a stack |
| 37 | // barrier is hit and so it can remove un-hit stack barriers. In order |
| 38 | // to minimize this cost, the Go runtime places stack barriers in |
| 39 | // exponentially-spaced frames, starting 1K past the current frame. |
| 40 | // The book-keeping structure hence grows logarithmically with the |
| 41 | // size of the stack and mark termination re-scans at most twice as |
| 42 | // much stack as necessary. |
| 43 | // |
| 44 | // The runtime reserves space for this book-keeping structure at the |
| 45 | // top of the stack allocation itself (just above the outermost |
| 46 | // frame). This is necessary because the regular memory allocator can |
| 47 | // itself grow the stack, and hence can't be used when allocating |
| 48 | // stack-related structures. |
| 49 | // |
| 50 | // For debugging, the runtime also supports installing stack barriers |
| 51 | // at every frame. However, this requires significantly more |
| 52 | // book-keeping space. |
| 53 | // |
| 54 | // Correctness |
| 55 | // ----------- |
| 56 | // |
| 57 | // The runtime and the compiler cooperate to ensure that all objects |
| 58 | // reachable from the stack as of mark termination are marked. |
| 59 | // Anything unchanged since the concurrent scan phase will be marked |
| 60 | // because it is marked by the concurrent scan. After the concurrent |
| 61 | // scan, there are three possible classes of stack modifications that |
| 62 | // must be tracked: |
| 63 | // |
| 64 | // 1) Mutator writes below the lowest un-hit stack barrier. This |
| 65 | // includes all writes performed by an executing function to its own |
| 66 | // stack frame. This part of the stack will be re-scanned by mark |
| 67 | // termination, which will mark any objects made reachable from |
| 68 | // modifications to this part of the stack. |
| 69 | // |
| 70 | // 2) Mutator writes above the lowest un-hit stack barrier. It's |
| 71 | // possible for a mutator to modify the stack above the lowest un-hit |
| 72 | // stack barrier if a higher frame has passed down a pointer to a |
| 73 | // stack variable in its frame. This is called an "up-pointer". The |
| 74 | // compiler ensures that writes through up-pointers have an |
| 75 | // accompanying write barrier (it simply doesn't distinguish between |
| 76 | // writes through up-pointers and writes through heap pointers). This |
| 77 | // write barrier marks any object made reachable from modifications to |
| 78 | // this part of the stack. |
| 79 | // |
| 80 | // 3) Runtime writes to the stack. Various runtime operations such as |
| 81 | // sends to unbuffered channels can write to arbitrary parts of the |
| 82 | // stack, including above the lowest un-hit stack barrier. We solve |
| 83 | // this in two ways. In many cases, the runtime can perform an |
| 84 | // explicit write barrier operation like in case 2. However, in the |
| 85 | // case of bulk memory move (typedmemmove), the runtime doesn't |
| 86 | // necessary have ready access to a pointer bitmap for the memory |
| 87 | // being copied, so it simply unwinds any stack barriers below the |
| 88 | // destination. |
| 89 | // |
| 90 | // Gotchas |
| 91 | // ------- |
| 92 | // |
| 93 | // Anything that inspects or manipulates the stack potentially needs |
| 94 | // to understand stack barriers. The most obvious case is that |
| 95 | // gentraceback needs to use the original return PC when it encounters |
| 96 | // the stack barrier trampoline. Anything that unwinds the stack such |
| 97 | // as panic/recover must unwind stack barriers in tandem with |
| 98 | // unwinding the stack. |
| 99 | // |
| 100 | // Stack barriers require that any goroutine whose stack has been |
| 101 | // scanned must execute write barriers. Go solves this by simply |
| 102 | // enabling write barriers globally during the concurrent scan phase. |
| 103 | // However, traditionally, write barriers are not enabled during this |
| 104 | // phase. |
Austin Clements | 92e68c8 | 2015-08-28 23:15:41 -0400 | [diff] [blame] | 105 | |
| 106 | package runtime |
| 107 | |
| 108 | import "unsafe" |
| 109 | |
| 110 | const debugStackBarrier = false |
| 111 | |
| 112 | // firstStackBarrierOffset is the approximate byte offset at |
| 113 | // which to place the first stack barrier from the current SP. |
| 114 | // This is a lower bound on how much stack will have to be |
| 115 | // re-scanned during mark termination. Subsequent barriers are |
| 116 | // placed at firstStackBarrierOffset * 2^n offsets. |
| 117 | // |
| 118 | // For debugging, this can be set to 0, which will install a |
| 119 | // stack barrier at every frame. If you do this, you may also |
| 120 | // have to raise _StackMin, since the stack barrier |
| 121 | // bookkeeping will use a large amount of each stack. |
| 122 | var firstStackBarrierOffset = 1024 |
| 123 | |
| 124 | // gcMaxStackBarriers returns the maximum number of stack barriers |
| 125 | // that can be installed in a stack of stackSize bytes. |
| 126 | func gcMaxStackBarriers(stackSize int) (n int) { |
| 127 | if firstStackBarrierOffset == 0 { |
| 128 | // Special debugging case for inserting stack barriers |
| 129 | // at every frame. Steal half of the stack for the |
| 130 | // []stkbar. Technically, if the stack were to consist |
| 131 | // solely of return PCs we would need two thirds of |
| 132 | // the stack, but stealing that much breaks things and |
| 133 | // this doesn't happen in practice. |
| 134 | return stackSize / 2 / int(unsafe.Sizeof(stkbar{})) |
| 135 | } |
| 136 | |
| 137 | offset := firstStackBarrierOffset |
| 138 | for offset < stackSize { |
| 139 | n++ |
| 140 | offset *= 2 |
| 141 | } |
| 142 | return n + 1 |
| 143 | } |
| 144 | |
| 145 | // gcInstallStackBarrier installs a stack barrier over the return PC of frame. |
| 146 | //go:nowritebarrier |
| 147 | func gcInstallStackBarrier(gp *g, frame *stkframe) bool { |
| 148 | if frame.lr == 0 { |
| 149 | if debugStackBarrier { |
| 150 | print("not installing stack barrier with no LR, goid=", gp.goid, "\n") |
| 151 | } |
| 152 | return false |
| 153 | } |
| 154 | |
| 155 | if frame.fn.entry == cgocallback_gofuncPC { |
| 156 | // cgocallback_gofunc doesn't return to its LR; |
| 157 | // instead, its return path puts LR in g.sched.pc and |
| 158 | // switches back to the system stack on which |
| 159 | // cgocallback_gofunc was originally called. We can't |
| 160 | // have a stack barrier in g.sched.pc, so don't |
| 161 | // install one in this frame. |
| 162 | if debugStackBarrier { |
| 163 | print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n") |
| 164 | } |
| 165 | return false |
| 166 | } |
| 167 | |
| 168 | // Save the return PC and overwrite it with stackBarrier. |
| 169 | var lrUintptr uintptr |
| 170 | if usesLR { |
| 171 | lrUintptr = frame.sp |
| 172 | } else { |
| 173 | lrUintptr = frame.fp - regSize |
| 174 | } |
| 175 | lrPtr := (*uintreg)(unsafe.Pointer(lrUintptr)) |
| 176 | if debugStackBarrier { |
| 177 | print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n") |
| 178 | if uintptr(*lrPtr) != frame.lr { |
| 179 | print("frame.lr=", hex(frame.lr)) |
| 180 | throw("frame.lr differs from stack LR") |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | gp.stkbar = gp.stkbar[:len(gp.stkbar)+1] |
| 185 | stkbar := &gp.stkbar[len(gp.stkbar)-1] |
| 186 | stkbar.savedLRPtr = lrUintptr |
| 187 | stkbar.savedLRVal = uintptr(*lrPtr) |
| 188 | *lrPtr = uintreg(stackBarrierPC) |
| 189 | return true |
| 190 | } |
| 191 | |
| 192 | // gcRemoveStackBarriers removes all stack barriers installed in gp's stack. |
| 193 | //go:nowritebarrier |
| 194 | func gcRemoveStackBarriers(gp *g) { |
| 195 | if debugStackBarrier && gp.stkbarPos != 0 { |
| 196 | print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n") |
| 197 | } |
| 198 | |
| 199 | // Remove stack barriers that we didn't hit. |
| 200 | for _, stkbar := range gp.stkbar[gp.stkbarPos:] { |
| 201 | gcRemoveStackBarrier(gp, stkbar) |
| 202 | } |
| 203 | |
| 204 | // Clear recorded stack barriers so copystack doesn't try to |
| 205 | // adjust them. |
| 206 | gp.stkbarPos = 0 |
| 207 | gp.stkbar = gp.stkbar[:0] |
| 208 | } |
| 209 | |
| 210 | // gcRemoveStackBarrier removes a single stack barrier. It is the |
| 211 | // inverse operation of gcInstallStackBarrier. |
| 212 | // |
| 213 | // This is nosplit to ensure gp's stack does not move. |
| 214 | // |
| 215 | //go:nowritebarrier |
| 216 | //go:nosplit |
| 217 | func gcRemoveStackBarrier(gp *g, stkbar stkbar) { |
| 218 | if debugStackBarrier { |
| 219 | print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n") |
| 220 | } |
| 221 | lrPtr := (*uintreg)(unsafe.Pointer(stkbar.savedLRPtr)) |
| 222 | if val := *lrPtr; val != uintreg(stackBarrierPC) { |
| 223 | printlock() |
| 224 | print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n") |
| 225 | print("gp.stkbar=") |
| 226 | gcPrintStkbars(gp.stkbar) |
| 227 | print(", gp.stkbarPos=", gp.stkbarPos, ", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n") |
| 228 | throw("stack barrier lost") |
| 229 | } |
| 230 | *lrPtr = uintreg(stkbar.savedLRVal) |
| 231 | } |
| 232 | |
| 233 | // gcPrintStkbars prints a []stkbar for debugging. |
| 234 | func gcPrintStkbars(stkbar []stkbar) { |
| 235 | print("[") |
| 236 | for i, s := range stkbar { |
| 237 | if i > 0 { |
| 238 | print(" ") |
| 239 | } |
| 240 | print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal)) |
| 241 | } |
| 242 | print("]") |
| 243 | } |
| 244 | |
| 245 | // gcUnwindBarriers marks all stack barriers up the frame containing |
| 246 | // sp as hit and removes them. This is used during stack unwinding for |
| 247 | // panic/recover and by heapBitsBulkBarrier to force stack re-scanning |
| 248 | // when its destination is on the stack. |
| 249 | // |
| 250 | // This is nosplit to ensure gp's stack does not move. |
| 251 | // |
| 252 | //go:nosplit |
| 253 | func gcUnwindBarriers(gp *g, sp uintptr) { |
| 254 | // On LR machines, if there is a stack barrier on the return |
| 255 | // from the frame containing sp, this will mark it as hit even |
| 256 | // though it isn't, but it's okay to be conservative. |
| 257 | before := gp.stkbarPos |
| 258 | for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp { |
| 259 | gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos]) |
| 260 | gp.stkbarPos++ |
| 261 | } |
| 262 | if debugStackBarrier && gp.stkbarPos != before { |
| 263 | print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ") |
| 264 | gcPrintStkbars(gp.stkbar[before:gp.stkbarPos]) |
| 265 | print("\n") |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | // nextBarrierPC returns the original return PC of the next stack barrier. |
| 270 | // Used by getcallerpc, so it must be nosplit. |
| 271 | //go:nosplit |
| 272 | func nextBarrierPC() uintptr { |
| 273 | gp := getg() |
| 274 | return gp.stkbar[gp.stkbarPos].savedLRVal |
| 275 | } |
| 276 | |
| 277 | // setNextBarrierPC sets the return PC of the next stack barrier. |
| 278 | // Used by setcallerpc, so it must be nosplit. |
| 279 | //go:nosplit |
| 280 | func setNextBarrierPC(pc uintptr) { |
| 281 | gp := getg() |
| 282 | gp.stkbar[gp.stkbarPos].savedLRVal = pc |
| 283 | } |