blob: 9a27d2adf954575bba78b2b79476269c3c1909d8 [file] [log] [blame]
Austin Clements92e68c82015-08-28 23:15:41 -04001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Garbage collector: stack barriers
Austin Clementse9089e42015-08-29 00:35:25 -04006//
7// Stack barriers enable the garbage collector to determine how much
8// of a gorountine stack has changed between when a stack is scanned
9// during the concurrent scan phase and when it is re-scanned during
10// the stop-the-world mark termination phase. Mark termination only
11// needs to re-scan the changed part, so for deep stacks this can
12// significantly reduce GC pause time compared to the alternative of
13// re-scanning whole stacks. The deeper the stacks, the more stack
14// barriers help.
15//
16// When stacks are scanned during the concurrent scan phase, the stack
17// scan installs stack barriers by selecting stack frames and
18// overwriting the saved return PCs (or link registers) of these
19// frames with the PC of a "stack barrier trampoline". Later, when a
20// selected frame returns, it "returns" to this trampoline instead of
21// returning to its actual caller. The trampoline records that the
22// stack has unwound past this frame and jumps to the original return
23// PC recorded when the stack barrier was installed. Mark termination
24// re-scans only as far as the first frame that hasn't hit a stack
25// barrier and then removes and un-hit stack barriers.
26//
27// This scheme is very lightweight. No special code is required in the
28// mutator to record stack unwinding and the trampoline is only a few
29// assembly instructions.
30//
31// Book-keeping
32// ------------
33//
34// The primary cost of stack barriers is book-keeping: the runtime has
35// to record the locations of all stack barriers and the original
36// return PCs in order to return to the correct caller when a stack
37// barrier is hit and so it can remove un-hit stack barriers. In order
38// to minimize this cost, the Go runtime places stack barriers in
39// exponentially-spaced frames, starting 1K past the current frame.
40// The book-keeping structure hence grows logarithmically with the
41// size of the stack and mark termination re-scans at most twice as
42// much stack as necessary.
43//
44// The runtime reserves space for this book-keeping structure at the
45// top of the stack allocation itself (just above the outermost
46// frame). This is necessary because the regular memory allocator can
47// itself grow the stack, and hence can't be used when allocating
48// stack-related structures.
49//
50// For debugging, the runtime also supports installing stack barriers
51// at every frame. However, this requires significantly more
52// book-keeping space.
53//
54// Correctness
55// -----------
56//
57// The runtime and the compiler cooperate to ensure that all objects
58// reachable from the stack as of mark termination are marked.
59// Anything unchanged since the concurrent scan phase will be marked
60// because it is marked by the concurrent scan. After the concurrent
61// scan, there are three possible classes of stack modifications that
62// must be tracked:
63//
64// 1) Mutator writes below the lowest un-hit stack barrier. This
65// includes all writes performed by an executing function to its own
66// stack frame. This part of the stack will be re-scanned by mark
67// termination, which will mark any objects made reachable from
68// modifications to this part of the stack.
69//
70// 2) Mutator writes above the lowest un-hit stack barrier. It's
71// possible for a mutator to modify the stack above the lowest un-hit
72// stack barrier if a higher frame has passed down a pointer to a
73// stack variable in its frame. This is called an "up-pointer". The
74// compiler ensures that writes through up-pointers have an
75// accompanying write barrier (it simply doesn't distinguish between
76// writes through up-pointers and writes through heap pointers). This
77// write barrier marks any object made reachable from modifications to
78// this part of the stack.
79//
80// 3) Runtime writes to the stack. Various runtime operations such as
81// sends to unbuffered channels can write to arbitrary parts of the
82// stack, including above the lowest un-hit stack barrier. We solve
83// this in two ways. In many cases, the runtime can perform an
84// explicit write barrier operation like in case 2. However, in the
85// case of bulk memory move (typedmemmove), the runtime doesn't
86// necessary have ready access to a pointer bitmap for the memory
87// being copied, so it simply unwinds any stack barriers below the
88// destination.
89//
90// Gotchas
91// -------
92//
93// Anything that inspects or manipulates the stack potentially needs
94// to understand stack barriers. The most obvious case is that
95// gentraceback needs to use the original return PC when it encounters
96// the stack barrier trampoline. Anything that unwinds the stack such
97// as panic/recover must unwind stack barriers in tandem with
98// unwinding the stack.
99//
100// Stack barriers require that any goroutine whose stack has been
101// scanned must execute write barriers. Go solves this by simply
102// enabling write barriers globally during the concurrent scan phase.
103// However, traditionally, write barriers are not enabled during this
104// phase.
Austin Clements92e68c82015-08-28 23:15:41 -0400105
106package runtime
107
108import "unsafe"
109
110const debugStackBarrier = false
111
112// firstStackBarrierOffset is the approximate byte offset at
113// which to place the first stack barrier from the current SP.
114// This is a lower bound on how much stack will have to be
115// re-scanned during mark termination. Subsequent barriers are
116// placed at firstStackBarrierOffset * 2^n offsets.
117//
118// For debugging, this can be set to 0, which will install a
119// stack barrier at every frame. If you do this, you may also
120// have to raise _StackMin, since the stack barrier
121// bookkeeping will use a large amount of each stack.
122var firstStackBarrierOffset = 1024
123
124// gcMaxStackBarriers returns the maximum number of stack barriers
125// that can be installed in a stack of stackSize bytes.
126func gcMaxStackBarriers(stackSize int) (n int) {
127 if firstStackBarrierOffset == 0 {
128 // Special debugging case for inserting stack barriers
129 // at every frame. Steal half of the stack for the
130 // []stkbar. Technically, if the stack were to consist
131 // solely of return PCs we would need two thirds of
132 // the stack, but stealing that much breaks things and
133 // this doesn't happen in practice.
134 return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
135 }
136
137 offset := firstStackBarrierOffset
138 for offset < stackSize {
139 n++
140 offset *= 2
141 }
142 return n + 1
143}
144
145// gcInstallStackBarrier installs a stack barrier over the return PC of frame.
146//go:nowritebarrier
147func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
148 if frame.lr == 0 {
149 if debugStackBarrier {
150 print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
151 }
152 return false
153 }
154
155 if frame.fn.entry == cgocallback_gofuncPC {
156 // cgocallback_gofunc doesn't return to its LR;
157 // instead, its return path puts LR in g.sched.pc and
158 // switches back to the system stack on which
159 // cgocallback_gofunc was originally called. We can't
160 // have a stack barrier in g.sched.pc, so don't
161 // install one in this frame.
162 if debugStackBarrier {
163 print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
164 }
165 return false
166 }
167
168 // Save the return PC and overwrite it with stackBarrier.
169 var lrUintptr uintptr
170 if usesLR {
171 lrUintptr = frame.sp
172 } else {
173 lrUintptr = frame.fp - regSize
174 }
175 lrPtr := (*uintreg)(unsafe.Pointer(lrUintptr))
176 if debugStackBarrier {
177 print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
178 if uintptr(*lrPtr) != frame.lr {
179 print("frame.lr=", hex(frame.lr))
180 throw("frame.lr differs from stack LR")
181 }
182 }
183
184 gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
185 stkbar := &gp.stkbar[len(gp.stkbar)-1]
186 stkbar.savedLRPtr = lrUintptr
187 stkbar.savedLRVal = uintptr(*lrPtr)
188 *lrPtr = uintreg(stackBarrierPC)
189 return true
190}
191
192// gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
193//go:nowritebarrier
194func gcRemoveStackBarriers(gp *g) {
195 if debugStackBarrier && gp.stkbarPos != 0 {
196 print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
197 }
198
199 // Remove stack barriers that we didn't hit.
200 for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
201 gcRemoveStackBarrier(gp, stkbar)
202 }
203
204 // Clear recorded stack barriers so copystack doesn't try to
205 // adjust them.
206 gp.stkbarPos = 0
207 gp.stkbar = gp.stkbar[:0]
208}
209
210// gcRemoveStackBarrier removes a single stack barrier. It is the
211// inverse operation of gcInstallStackBarrier.
212//
213// This is nosplit to ensure gp's stack does not move.
214//
215//go:nowritebarrier
216//go:nosplit
217func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
218 if debugStackBarrier {
219 print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
220 }
221 lrPtr := (*uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
222 if val := *lrPtr; val != uintreg(stackBarrierPC) {
223 printlock()
224 print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
225 print("gp.stkbar=")
226 gcPrintStkbars(gp.stkbar)
227 print(", gp.stkbarPos=", gp.stkbarPos, ", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
228 throw("stack barrier lost")
229 }
230 *lrPtr = uintreg(stkbar.savedLRVal)
231}
232
233// gcPrintStkbars prints a []stkbar for debugging.
234func gcPrintStkbars(stkbar []stkbar) {
235 print("[")
236 for i, s := range stkbar {
237 if i > 0 {
238 print(" ")
239 }
240 print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
241 }
242 print("]")
243}
244
245// gcUnwindBarriers marks all stack barriers up the frame containing
246// sp as hit and removes them. This is used during stack unwinding for
247// panic/recover and by heapBitsBulkBarrier to force stack re-scanning
248// when its destination is on the stack.
249//
250// This is nosplit to ensure gp's stack does not move.
251//
252//go:nosplit
253func gcUnwindBarriers(gp *g, sp uintptr) {
254 // On LR machines, if there is a stack barrier on the return
255 // from the frame containing sp, this will mark it as hit even
256 // though it isn't, but it's okay to be conservative.
257 before := gp.stkbarPos
258 for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
259 gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
260 gp.stkbarPos++
261 }
262 if debugStackBarrier && gp.stkbarPos != before {
263 print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
264 gcPrintStkbars(gp.stkbar[before:gp.stkbarPos])
265 print("\n")
266 }
267}
268
269// nextBarrierPC returns the original return PC of the next stack barrier.
270// Used by getcallerpc, so it must be nosplit.
271//go:nosplit
272func nextBarrierPC() uintptr {
273 gp := getg()
274 return gp.stkbar[gp.stkbarPos].savedLRVal
275}
276
277// setNextBarrierPC sets the return PC of the next stack barrier.
278// Used by setcallerpc, so it must be nosplit.
279//go:nosplit
280func setNextBarrierPC(pc uintptr) {
281 gp := getg()
282 gp.stkbar[gp.stkbarPos].savedLRVal = pc
283}