blob: 1bf9d573b70f59bca185b9db62e91feb488b8492 [file] [log] [blame]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector: stack barriers
//
// Stack barriers enable the garbage collector to determine how much
// of a gorountine stack has changed between when a stack is scanned
// during the concurrent scan phase and when it is re-scanned during
// the stop-the-world mark termination phase. Mark termination only
// needs to re-scan the changed part, so for deep stacks this can
// significantly reduce GC pause time compared to the alternative of
// re-scanning whole stacks. The deeper the stacks, the more stack
// barriers help.
//
// When stacks are scanned during the concurrent scan phase, the stack
// scan installs stack barriers by selecting stack frames and
// overwriting the saved return PCs (or link registers) of these
// frames with the PC of a "stack barrier trampoline". Later, when a
// selected frame returns, it "returns" to this trampoline instead of
// returning to its actual caller. The trampoline records that the
// stack has unwound past this frame and jumps to the original return
// PC recorded when the stack barrier was installed. Mark termination
// re-scans only as far as the first frame that hasn't hit a stack
// barrier and then removes and un-hit stack barriers.
//
// This scheme is very lightweight. No special code is required in the
// mutator to record stack unwinding and the trampoline is only a few
// assembly instructions.
//
// Book-keeping
// ------------
//
// The primary cost of stack barriers is book-keeping: the runtime has
// to record the locations of all stack barriers and the original
// return PCs in order to return to the correct caller when a stack
// barrier is hit and so it can remove un-hit stack barriers. In order
// to minimize this cost, the Go runtime places stack barriers in
// exponentially-spaced frames, starting 1K past the current frame.
// The book-keeping structure hence grows logarithmically with the
// size of the stack and mark termination re-scans at most twice as
// much stack as necessary.
//
// The runtime reserves space for this book-keeping structure at the
// top of the stack allocation itself (just above the outermost
// frame). This is necessary because the regular memory allocator can
// itself grow the stack, and hence can't be used when allocating
// stack-related structures.
//
// For debugging, the runtime also supports installing stack barriers
// at every frame. However, this requires significantly more
// book-keeping space.
//
// Correctness
// -----------
//
// The runtime and the compiler cooperate to ensure that all objects
// reachable from the stack as of mark termination are marked.
// Anything unchanged since the concurrent scan phase will be marked
// because it is marked by the concurrent scan. After the concurrent
// scan, there are three possible classes of stack modifications that
// must be tracked:
//
// 1) Mutator writes below the lowest un-hit stack barrier. This
// includes all writes performed by an executing function to its own
// stack frame. This part of the stack will be re-scanned by mark
// termination, which will mark any objects made reachable from
// modifications to this part of the stack.
//
// 2) Mutator writes above the lowest un-hit stack barrier. It's
// possible for a mutator to modify the stack above the lowest un-hit
// stack barrier if a higher frame has passed down a pointer to a
// stack variable in its frame. This is called an "up-pointer". The
// compiler ensures that writes through up-pointers have an
// accompanying write barrier (it simply doesn't distinguish between
// writes through up-pointers and writes through heap pointers). This
// write barrier marks any object made reachable from modifications to
// this part of the stack.
//
// 3) Runtime writes to the stack. Various runtime operations such as
// sends to unbuffered channels can write to arbitrary parts of the
// stack, including above the lowest un-hit stack barrier. We solve
// this in two ways. In many cases, the runtime can perform an
// explicit write barrier operation like in case 2. However, in the
// case of bulk memory move (typedmemmove), the runtime doesn't
// necessary have ready access to a pointer bitmap for the memory
// being copied, so it simply unwinds any stack barriers below the
// destination.
//
// Gotchas
// -------
//
// Anything that inspects or manipulates the stack potentially needs
// to understand stack barriers. The most obvious case is that
// gentraceback needs to use the original return PC when it encounters
// the stack barrier trampoline. Anything that unwinds the stack such
// as panic/recover must unwind stack barriers in tandem with
// unwinding the stack.
//
// Stack barriers require that any goroutine whose stack has been
// scanned must execute write barriers. Go solves this by simply
// enabling write barriers globally during the concurrent scan phase.
// However, traditionally, write barriers are not enabled during this
// phase.
//
// Synchronization
// ---------------
//
// For the most part, accessing and modifying stack barriers is
// synchronized around GC safe points. Installing stack barriers
// forces the G to a safe point, while all other operations that
// modify stack barriers run on the G and prevent it from reaching a
// safe point.
//
// Subtlety arises when a G may be tracebacked when *not* at a safe
// point. This happens during sigprof. For this, each G has a "stack
// barrier lock" (see gcLockStackBarriers, gcUnlockStackBarriers).
// Operations that manipulate stack barriers acquire this lock, while
// sigprof tries to acquire it and simply skips the traceback if it
// can't acquire it. There is one exception for performance and
// complexity reasons: hitting a stack barrier manipulates the stack
// barrier list without acquiring the stack barrier lock. For this,
// gentraceback performs a special fix up if the traceback starts in
// the stack barrier function.
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
const debugStackBarrier = false
// firstStackBarrierOffset is the approximate byte offset at
// which to place the first stack barrier from the current SP.
// This is a lower bound on how much stack will have to be
// re-scanned during mark termination. Subsequent barriers are
// placed at firstStackBarrierOffset * 2^n offsets.
//
// For debugging, this can be set to 0, which will install a
// stack barrier at every frame. If you do this, you may also
// have to raise _StackMin, since the stack barrier
// bookkeeping will use a large amount of each stack.
var firstStackBarrierOffset = 1024
// gcMaxStackBarriers returns the maximum number of stack barriers
// that can be installed in a stack of stackSize bytes.
func gcMaxStackBarriers(stackSize int) (n int) {
if firstStackBarrierOffset == 0 {
// Special debugging case for inserting stack barriers
// at every frame. Steal half of the stack for the
// []stkbar. Technically, if the stack were to consist
// solely of return PCs we would need two thirds of
// the stack, but stealing that much breaks things and
// this doesn't happen in practice.
return stackSize / 2 / int(unsafe.Sizeof(stkbar{}))
}
offset := firstStackBarrierOffset
for offset < stackSize {
n++
offset *= 2
}
return n + 1
}
// gcInstallStackBarrier installs a stack barrier over the return PC of frame.
//go:nowritebarrier
func gcInstallStackBarrier(gp *g, frame *stkframe) bool {
if frame.lr == 0 {
if debugStackBarrier {
print("not installing stack barrier with no LR, goid=", gp.goid, "\n")
}
return false
}
if frame.fn.entry == cgocallback_gofuncPC {
// cgocallback_gofunc doesn't return to its LR;
// instead, its return path puts LR in g.sched.pc and
// switches back to the system stack on which
// cgocallback_gofunc was originally called. We can't
// have a stack barrier in g.sched.pc, so don't
// install one in this frame.
if debugStackBarrier {
print("not installing stack barrier over LR of cgocallback_gofunc, goid=", gp.goid, "\n")
}
return false
}
// Save the return PC and overwrite it with stackBarrier.
var lrUintptr uintptr
if usesLR {
lrUintptr = frame.sp
} else {
lrUintptr = frame.fp - sys.RegSize
}
lrPtr := (*sys.Uintreg)(unsafe.Pointer(lrUintptr))
if debugStackBarrier {
print("install stack barrier at ", hex(lrUintptr), " over ", hex(*lrPtr), ", goid=", gp.goid, "\n")
if uintptr(*lrPtr) != frame.lr {
print("frame.lr=", hex(frame.lr))
throw("frame.lr differs from stack LR")
}
}
gp.stkbar = gp.stkbar[:len(gp.stkbar)+1]
stkbar := &gp.stkbar[len(gp.stkbar)-1]
stkbar.savedLRPtr = lrUintptr
stkbar.savedLRVal = uintptr(*lrPtr)
*lrPtr = sys.Uintreg(stackBarrierPC)
return true
}
// gcRemoveStackBarriers removes all stack barriers installed in gp's stack.
//
// gp's stack barriers must be locked.
//
//go:nowritebarrier
func gcRemoveStackBarriers(gp *g) {
if debugStackBarrier && gp.stkbarPos != 0 {
print("hit ", gp.stkbarPos, " stack barriers, goid=", gp.goid, "\n")
}
// Remove stack barriers that we didn't hit.
for _, stkbar := range gp.stkbar[gp.stkbarPos:] {
gcRemoveStackBarrier(gp, stkbar)
}
// Clear recorded stack barriers so copystack doesn't try to
// adjust them.
gp.stkbarPos = 0
gp.stkbar = gp.stkbar[:0]
}
// gcRemoveStackBarrier removes a single stack barrier. It is the
// inverse operation of gcInstallStackBarrier.
//
// This is nosplit to ensure gp's stack does not move.
//
//go:nowritebarrier
//go:nosplit
func gcRemoveStackBarrier(gp *g, stkbar stkbar) {
if debugStackBarrier {
print("remove stack barrier at ", hex(stkbar.savedLRPtr), " with ", hex(stkbar.savedLRVal), ", goid=", gp.goid, "\n")
}
lrPtr := (*sys.Uintreg)(unsafe.Pointer(stkbar.savedLRPtr))
if val := *lrPtr; val != sys.Uintreg(stackBarrierPC) {
printlock()
print("at *", hex(stkbar.savedLRPtr), " expected stack barrier PC ", hex(stackBarrierPC), ", found ", hex(val), ", goid=", gp.goid, "\n")
print("gp.stkbar=")
gcPrintStkbars(gp, -1)
print(", gp.stack=[", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
throw("stack barrier lost")
}
*lrPtr = sys.Uintreg(stkbar.savedLRVal)
}
// gcTryRemoveAllStackBarriers tries to remove stack barriers from all
// Gs in gps. It is best-effort and efficient. If it can't remove
// barriers from a G immediately, it will simply skip it.
func gcTryRemoveAllStackBarriers(gps []*g) {
for _, gp := range gps {
retry:
for {
switch s := readgstatus(gp); s {
default:
break retry
case _Grunnable, _Gsyscall, _Gwaiting:
if !castogscanstatus(gp, s, s|_Gscan) {
continue
}
gcLockStackBarriers(gp)
gcRemoveStackBarriers(gp)
gcUnlockStackBarriers(gp)
restartg(gp)
break retry
}
}
}
}
// gcPrintStkbars prints the stack barriers of gp for debugging. It
// places a "@@@" marker at gp.stkbarPos. If marker >= 0, it will also
// place a "==>" marker before the marker'th entry.
func gcPrintStkbars(gp *g, marker int) {
print("[")
for i, s := range gp.stkbar {
if i > 0 {
print(" ")
}
if i == int(gp.stkbarPos) {
print("@@@ ")
}
if i == marker {
print("==> ")
}
print("*", hex(s.savedLRPtr), "=", hex(s.savedLRVal))
}
if int(gp.stkbarPos) == len(gp.stkbar) {
print(" @@@")
}
if marker == len(gp.stkbar) {
print(" ==>")
}
print("]")
}
// gcUnwindBarriers marks all stack barriers up the frame containing
// sp as hit and removes them. This is used during stack unwinding for
// panic/recover and by heapBitsBulkBarrier to force stack re-scanning
// when its destination is on the stack.
//
// This is nosplit to ensure gp's stack does not move.
//
//go:nosplit
func gcUnwindBarriers(gp *g, sp uintptr) {
gcLockStackBarriers(gp)
// On LR machines, if there is a stack barrier on the return
// from the frame containing sp, this will mark it as hit even
// though it isn't, but it's okay to be conservative.
before := gp.stkbarPos
for int(gp.stkbarPos) < len(gp.stkbar) && gp.stkbar[gp.stkbarPos].savedLRPtr < sp {
gcRemoveStackBarrier(gp, gp.stkbar[gp.stkbarPos])
gp.stkbarPos++
}
gcUnlockStackBarriers(gp)
if debugStackBarrier && gp.stkbarPos != before {
print("skip barriers below ", hex(sp), " in goid=", gp.goid, ": ")
// We skipped barriers between the "==>" marker
// (before) and the "@@@" marker (gp.stkbarPos).
gcPrintStkbars(gp, int(before))
print("\n")
}
}
// nextBarrierPC returns the original return PC of the next stack barrier.
// Used by getcallerpc, so it must be nosplit.
//go:nosplit
func nextBarrierPC() uintptr {
gp := getg()
return gp.stkbar[gp.stkbarPos].savedLRVal
}
// setNextBarrierPC sets the return PC of the next stack barrier.
// Used by setcallerpc, so it must be nosplit.
//go:nosplit
func setNextBarrierPC(pc uintptr) {
gp := getg()
gcLockStackBarriers(gp)
gp.stkbar[gp.stkbarPos].savedLRVal = pc
gcUnlockStackBarriers(gp)
}
// gcLockStackBarriers synchronizes with tracebacks of gp's stack
// during sigprof for installation or removal of stack barriers. It
// blocks until any current sigprof is done tracebacking gp's stack
// and then disallows profiling tracebacks of gp's stack.
//
// This is necessary because a sigprof during barrier installation or
// removal could observe inconsistencies between the stkbar array and
// the stack itself and crash.
//
//go:nosplit
func gcLockStackBarriers(gp *g) {
// Disable preemption so scanstack cannot run while the caller
// is manipulating the stack barriers.
acquirem()
for !atomic.Cas(&gp.stackLock, 0, 1) {
osyield()
}
}
//go:nosplit
func gcTryLockStackBarriers(gp *g) bool {
mp := acquirem()
result := atomic.Cas(&gp.stackLock, 0, 1)
if !result {
releasem(mp)
}
return result
}
func gcUnlockStackBarriers(gp *g) {
atomic.Store(&gp.stackLock, 0)
releasem(getg().m)
}