| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // This implements the write barrier buffer. The write barrier itself |
| // is gcWriteBarrier and is implemented in assembly. |
| // |
| // See mbarrier.go for algorithmic details on the write barrier. This |
| // file deals only with the buffer. |
| // |
| // The write barrier has a fast path and a slow path. The fast path |
| // simply enqueues to a per-P write barrier buffer. It's written in |
| // assembly and doesn't clobber any general purpose registers, so it |
| // doesn't have the usual overheads of a Go call. |
| // |
| // When the buffer fills up, the write barrier invokes the slow path |
| // (wbBufFlush) to flush the buffer to the GC work queues. In this |
| // path, since the compiler didn't spill registers, we spill *all* |
| // registers and disallow any GC safe points that could observe the |
| // stack frame (since we don't know the types of the spilled |
| // registers). |
| |
| package runtime |
| |
| import ( |
| "runtime/internal/atomic" |
| "runtime/internal/sys" |
| "unsafe" |
| ) |
| |
| // testSmallBuf forces a small write barrier buffer to stress write |
| // barrier flushing. |
| const testSmallBuf = false |
| |
| // wbBuf is a per-P buffer of pointers queued by the write barrier. |
| // This buffer is flushed to the GC workbufs when it fills up and on |
| // various GC transitions. |
| // |
| // This is closely related to a "sequential store buffer" (SSB), |
| // except that SSBs are usually used for maintaining remembered sets, |
| // while this is used for marking. |
| type wbBuf struct { |
| // next points to the next slot in buf. It must not be a |
| // pointer type because it can point past the end of buf and |
| // must be updated without write barriers. |
| // |
| // This is a pointer rather than an index to optimize the |
| // write barrier assembly. |
| next uintptr |
| |
| // end points to just past the end of buf. It must not be a |
| // pointer type because it points past the end of buf and must |
| // be updated without write barriers. |
| end uintptr |
| |
| // buf stores a series of pointers to execute write barriers |
| // on. This must be a multiple of wbBufEntryPointers because |
| // the write barrier only checks for overflow once per entry. |
| buf [wbBufEntryPointers * wbBufEntries]uintptr |
| |
| // debugGen causes the write barrier buffer to flush after |
| // every write barrier if equal to gcWorkPauseGen. This is for |
| // debugging #27993. This is only set if debugCachedWork is |
| // set. |
| debugGen uint32 |
| } |
| |
| const ( |
| // wbBufEntries is the number of write barriers between |
| // flushes of the write barrier buffer. |
| // |
| // This trades latency for throughput amortization. Higher |
| // values amortize flushing overhead more, but increase the |
| // latency of flushing. Higher values also increase the cache |
| // footprint of the buffer. |
| // |
| // TODO: What is the latency cost of this? Tune this value. |
| wbBufEntries = 256 |
| |
| // wbBufEntryPointers is the number of pointers added to the |
| // buffer by each write barrier. |
| wbBufEntryPointers = 2 |
| ) |
| |
| // reset empties b by resetting its next and end pointers. |
| func (b *wbBuf) reset() { |
| start := uintptr(unsafe.Pointer(&b.buf[0])) |
| b.next = start |
| if writeBarrier.cgo || (debugCachedWork && (throwOnGCWork || b.debugGen == atomic.Load(&gcWorkPauseGen))) { |
| // Effectively disable the buffer by forcing a flush |
| // on every barrier. |
| b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) |
| } else if testSmallBuf { |
| // For testing, allow two barriers in the buffer. If |
| // we only did one, then barriers of non-heap pointers |
| // would be no-ops. This lets us combine a buffered |
| // barrier with a flush at a later time. |
| b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers])) |
| } else { |
| b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0]) |
| } |
| |
| if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 { |
| throw("bad write barrier buffer bounds") |
| } |
| } |
| |
| // discard resets b's next pointer, but not its end pointer. |
| // |
| // This must be nosplit because it's called by wbBufFlush. |
| // |
| //go:nosplit |
| func (b *wbBuf) discard() { |
| b.next = uintptr(unsafe.Pointer(&b.buf[0])) |
| } |
| |
| // empty reports whether b contains no pointers. |
| func (b *wbBuf) empty() bool { |
| return b.next == uintptr(unsafe.Pointer(&b.buf[0])) |
| } |
| |
| // putFast adds old and new to the write barrier buffer and returns |
| // false if a flush is necessary. Callers should use this as: |
| // |
| // buf := &getg().m.p.ptr().wbBuf |
| // if !buf.putFast(old, new) { |
| // wbBufFlush(...) |
| // } |
| // ... actual memory write ... |
| // |
| // The arguments to wbBufFlush depend on whether the caller is doing |
| // its own cgo pointer checks. If it is, then this can be |
| // wbBufFlush(nil, 0). Otherwise, it must pass the slot address and |
| // new. |
| // |
| // The caller must ensure there are no preemption points during the |
| // above sequence. There must be no preemption points while buf is in |
| // use because it is a per-P resource. There must be no preemption |
| // points between the buffer put and the write to memory because this |
| // could allow a GC phase change, which could result in missed write |
| // barriers. |
| // |
| // putFast must be nowritebarrierrec to because write barriers here would |
| // corrupt the write barrier buffer. It (and everything it calls, if |
| // it called anything) has to be nosplit to avoid scheduling on to a |
| // different P and a different buffer. |
| // |
| //go:nowritebarrierrec |
| //go:nosplit |
| func (b *wbBuf) putFast(old, new uintptr) bool { |
| p := (*[2]uintptr)(unsafe.Pointer(b.next)) |
| p[0] = old |
| p[1] = new |
| b.next += 2 * sys.PtrSize |
| return b.next != b.end |
| } |
| |
| // wbBufFlush flushes the current P's write barrier buffer to the GC |
| // workbufs. It is passed the slot and value of the write barrier that |
| // caused the flush so that it can implement cgocheck. |
| // |
| // This must not have write barriers because it is part of the write |
| // barrier implementation. |
| // |
| // This and everything it calls must be nosplit because 1) the stack |
| // contains untyped slots from gcWriteBarrier and 2) there must not be |
| // a GC safe point between the write barrier test in the caller and |
| // flushing the buffer. |
| // |
| // TODO: A "go:nosplitrec" annotation would be perfect for this. |
| // |
| //go:nowritebarrierrec |
| //go:nosplit |
| func wbBufFlush(dst *uintptr, src uintptr) { |
| // Note: Every possible return from this function must reset |
| // the buffer's next pointer to prevent buffer overflow. |
| |
| // This *must not* modify its arguments because this |
| // function's argument slots do double duty in gcWriteBarrier |
| // as register spill slots. Currently, not modifying the |
| // arguments is sufficient to keep the spill slots unmodified |
| // (which seems unlikely to change since it costs little and |
| // helps with debugging). |
| |
| if getg().m.dying > 0 { |
| // We're going down. Not much point in write barriers |
| // and this way we can allow write barriers in the |
| // panic path. |
| getg().m.p.ptr().wbBuf.discard() |
| return |
| } |
| |
| if writeBarrier.cgo && dst != nil { |
| // This must be called from the stack that did the |
| // write. It's nosplit all the way down. |
| cgoCheckWriteBarrier(dst, src) |
| if !writeBarrier.needed { |
| // We were only called for cgocheck. |
| getg().m.p.ptr().wbBuf.discard() |
| return |
| } |
| } |
| |
| // Switch to the system stack so we don't have to worry about |
| // the untyped stack slots or safe points. |
| systemstack(func() { |
| if debugCachedWork { |
| // For debugging, include the old value of the |
| // slot and some other data in the traceback. |
| wbBuf := &getg().m.p.ptr().wbBuf |
| var old uintptr |
| if dst != nil { |
| // dst may be nil in direct calls to wbBufFlush. |
| old = *dst |
| } |
| wbBufFlush1Debug(old, wbBuf.buf[0], wbBuf.buf[1], &wbBuf.buf[0], wbBuf.next) |
| } else { |
| wbBufFlush1(getg().m.p.ptr()) |
| } |
| }) |
| } |
| |
| // wbBufFlush1Debug is a temporary function for debugging issue |
| // #27993. It exists solely to add some context to the traceback. |
| // |
| //go:nowritebarrierrec |
| //go:systemstack |
| //go:noinline |
| func wbBufFlush1Debug(old, buf1, buf2 uintptr, start *uintptr, next uintptr) { |
| wbBufFlush1(getg().m.p.ptr()) |
| } |
| |
| // wbBufFlush1 flushes p's write barrier buffer to the GC work queue. |
| // |
| // This must not have write barriers because it is part of the write |
| // barrier implementation, so this may lead to infinite loops or |
| // buffer corruption. |
| // |
| // This must be non-preemptible because it uses the P's workbuf. |
| // |
| //go:nowritebarrierrec |
| //go:systemstack |
| func wbBufFlush1(_p_ *p) { |
| // Get the buffered pointers. |
| start := uintptr(unsafe.Pointer(&_p_.wbBuf.buf[0])) |
| n := (_p_.wbBuf.next - start) / unsafe.Sizeof(_p_.wbBuf.buf[0]) |
| ptrs := _p_.wbBuf.buf[:n] |
| |
| // Poison the buffer to make extra sure nothing is enqueued |
| // while we're processing the buffer. |
| _p_.wbBuf.next = 0 |
| |
| if useCheckmark { |
| // Slow path for checkmark mode. |
| for _, ptr := range ptrs { |
| shade(ptr) |
| } |
| _p_.wbBuf.reset() |
| return |
| } |
| |
| // Mark all of the pointers in the buffer and record only the |
| // pointers we greyed. We use the buffer itself to temporarily |
| // record greyed pointers. |
| // |
| // TODO: Should scanobject/scanblock just stuff pointers into |
| // the wbBuf? Then this would become the sole greying path. |
| // |
| // TODO: We could avoid shading any of the "new" pointers in |
| // the buffer if the stack has been shaded, or even avoid |
| // putting them in the buffer at all (which would double its |
| // capacity). This is slightly complicated with the buffer; we |
| // could track whether any un-shaded goroutine has used the |
| // buffer, or just track globally whether there are any |
| // un-shaded stacks and flush after each stack scan. |
| gcw := &_p_.gcw |
| pos := 0 |
| for _, ptr := range ptrs { |
| if ptr < minLegalPointer { |
| // nil pointers are very common, especially |
| // for the "old" values. Filter out these and |
| // other "obvious" non-heap pointers ASAP. |
| // |
| // TODO: Should we filter out nils in the fast |
| // path to reduce the rate of flushes? |
| continue |
| } |
| obj, span, objIndex := findObject(ptr, 0, 0, !usestackmaps) |
| if obj == 0 { |
| continue |
| } |
| if span.isFree(objIndex) { |
| // For gccgo, it is possible that we have a write barrier |
| // writing to unintialized stack memory. So we could see |
| // a bad pointer in the write barrier buffer. Don't mark |
| // it in this case. |
| continue |
| } |
| // TODO: Consider making two passes where the first |
| // just prefetches the mark bits. |
| mbits := span.markBitsForIndex(objIndex) |
| if mbits.isMarked() { |
| continue |
| } |
| mbits.setMarked() |
| |
| // Mark span. |
| arena, pageIdx, pageMask := pageIndexOf(span.base()) |
| if arena.pageMarks[pageIdx]&pageMask == 0 { |
| atomic.Or8(&arena.pageMarks[pageIdx], pageMask) |
| } |
| |
| if span.spanclass.noscan() { |
| gcw.bytesMarked += uint64(span.elemsize) |
| continue |
| } |
| ptrs[pos] = obj |
| pos++ |
| } |
| |
| // Enqueue the greyed objects. |
| gcw.putBatch(ptrs[:pos]) |
| |
| _p_.wbBuf.reset() |
| } |