| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package scan |
| |
| import ( |
| "internal/goarch" |
| "internal/runtime/gc" |
| "internal/runtime/sys" |
| "unsafe" |
| ) |
| |
| // ScanSpanPackedGo is an optimized pure Go implementation of ScanSpanPacked. |
| func ScanSpanPackedGo(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32) { |
| buf := newUnsafeBuf(bufp) |
| objBytes := uintptr(gc.SizeClassToSize[sizeClass]) |
| // TODO(austin): Trim objMarks to the number of objects in this size class? |
| for markI, markWord := range objMarks { |
| for range sys.OnesCount64(uint64(markWord)) { |
| bitI := sys.TrailingZeros64(uint64(markWord)) |
| markWord &^= 1 << bitI |
| |
| objIndex := markI*goarch.PtrBits + bitI |
| |
| // objStartInSpan is the index of the word from mem where the |
| // object stats. objEndInSpan points to the next object, i.e. |
| // it's an exclusive upper bound. |
| objStartInSpan := objBytes * uintptr(objIndex) / goarch.PtrSize |
| objEndInSpan := objStartInSpan + objBytes/goarch.PtrSize |
| |
| // TODO: Another way to do this would be to extract the pointer mask |
| // for this object (it's at most 64 bits) and do a bit iteration |
| // over that. |
| |
| for wordI := objStartInSpan; wordI < objEndInSpan; wordI++ { |
| val := *(*uintptr)(unsafe.Add(mem, wordI*goarch.PtrSize)) |
| // Check if we should enqueue this word. |
| // |
| // We load the word before the check because, even though this |
| // can lead to loading much more than necessary, it's faster. |
| // Most likely this is because it warms up the hardware |
| // prefetcher much better, and gives us more time before we need |
| // the value. |
| // |
| // We discard values that can't possibly be useful pointers |
| // here, too, because this filters out a lot of words and does |
| // so with as little processing as possible. |
| // |
| // TODO: This is close to, but not entirely branchless. |
| isPtr := bool2int(ptrMask[wordI/goarch.PtrBits]&(1<<(wordI%goarch.PtrBits)) != 0) |
| isNonNil := bool2int(val >= 4096) |
| pred := isPtr&isNonNil != 0 |
| buf.addIf(val, pred) |
| } |
| } |
| } |
| // We don't know the true size of bufp, but we can at least catch obvious errors |
| // in this function by making sure we didn't write more than gc.PageWords pointers |
| // into the buffer. |
| buf.check(gc.PageWords) |
| return int32(buf.n) |
| } |
| |
| // unsafeBuf allows for appending to a buffer without bounds-checks or branches. |
| type unsafeBuf[T any] struct { |
| base *T |
| n int |
| } |
| |
| func newUnsafeBuf[T any](base *T) unsafeBuf[T] { |
| return unsafeBuf[T]{base, 0} |
| } |
| |
| // addIf appends a value to the buffer if the predicate is true. |
| // |
| // addIf speculatively writes to the next index of the buffer, so the caller |
| // must be certain that such a write will still be in-bounds with respect |
| // to the buffer's true capacity. |
| func (b *unsafeBuf[T]) addIf(val T, pred bool) { |
| *(*T)(unsafe.Add(unsafe.Pointer(b.base), b.n*int(unsafe.Sizeof(val)))) = val |
| b.n += bool2int(pred) |
| } |
| |
| // check performs a bounds check on speculative writes into the buffer. |
| // Calling this shortly after a series of addIf calls is important to |
| // catch any misuse as fast as possible. Separating the bounds check from |
| // the append is more efficient, but one check to cover several appends is |
| // still efficient and much more memory safe. |
| func (b unsafeBuf[T]) check(cap int) { |
| // We fail even if b.n == cap because addIf speculatively writes one past b.n. |
| if b.n >= cap { |
| panic("unsafeBuf overflow") |
| } |
| } |
| |
| func bool2int(x bool) int { |
| // This particular pattern gets optimized by the compiler. |
| var b int |
| if x { |
| b = 1 |
| } |
| return b |
| } |