blob: 9a2985a3ccd2206bebabbf7adfebb44554f56a39 [file] [log] [blame] [edit]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scan
import (
"internal/goarch"
"internal/runtime/gc"
"internal/runtime/sys"
"unsafe"
)
// ScanSpanPackedGo is an optimized pure Go implementation of ScanSpanPacked.
func ScanSpanPackedGo(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32) {
buf := newUnsafeBuf(bufp)
objBytes := uintptr(gc.SizeClassToSize[sizeClass])
// TODO(austin): Trim objMarks to the number of objects in this size class?
for markI, markWord := range objMarks {
for range sys.OnesCount64(uint64(markWord)) {
bitI := sys.TrailingZeros64(uint64(markWord))
markWord &^= 1 << bitI
objIndex := markI*goarch.PtrBits + bitI
// objStartInSpan is the index of the word from mem where the
// object stats. objEndInSpan points to the next object, i.e.
// it's an exclusive upper bound.
objStartInSpan := objBytes * uintptr(objIndex) / goarch.PtrSize
objEndInSpan := objStartInSpan + objBytes/goarch.PtrSize
// TODO: Another way to do this would be to extract the pointer mask
// for this object (it's at most 64 bits) and do a bit iteration
// over that.
for wordI := objStartInSpan; wordI < objEndInSpan; wordI++ {
val := *(*uintptr)(unsafe.Add(mem, wordI*goarch.PtrSize))
// Check if we should enqueue this word.
//
// We load the word before the check because, even though this
// can lead to loading much more than necessary, it's faster.
// Most likely this is because it warms up the hardware
// prefetcher much better, and gives us more time before we need
// the value.
//
// We discard values that can't possibly be useful pointers
// here, too, because this filters out a lot of words and does
// so with as little processing as possible.
//
// TODO: This is close to, but not entirely branchless.
isPtr := bool2int(ptrMask[wordI/goarch.PtrBits]&(1<<(wordI%goarch.PtrBits)) != 0)
isNonNil := bool2int(val >= 4096)
pred := isPtr&isNonNil != 0
buf.addIf(val, pred)
}
}
}
// We don't know the true size of bufp, but we can at least catch obvious errors
// in this function by making sure we didn't write more than gc.PageWords pointers
// into the buffer.
buf.check(gc.PageWords)
return int32(buf.n)
}
// unsafeBuf allows for appending to a buffer without bounds-checks or branches.
type unsafeBuf[T any] struct {
base *T
n int
}
func newUnsafeBuf[T any](base *T) unsafeBuf[T] {
return unsafeBuf[T]{base, 0}
}
// addIf appends a value to the buffer if the predicate is true.
//
// addIf speculatively writes to the next index of the buffer, so the caller
// must be certain that such a write will still be in-bounds with respect
// to the buffer's true capacity.
func (b *unsafeBuf[T]) addIf(val T, pred bool) {
*(*T)(unsafe.Add(unsafe.Pointer(b.base), b.n*int(unsafe.Sizeof(val)))) = val
b.n += bool2int(pred)
}
// check performs a bounds check on speculative writes into the buffer.
// Calling this shortly after a series of addIf calls is important to
// catch any misuse as fast as possible. Separating the bounds check from
// the append is more efficient, but one check to cover several appends is
// still efficient and much more memory safe.
func (b unsafeBuf[T]) check(cap int) {
// We fail even if b.n == cap because addIf speculatively writes one past b.n.
if b.n >= cap {
panic("unsafeBuf overflow")
}
}
func bool2int(x bool) int {
// This particular pattern gets optimized by the compiler.
var b int
if x {
b = 1
}
return b
}