// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package scan import ( "internal/goarch" "internal/runtime/gc" "internal/runtime/sys" "unsafe" ) // ScanSpanPackedGo is an optimized pure Go implementation of ScanSpanPacked. func ScanSpanPackedGo(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32) { buf := newUnsafeBuf(bufp) objBytes := uintptr(gc.SizeClassToSize[sizeClass]) // TODO(austin): Trim objMarks to the number of objects in this size class? for markI, markWord := range objMarks { for range sys.OnesCount64(uint64(markWord)) { bitI := sys.TrailingZeros64(uint64(markWord)) markWord &^= 1 << bitI objIndex := markI*goarch.PtrBits + bitI // objStartInSpan is the index of the word from mem where the // object stats. objEndInSpan points to the next object, i.e. // it's an exclusive upper bound. objStartInSpan := objBytes * uintptr(objIndex) / goarch.PtrSize objEndInSpan := objStartInSpan + objBytes/goarch.PtrSize // TODO: Another way to do this would be to extract the pointer mask // for this object (it's at most 64 bits) and do a bit iteration // over that. for wordI := objStartInSpan; wordI < objEndInSpan; wordI++ { val := *(*uintptr)(unsafe.Add(mem, wordI*goarch.PtrSize)) // Check if we should enqueue this word. // // We load the word before the check because, even though this // can lead to loading much more than necessary, it's faster. // Most likely this is because it warms up the hardware // prefetcher much better, and gives us more time before we need // the value. // // We discard values that can't possibly be useful pointers // here, too, because this filters out a lot of words and does // so with as little processing as possible. // // TODO: This is close to, but not entirely branchless. isPtr := bool2int(ptrMask[wordI/goarch.PtrBits]&(1<<(wordI%goarch.PtrBits)) != 0) isNonNil := bool2int(val >= 4096) pred := isPtr&isNonNil != 0 buf.addIf(val, pred) } } } // We don't know the true size of bufp, but we can at least catch obvious errors // in this function by making sure we didn't write more than gc.PageWords pointers // into the buffer. buf.check(gc.PageWords) return int32(buf.n) } // unsafeBuf allows for appending to a buffer without bounds-checks or branches. type unsafeBuf[T any] struct { base *T n int } func newUnsafeBuf[T any](base *T) unsafeBuf[T] { return unsafeBuf[T]{base, 0} } // addIf appends a value to the buffer if the predicate is true. // // addIf speculatively writes to the next index of the buffer, so the caller // must be certain that such a write will still be in-bounds with respect // to the buffer's true capacity. func (b *unsafeBuf[T]) addIf(val T, pred bool) { *(*T)(unsafe.Add(unsafe.Pointer(b.base), b.n*int(unsafe.Sizeof(val)))) = val b.n += bool2int(pred) } // check performs a bounds check on speculative writes into the buffer. // Calling this shortly after a series of addIf calls is important to // catch any misuse as fast as possible. Separating the bounds check from // the append is more efficient, but one check to cover several appends is // still efficient and much more memory safe. func (b unsafeBuf[T]) check(cap int) { // We fail even if b.n == cap because addIf speculatively writes one past b.n. if b.n >= cap { panic("unsafeBuf overflow") } } func bool2int(x bool) int { // This particular pattern gets optimized by the compiler. var b int if x { b = 1 } return b }