1
2
3
4
5 package scan_test
6
7 import (
8 "fmt"
9 "internal/cpu"
10 "internal/goarch"
11 "internal/runtime/gc"
12 "internal/runtime/gc/scan"
13 "math/bits"
14 "math/rand/v2"
15 "slices"
16 "sync"
17 "testing"
18 "unsafe"
19 )
20
21 type scanFunc func(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32)
22
23 func testScanSpanPacked(t *testing.T, scanF scanFunc) {
24 scanR := scan.ScanSpanPackedReference
25
26
27 mem, free := makeMem(t, 1)
28 defer free()
29 for i := range mem {
30
31
32 mem[i] = uintptr(int(gc.PageSize) + i + 1)
33 }
34
35
36 rnd := rand.New(rand.NewPCG(42, 42))
37 var ptrs gc.PtrMask
38 for i := range ptrs {
39 ptrs[i] = uintptr(rnd.Uint64())
40 }
41
42 bufF := make([]uintptr, gc.PageWords)
43 bufR := make([]uintptr, gc.PageWords)
44 testObjs(t, func(t *testing.T, sizeClass int, objs *gc.ObjMask) {
45 nF := scanF(unsafe.Pointer(&mem[0]), &bufF[0], objs, uintptr(sizeClass), &ptrs)
46 nR := scanR(unsafe.Pointer(&mem[0]), &bufR[0], objs, uintptr(sizeClass), &ptrs)
47
48 if nR != nF {
49 t.Errorf("want %d count, got %d", nR, nF)
50 } else if !slices.Equal(bufF[:nF], bufR[:nR]) {
51 t.Errorf("want scanned pointers %d, got %d", bufR[:nR], bufF[:nF])
52 }
53 })
54 }
55
56 func testObjs(t *testing.T, f func(t *testing.T, sizeClass int, objMask *gc.ObjMask)) {
57 for sizeClass := range gc.NumSizeClasses {
58 if sizeClass == 0 {
59 continue
60 }
61 size := uintptr(gc.SizeClassToSize[sizeClass])
62 if size > gc.MinSizeForMallocHeader {
63 break
64 }
65 t.Run(fmt.Sprintf("size=%d", size), func(t *testing.T) {
66
67 const objMask = 0x101
68 nObj := uintptr(gc.SizeClassToNPages[sizeClass]) * gc.PageSize / size
69 for i := range nObj - uintptr(bits.Len(objMask)-1) {
70 t.Run(fmt.Sprintf("objs=0x%x<<%d", objMask, i), func(t *testing.T) {
71 var objs gc.ObjMask
72 objs[i/goarch.PtrBits] = objMask << (i % goarch.PtrBits)
73 f(t, sizeClass, &objs)
74 })
75 }
76 })
77 }
78 }
79
80 var dataCacheSizes = sync.OnceValue(func() []uintptr {
81 cs := cpu.DataCacheSizes()
82 for i, c := range cs {
83 fmt.Printf("# L%d cache: %d (%d Go pages)\n", i+1, c, c/gc.PageSize)
84 }
85 return cs
86 })
87
88 func BenchmarkScanSpanPacked(b *testing.B) {
89 benchmarkCacheSizes(b, benchmarkScanSpanPackedAllSizeClasses)
90 }
91
92 func benchmarkCacheSizes(b *testing.B, fn func(b *testing.B, heapPages int)) {
93 cacheSizes := dataCacheSizes()
94 b.Run("cache=tiny/pages=1", func(b *testing.B) {
95 fn(b, 1)
96 })
97 for i, cacheBytes := range cacheSizes {
98 pages := int(cacheBytes*3/4) / gc.PageSize
99 b.Run(fmt.Sprintf("cache=L%d/pages=%d", i+1, pages), func(b *testing.B) {
100 fn(b, pages)
101 })
102 }
103 if len(cacheSizes) == 0 {
104 return
105 }
106 ramPages := int(cacheSizes[len(cacheSizes)-1]*3/2) / gc.PageSize
107 b.Run(fmt.Sprintf("cache=ram/pages=%d", ramPages), func(b *testing.B) {
108 fn(b, ramPages)
109 })
110 }
111
112 func benchmarkScanSpanPackedAllSizeClasses(b *testing.B, nPages int) {
113 for sc := range gc.NumSizeClasses {
114 if sc == 0 {
115 continue
116 }
117 if sc >= gc.MinSizeForMallocHeader {
118 break
119 }
120 b.Run(fmt.Sprintf("sizeclass=%d", sc), func(b *testing.B) {
121 benchmarkScanSpanPacked(b, nPages, sc)
122 })
123 }
124 }
125
126 func benchmarkScanSpanPacked(b *testing.B, nPages int, sizeClass int) {
127 rnd := rand.New(rand.NewPCG(42, 42))
128
129
130 mem, free := makeMem(b, nPages)
131 defer free()
132 for i := range mem {
133
134
135 mem[i] = uintptr(int(gc.PageSize) + i + 1)
136 }
137
138
139 ptrs := make([]gc.PtrMask, nPages)
140 for i := range ptrs {
141 for j := range ptrs[i] {
142 ptrs[i][j] = uintptr(rnd.Uint64())
143 }
144 }
145
146
147 pageOrder := rnd.Perm(nPages)
148
149
150 buf := make([]uintptr, gc.PageWords)
151
152
153
154 objBytes := uintptr(gc.SizeClassToSize[sizeClass])
155 nObj := gc.PageSize / objBytes
156 markOrder := rnd.Perm(int(nObj))
157 const steps = 11
158 for i := 0; i < steps; i++ {
159 frac := float64(i) / float64(steps-1)
160
161 nMarks := int(float64(len(markOrder))*frac + 0.5)
162 var objMarks gc.ObjMask
163 for _, mark := range markOrder[:nMarks] {
164 objMarks[mark/goarch.PtrBits] |= 1 << (mark % goarch.PtrBits)
165 }
166 greyClusters := 0
167 for page := range ptrs {
168 greyClusters += countGreyClusters(sizeClass, &objMarks, &ptrs[page])
169 }
170
171
172
173
174
175
176
177
178
179
180
181 avgBytes := int64(greyClusters) * int64(cpu.CacheLineSize) / int64(len(ptrs))
182
183 b.Run(fmt.Sprintf("pct=%d", int(100*frac)), func(b *testing.B) {
184 b.Run("impl=Reference", func(b *testing.B) {
185 b.SetBytes(avgBytes)
186 for i := range b.N {
187 page := pageOrder[i%len(pageOrder)]
188 scan.ScanSpanPackedReference(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
189 }
190 })
191 b.Run("impl=Go", func(b *testing.B) {
192 b.SetBytes(avgBytes)
193 for i := range b.N {
194 page := pageOrder[i%len(pageOrder)]
195 scan.ScanSpanPackedGo(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
196 }
197 })
198 if scan.HasFastScanSpanPacked() {
199 b.Run("impl=Platform", func(b *testing.B) {
200 b.SetBytes(avgBytes)
201 for i := range b.N {
202 page := pageOrder[i%len(pageOrder)]
203 scan.ScanSpanPacked(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
204 }
205 })
206 }
207 })
208 }
209 }
210
211 func countGreyClusters(sizeClass int, objMarks *gc.ObjMask, ptrMask *gc.PtrMask) int {
212 clusters := 0
213 lastCluster := -1
214
215 expandBy := uintptr(gc.SizeClassToSize[sizeClass]) / goarch.PtrSize
216 for word := range gc.PageWords {
217 objI := uintptr(word) / expandBy
218 if objMarks[objI/goarch.PtrBits]&(1<<(objI%goarch.PtrBits)) == 0 {
219 continue
220 }
221 if ptrMask[word/goarch.PtrBits]&(1<<(word%goarch.PtrBits)) == 0 {
222 continue
223 }
224 c := word * 8 / goarch.PtrBits
225 if c != lastCluster {
226 lastCluster = c
227 clusters++
228 }
229 }
230 return clusters
231 }
232
233 func BenchmarkScanMaxBandwidth(b *testing.B) {
234
235
236
237 benchmarkCacheSizes(b, func(b *testing.B, heapPages int) {
238 mem, free := makeMem(b, heapPages)
239 defer free()
240 for i := range mem {
241 mem[i] = uintptr(int(gc.PageSize) + i + 1)
242 }
243 buf := make([]uintptr, gc.PageWords)
244
245
246 rnd := rand.New(rand.NewPCG(42, 42))
247 pageOrder := rnd.Perm(heapPages)
248
249 b.SetBytes(int64(gc.PageSize))
250
251 b.ResetTimer()
252 for i := range b.N {
253 page := pageOrder[i%len(pageOrder)]
254 copy(buf, mem[gc.PageWords*page:])
255 }
256 })
257 }
258
View as plain text