Source file src/internal/runtime/gc/scan/scan_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package scan_test
     6  
     7  import (
     8  	"fmt"
     9  	"internal/cpu"
    10  	"internal/goarch"
    11  	"internal/runtime/gc"
    12  	"internal/runtime/gc/scan"
    13  	"math/bits"
    14  	"math/rand/v2"
    15  	"slices"
    16  	"sync"
    17  	"testing"
    18  	"unsafe"
    19  )
    20  
    21  type scanFunc func(mem unsafe.Pointer, bufp *uintptr, objMarks *gc.ObjMask, sizeClass uintptr, ptrMask *gc.PtrMask) (count int32)
    22  
    23  func testScanSpanPacked(t *testing.T, scanF scanFunc) {
    24  	scanR := scan.ScanSpanPackedReference
    25  
    26  	// Construct a fake memory
    27  	mem, free := makeMem(t, 1)
    28  	defer free()
    29  	for i := range mem {
    30  		// Use values > heap.PageSize because a scan function can discard
    31  		// pointers smaller than this.
    32  		mem[i] = uintptr(int(gc.PageSize) + i + 1)
    33  	}
    34  
    35  	// Construct a random pointer mask
    36  	rnd := rand.New(rand.NewPCG(42, 42))
    37  	var ptrs gc.PtrMask
    38  	for i := range ptrs {
    39  		ptrs[i] = uintptr(rnd.Uint64())
    40  	}
    41  
    42  	bufF := make([]uintptr, gc.PageWords)
    43  	bufR := make([]uintptr, gc.PageWords)
    44  	testObjs(t, func(t *testing.T, sizeClass int, objs *gc.ObjMask) {
    45  		nF := scanF(unsafe.Pointer(&mem[0]), &bufF[0], objs, uintptr(sizeClass), &ptrs)
    46  		nR := scanR(unsafe.Pointer(&mem[0]), &bufR[0], objs, uintptr(sizeClass), &ptrs)
    47  
    48  		if nR != nF {
    49  			t.Errorf("want %d count, got %d", nR, nF)
    50  		} else if !slices.Equal(bufF[:nF], bufR[:nR]) {
    51  			t.Errorf("want scanned pointers %d, got %d", bufR[:nR], bufF[:nF])
    52  		}
    53  	})
    54  }
    55  
    56  func testObjs(t *testing.T, f func(t *testing.T, sizeClass int, objMask *gc.ObjMask)) {
    57  	for sizeClass := range gc.NumSizeClasses {
    58  		if sizeClass == 0 {
    59  			continue
    60  		}
    61  		size := uintptr(gc.SizeClassToSize[sizeClass])
    62  		if size > gc.MinSizeForMallocHeader {
    63  			break // Pointer/scalar metadata is not packed for larger sizes.
    64  		}
    65  		t.Run(fmt.Sprintf("size=%d", size), func(t *testing.T) {
    66  			// Scan a few objects near i to test boundary conditions.
    67  			const objMask = 0x101
    68  			nObj := uintptr(gc.SizeClassToNPages[sizeClass]) * gc.PageSize / size
    69  			for i := range nObj - uintptr(bits.Len(objMask)-1) {
    70  				t.Run(fmt.Sprintf("objs=0x%x<<%d", objMask, i), func(t *testing.T) {
    71  					var objs gc.ObjMask
    72  					objs[i/goarch.PtrBits] = objMask << (i % goarch.PtrBits)
    73  					f(t, sizeClass, &objs)
    74  				})
    75  			}
    76  		})
    77  	}
    78  }
    79  
    80  var dataCacheSizes = sync.OnceValue(func() []uintptr {
    81  	cs := cpu.DataCacheSizes()
    82  	for i, c := range cs {
    83  		fmt.Printf("# L%d cache: %d (%d Go pages)\n", i+1, c, c/gc.PageSize)
    84  	}
    85  	return cs
    86  })
    87  
    88  func BenchmarkScanSpanPacked(b *testing.B) {
    89  	benchmarkCacheSizes(b, benchmarkScanSpanPackedAllSizeClasses)
    90  }
    91  
    92  func benchmarkCacheSizes(b *testing.B, fn func(b *testing.B, heapPages int)) {
    93  	cacheSizes := dataCacheSizes()
    94  	b.Run("cache=tiny/pages=1", func(b *testing.B) {
    95  		fn(b, 1)
    96  	})
    97  	for i, cacheBytes := range cacheSizes {
    98  		pages := int(cacheBytes*3/4) / gc.PageSize
    99  		b.Run(fmt.Sprintf("cache=L%d/pages=%d", i+1, pages), func(b *testing.B) {
   100  			fn(b, pages)
   101  		})
   102  	}
   103  	if len(cacheSizes) == 0 {
   104  		return
   105  	}
   106  	ramPages := int(cacheSizes[len(cacheSizes)-1]*3/2) / gc.PageSize
   107  	b.Run(fmt.Sprintf("cache=ram/pages=%d", ramPages), func(b *testing.B) {
   108  		fn(b, ramPages)
   109  	})
   110  }
   111  
   112  func benchmarkScanSpanPackedAllSizeClasses(b *testing.B, nPages int) {
   113  	for sc := range gc.NumSizeClasses {
   114  		if sc == 0 {
   115  			continue
   116  		}
   117  		if sc >= gc.MinSizeForMallocHeader {
   118  			break
   119  		}
   120  		b.Run(fmt.Sprintf("sizeclass=%d", sc), func(b *testing.B) {
   121  			benchmarkScanSpanPacked(b, nPages, sc)
   122  		})
   123  	}
   124  }
   125  
   126  func benchmarkScanSpanPacked(b *testing.B, nPages int, sizeClass int) {
   127  	rnd := rand.New(rand.NewPCG(42, 42))
   128  
   129  	// Construct a fake memory
   130  	mem, free := makeMem(b, nPages)
   131  	defer free()
   132  	for i := range mem {
   133  		// Use values > heap.PageSize because a scan function can discard
   134  		// pointers smaller than this.
   135  		mem[i] = uintptr(int(gc.PageSize) + i + 1)
   136  	}
   137  
   138  	// Construct a random pointer mask
   139  	ptrs := make([]gc.PtrMask, nPages)
   140  	for i := range ptrs {
   141  		for j := range ptrs[i] {
   142  			ptrs[i][j] = uintptr(rnd.Uint64())
   143  		}
   144  	}
   145  
   146  	// Visit the pages in a random order
   147  	pageOrder := rnd.Perm(nPages)
   148  
   149  	// Create the scan buffer.
   150  	buf := make([]uintptr, gc.PageWords)
   151  
   152  	// Sweep from 0 marks to all marks. We'll use the same marks for each page
   153  	// because I don't think that predictability matters.
   154  	objBytes := uintptr(gc.SizeClassToSize[sizeClass])
   155  	nObj := gc.PageSize / objBytes
   156  	markOrder := rnd.Perm(int(nObj))
   157  	const steps = 11
   158  	for i := 0; i < steps; i++ {
   159  		frac := float64(i) / float64(steps-1)
   160  		// Set frac marks.
   161  		nMarks := int(float64(len(markOrder))*frac + 0.5)
   162  		var objMarks gc.ObjMask
   163  		for _, mark := range markOrder[:nMarks] {
   164  			objMarks[mark/goarch.PtrBits] |= 1 << (mark % goarch.PtrBits)
   165  		}
   166  		greyClusters := 0
   167  		for page := range ptrs {
   168  			greyClusters += countGreyClusters(sizeClass, &objMarks, &ptrs[page])
   169  		}
   170  
   171  		// Report MB/s of how much memory they're actually hitting. This assumes
   172  		// 64 byte cache lines (TODO: Should it assume 128 byte cache lines?)
   173  		// and expands each access to the whole cache line. This is useful for
   174  		// comparing against memory bandwidth.
   175  		//
   176  		// TODO: Add a benchmark that just measures single core memory bandwidth
   177  		// for comparison. (See runtime memcpy benchmarks.)
   178  		//
   179  		// TODO: Should there be a separate measure where we don't expand to
   180  		// cache lines?
   181  		avgBytes := int64(greyClusters) * int64(cpu.CacheLineSize) / int64(len(ptrs))
   182  
   183  		b.Run(fmt.Sprintf("pct=%d", int(100*frac)), func(b *testing.B) {
   184  			b.Run("impl=Reference", func(b *testing.B) {
   185  				b.SetBytes(avgBytes)
   186  				for i := range b.N {
   187  					page := pageOrder[i%len(pageOrder)]
   188  					scan.ScanSpanPackedReference(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   189  				}
   190  			})
   191  			b.Run("impl=Go", func(b *testing.B) {
   192  				b.SetBytes(avgBytes)
   193  				for i := range b.N {
   194  					page := pageOrder[i%len(pageOrder)]
   195  					scan.ScanSpanPackedGo(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   196  				}
   197  			})
   198  			if scan.HasFastScanSpanPacked() {
   199  				b.Run("impl=Platform", func(b *testing.B) {
   200  					b.SetBytes(avgBytes)
   201  					for i := range b.N {
   202  						page := pageOrder[i%len(pageOrder)]
   203  						scan.ScanSpanPacked(unsafe.Pointer(&mem[gc.PageWords*page]), &buf[0], &objMarks, uintptr(sizeClass), &ptrs[page])
   204  					}
   205  				})
   206  			}
   207  		})
   208  	}
   209  }
   210  
   211  func countGreyClusters(sizeClass int, objMarks *gc.ObjMask, ptrMask *gc.PtrMask) int {
   212  	clusters := 0
   213  	lastCluster := -1
   214  
   215  	expandBy := uintptr(gc.SizeClassToSize[sizeClass]) / goarch.PtrSize
   216  	for word := range gc.PageWords {
   217  		objI := uintptr(word) / expandBy
   218  		if objMarks[objI/goarch.PtrBits]&(1<<(objI%goarch.PtrBits)) == 0 {
   219  			continue
   220  		}
   221  		if ptrMask[word/goarch.PtrBits]&(1<<(word%goarch.PtrBits)) == 0 {
   222  			continue
   223  		}
   224  		c := word * 8 / goarch.PtrBits
   225  		if c != lastCluster {
   226  			lastCluster = c
   227  			clusters++
   228  		}
   229  	}
   230  	return clusters
   231  }
   232  
   233  func BenchmarkScanMaxBandwidth(b *testing.B) {
   234  	// Measure the theoretical "maximum" bandwidth of scanning by reproducing
   235  	// the memory access pattern of a full page scan, but using memcpy as the
   236  	// kernel instead of scanning.
   237  	benchmarkCacheSizes(b, func(b *testing.B, heapPages int) {
   238  		mem, free := makeMem(b, heapPages)
   239  		defer free()
   240  		for i := range mem {
   241  			mem[i] = uintptr(int(gc.PageSize) + i + 1)
   242  		}
   243  		buf := make([]uintptr, gc.PageWords)
   244  
   245  		// Visit the pages in a random order
   246  		rnd := rand.New(rand.NewPCG(42, 42))
   247  		pageOrder := rnd.Perm(heapPages)
   248  
   249  		b.SetBytes(int64(gc.PageSize))
   250  
   251  		b.ResetTimer()
   252  		for i := range b.N {
   253  			page := pageOrder[i%len(pageOrder)]
   254  			copy(buf, mem[gc.PageWords*page:])
   255  		}
   256  	})
   257  }
   258  

View as plain text