Source file src/runtime/runtime_test.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"flag"
     9  	"internal/asan"
    10  	"internal/cpu"
    11  	"internal/msan"
    12  	"internal/race"
    13  	"internal/runtime/atomic"
    14  	"internal/testenv"
    15  	"io"
    16  	"math/bits"
    17  	. "runtime"
    18  	"runtime/debug"
    19  	"slices"
    20  	"strings"
    21  	"sync"
    22  	"testing"
    23  	"time"
    24  	"unsafe"
    25  )
    26  
    27  // flagQuick is set by the -quick option to skip some relatively slow tests.
    28  // This is used by the cmd/dist test runtime:cpu124.
    29  // The cmd/dist test passes both -test.short and -quick;
    30  // there are tests that only check testing.Short, and those tests will
    31  // not be skipped if only -quick is used.
    32  var flagQuick = flag.Bool("quick", false, "skip slow tests, for cmd/dist test runtime:cpu124")
    33  
    34  func init() {
    35  	// We're testing the runtime, so make tracebacks show things
    36  	// in the runtime. This only raises the level, so it won't
    37  	// override GOTRACEBACK=crash from the user.
    38  	SetTracebackEnv("system")
    39  }
    40  
    41  var errf error
    42  
    43  func errfn() error {
    44  	return errf
    45  }
    46  
    47  func errfn1() error {
    48  	return io.EOF
    49  }
    50  
    51  func BenchmarkIfaceCmp100(b *testing.B) {
    52  	for i := 0; i < b.N; i++ {
    53  		for j := 0; j < 100; j++ {
    54  			if errfn() == io.EOF {
    55  				b.Fatal("bad comparison")
    56  			}
    57  		}
    58  	}
    59  }
    60  
    61  func BenchmarkIfaceCmpNil100(b *testing.B) {
    62  	for i := 0; i < b.N; i++ {
    63  		for j := 0; j < 100; j++ {
    64  			if errfn1() == nil {
    65  				b.Fatal("bad comparison")
    66  			}
    67  		}
    68  	}
    69  }
    70  
    71  var efaceCmp1 any
    72  var efaceCmp2 any
    73  
    74  func BenchmarkEfaceCmpDiff(b *testing.B) {
    75  	x := 5
    76  	efaceCmp1 = &x
    77  	y := 6
    78  	efaceCmp2 = &y
    79  	for i := 0; i < b.N; i++ {
    80  		for j := 0; j < 100; j++ {
    81  			if efaceCmp1 == efaceCmp2 {
    82  				b.Fatal("bad comparison")
    83  			}
    84  		}
    85  	}
    86  }
    87  
    88  func BenchmarkEfaceCmpDiffIndirect(b *testing.B) {
    89  	efaceCmp1 = [2]int{1, 2}
    90  	efaceCmp2 = [2]int{1, 2}
    91  	for i := 0; i < b.N; i++ {
    92  		for j := 0; j < 100; j++ {
    93  			if efaceCmp1 != efaceCmp2 {
    94  				b.Fatal("bad comparison")
    95  			}
    96  		}
    97  	}
    98  }
    99  
   100  func BenchmarkDefer(b *testing.B) {
   101  	for i := 0; i < b.N; i++ {
   102  		defer1()
   103  	}
   104  }
   105  
   106  func defer1() {
   107  	defer func(x, y, z int) {
   108  		if recover() != nil || x != 1 || y != 2 || z != 3 {
   109  			panic("bad recover")
   110  		}
   111  	}(1, 2, 3)
   112  }
   113  
   114  func BenchmarkDefer10(b *testing.B) {
   115  	for i := 0; i < b.N/10; i++ {
   116  		defer2()
   117  	}
   118  }
   119  
   120  func defer2() {
   121  	for i := 0; i < 10; i++ {
   122  		defer func(x, y, z int) {
   123  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   124  				panic("bad recover")
   125  			}
   126  		}(1, 2, 3)
   127  	}
   128  }
   129  
   130  func BenchmarkDeferMany(b *testing.B) {
   131  	for i := 0; i < b.N; i++ {
   132  		defer func(x, y, z int) {
   133  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   134  				panic("bad recover")
   135  			}
   136  		}(1, 2, 3)
   137  	}
   138  }
   139  
   140  func BenchmarkPanicRecover(b *testing.B) {
   141  	for i := 0; i < b.N; i++ {
   142  		defer3()
   143  	}
   144  }
   145  
   146  func defer3() {
   147  	defer func(x, y, z int) {
   148  		if recover() == nil {
   149  			panic("failed recover")
   150  		}
   151  	}(1, 2, 3)
   152  	panic("hi")
   153  }
   154  
   155  // golang.org/issue/7063
   156  func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
   157  	SetCPUProfileRate(0)
   158  }
   159  
   160  // Addresses to test for faulting behavior.
   161  // This is less a test of SetPanicOnFault and more a check that
   162  // the operating system and the runtime can process these faults
   163  // correctly. That is, we're indirectly testing that without SetPanicOnFault
   164  // these would manage to turn into ordinary crashes.
   165  // Note that these are truncated on 32-bit systems, so the bottom 32 bits
   166  // of the larger addresses must themselves be invalid addresses.
   167  // We might get unlucky and the OS might have mapped one of these
   168  // addresses, but probably not: they're all in the first page, very high
   169  // addresses that normally an OS would reserve for itself, or malformed
   170  // addresses. Even so, we might have to remove one or two on different
   171  // systems. We will see.
   172  
   173  var faultAddrs = []uint64{
   174  	// low addresses
   175  	0,
   176  	1,
   177  	0xfff,
   178  	// high (kernel) addresses
   179  	// or else malformed.
   180  	0xffffffffffffffff,
   181  	0xfffffffffffff001,
   182  	0xffffffffffff0001,
   183  	0xfffffffffff00001,
   184  	0xffffffffff000001,
   185  	0xfffffffff0000001,
   186  	0xffffffff00000001,
   187  	0xfffffff000000001,
   188  	0xffffff0000000001,
   189  	0xfffff00000000001,
   190  	0xffff000000000001,
   191  	0xfff0000000000001,
   192  	0xff00000000000001,
   193  	0xf000000000000001,
   194  	0x8000000000000001,
   195  }
   196  
   197  func TestSetPanicOnFault(t *testing.T) {
   198  	old := debug.SetPanicOnFault(true)
   199  	defer debug.SetPanicOnFault(old)
   200  
   201  	nfault := 0
   202  	for _, addr := range faultAddrs {
   203  		testSetPanicOnFault(t, uintptr(addr), &nfault)
   204  	}
   205  	if nfault == 0 {
   206  		t.Fatalf("none of the addresses faulted")
   207  	}
   208  }
   209  
   210  // testSetPanicOnFault tests one potentially faulting address.
   211  // It deliberately constructs and uses an invalid pointer,
   212  // so mark it as nocheckptr.
   213  //
   214  //go:nocheckptr
   215  func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
   216  	if GOOS == "js" || GOOS == "wasip1" {
   217  		t.Skip(GOOS + " does not support catching faults")
   218  	}
   219  
   220  	defer func() {
   221  		if err := recover(); err != nil {
   222  			*nfault++
   223  		}
   224  	}()
   225  
   226  	// The read should fault, except that sometimes we hit
   227  	// addresses that have had C or kernel pages mapped there
   228  	// readable by user code. So just log the content.
   229  	// If no addresses fault, we'll fail the test.
   230  	v := *(*byte)(unsafe.Pointer(addr))
   231  	t.Logf("addr %#x: %#x\n", addr, v)
   232  }
   233  
   234  func eqstring_generic(s1, s2 string) bool {
   235  	if len(s1) != len(s2) {
   236  		return false
   237  	}
   238  	// optimization in assembly versions:
   239  	// if s1.str == s2.str { return true }
   240  	for i := 0; i < len(s1); i++ {
   241  		if s1[i] != s2[i] {
   242  			return false
   243  		}
   244  	}
   245  	return true
   246  }
   247  
   248  func TestEqString(t *testing.T) {
   249  	// This isn't really an exhaustive test of == on strings, it's
   250  	// just a convenient way of documenting (via eqstring_generic)
   251  	// what == does.
   252  	s := []string{
   253  		"",
   254  		"a",
   255  		"c",
   256  		"aaa",
   257  		"ccc",
   258  		"cccc"[:3], // same contents, different string
   259  		"1234567890",
   260  	}
   261  	for _, s1 := range s {
   262  		for _, s2 := range s {
   263  			x := s1 == s2
   264  			y := eqstring_generic(s1, s2)
   265  			if x != y {
   266  				t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
   267  			}
   268  		}
   269  	}
   270  }
   271  
   272  func TestTrailingZero(t *testing.T) {
   273  	// make sure we add padding for structs with trailing zero-sized fields
   274  	type T1 struct {
   275  		n int32
   276  		z [0]byte
   277  	}
   278  	if unsafe.Sizeof(T1{}) != 8 {
   279  		t.Errorf("sizeof(%#v)==%d, want 8", T1{}, unsafe.Sizeof(T1{}))
   280  	}
   281  	type T2 struct {
   282  		n int64
   283  		z struct{}
   284  	}
   285  	if unsafe.Sizeof(T2{}) != 8+unsafe.Sizeof(uintptr(0)) {
   286  		t.Errorf("sizeof(%#v)==%d, want %d", T2{}, unsafe.Sizeof(T2{}), 8+unsafe.Sizeof(uintptr(0)))
   287  	}
   288  	type T3 struct {
   289  		n byte
   290  		z [4]struct{}
   291  	}
   292  	if unsafe.Sizeof(T3{}) != 2 {
   293  		t.Errorf("sizeof(%#v)==%d, want 2", T3{}, unsafe.Sizeof(T3{}))
   294  	}
   295  	// make sure padding can double for both zerosize and alignment
   296  	type T4 struct {
   297  		a int32
   298  		b int16
   299  		c int8
   300  		z struct{}
   301  	}
   302  	if unsafe.Sizeof(T4{}) != 8 {
   303  		t.Errorf("sizeof(%#v)==%d, want 8", T4{}, unsafe.Sizeof(T4{}))
   304  	}
   305  	// make sure we don't pad a zero-sized thing
   306  	type T5 struct {
   307  	}
   308  	if unsafe.Sizeof(T5{}) != 0 {
   309  		t.Errorf("sizeof(%#v)==%d, want 0", T5{}, unsafe.Sizeof(T5{}))
   310  	}
   311  }
   312  
   313  func TestAppendGrowthHeap(t *testing.T) {
   314  	var x []int64
   315  	check := func(want int) {
   316  		if cap(x) != want {
   317  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   318  		}
   319  	}
   320  
   321  	check(0)
   322  	want := 1
   323  	for i := 1; i <= 100; i++ {
   324  		x = append(x, 1)
   325  		check(want)
   326  		if i&(i-1) == 0 {
   327  			want = 2 * i
   328  		}
   329  	}
   330  	Escape(&x[0]) // suppress stack-allocated backing store
   331  }
   332  
   333  func TestAppendGrowthStack(t *testing.T) {
   334  	if race.Enabled || asan.Enabled || msan.Enabled {
   335  		t.Skip("instrumentation breaks this optimization")
   336  	}
   337  	var x []int64
   338  	check := func(want int) {
   339  		if cap(x) != want {
   340  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   341  		}
   342  	}
   343  
   344  	check(0)
   345  	want := 32 / 8 // 32 is the default for cmd/compile/internal/base.DebugFlags.VariableMakeThreshold
   346  	if testenv.OptimizationOff() {
   347  		want = 1
   348  	}
   349  	for i := 1; i <= 100; i++ {
   350  		x = append(x, 1)
   351  		check(want)
   352  		if i&(i-1) == 0 {
   353  			want = max(want, 2*i)
   354  		}
   355  	}
   356  }
   357  
   358  var One = []int64{1}
   359  
   360  func TestAppendSliceGrowth(t *testing.T) {
   361  	var x []int64
   362  	check := func(want int) {
   363  		if cap(x) != want {
   364  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   365  		}
   366  	}
   367  
   368  	check(0)
   369  	want := 1
   370  	for i := 1; i <= 100; i++ {
   371  		x = append(x, One...)
   372  		check(want)
   373  		if i&(i-1) == 0 {
   374  			want = 2 * i
   375  		}
   376  	}
   377  }
   378  
   379  func TestGoroutineProfileTrivial(t *testing.T) {
   380  	// Calling GoroutineProfile twice in a row should find the same number of goroutines,
   381  	// but it's possible there are goroutines just about to exit, so we might end up
   382  	// with fewer in the second call. Try a few times; it should converge once those
   383  	// zombies are gone.
   384  	for i := 0; ; i++ {
   385  		n1, ok := GoroutineProfile(nil) // should fail, there's at least 1 goroutine
   386  		if n1 < 1 || ok {
   387  			t.Fatalf("GoroutineProfile(nil) = %d, %v, want >0, false", n1, ok)
   388  		}
   389  		n2, ok := GoroutineProfile(make([]StackRecord, n1))
   390  		if n2 == n1 && ok {
   391  			break
   392  		}
   393  		t.Logf("GoroutineProfile(%d) = %d, %v, want %d, true", n1, n2, ok, n1)
   394  		if i >= 10 {
   395  			t.Fatalf("GoroutineProfile not converging")
   396  		}
   397  	}
   398  }
   399  
   400  func BenchmarkGoroutineProfile(b *testing.B) {
   401  	run := func(fn func() bool) func(b *testing.B) {
   402  		runOne := func(b *testing.B) {
   403  			latencies := make([]time.Duration, 0, b.N)
   404  
   405  			b.ResetTimer()
   406  			for i := 0; i < b.N; i++ {
   407  				start := time.Now()
   408  				ok := fn()
   409  				if !ok {
   410  					b.Fatal("goroutine profile failed")
   411  				}
   412  				latencies = append(latencies, time.Since(start))
   413  			}
   414  			b.StopTimer()
   415  
   416  			// Sort latencies then report percentiles.
   417  			slices.Sort(latencies)
   418  			b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   419  			b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   420  			b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   421  		}
   422  		return func(b *testing.B) {
   423  			b.Run("idle", runOne)
   424  
   425  			b.Run("loaded", func(b *testing.B) {
   426  				stop := applyGCLoad(b)
   427  				runOne(b)
   428  				// Make sure to stop the timer before we wait! The load created above
   429  				// is very heavy-weight and not easy to stop, so we could end up
   430  				// confusing the benchmarking framework for small b.N.
   431  				b.StopTimer()
   432  				stop()
   433  			})
   434  		}
   435  	}
   436  
   437  	// Measure the cost of counting goroutines
   438  	b.Run("small-nil", run(func() bool {
   439  		GoroutineProfile(nil)
   440  		return true
   441  	}))
   442  
   443  	// Measure the cost with a small set of goroutines
   444  	n := NumGoroutine()
   445  	p := make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   446  	b.Run("small", run(func() bool {
   447  		_, ok := GoroutineProfile(p)
   448  		return ok
   449  	}))
   450  
   451  	// Measure the cost with a large set of goroutines
   452  	ch := make(chan int)
   453  	var ready, done sync.WaitGroup
   454  	for i := 0; i < 5000; i++ {
   455  		ready.Add(1)
   456  		done.Add(1)
   457  		go func() { ready.Done(); <-ch; done.Done() }()
   458  	}
   459  	ready.Wait()
   460  
   461  	// Count goroutines with a large allgs list
   462  	b.Run("large-nil", run(func() bool {
   463  		GoroutineProfile(nil)
   464  		return true
   465  	}))
   466  
   467  	n = NumGoroutine()
   468  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   469  	b.Run("large", run(func() bool {
   470  		_, ok := GoroutineProfile(p)
   471  		return ok
   472  	}))
   473  
   474  	close(ch)
   475  	done.Wait()
   476  
   477  	// Count goroutines with a large (but unused) allgs list
   478  	b.Run("sparse-nil", run(func() bool {
   479  		GoroutineProfile(nil)
   480  		return true
   481  	}))
   482  
   483  	// Measure the cost of a large (but unused) allgs list
   484  	n = NumGoroutine()
   485  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   486  	b.Run("sparse", run(func() bool {
   487  		_, ok := GoroutineProfile(p)
   488  		return ok
   489  	}))
   490  }
   491  
   492  func TestVersion(t *testing.T) {
   493  	// Test that version does not contain \r or \n.
   494  	vers := Version()
   495  	if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
   496  		t.Fatalf("cr/nl in version: %q", vers)
   497  	}
   498  }
   499  
   500  func BenchmarkProcYield(b *testing.B) {
   501  	benchN := func(n uint32) func(*testing.B) {
   502  		return func(b *testing.B) {
   503  			for i := 0; i < b.N; i++ {
   504  				ProcYield(n)
   505  			}
   506  		}
   507  	}
   508  
   509  	b.Run("1", benchN(1))
   510  	b.Run("10", benchN(10))
   511  	b.Run("30", benchN(30)) // active_spin_cnt in lock_sema.go and lock_futex.go
   512  	b.Run("100", benchN(100))
   513  	b.Run("1000", benchN(1000))
   514  }
   515  
   516  func BenchmarkOSYield(b *testing.B) {
   517  	for i := 0; i < b.N; i++ {
   518  		OSYield()
   519  	}
   520  }
   521  
   522  func BenchmarkMutexContention(b *testing.B) {
   523  	// Measure throughput of a single mutex with all threads contending
   524  	//
   525  	// Share a single counter across all threads. Progress from any thread is
   526  	// progress for the benchmark as a whole. We don't measure or give points
   527  	// for fairness here, arbitrary delay to any given thread's progress is
   528  	// invisible and allowed.
   529  	//
   530  	// The cache line that holds the count value will need to move between
   531  	// processors, but not as often as the cache line that holds the mutex. The
   532  	// mutex protects access to the count value, which limits contention on that
   533  	// cache line. This is a simple design, but it helps to make the behavior of
   534  	// the benchmark clear. Most real uses of mutex will protect some number of
   535  	// cache lines anyway.
   536  
   537  	var state struct {
   538  		_     cpu.CacheLinePad
   539  		lock  Mutex
   540  		_     cpu.CacheLinePad
   541  		count atomic.Int64
   542  		_     cpu.CacheLinePad
   543  	}
   544  
   545  	procs := GOMAXPROCS(0)
   546  	var wg sync.WaitGroup
   547  	for range procs {
   548  		wg.Add(1)
   549  		go func() {
   550  			defer wg.Done()
   551  			for {
   552  				Lock(&state.lock)
   553  				ours := state.count.Add(1)
   554  				Unlock(&state.lock)
   555  				if ours >= int64(b.N) {
   556  					return
   557  				}
   558  			}
   559  		}()
   560  	}
   561  	wg.Wait()
   562  }
   563  
   564  func BenchmarkMutexCapture(b *testing.B) {
   565  
   566  	// Measure mutex fairness.
   567  	//
   568  	// Have several threads contend for a single mutex value. Measure how
   569  	// effectively a single thread is able to capture the lock and report the
   570  	// duration of those "streak" events. Measure how long other individual
   571  	// threads need to wait between their turns with the lock. Report the
   572  	// duration of those "starve" events.
   573  	//
   574  	// Report in terms of wall clock time (assuming a constant time per
   575  	// lock/unlock pair) rather than number of locks/unlocks. This keeps
   576  	// timekeeping overhead out of the critical path, and avoids giving an
   577  	// advantage to lock/unlock implementations that take less time per
   578  	// operation.
   579  
   580  	var state struct {
   581  		_     cpu.CacheLinePad
   582  		lock  Mutex
   583  		_     cpu.CacheLinePad
   584  		count atomic.Int64
   585  		_     cpu.CacheLinePad
   586  	}
   587  
   588  	procs := GOMAXPROCS(0)
   589  	var wg sync.WaitGroup
   590  	histograms := make(chan [2][65]int)
   591  	for range procs {
   592  		wg.Add(1)
   593  		go func() {
   594  			var (
   595  				prev      int64
   596  				streak    int64
   597  				histogram [2][65]int
   598  			)
   599  			for {
   600  				Lock(&state.lock)
   601  				ours := state.count.Add(1)
   602  				Unlock(&state.lock)
   603  				delta := ours - prev - 1
   604  				prev = ours
   605  				if delta == 0 {
   606  					streak++
   607  				} else {
   608  					histogram[0][bits.LeadingZeros64(uint64(streak))]++
   609  					histogram[1][bits.LeadingZeros64(uint64(delta))]++
   610  					streak = 1
   611  				}
   612  				if ours >= int64(b.N) {
   613  					wg.Done()
   614  					if delta == 0 {
   615  						histogram[0][bits.LeadingZeros64(uint64(streak))]++
   616  						histogram[1][bits.LeadingZeros64(uint64(delta))]++
   617  					}
   618  					histograms <- histogram
   619  					return
   620  				}
   621  			}
   622  		}()
   623  	}
   624  
   625  	wg.Wait()
   626  	b.StopTimer()
   627  
   628  	var histogram [2][65]int
   629  	for range procs {
   630  		h := <-histograms
   631  		for i := range h {
   632  			for j := range h[i] {
   633  				histogram[i][j] += h[i][j]
   634  			}
   635  		}
   636  	}
   637  
   638  	percentile := func(h [65]int, p float64) int {
   639  		sum := 0
   640  		for i, v := range h {
   641  			bound := uint64(1<<63) >> i
   642  			sum += int(bound) * v
   643  		}
   644  
   645  		// Imagine that the longest streak / starvation events were instead half
   646  		// as long but twice in number. (Note that we've pre-multiplied by the
   647  		// [lower] "bound" value.) Continue those splits until we meet the
   648  		// percentile target.
   649  		part := 0
   650  		for i, v := range h {
   651  			bound := uint64(1<<63) >> i
   652  			part += int(bound) * v
   653  			// have we trimmed off enough at the head to dip below the percentile goal
   654  			if float64(sum-part) < float64(sum)*p {
   655  				return int(bound)
   656  			}
   657  		}
   658  
   659  		return 0
   660  	}
   661  
   662  	perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
   663  	b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
   664  	b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
   665  	b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
   666  	b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
   667  }
   668  
   669  func BenchmarkMutexHandoff(b *testing.B) {
   670  	testcase := func(delay func(l *Mutex)) func(b *testing.B) {
   671  		return func(b *testing.B) {
   672  			if workers := 2; GOMAXPROCS(0) < workers {
   673  				b.Skipf("requires GOMAXPROCS >= %d", workers)
   674  			}
   675  
   676  			// Measure latency of mutex handoff between threads.
   677  			//
   678  			// Hand off a runtime.mutex between two threads, one running a
   679  			// "coordinator" goroutine and the other running a "worker"
   680  			// goroutine. We don't override the runtime's typical
   681  			// goroutine/thread mapping behavior.
   682  			//
   683  			// Measure the latency, starting when the coordinator enters a call
   684  			// to runtime.unlock and ending when the worker's call to
   685  			// runtime.lock returns. The benchmark can specify a "delay"
   686  			// function to simulate the length of the mutex-holder's critical
   687  			// section, including to arrange for the worker's thread to be in
   688  			// either the "spinning" or "sleeping" portions of the runtime.lock2
   689  			// implementation. Measurement starts after any such "delay".
   690  			//
   691  			// The two threads' goroutines communicate their current position to
   692  			// each other in a non-blocking way via the "turn" state.
   693  
   694  			var state struct {
   695  				_    cpu.CacheLinePad
   696  				lock Mutex
   697  				_    cpu.CacheLinePad
   698  				turn atomic.Int64
   699  				_    cpu.CacheLinePad
   700  			}
   701  
   702  			var delta atomic.Int64
   703  			var wg sync.WaitGroup
   704  
   705  			// coordinator:
   706  			//  - acquire the mutex
   707  			//  - set the turn to 2 mod 4, instructing the worker to begin its Lock call
   708  			//  - wait until the mutex is contended
   709  			//  - wait a bit more so the worker can commit to its sleep
   710  			//  - release the mutex and wait for it to be our turn (0 mod 4) again
   711  			wg.Add(1)
   712  			go func() {
   713  				defer wg.Done()
   714  				var t int64
   715  				for range b.N {
   716  					Lock(&state.lock)
   717  					state.turn.Add(2)
   718  					delay(&state.lock)
   719  					t -= Nanotime() // start the timer
   720  					Unlock(&state.lock)
   721  					for state.turn.Load()&0x2 != 0 {
   722  					}
   723  				}
   724  				state.turn.Add(1)
   725  				delta.Add(t)
   726  			}()
   727  
   728  			// worker:
   729  			//  - wait until its our turn (2 mod 4)
   730  			//  - acquire and release the mutex
   731  			//  - switch the turn counter back to the coordinator (0 mod 4)
   732  			wg.Add(1)
   733  			go func() {
   734  				defer wg.Done()
   735  				var t int64
   736  				for {
   737  					switch state.turn.Load() & 0x3 {
   738  					case 0:
   739  					case 1, 3:
   740  						delta.Add(t)
   741  						return
   742  					case 2:
   743  						Lock(&state.lock)
   744  						t += Nanotime() // stop the timer
   745  						Unlock(&state.lock)
   746  						state.turn.Add(2)
   747  					}
   748  				}
   749  			}()
   750  
   751  			wg.Wait()
   752  			b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
   753  		}
   754  	}
   755  
   756  	b.Run("Solo", func(b *testing.B) {
   757  		var lock Mutex
   758  		for range b.N {
   759  			Lock(&lock)
   760  			Unlock(&lock)
   761  		}
   762  	})
   763  
   764  	b.Run("FastPingPong", testcase(func(l *Mutex) {}))
   765  	b.Run("SlowPingPong", testcase(func(l *Mutex) {
   766  		// Wait for the worker to stop spinning and prepare to sleep
   767  		for !MutexContended(l) {
   768  		}
   769  		// Wait a bit longer so the OS can finish committing the worker to its
   770  		// sleep. Balance consistency against getting enough iterations.
   771  		const extraNs = 10e3
   772  		for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
   773  		}
   774  	}))
   775  }
   776  

View as plain text