Source file src/runtime/runtime_test.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"internal/asan"
    11  	"internal/cpu"
    12  	"internal/msan"
    13  	"internal/race"
    14  	"internal/runtime/atomic"
    15  	"internal/testenv"
    16  	"io"
    17  	"math/bits"
    18  	. "runtime"
    19  	"runtime/debug"
    20  	"slices"
    21  	"strings"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  	"unsafe"
    26  )
    27  
    28  // flagQuick is set by the -quick option to skip some relatively slow tests.
    29  // This is used by the cmd/dist test runtime:cpu124.
    30  // The cmd/dist test passes both -test.short and -quick;
    31  // there are tests that only check testing.Short, and those tests will
    32  // not be skipped if only -quick is used.
    33  var flagQuick = flag.Bool("quick", false, "skip slow tests, for cmd/dist test runtime:cpu124")
    34  
    35  func init() {
    36  	// We're testing the runtime, so make tracebacks show things
    37  	// in the runtime. This only raises the level, so it won't
    38  	// override GOTRACEBACK=crash from the user.
    39  	SetTracebackEnv("system")
    40  }
    41  
    42  var errf error
    43  
    44  func errfn() error {
    45  	return errf
    46  }
    47  
    48  func errfn1() error {
    49  	return io.EOF
    50  }
    51  
    52  func BenchmarkIfaceCmp100(b *testing.B) {
    53  	for i := 0; i < b.N; i++ {
    54  		for j := 0; j < 100; j++ {
    55  			if errfn() == io.EOF {
    56  				b.Fatal("bad comparison")
    57  			}
    58  		}
    59  	}
    60  }
    61  
    62  func BenchmarkIfaceCmpNil100(b *testing.B) {
    63  	for i := 0; i < b.N; i++ {
    64  		for j := 0; j < 100; j++ {
    65  			if errfn1() == nil {
    66  				b.Fatal("bad comparison")
    67  			}
    68  		}
    69  	}
    70  }
    71  
    72  var efaceCmp1 any
    73  var efaceCmp2 any
    74  
    75  func BenchmarkEfaceCmpDiff(b *testing.B) {
    76  	x := 5
    77  	efaceCmp1 = &x
    78  	y := 6
    79  	efaceCmp2 = &y
    80  	for i := 0; i < b.N; i++ {
    81  		for j := 0; j < 100; j++ {
    82  			if efaceCmp1 == efaceCmp2 {
    83  				b.Fatal("bad comparison")
    84  			}
    85  		}
    86  	}
    87  }
    88  
    89  func BenchmarkEfaceCmpDiffIndirect(b *testing.B) {
    90  	efaceCmp1 = [2]int{1, 2}
    91  	efaceCmp2 = [2]int{1, 2}
    92  	for i := 0; i < b.N; i++ {
    93  		for j := 0; j < 100; j++ {
    94  			if efaceCmp1 != efaceCmp2 {
    95  				b.Fatal("bad comparison")
    96  			}
    97  		}
    98  	}
    99  }
   100  
   101  func BenchmarkDefer(b *testing.B) {
   102  	for i := 0; i < b.N; i++ {
   103  		defer1()
   104  	}
   105  }
   106  
   107  func defer1() {
   108  	defer func(x, y, z int) {
   109  		if recover() != nil || x != 1 || y != 2 || z != 3 {
   110  			panic("bad recover")
   111  		}
   112  	}(1, 2, 3)
   113  }
   114  
   115  func BenchmarkDefer10(b *testing.B) {
   116  	for i := 0; i < b.N/10; i++ {
   117  		defer2()
   118  	}
   119  }
   120  
   121  func defer2() {
   122  	for i := 0; i < 10; i++ {
   123  		defer func(x, y, z int) {
   124  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   125  				panic("bad recover")
   126  			}
   127  		}(1, 2, 3)
   128  	}
   129  }
   130  
   131  func BenchmarkDeferMany(b *testing.B) {
   132  	for i := 0; i < b.N; i++ {
   133  		defer func(x, y, z int) {
   134  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   135  				panic("bad recover")
   136  			}
   137  		}(1, 2, 3)
   138  	}
   139  }
   140  
   141  func BenchmarkPanicRecover(b *testing.B) {
   142  	for i := 0; i < b.N; i++ {
   143  		defer3()
   144  	}
   145  }
   146  
   147  func defer3() {
   148  	defer func(x, y, z int) {
   149  		if recover() == nil {
   150  			panic("failed recover")
   151  		}
   152  	}(1, 2, 3)
   153  	panic("hi")
   154  }
   155  
   156  // golang.org/issue/7063
   157  func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
   158  	SetCPUProfileRate(0)
   159  }
   160  
   161  // Addresses to test for faulting behavior.
   162  // This is less a test of SetPanicOnFault and more a check that
   163  // the operating system and the runtime can process these faults
   164  // correctly. That is, we're indirectly testing that without SetPanicOnFault
   165  // these would manage to turn into ordinary crashes.
   166  // Note that these are truncated on 32-bit systems, so the bottom 32 bits
   167  // of the larger addresses must themselves be invalid addresses.
   168  // We might get unlucky and the OS might have mapped one of these
   169  // addresses, but probably not: they're all in the first page, very high
   170  // addresses that normally an OS would reserve for itself, or malformed
   171  // addresses. Even so, we might have to remove one or two on different
   172  // systems. We will see.
   173  
   174  var faultAddrs = []uint64{
   175  	// low addresses
   176  	0,
   177  	1,
   178  	0xfff,
   179  	// high (kernel) addresses
   180  	// or else malformed.
   181  	0xffffffffffffffff,
   182  	0xfffffffffffff001,
   183  	0xffffffffffff0001,
   184  	0xfffffffffff00001,
   185  	0xffffffffff000001,
   186  	0xfffffffff0000001,
   187  	0xffffffff00000001,
   188  	0xfffffff000000001,
   189  	0xffffff0000000001,
   190  	0xfffff00000000001,
   191  	0xffff000000000001,
   192  	0xfff0000000000001,
   193  	0xff00000000000001,
   194  	0xf000000000000001,
   195  	0x8000000000000001,
   196  }
   197  
   198  func TestSetPanicOnFault(t *testing.T) {
   199  	old := debug.SetPanicOnFault(true)
   200  	defer debug.SetPanicOnFault(old)
   201  
   202  	nfault := 0
   203  	for _, addr := range faultAddrs {
   204  		testSetPanicOnFault(t, uintptr(addr), &nfault)
   205  	}
   206  	if nfault == 0 {
   207  		t.Fatalf("none of the addresses faulted")
   208  	}
   209  }
   210  
   211  // testSetPanicOnFault tests one potentially faulting address.
   212  // It deliberately constructs and uses an invalid pointer,
   213  // so mark it as nocheckptr.
   214  //
   215  //go:nocheckptr
   216  func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
   217  	if GOOS == "js" || GOOS == "wasip1" {
   218  		t.Skip(GOOS + " does not support catching faults")
   219  	}
   220  
   221  	defer func() {
   222  		if err := recover(); err != nil {
   223  			*nfault++
   224  		}
   225  	}()
   226  
   227  	// The read should fault, except that sometimes we hit
   228  	// addresses that have had C or kernel pages mapped there
   229  	// readable by user code. So just log the content.
   230  	// If no addresses fault, we'll fail the test.
   231  	v := *(*byte)(unsafe.Pointer(addr))
   232  	t.Logf("addr %#x: %#x\n", addr, v)
   233  }
   234  
   235  func eqstring_generic(s1, s2 string) bool {
   236  	if len(s1) != len(s2) {
   237  		return false
   238  	}
   239  	// optimization in assembly versions:
   240  	// if s1.str == s2.str { return true }
   241  	for i := 0; i < len(s1); i++ {
   242  		if s1[i] != s2[i] {
   243  			return false
   244  		}
   245  	}
   246  	return true
   247  }
   248  
   249  func TestEqString(t *testing.T) {
   250  	// This isn't really an exhaustive test of == on strings, it's
   251  	// just a convenient way of documenting (via eqstring_generic)
   252  	// what == does.
   253  	s := []string{
   254  		"",
   255  		"a",
   256  		"c",
   257  		"aaa",
   258  		"ccc",
   259  		"cccc"[:3], // same contents, different string
   260  		"1234567890",
   261  	}
   262  	for _, s1 := range s {
   263  		for _, s2 := range s {
   264  			x := s1 == s2
   265  			y := eqstring_generic(s1, s2)
   266  			if x != y {
   267  				t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
   268  			}
   269  		}
   270  	}
   271  }
   272  
   273  func TestTrailingZero(t *testing.T) {
   274  	// make sure we add padding for structs with trailing zero-sized fields
   275  	type T1 struct {
   276  		n int32
   277  		z [0]byte
   278  	}
   279  	if unsafe.Sizeof(T1{}) != 8 {
   280  		t.Errorf("sizeof(%#v)==%d, want 8", T1{}, unsafe.Sizeof(T1{}))
   281  	}
   282  	type T2 struct {
   283  		n int64
   284  		z struct{}
   285  	}
   286  	if unsafe.Sizeof(T2{}) != 8+unsafe.Sizeof(uintptr(0)) {
   287  		t.Errorf("sizeof(%#v)==%d, want %d", T2{}, unsafe.Sizeof(T2{}), 8+unsafe.Sizeof(uintptr(0)))
   288  	}
   289  	type T3 struct {
   290  		n byte
   291  		z [4]struct{}
   292  	}
   293  	if unsafe.Sizeof(T3{}) != 2 {
   294  		t.Errorf("sizeof(%#v)==%d, want 2", T3{}, unsafe.Sizeof(T3{}))
   295  	}
   296  	// make sure padding can double for both zerosize and alignment
   297  	type T4 struct {
   298  		a int32
   299  		b int16
   300  		c int8
   301  		z struct{}
   302  	}
   303  	if unsafe.Sizeof(T4{}) != 8 {
   304  		t.Errorf("sizeof(%#v)==%d, want 8", T4{}, unsafe.Sizeof(T4{}))
   305  	}
   306  	// make sure we don't pad a zero-sized thing
   307  	type T5 struct {
   308  	}
   309  	if unsafe.Sizeof(T5{}) != 0 {
   310  		t.Errorf("sizeof(%#v)==%d, want 0", T5{}, unsafe.Sizeof(T5{}))
   311  	}
   312  }
   313  
   314  func TestAppendGrowthHeap(t *testing.T) {
   315  	var x []int64
   316  	check := func(want int) {
   317  		if cap(x) != want {
   318  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   319  		}
   320  	}
   321  
   322  	check(0)
   323  	want := 1
   324  	for i := 1; i <= 100; i++ {
   325  		x = append(x, 1)
   326  		check(want)
   327  		if i&(i-1) == 0 {
   328  			want = 2 * i
   329  		}
   330  	}
   331  	Escape(&x[0]) // suppress stack-allocated backing store
   332  }
   333  
   334  func TestAppendGrowthStack(t *testing.T) {
   335  	if race.Enabled || asan.Enabled || msan.Enabled {
   336  		t.Skip("instrumentation breaks this optimization")
   337  	}
   338  	var x []int64
   339  	check := func(want int) {
   340  		if cap(x) != want {
   341  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   342  		}
   343  	}
   344  
   345  	check(0)
   346  	want := 32 / 8 // 32 is the default for cmd/compile/internal/base.DebugFlags.VariableMakeThreshold
   347  	if testenv.OptimizationOff() {
   348  		want = 1
   349  	}
   350  	for i := 1; i <= 100; i++ {
   351  		x = append(x, 1)
   352  		check(want)
   353  		if i&(i-1) == 0 {
   354  			want = max(want, 2*i)
   355  		}
   356  	}
   357  }
   358  
   359  var One = []int64{1}
   360  
   361  func TestAppendSliceGrowth(t *testing.T) {
   362  	var x []int64
   363  	check := func(want int) {
   364  		if cap(x) != want {
   365  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   366  		}
   367  	}
   368  
   369  	check(0)
   370  	want := 1
   371  	for i := 1; i <= 100; i++ {
   372  		x = append(x, One...)
   373  		check(want)
   374  		if i&(i-1) == 0 {
   375  			want = 2 * i
   376  		}
   377  	}
   378  }
   379  
   380  func TestGoroutineProfileTrivial(t *testing.T) {
   381  	// Calling GoroutineProfile twice in a row should find the same number of goroutines,
   382  	// but it's possible there are goroutines just about to exit, so we might end up
   383  	// with fewer in the second call. Try a few times; it should converge once those
   384  	// zombies are gone.
   385  	for i := 0; ; i++ {
   386  		n1, ok := GoroutineProfile(nil) // should fail, there's at least 1 goroutine
   387  		if n1 < 1 || ok {
   388  			t.Fatalf("GoroutineProfile(nil) = %d, %v, want >0, false", n1, ok)
   389  		}
   390  		n2, ok := GoroutineProfile(make([]StackRecord, n1))
   391  		if n2 == n1 && ok {
   392  			break
   393  		}
   394  		t.Logf("GoroutineProfile(%d) = %d, %v, want %d, true", n1, n2, ok, n1)
   395  		if i >= 10 {
   396  			t.Fatalf("GoroutineProfile not converging")
   397  		}
   398  	}
   399  }
   400  
   401  func BenchmarkGoroutineProfile(b *testing.B) {
   402  	run := func(fn func() bool) func(b *testing.B) {
   403  		runOne := func(b *testing.B) {
   404  			latencies := make([]time.Duration, 0, b.N)
   405  
   406  			b.ResetTimer()
   407  			for i := 0; i < b.N; i++ {
   408  				start := time.Now()
   409  				ok := fn()
   410  				if !ok {
   411  					b.Fatal("goroutine profile failed")
   412  				}
   413  				latencies = append(latencies, time.Since(start))
   414  			}
   415  			b.StopTimer()
   416  
   417  			// Sort latencies then report percentiles.
   418  			slices.Sort(latencies)
   419  			b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   420  			b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   421  			b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   422  		}
   423  		return func(b *testing.B) {
   424  			b.Run("idle", runOne)
   425  
   426  			b.Run("loaded", func(b *testing.B) {
   427  				stop := applyGCLoad(b)
   428  				runOne(b)
   429  				// Make sure to stop the timer before we wait! The load created above
   430  				// is very heavy-weight and not easy to stop, so we could end up
   431  				// confusing the benchmarking framework for small b.N.
   432  				b.StopTimer()
   433  				stop()
   434  			})
   435  		}
   436  	}
   437  
   438  	// Measure the cost of counting goroutines
   439  	b.Run("small-nil", run(func() bool {
   440  		GoroutineProfile(nil)
   441  		return true
   442  	}))
   443  
   444  	// Measure the cost with a small set of goroutines
   445  	n := NumGoroutine()
   446  	p := make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   447  	b.Run("small", run(func() bool {
   448  		_, ok := GoroutineProfile(p)
   449  		return ok
   450  	}))
   451  
   452  	// Measure the cost with a large set of goroutines
   453  	ch := make(chan int)
   454  	var ready, done sync.WaitGroup
   455  	for i := 0; i < 5000; i++ {
   456  		ready.Add(1)
   457  		done.Add(1)
   458  		go func() { ready.Done(); <-ch; done.Done() }()
   459  	}
   460  	ready.Wait()
   461  
   462  	// Count goroutines with a large allgs list
   463  	b.Run("large-nil", run(func() bool {
   464  		GoroutineProfile(nil)
   465  		return true
   466  	}))
   467  
   468  	n = NumGoroutine()
   469  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   470  	b.Run("large", run(func() bool {
   471  		_, ok := GoroutineProfile(p)
   472  		return ok
   473  	}))
   474  
   475  	close(ch)
   476  	done.Wait()
   477  
   478  	// Count goroutines with a large (but unused) allgs list
   479  	b.Run("sparse-nil", run(func() bool {
   480  		GoroutineProfile(nil)
   481  		return true
   482  	}))
   483  
   484  	// Measure the cost of a large (but unused) allgs list
   485  	n = NumGoroutine()
   486  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   487  	b.Run("sparse", run(func() bool {
   488  		_, ok := GoroutineProfile(p)
   489  		return ok
   490  	}))
   491  }
   492  
   493  func TestVersion(t *testing.T) {
   494  	// Test that version does not contain \r or \n.
   495  	vers := Version()
   496  	if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
   497  		t.Fatalf("cr/nl in version: %q", vers)
   498  	}
   499  }
   500  
   501  func TestTimediv(t *testing.T) {
   502  	for _, tc := range []struct {
   503  		num int64
   504  		div int32
   505  		ret int32
   506  		rem int32
   507  	}{
   508  		{
   509  			num: 8,
   510  			div: 2,
   511  			ret: 4,
   512  			rem: 0,
   513  		},
   514  		{
   515  			num: 9,
   516  			div: 2,
   517  			ret: 4,
   518  			rem: 1,
   519  		},
   520  		{
   521  			// Used by runtime.check.
   522  			num: 12345*1000000000 + 54321,
   523  			div: 1000000000,
   524  			ret: 12345,
   525  			rem: 54321,
   526  		},
   527  		{
   528  			num: 1<<32 - 1,
   529  			div: 2,
   530  			ret: 1<<31 - 1, // no overflow.
   531  			rem: 1,
   532  		},
   533  		{
   534  			num: 1 << 32,
   535  			div: 2,
   536  			ret: 1<<31 - 1, // overflow.
   537  			rem: 0,
   538  		},
   539  		{
   540  			num: 1 << 40,
   541  			div: 2,
   542  			ret: 1<<31 - 1, // overflow.
   543  			rem: 0,
   544  		},
   545  		{
   546  			num: 1<<40 + 1,
   547  			div: 1 << 10,
   548  			ret: 1 << 30,
   549  			rem: 1,
   550  		},
   551  	} {
   552  		name := fmt.Sprintf("%d div %d", tc.num, tc.div)
   553  		t.Run(name, func(t *testing.T) {
   554  			// Double check that the inputs make sense using
   555  			// standard 64-bit division.
   556  			ret64 := tc.num / int64(tc.div)
   557  			rem64 := tc.num % int64(tc.div)
   558  			if ret64 != int64(int32(ret64)) {
   559  				// Simulate timediv overflow value.
   560  				ret64 = 1<<31 - 1
   561  				rem64 = 0
   562  			}
   563  			if ret64 != int64(tc.ret) {
   564  				t.Errorf("%d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret64, rem64, tc.ret, tc.rem)
   565  			}
   566  
   567  			var rem int32
   568  			ret := Timediv(tc.num, tc.div, &rem)
   569  			if ret != tc.ret || rem != tc.rem {
   570  				t.Errorf("timediv %d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret, rem, tc.ret, tc.rem)
   571  			}
   572  		})
   573  	}
   574  }
   575  
   576  func BenchmarkProcYield(b *testing.B) {
   577  	benchN := func(n uint32) func(*testing.B) {
   578  		return func(b *testing.B) {
   579  			for i := 0; i < b.N; i++ {
   580  				ProcYield(n)
   581  			}
   582  		}
   583  	}
   584  
   585  	b.Run("1", benchN(1))
   586  	b.Run("10", benchN(10))
   587  	b.Run("30", benchN(30)) // active_spin_cnt in lock_sema.go and lock_futex.go
   588  	b.Run("100", benchN(100))
   589  	b.Run("1000", benchN(1000))
   590  }
   591  
   592  func BenchmarkOSYield(b *testing.B) {
   593  	for i := 0; i < b.N; i++ {
   594  		OSYield()
   595  	}
   596  }
   597  
   598  func BenchmarkMutexContention(b *testing.B) {
   599  	// Measure throughput of a single mutex with all threads contending
   600  	//
   601  	// Share a single counter across all threads. Progress from any thread is
   602  	// progress for the benchmark as a whole. We don't measure or give points
   603  	// for fairness here, arbitrary delay to any given thread's progress is
   604  	// invisible and allowed.
   605  	//
   606  	// The cache line that holds the count value will need to move between
   607  	// processors, but not as often as the cache line that holds the mutex. The
   608  	// mutex protects access to the count value, which limits contention on that
   609  	// cache line. This is a simple design, but it helps to make the behavior of
   610  	// the benchmark clear. Most real uses of mutex will protect some number of
   611  	// cache lines anyway.
   612  
   613  	var state struct {
   614  		_     cpu.CacheLinePad
   615  		lock  Mutex
   616  		_     cpu.CacheLinePad
   617  		count atomic.Int64
   618  		_     cpu.CacheLinePad
   619  	}
   620  
   621  	procs := GOMAXPROCS(0)
   622  	var wg sync.WaitGroup
   623  	for range procs {
   624  		wg.Add(1)
   625  		go func() {
   626  			defer wg.Done()
   627  			for {
   628  				Lock(&state.lock)
   629  				ours := state.count.Add(1)
   630  				Unlock(&state.lock)
   631  				if ours >= int64(b.N) {
   632  					return
   633  				}
   634  			}
   635  		}()
   636  	}
   637  	wg.Wait()
   638  }
   639  
   640  func BenchmarkMutexCapture(b *testing.B) {
   641  
   642  	// Measure mutex fairness.
   643  	//
   644  	// Have several threads contend for a single mutex value. Measure how
   645  	// effectively a single thread is able to capture the lock and report the
   646  	// duration of those "streak" events. Measure how long other individual
   647  	// threads need to wait between their turns with the lock. Report the
   648  	// duration of those "starve" events.
   649  	//
   650  	// Report in terms of wall clock time (assuming a constant time per
   651  	// lock/unlock pair) rather than number of locks/unlocks. This keeps
   652  	// timekeeping overhead out of the critical path, and avoids giving an
   653  	// advantage to lock/unlock implementations that take less time per
   654  	// operation.
   655  
   656  	var state struct {
   657  		_     cpu.CacheLinePad
   658  		lock  Mutex
   659  		_     cpu.CacheLinePad
   660  		count atomic.Int64
   661  		_     cpu.CacheLinePad
   662  	}
   663  
   664  	procs := GOMAXPROCS(0)
   665  	var wg sync.WaitGroup
   666  	histograms := make(chan [2][65]int)
   667  	for range procs {
   668  		wg.Add(1)
   669  		go func() {
   670  			var (
   671  				prev      int64
   672  				streak    int64
   673  				histogram [2][65]int
   674  			)
   675  			for {
   676  				Lock(&state.lock)
   677  				ours := state.count.Add(1)
   678  				Unlock(&state.lock)
   679  				delta := ours - prev - 1
   680  				prev = ours
   681  				if delta == 0 {
   682  					streak++
   683  				} else {
   684  					histogram[0][bits.LeadingZeros64(uint64(streak))]++
   685  					histogram[1][bits.LeadingZeros64(uint64(delta))]++
   686  					streak = 1
   687  				}
   688  				if ours >= int64(b.N) {
   689  					wg.Done()
   690  					if delta == 0 {
   691  						histogram[0][bits.LeadingZeros64(uint64(streak))]++
   692  						histogram[1][bits.LeadingZeros64(uint64(delta))]++
   693  					}
   694  					histograms <- histogram
   695  					return
   696  				}
   697  			}
   698  		}()
   699  	}
   700  
   701  	wg.Wait()
   702  	b.StopTimer()
   703  
   704  	var histogram [2][65]int
   705  	for range procs {
   706  		h := <-histograms
   707  		for i := range h {
   708  			for j := range h[i] {
   709  				histogram[i][j] += h[i][j]
   710  			}
   711  		}
   712  	}
   713  
   714  	percentile := func(h [65]int, p float64) int {
   715  		sum := 0
   716  		for i, v := range h {
   717  			bound := uint64(1<<63) >> i
   718  			sum += int(bound) * v
   719  		}
   720  
   721  		// Imagine that the longest streak / starvation events were instead half
   722  		// as long but twice in number. (Note that we've pre-multiplied by the
   723  		// [lower] "bound" value.) Continue those splits until we meet the
   724  		// percentile target.
   725  		part := 0
   726  		for i, v := range h {
   727  			bound := uint64(1<<63) >> i
   728  			part += int(bound) * v
   729  			// have we trimmed off enough at the head to dip below the percentile goal
   730  			if float64(sum-part) < float64(sum)*p {
   731  				return int(bound)
   732  			}
   733  		}
   734  
   735  		return 0
   736  	}
   737  
   738  	perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
   739  	b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
   740  	b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
   741  	b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
   742  	b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
   743  }
   744  
   745  func BenchmarkMutexHandoff(b *testing.B) {
   746  	testcase := func(delay func(l *Mutex)) func(b *testing.B) {
   747  		return func(b *testing.B) {
   748  			if workers := 2; GOMAXPROCS(0) < workers {
   749  				b.Skipf("requires GOMAXPROCS >= %d", workers)
   750  			}
   751  
   752  			// Measure latency of mutex handoff between threads.
   753  			//
   754  			// Hand off a runtime.mutex between two threads, one running a
   755  			// "coordinator" goroutine and the other running a "worker"
   756  			// goroutine. We don't override the runtime's typical
   757  			// goroutine/thread mapping behavior.
   758  			//
   759  			// Measure the latency, starting when the coordinator enters a call
   760  			// to runtime.unlock and ending when the worker's call to
   761  			// runtime.lock returns. The benchmark can specify a "delay"
   762  			// function to simulate the length of the mutex-holder's critical
   763  			// section, including to arrange for the worker's thread to be in
   764  			// either the "spinning" or "sleeping" portions of the runtime.lock2
   765  			// implementation. Measurement starts after any such "delay".
   766  			//
   767  			// The two threads' goroutines communicate their current position to
   768  			// each other in a non-blocking way via the "turn" state.
   769  
   770  			var state struct {
   771  				_    cpu.CacheLinePad
   772  				lock Mutex
   773  				_    cpu.CacheLinePad
   774  				turn atomic.Int64
   775  				_    cpu.CacheLinePad
   776  			}
   777  
   778  			var delta atomic.Int64
   779  			var wg sync.WaitGroup
   780  
   781  			// coordinator:
   782  			//  - acquire the mutex
   783  			//  - set the turn to 2 mod 4, instructing the worker to begin its Lock call
   784  			//  - wait until the mutex is contended
   785  			//  - wait a bit more so the worker can commit to its sleep
   786  			//  - release the mutex and wait for it to be our turn (0 mod 4) again
   787  			wg.Add(1)
   788  			go func() {
   789  				defer wg.Done()
   790  				var t int64
   791  				for range b.N {
   792  					Lock(&state.lock)
   793  					state.turn.Add(2)
   794  					delay(&state.lock)
   795  					t -= Nanotime() // start the timer
   796  					Unlock(&state.lock)
   797  					for state.turn.Load()&0x2 != 0 {
   798  					}
   799  				}
   800  				state.turn.Add(1)
   801  				delta.Add(t)
   802  			}()
   803  
   804  			// worker:
   805  			//  - wait until its our turn (2 mod 4)
   806  			//  - acquire and release the mutex
   807  			//  - switch the turn counter back to the coordinator (0 mod 4)
   808  			wg.Add(1)
   809  			go func() {
   810  				defer wg.Done()
   811  				var t int64
   812  				for {
   813  					switch state.turn.Load() & 0x3 {
   814  					case 0:
   815  					case 1, 3:
   816  						delta.Add(t)
   817  						return
   818  					case 2:
   819  						Lock(&state.lock)
   820  						t += Nanotime() // stop the timer
   821  						Unlock(&state.lock)
   822  						state.turn.Add(2)
   823  					}
   824  				}
   825  			}()
   826  
   827  			wg.Wait()
   828  			b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
   829  		}
   830  	}
   831  
   832  	b.Run("Solo", func(b *testing.B) {
   833  		var lock Mutex
   834  		for range b.N {
   835  			Lock(&lock)
   836  			Unlock(&lock)
   837  		}
   838  	})
   839  
   840  	b.Run("FastPingPong", testcase(func(l *Mutex) {}))
   841  	b.Run("SlowPingPong", testcase(func(l *Mutex) {
   842  		// Wait for the worker to stop spinning and prepare to sleep
   843  		for !MutexContended(l) {
   844  		}
   845  		// Wait a bit longer so the OS can finish committing the worker to its
   846  		// sleep. Balance consistency against getting enough iterations.
   847  		const extraNs = 10e3
   848  		for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
   849  		}
   850  	}))
   851  }
   852  

View as plain text