Source file src/runtime/metrics_test.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"bytes"
     9  	"internal/abi"
    10  	"internal/goexperiment"
    11  	"internal/profile"
    12  	"internal/testenv"
    13  	"os"
    14  	"reflect"
    15  	"runtime"
    16  	"runtime/debug"
    17  	"runtime/metrics"
    18  	"runtime/pprof"
    19  	"runtime/trace"
    20  	"slices"
    21  	"sort"
    22  	"strings"
    23  	"sync"
    24  	"sync/atomic"
    25  	"testing"
    26  	"time"
    27  	"unsafe"
    28  )
    29  
    30  func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
    31  	all := metrics.All()
    32  	samples := make([]metrics.Sample, len(all))
    33  	descs := make(map[string]metrics.Description)
    34  	for i := range all {
    35  		samples[i].Name = all[i].Name
    36  		descs[all[i].Name] = all[i]
    37  	}
    38  	return descs, samples
    39  }
    40  
    41  func TestReadMetrics(t *testing.T) {
    42  	// Run a GC cycle to get some of the stats to be non-zero.
    43  	runtime.GC()
    44  
    45  	// Set an arbitrary memory limit to check the metric for it
    46  	limit := int64(512 * 1024 * 1024)
    47  	oldLimit := debug.SetMemoryLimit(limit)
    48  	defer debug.SetMemoryLimit(oldLimit)
    49  
    50  	// Set a GC percent to check the metric for it
    51  	gcPercent := 99
    52  	oldGCPercent := debug.SetGCPercent(gcPercent)
    53  	defer debug.SetGCPercent(oldGCPercent)
    54  
    55  	// Tests whether readMetrics produces values aligning
    56  	// with ReadMemStats while the world is stopped.
    57  	var mstats runtime.MemStats
    58  	_, samples := prepareAllMetricsSamples()
    59  	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
    60  
    61  	checkUint64 := func(t *testing.T, m string, got, want uint64) {
    62  		t.Helper()
    63  		if got != want {
    64  			t.Errorf("metric %q: got %d, want %d", m, got, want)
    65  		}
    66  	}
    67  
    68  	// Check to make sure the values we read line up with other values we read.
    69  	var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
    70  	var tinyAllocs uint64
    71  	var mallocs, frees uint64
    72  	for i := range samples {
    73  		switch name := samples[i].Name; name {
    74  		case "/cgo/go-to-c-calls:calls":
    75  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
    76  		case "/memory/classes/heap/free:bytes":
    77  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
    78  		case "/memory/classes/heap/released:bytes":
    79  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
    80  		case "/memory/classes/heap/objects:bytes":
    81  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
    82  		case "/memory/classes/heap/unused:bytes":
    83  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
    84  		case "/memory/classes/heap/stacks:bytes":
    85  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
    86  		case "/memory/classes/metadata/mcache/free:bytes":
    87  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
    88  		case "/memory/classes/metadata/mcache/inuse:bytes":
    89  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
    90  		case "/memory/classes/metadata/mspan/free:bytes":
    91  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
    92  		case "/memory/classes/metadata/mspan/inuse:bytes":
    93  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
    94  		case "/memory/classes/metadata/other:bytes":
    95  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
    96  		case "/memory/classes/os-stacks:bytes":
    97  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
    98  		case "/memory/classes/other:bytes":
    99  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
   100  		case "/memory/classes/profiling/buckets:bytes":
   101  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
   102  		case "/memory/classes/total:bytes":
   103  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
   104  		case "/gc/heap/allocs-by-size:bytes":
   105  			hist := samples[i].Value.Float64Histogram()
   106  			// Skip size class 0 in BySize, because it's always empty and not represented
   107  			// in the histogram.
   108  			for i, sc := range mstats.BySize[1:] {
   109  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   110  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   111  					// The rest of the checks aren't expected to work anyway.
   112  					continue
   113  				}
   114  				if c, m := hist.Counts[i], sc.Mallocs; c != m {
   115  					t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
   116  				}
   117  			}
   118  			allocsBySize = hist
   119  		case "/gc/heap/allocs:bytes":
   120  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
   121  		case "/gc/heap/frees-by-size:bytes":
   122  			hist := samples[i].Value.Float64Histogram()
   123  			// Skip size class 0 in BySize, because it's always empty and not represented
   124  			// in the histogram.
   125  			for i, sc := range mstats.BySize[1:] {
   126  				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
   127  					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
   128  					// The rest of the checks aren't expected to work anyway.
   129  					continue
   130  				}
   131  				if c, f := hist.Counts[i], sc.Frees; c != f {
   132  					t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
   133  				}
   134  			}
   135  		case "/gc/heap/frees:bytes":
   136  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
   137  		case "/gc/heap/tiny/allocs:objects":
   138  			// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
   139  			// The reason for this is because MemStats couldn't be extended at the time
   140  			// but there was a desire to have Mallocs at least be a little more representative,
   141  			// while having Mallocs - Frees still represent a live object count.
   142  			// Unfortunately, MemStats doesn't actually export a large allocation count,
   143  			// so it's impossible to pull this number out directly.
   144  			//
   145  			// Check tiny allocation count outside of this loop, by using the allocs-by-size
   146  			// histogram in order to figure out how many large objects there are.
   147  			tinyAllocs = samples[i].Value.Uint64()
   148  			// Because the next two metrics tests are checking against Mallocs and Frees,
   149  			// we can't check them directly for the same reason: we need to account for tiny
   150  			// allocations included in Mallocs and Frees.
   151  		case "/gc/heap/allocs:objects":
   152  			mallocs = samples[i].Value.Uint64()
   153  		case "/gc/heap/frees:objects":
   154  			frees = samples[i].Value.Uint64()
   155  		case "/gc/heap/live:bytes":
   156  			// Check for "obviously wrong" values. We can't check a stronger invariant,
   157  			// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
   158  			// under racy conditions, and some objects may be double-counted (this is
   159  			// intentional and necessary for GC performance).
   160  			//
   161  			// Instead, check against a much more reasonable upper-bound: the amount of
   162  			// mapped heap memory. We can't possibly overcount to the point of exceeding
   163  			// total mapped heap memory, except if there's an accounting bug.
   164  			if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
   165  				t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
   166  			} else if live == 0 {
   167  				// Might happen if we don't call runtime.GC() above.
   168  				t.Error("live bytes is 0")
   169  			}
   170  		case "/gc/gomemlimit:bytes":
   171  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
   172  		case "/gc/heap/objects:objects":
   173  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
   174  		case "/gc/heap/goal:bytes":
   175  			checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
   176  		case "/gc/gogc:percent":
   177  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
   178  		case "/gc/cycles/automatic:gc-cycles":
   179  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
   180  		case "/gc/cycles/forced:gc-cycles":
   181  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
   182  		case "/gc/cycles/total:gc-cycles":
   183  			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
   184  		case "/gc/pauses:seconds":
   185  			gcPauses = samples[i].Value.Float64Histogram()
   186  		case "/sched/pauses/total/gc:seconds":
   187  			schedPausesTotalGC = samples[i].Value.Float64Histogram()
   188  		}
   189  	}
   190  
   191  	// Check tinyAllocs.
   192  	nonTinyAllocs := uint64(0)
   193  	for _, c := range allocsBySize.Counts {
   194  		nonTinyAllocs += c
   195  	}
   196  	checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
   197  
   198  	// Check allocation and free counts.
   199  	checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
   200  	checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
   201  
   202  	// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
   203  	if !slices.Equal(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
   204  		t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
   205  	}
   206  	if !slices.Equal(gcPauses.Counts, schedPausesTotalGC.Counts) {
   207  		t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
   208  	}
   209  }
   210  
   211  func TestReadMetricsConsistency(t *testing.T) {
   212  	// Tests whether readMetrics produces consistent, sensible values.
   213  	// The values are read concurrently with the runtime doing other
   214  	// things (e.g. allocating) so what we read can't reasonably compared
   215  	// to other runtime values (e.g. MemStats).
   216  
   217  	// Run a few GC cycles to get some of the stats to be non-zero.
   218  	runtime.GC()
   219  	runtime.GC()
   220  	runtime.GC()
   221  
   222  	// Set GOMAXPROCS high then sleep briefly to ensure we generate
   223  	// some idle time.
   224  	oldmaxprocs := runtime.GOMAXPROCS(10)
   225  	time.Sleep(time.Millisecond)
   226  	runtime.GOMAXPROCS(oldmaxprocs)
   227  
   228  	// Read all the supported metrics through the metrics package.
   229  	descs, samples := prepareAllMetricsSamples()
   230  	metrics.Read(samples)
   231  
   232  	// Check to make sure the values we read make sense.
   233  	var totalVirtual struct {
   234  		got, want uint64
   235  	}
   236  	var objects struct {
   237  		alloc, free             *metrics.Float64Histogram
   238  		allocs, frees           uint64
   239  		allocdBytes, freedBytes uint64
   240  		total, totalBytes       uint64
   241  	}
   242  	var gc struct {
   243  		numGC  uint64
   244  		pauses uint64
   245  	}
   246  	var totalScan struct {
   247  		got, want uint64
   248  	}
   249  	var cpu struct {
   250  		gcAssist    float64
   251  		gcDedicated float64
   252  		gcIdle      float64
   253  		gcPause     float64
   254  		gcTotal     float64
   255  
   256  		idle float64
   257  		user float64
   258  
   259  		scavengeAssist float64
   260  		scavengeBg     float64
   261  		scavengeTotal  float64
   262  
   263  		total float64
   264  	}
   265  	for i := range samples {
   266  		kind := samples[i].Value.Kind()
   267  		if want := descs[samples[i].Name].Kind; kind != want {
   268  			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
   269  			continue
   270  		}
   271  		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
   272  			v := samples[i].Value.Uint64()
   273  			totalVirtual.want += v
   274  
   275  			// None of these stats should ever get this big.
   276  			// If they do, there's probably overflow involved,
   277  			// usually due to bad accounting.
   278  			if int64(v) < 0 {
   279  				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
   280  			}
   281  		}
   282  		switch samples[i].Name {
   283  		case "/cpu/classes/gc/mark/assist:cpu-seconds":
   284  			cpu.gcAssist = samples[i].Value.Float64()
   285  		case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
   286  			cpu.gcDedicated = samples[i].Value.Float64()
   287  		case "/cpu/classes/gc/mark/idle:cpu-seconds":
   288  			cpu.gcIdle = samples[i].Value.Float64()
   289  		case "/cpu/classes/gc/pause:cpu-seconds":
   290  			cpu.gcPause = samples[i].Value.Float64()
   291  		case "/cpu/classes/gc/total:cpu-seconds":
   292  			cpu.gcTotal = samples[i].Value.Float64()
   293  		case "/cpu/classes/idle:cpu-seconds":
   294  			cpu.idle = samples[i].Value.Float64()
   295  		case "/cpu/classes/scavenge/assist:cpu-seconds":
   296  			cpu.scavengeAssist = samples[i].Value.Float64()
   297  		case "/cpu/classes/scavenge/background:cpu-seconds":
   298  			cpu.scavengeBg = samples[i].Value.Float64()
   299  		case "/cpu/classes/scavenge/total:cpu-seconds":
   300  			cpu.scavengeTotal = samples[i].Value.Float64()
   301  		case "/cpu/classes/total:cpu-seconds":
   302  			cpu.total = samples[i].Value.Float64()
   303  		case "/cpu/classes/user:cpu-seconds":
   304  			cpu.user = samples[i].Value.Float64()
   305  		case "/memory/classes/total:bytes":
   306  			totalVirtual.got = samples[i].Value.Uint64()
   307  		case "/memory/classes/heap/objects:bytes":
   308  			objects.totalBytes = samples[i].Value.Uint64()
   309  		case "/gc/heap/objects:objects":
   310  			objects.total = samples[i].Value.Uint64()
   311  		case "/gc/heap/allocs:bytes":
   312  			objects.allocdBytes = samples[i].Value.Uint64()
   313  		case "/gc/heap/allocs:objects":
   314  			objects.allocs = samples[i].Value.Uint64()
   315  		case "/gc/heap/allocs-by-size:bytes":
   316  			objects.alloc = samples[i].Value.Float64Histogram()
   317  		case "/gc/heap/frees:bytes":
   318  			objects.freedBytes = samples[i].Value.Uint64()
   319  		case "/gc/heap/frees:objects":
   320  			objects.frees = samples[i].Value.Uint64()
   321  		case "/gc/heap/frees-by-size:bytes":
   322  			objects.free = samples[i].Value.Float64Histogram()
   323  		case "/gc/cycles:gc-cycles":
   324  			gc.numGC = samples[i].Value.Uint64()
   325  		case "/gc/pauses:seconds":
   326  			h := samples[i].Value.Float64Histogram()
   327  			gc.pauses = 0
   328  			for i := range h.Counts {
   329  				gc.pauses += h.Counts[i]
   330  			}
   331  		case "/gc/scan/heap:bytes":
   332  			totalScan.want += samples[i].Value.Uint64()
   333  		case "/gc/scan/globals:bytes":
   334  			totalScan.want += samples[i].Value.Uint64()
   335  		case "/gc/scan/stack:bytes":
   336  			totalScan.want += samples[i].Value.Uint64()
   337  		case "/gc/scan/total:bytes":
   338  			totalScan.got = samples[i].Value.Uint64()
   339  		case "/sched/gomaxprocs:threads":
   340  			if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
   341  				t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
   342  			}
   343  		case "/sched/goroutines:goroutines":
   344  			if samples[i].Value.Uint64() < 1 {
   345  				t.Error("number of goroutines is less than one")
   346  			}
   347  		}
   348  	}
   349  	// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
   350  	if runtime.GOOS == "linux" {
   351  		if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
   352  			t.Errorf("found no time spent on GC work: %#v", cpu)
   353  		}
   354  		if cpu.gcPause <= 0 {
   355  			t.Errorf("found no GC pauses: %f", cpu.gcPause)
   356  		}
   357  		if cpu.idle <= 0 {
   358  			t.Errorf("found no idle time: %f", cpu.idle)
   359  		}
   360  		if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) {
   361  			t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal)
   362  		}
   363  		if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) {
   364  			t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal)
   365  		}
   366  		if cpu.total <= 0 {
   367  			t.Errorf("found no total CPU time passed")
   368  		}
   369  		if cpu.user <= 0 {
   370  			t.Errorf("found no user time passed")
   371  		}
   372  		if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) {
   373  			t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total)
   374  		}
   375  	}
   376  	if totalVirtual.got != totalVirtual.want {
   377  		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
   378  	}
   379  	if got, want := objects.allocs-objects.frees, objects.total; got != want {
   380  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   381  	}
   382  	if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
   383  		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
   384  	}
   385  	if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
   386  		t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   387  	}
   388  	if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
   389  		t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
   390  	}
   391  	if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
   392  		t.Error("allocs-by-size and frees-by-size buckets don't match in length")
   393  	} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
   394  		t.Error("allocs-by-size and frees-by-size counts don't match in length")
   395  	} else {
   396  		for i := range objects.alloc.Buckets {
   397  			ba := objects.alloc.Buckets[i]
   398  			bf := objects.free.Buckets[i]
   399  			if ba != bf {
   400  				t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
   401  			}
   402  		}
   403  		if !t.Failed() {
   404  			var gotAlloc, gotFree uint64
   405  			want := objects.total
   406  			for i := range objects.alloc.Counts {
   407  				if objects.alloc.Counts[i] < objects.free.Counts[i] {
   408  					t.Errorf("found more allocs than frees in object dist bucket %d", i)
   409  					continue
   410  				}
   411  				gotAlloc += objects.alloc.Counts[i]
   412  				gotFree += objects.free.Counts[i]
   413  			}
   414  			if got := gotAlloc - gotFree; got != want {
   415  				t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
   416  			}
   417  			if gotAlloc != objects.allocs {
   418  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
   419  			}
   420  			if gotFree != objects.frees {
   421  				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
   422  			}
   423  		}
   424  	}
   425  	// The current GC has at least 2 pauses per GC.
   426  	// Check to see if that value makes sense.
   427  	if gc.pauses < gc.numGC*2 {
   428  		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
   429  	}
   430  	if totalScan.got <= 0 {
   431  		t.Errorf("scannable GC space is empty: %d", totalScan.got)
   432  	}
   433  	if totalScan.got != totalScan.want {
   434  		t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
   435  	}
   436  }
   437  
   438  func BenchmarkReadMetricsLatency(b *testing.B) {
   439  	stop := applyGCLoad(b)
   440  
   441  	// Spend this much time measuring latencies.
   442  	latencies := make([]time.Duration, 0, 1024)
   443  	_, samples := prepareAllMetricsSamples()
   444  
   445  	// Hit metrics.Read continuously and measure.
   446  	b.ResetTimer()
   447  	for i := 0; i < b.N; i++ {
   448  		start := time.Now()
   449  		metrics.Read(samples)
   450  		latencies = append(latencies, time.Since(start))
   451  	}
   452  	// Make sure to stop the timer before we wait! The load created above
   453  	// is very heavy-weight and not easy to stop, so we could end up
   454  	// confusing the benchmarking framework for small b.N.
   455  	b.StopTimer()
   456  	stop()
   457  
   458  	// Disable the default */op metrics.
   459  	// ns/op doesn't mean anything because it's an average, but we
   460  	// have a sleep in our b.N loop above which skews this significantly.
   461  	b.ReportMetric(0, "ns/op")
   462  	b.ReportMetric(0, "B/op")
   463  	b.ReportMetric(0, "allocs/op")
   464  
   465  	// Sort latencies then report percentiles.
   466  	sort.Slice(latencies, func(i, j int) bool {
   467  		return latencies[i] < latencies[j]
   468  	})
   469  	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   470  	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   471  	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   472  }
   473  
   474  var readMetricsSink [1024]interface{}
   475  
   476  func TestReadMetricsCumulative(t *testing.T) {
   477  	// Set up the set of metrics marked cumulative.
   478  	descs := metrics.All()
   479  	var samples [2][]metrics.Sample
   480  	samples[0] = make([]metrics.Sample, len(descs))
   481  	samples[1] = make([]metrics.Sample, len(descs))
   482  	total := 0
   483  	for i := range samples[0] {
   484  		if !descs[i].Cumulative {
   485  			continue
   486  		}
   487  		samples[0][total].Name = descs[i].Name
   488  		total++
   489  	}
   490  	samples[0] = samples[0][:total]
   491  	samples[1] = samples[1][:total]
   492  	copy(samples[1], samples[0])
   493  
   494  	// Start some noise in the background.
   495  	var wg sync.WaitGroup
   496  	wg.Add(1)
   497  	done := make(chan struct{})
   498  	go func() {
   499  		defer wg.Done()
   500  		for {
   501  			// Add more things here that could influence metrics.
   502  			for i := 0; i < len(readMetricsSink); i++ {
   503  				readMetricsSink[i] = make([]byte, 1024)
   504  				select {
   505  				case <-done:
   506  					return
   507  				default:
   508  				}
   509  			}
   510  			runtime.GC()
   511  		}
   512  	}()
   513  
   514  	sum := func(us []uint64) uint64 {
   515  		total := uint64(0)
   516  		for _, u := range us {
   517  			total += u
   518  		}
   519  		return total
   520  	}
   521  
   522  	// Populate the first generation.
   523  	metrics.Read(samples[0])
   524  
   525  	// Check to make sure that these metrics only grow monotonically.
   526  	for gen := 1; gen < 10; gen++ {
   527  		metrics.Read(samples[gen%2])
   528  		for i := range samples[gen%2] {
   529  			name := samples[gen%2][i].Name
   530  			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
   531  
   532  			switch vNew.Kind() {
   533  			case metrics.KindUint64:
   534  				new := vNew.Uint64()
   535  				old := vOld.Uint64()
   536  				if new < old {
   537  					t.Errorf("%s decreased: %d < %d", name, new, old)
   538  				}
   539  			case metrics.KindFloat64:
   540  				new := vNew.Float64()
   541  				old := vOld.Float64()
   542  				if new < old {
   543  					t.Errorf("%s decreased: %f < %f", name, new, old)
   544  				}
   545  			case metrics.KindFloat64Histogram:
   546  				new := sum(vNew.Float64Histogram().Counts)
   547  				old := sum(vOld.Float64Histogram().Counts)
   548  				if new < old {
   549  					t.Errorf("%s counts decreased: %d < %d", name, new, old)
   550  				}
   551  			}
   552  		}
   553  	}
   554  	close(done)
   555  
   556  	wg.Wait()
   557  }
   558  
   559  func withinEpsilon(v1, v2, e float64) bool {
   560  	return v2-v2*e <= v1 && v1 <= v2+v2*e
   561  }
   562  
   563  func TestMutexWaitTimeMetric(t *testing.T) {
   564  	var sample [1]metrics.Sample
   565  	sample[0].Name = "/sync/mutex/wait/total:seconds"
   566  
   567  	locks := []locker2{
   568  		new(mutex),
   569  		new(rwmutexWrite),
   570  		new(rwmutexReadWrite),
   571  		new(rwmutexWriteRead),
   572  	}
   573  	for _, lock := range locks {
   574  		t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
   575  			metrics.Read(sample[:])
   576  			before := time.Duration(sample[0].Value.Float64() * 1e9)
   577  
   578  			minMutexWaitTime := generateMutexWaitTime(lock)
   579  
   580  			metrics.Read(sample[:])
   581  			after := time.Duration(sample[0].Value.Float64() * 1e9)
   582  
   583  			if wt := after - before; wt < minMutexWaitTime {
   584  				t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
   585  			}
   586  		})
   587  	}
   588  }
   589  
   590  // locker2 represents an API surface of two concurrent goroutines
   591  // locking the same resource, but through different APIs. It's intended
   592  // to abstract over the relationship of two Lock calls or an RLock
   593  // and a Lock call.
   594  type locker2 interface {
   595  	Lock1()
   596  	Unlock1()
   597  	Lock2()
   598  	Unlock2()
   599  }
   600  
   601  type mutex struct {
   602  	mu sync.Mutex
   603  }
   604  
   605  func (m *mutex) Lock1()   { m.mu.Lock() }
   606  func (m *mutex) Unlock1() { m.mu.Unlock() }
   607  func (m *mutex) Lock2()   { m.mu.Lock() }
   608  func (m *mutex) Unlock2() { m.mu.Unlock() }
   609  
   610  type rwmutexWrite struct {
   611  	mu sync.RWMutex
   612  }
   613  
   614  func (m *rwmutexWrite) Lock1()   { m.mu.Lock() }
   615  func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
   616  func (m *rwmutexWrite) Lock2()   { m.mu.Lock() }
   617  func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
   618  
   619  type rwmutexReadWrite struct {
   620  	mu sync.RWMutex
   621  }
   622  
   623  func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() }
   624  func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
   625  func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() }
   626  func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
   627  
   628  type rwmutexWriteRead struct {
   629  	mu sync.RWMutex
   630  }
   631  
   632  func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() }
   633  func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
   634  func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() }
   635  func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
   636  
   637  // generateMutexWaitTime causes a couple of goroutines
   638  // to block a whole bunch of times on a sync.Mutex, returning
   639  // the minimum amount of time that should be visible in the
   640  // /sync/mutex-wait:seconds metric.
   641  func generateMutexWaitTime(mu locker2) time.Duration {
   642  	// Set up the runtime to always track casgstatus transitions for metrics.
   643  	*runtime.CasGStatusAlwaysTrack = true
   644  
   645  	mu.Lock1()
   646  
   647  	// Start up a goroutine to wait on the lock.
   648  	gc := make(chan *runtime.G)
   649  	done := make(chan bool)
   650  	go func() {
   651  		gc <- runtime.Getg()
   652  
   653  		for {
   654  			mu.Lock2()
   655  			mu.Unlock2()
   656  			if <-done {
   657  				return
   658  			}
   659  		}
   660  	}()
   661  	gp := <-gc
   662  
   663  	// Set the block time high enough so that it will always show up, even
   664  	// on systems with coarse timer granularity.
   665  	const blockTime = 100 * time.Millisecond
   666  
   667  	// Make sure the goroutine spawned above actually blocks on the lock.
   668  	for {
   669  		if runtime.GIsWaitingOnMutex(gp) {
   670  			break
   671  		}
   672  		runtime.Gosched()
   673  	}
   674  
   675  	// Let some amount of time pass.
   676  	time.Sleep(blockTime)
   677  
   678  	// Let the other goroutine acquire the lock.
   679  	mu.Unlock1()
   680  	done <- true
   681  
   682  	// Reset flag.
   683  	*runtime.CasGStatusAlwaysTrack = false
   684  	return blockTime
   685  }
   686  
   687  // See issue #60276.
   688  func TestCPUMetricsSleep(t *testing.T) {
   689  	if runtime.GOOS == "wasip1" {
   690  		// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
   691  		// time. This is accurately reflected in the metrics, but it means this
   692  		// test is basically meaningless on this platform.
   693  		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
   694  	}
   695  
   696  	names := []string{
   697  		"/cpu/classes/idle:cpu-seconds",
   698  
   699  		"/cpu/classes/gc/mark/assist:cpu-seconds",
   700  		"/cpu/classes/gc/mark/dedicated:cpu-seconds",
   701  		"/cpu/classes/gc/mark/idle:cpu-seconds",
   702  		"/cpu/classes/gc/pause:cpu-seconds",
   703  		"/cpu/classes/gc/total:cpu-seconds",
   704  		"/cpu/classes/scavenge/assist:cpu-seconds",
   705  		"/cpu/classes/scavenge/background:cpu-seconds",
   706  		"/cpu/classes/scavenge/total:cpu-seconds",
   707  		"/cpu/classes/total:cpu-seconds",
   708  		"/cpu/classes/user:cpu-seconds",
   709  	}
   710  	prep := func() []metrics.Sample {
   711  		mm := make([]metrics.Sample, len(names))
   712  		for i := range names {
   713  			mm[i].Name = names[i]
   714  		}
   715  		return mm
   716  	}
   717  	m1, m2 := prep(), prep()
   718  
   719  	const (
   720  		// Expected time spent idle.
   721  		dur = 100 * time.Millisecond
   722  
   723  		// maxFailures is the number of consecutive failures requires to cause the test to fail.
   724  		maxFailures = 10
   725  	)
   726  
   727  	failureIdleTimes := make([]float64, 0, maxFailures)
   728  
   729  	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
   730  	// as user time rather than idle time. In an ideal world we'd expect the whole application
   731  	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
   732  	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
   733  	// blocked in a sleep. For example, slow platforms might spend more time expected in the
   734  	// scheduler. Another example is that a Go runtime background goroutine could run while
   735  	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
   736  	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
   737  	// observe itself as idle with nanotime.
   738  	//
   739  	// To deal with all this, we give a half-proc's worth of leniency.
   740  	//
   741  	// We also retry multiple times to deal with the fact that the OS might deschedule us before
   742  	// we yield and go idle. That has a rare enough chance that retries should resolve it.
   743  	// If the issue we expect is happening, it should be persistent.
   744  	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
   745  
   746  	// Let's make sure there's no background scavenge work to do.
   747  	//
   748  	// The runtime.GC calls below ensure the background sweeper
   749  	// will not run during the idle period.
   750  	debug.FreeOSMemory()
   751  
   752  	for retries := 0; retries < maxFailures; retries++ {
   753  		// Read 1.
   754  		runtime.GC() // Update /cpu/classes metrics.
   755  		metrics.Read(m1)
   756  
   757  		// Sleep.
   758  		time.Sleep(dur)
   759  
   760  		// Read 2.
   761  		runtime.GC() // Update /cpu/classes metrics.
   762  		metrics.Read(m2)
   763  
   764  		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
   765  		if dt >= minIdleCPUSeconds {
   766  			// All is well. Test passed.
   767  			return
   768  		}
   769  		failureIdleTimes = append(failureIdleTimes, dt)
   770  		// Try again.
   771  	}
   772  
   773  	// We couldn't observe the expected idle time even once.
   774  	for i, dt := range failureIdleTimes {
   775  		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
   776  	}
   777  	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
   778  	for i := range names {
   779  		if m1[i].Value.Kind() == metrics.KindBad {
   780  			continue
   781  		}
   782  		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
   783  	}
   784  	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
   785  }
   786  
   787  // Call f() and verify that the correct STW metrics increment. If isGC is true,
   788  // fn triggers a GC STW. Otherwise, fn triggers an other STW.
   789  func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
   790  	m := []metrics.Sample{
   791  		{Name: "/sched/pauses/stopping/gc:seconds"},
   792  		{Name: "/sched/pauses/stopping/other:seconds"},
   793  		{Name: "/sched/pauses/total/gc:seconds"},
   794  		{Name: "/sched/pauses/total/other:seconds"},
   795  	}
   796  
   797  	stoppingGC := &m[0]
   798  	stoppingOther := &m[1]
   799  	totalGC := &m[2]
   800  	totalOther := &m[3]
   801  
   802  	sampleCount := func(s *metrics.Sample) uint64 {
   803  		h := s.Value.Float64Histogram()
   804  
   805  		var n uint64
   806  		for _, c := range h.Counts {
   807  			n += c
   808  		}
   809  		return n
   810  	}
   811  
   812  	// Read baseline.
   813  	metrics.Read(m)
   814  
   815  	baselineStartGC := sampleCount(stoppingGC)
   816  	baselineStartOther := sampleCount(stoppingOther)
   817  	baselineTotalGC := sampleCount(totalGC)
   818  	baselineTotalOther := sampleCount(totalOther)
   819  
   820  	fn(t)
   821  
   822  	metrics.Read(m)
   823  
   824  	if isGC {
   825  		if got := sampleCount(stoppingGC); got <= baselineStartGC {
   826  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
   827  		}
   828  		if got := sampleCount(totalGC); got <= baselineTotalGC {
   829  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
   830  		}
   831  
   832  		if got := sampleCount(stoppingOther); got != baselineStartOther {
   833  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
   834  		}
   835  		if got := sampleCount(totalOther); got != baselineTotalOther {
   836  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
   837  		}
   838  	} else {
   839  		if got := sampleCount(stoppingGC); got != baselineStartGC {
   840  			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
   841  		}
   842  		if got := sampleCount(totalGC); got != baselineTotalGC {
   843  			t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
   844  		}
   845  
   846  		if got := sampleCount(stoppingOther); got <= baselineStartOther {
   847  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
   848  		}
   849  		if got := sampleCount(totalOther); got <= baselineTotalOther {
   850  			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
   851  		}
   852  	}
   853  }
   854  
   855  func TestSchedPauseMetrics(t *testing.T) {
   856  	tests := []struct {
   857  		name string
   858  		isGC bool
   859  		fn   func(t *testing.T)
   860  	}{
   861  		{
   862  			name: "runtime.GC",
   863  			isGC: true,
   864  			fn: func(t *testing.T) {
   865  				runtime.GC()
   866  			},
   867  		},
   868  		{
   869  			name: "runtime.GOMAXPROCS",
   870  			fn: func(t *testing.T) {
   871  				if runtime.GOARCH == "wasm" {
   872  					t.Skip("GOMAXPROCS >1 not supported on wasm")
   873  				}
   874  
   875  				n := runtime.GOMAXPROCS(0)
   876  				defer runtime.GOMAXPROCS(n)
   877  
   878  				runtime.GOMAXPROCS(n + 1)
   879  			},
   880  		},
   881  		{
   882  			name: "runtime.GoroutineProfile",
   883  			fn: func(t *testing.T) {
   884  				var s [1]runtime.StackRecord
   885  				runtime.GoroutineProfile(s[:])
   886  			},
   887  		},
   888  		{
   889  			name: "runtime.ReadMemStats",
   890  			fn: func(t *testing.T) {
   891  				var mstats runtime.MemStats
   892  				runtime.ReadMemStats(&mstats)
   893  			},
   894  		},
   895  		{
   896  			name: "runtime.Stack",
   897  			fn: func(t *testing.T) {
   898  				var b [64]byte
   899  				runtime.Stack(b[:], true)
   900  			},
   901  		},
   902  		{
   903  			name: "runtime/debug.WriteHeapDump",
   904  			fn: func(t *testing.T) {
   905  				if runtime.GOOS == "js" {
   906  					t.Skip("WriteHeapDump not supported on js")
   907  				}
   908  
   909  				f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
   910  				if err != nil {
   911  					t.Fatalf("os.CreateTemp failed: %v", err)
   912  				}
   913  				defer os.Remove(f.Name())
   914  				defer f.Close()
   915  				debug.WriteHeapDump(f.Fd())
   916  			},
   917  		},
   918  		{
   919  			name: "runtime/trace.Start",
   920  			fn: func(t *testing.T) {
   921  				if trace.IsEnabled() {
   922  					t.Skip("tracing already enabled")
   923  				}
   924  
   925  				var buf bytes.Buffer
   926  				if err := trace.Start(&buf); err != nil {
   927  					t.Errorf("trace.Start err got %v want nil", err)
   928  				}
   929  				trace.Stop()
   930  			},
   931  		},
   932  	}
   933  
   934  	// These tests count STW pauses, classified based on whether they're related
   935  	// to the GC or not. Disable automatic GC cycles during the test so we don't
   936  	// have an incidental GC pause when we're trying to observe only
   937  	// non-GC-related pauses. This is especially important for the
   938  	// runtime/trace.Start test, since (as of this writing) that will block
   939  	// until any active GC mark phase completes.
   940  	defer debug.SetGCPercent(debug.SetGCPercent(-1))
   941  	runtime.GC()
   942  
   943  	for _, tc := range tests {
   944  		t.Run(tc.name, func(t *testing.T) {
   945  			testSchedPauseMetrics(t, tc.fn, tc.isGC)
   946  		})
   947  	}
   948  }
   949  
   950  func TestRuntimeLockMetricsAndProfile(t *testing.T) {
   951  	old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
   952  	defer runtime.SetMutexProfileFraction(old)
   953  	if old != 0 {
   954  		t.Fatalf("need MutexProfileRate 0, got %d", old)
   955  	}
   956  
   957  	t.Logf("NumCPU %d", runtime.NumCPU())
   958  	t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
   959  	if minCPU := 2; runtime.NumCPU() < minCPU {
   960  		t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
   961  	}
   962  
   963  	loadProfile := func(t *testing.T) *profile.Profile {
   964  		var w bytes.Buffer
   965  		pprof.Lookup("mutex").WriteTo(&w, 0)
   966  		p, err := profile.Parse(&w)
   967  		if err != nil {
   968  			t.Fatalf("failed to parse profile: %v", err)
   969  		}
   970  		if err := p.CheckValid(); err != nil {
   971  			t.Fatalf("invalid profile: %v", err)
   972  		}
   973  		return p
   974  	}
   975  
   976  	measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
   977  		beforeProfile := loadProfile(t)
   978  		beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   979  		metrics.Read(beforeMetrics)
   980  
   981  		fn()
   982  
   983  		afterProfile := loadProfile(t)
   984  		afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
   985  		metrics.Read(afterMetrics)
   986  
   987  		sumSamples := func(p *profile.Profile, i int) int64 {
   988  			var sum int64
   989  			for _, s := range p.Sample {
   990  				sum += s.Value[i]
   991  			}
   992  			return sum
   993  		}
   994  
   995  		metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
   996  		profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
   997  
   998  		// The internal/profile package does not support compaction; this delta
   999  		// profile will include separate positive and negative entries.
  1000  		p = afterProfile.Copy()
  1001  		if len(beforeProfile.Sample) > 0 {
  1002  			err := p.Merge(beforeProfile, -1)
  1003  			if err != nil {
  1004  				t.Fatalf("Merge profiles: %v", err)
  1005  			}
  1006  		}
  1007  
  1008  		return metricGrowth, profileGrowth, p
  1009  	}
  1010  
  1011  	testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
  1012  		return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) {
  1013  			metricGrowth, totalProfileGrowth, p := measureDelta(t, func() {
  1014  				var started, stopped sync.WaitGroup
  1015  				started.Add(workers)
  1016  				stopped.Add(workers)
  1017  				for i := 0; i < workers; i++ {
  1018  					w := &contentionWorker{
  1019  						before: func() {
  1020  							started.Done()
  1021  							started.Wait()
  1022  						},
  1023  						after: func() {
  1024  							stopped.Done()
  1025  						},
  1026  						fn: fn,
  1027  					}
  1028  					go w.run()
  1029  				}
  1030  				stopped.Wait()
  1031  			})
  1032  
  1033  			if totalProfileGrowth == 0 {
  1034  				t.Errorf("no increase in mutex profile")
  1035  			}
  1036  			if metricGrowth == 0 && strictTiming {
  1037  				// If the critical section is very short, systems with low timer
  1038  				// resolution may be unable to measure it via nanotime.
  1039  				//
  1040  				// This is sampled at 1 per gTrackingPeriod, but the explicit
  1041  				// runtime.mutex tests create 200 contention events. Observing
  1042  				// zero of those has a probability of (7/8)^200 = 2.5e-12 which
  1043  				// is acceptably low (though the calculation has a tenuous
  1044  				// dependency on cheaprandn being a good-enough source of
  1045  				// entropy).
  1046  				t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
  1047  			}
  1048  			// This comparison is possible because the time measurements in support of
  1049  			// runtime/pprof and runtime/metrics for runtime-internal locks are so close
  1050  			// together. It doesn't work as well for user-space contention, where the
  1051  			// involved goroutines are not _Grunnable the whole time and so need to pass
  1052  			// through the scheduler.
  1053  			t.Logf("lock contention growth in runtime/pprof's view  (%fs)", totalProfileGrowth)
  1054  			t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
  1055  
  1056  			acceptStacks = append([][]string(nil), acceptStacks...)
  1057  			for i, stk := range acceptStacks {
  1058  				if goexperiment.StaticLockRanking {
  1059  					if !slices.ContainsFunc(stk, func(s string) bool {
  1060  						return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
  1061  					}) {
  1062  						// stk is a call stack that is still on the user stack when
  1063  						// it calls runtime.unlock. Add the extra function that
  1064  						// we'll see, when the static lock ranking implementation of
  1065  						// runtime.unlockWithRank switches to the system stack.
  1066  						stk = append([]string{"runtime.unlockWithRank"}, stk...)
  1067  					}
  1068  				}
  1069  				acceptStacks[i] = stk
  1070  			}
  1071  
  1072  			var stks [][]string
  1073  			values := make([][2]int64, len(acceptStacks)+1)
  1074  			for _, s := range p.Sample {
  1075  				var have []string
  1076  				for _, loc := range s.Location {
  1077  					for _, line := range loc.Line {
  1078  						have = append(have, line.Function.Name)
  1079  					}
  1080  				}
  1081  				stks = append(stks, have)
  1082  				found := false
  1083  				for i, stk := range acceptStacks {
  1084  					if slices.Equal(have, stk) {
  1085  						values[i][0] += s.Value[0]
  1086  						values[i][1] += s.Value[1]
  1087  						found = true
  1088  						break
  1089  					}
  1090  				}
  1091  				if !found {
  1092  					values[len(values)-1][0] += s.Value[0]
  1093  					values[len(values)-1][1] += s.Value[1]
  1094  				}
  1095  			}
  1096  			profileGrowth = make([]int64, len(acceptStacks)+1)
  1097  			profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1]
  1098  			for i, stk := range acceptStacks {
  1099  				n += values[i][0]
  1100  				value += values[i][1]
  1101  				profileGrowth[i] = values[i][1]
  1102  				t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
  1103  			}
  1104  			if n == 0 && value == 0 {
  1105  				t.Logf("profile:\n%s", p)
  1106  				for _, have := range stks {
  1107  					t.Logf("have stack %v", have)
  1108  				}
  1109  				for _, stk := range acceptStacks {
  1110  					t.Errorf("want stack %v", stk)
  1111  				}
  1112  			}
  1113  
  1114  			return metricGrowth, profileGrowth, n, value, func() {
  1115  				t.Logf("profile:\n%s", p)
  1116  			}
  1117  		}
  1118  	}
  1119  
  1120  	name := t.Name()
  1121  
  1122  	t.Run("runtime.lock", func(t *testing.T) {
  1123  		// The goroutine that acquires the lock will only proceed when it
  1124  		// detects that its partner is contended for the lock. That will lead to
  1125  		// live-lock if anything (such as a STW) prevents the partner goroutine
  1126  		// from running. Allowing the contention workers to pause and restart
  1127  		// (to allow a STW to proceed) makes it harder to confirm that we're
  1128  		// counting the correct number of contention events, since some locks
  1129  		// will end up contended twice. Instead, disable the GC.
  1130  		defer debug.SetGCPercent(debug.SetGCPercent(-1))
  1131  
  1132  		mus := make([]runtime.Mutex, 200)
  1133  		var needContention atomic.Int64
  1134  
  1135  		baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
  1136  		fastDelayMicros := baseDelay.Microseconds()
  1137  		slowDelayMicros := baseDelay.Microseconds() * 4
  1138  
  1139  		const (
  1140  			fastRole = iota
  1141  			slowRole
  1142  			workerCount
  1143  		)
  1144  		if runtime.GOMAXPROCS(0) < workerCount {
  1145  			t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount)
  1146  		}
  1147  
  1148  		minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks
  1149  		maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks
  1150  		for i := range minTicks {
  1151  			minTicks[i] = make([]int64, len(mus))
  1152  			maxTicks[i] = make([]int64, len(mus))
  1153  		}
  1154  		var id atomic.Int32
  1155  		fn := func() bool {
  1156  			n := int(needContention.Load())
  1157  			if n < 0 {
  1158  				return false
  1159  			}
  1160  			mu := &mus[n]
  1161  
  1162  			// Each worker has a role: to have a fast or slow critical section.
  1163  			// Rotate the role assignments as we step through the mutex slice so
  1164  			// we don't end up with one M always claiming the same kind of work.
  1165  			id := int(id.Add(1))
  1166  			role := (id + n) % workerCount
  1167  
  1168  			marker, delayMicros := fastMarkerFrame, fastDelayMicros
  1169  			if role == slowRole {
  1170  				marker, delayMicros = slowMarkerFrame, slowDelayMicros
  1171  			}
  1172  
  1173  			// Each lock is used by two different critical sections, one fast
  1174  			// and one slow, identified in profiles by their different "marker"
  1175  			// functions. We expect the profile to blame each for the amount of
  1176  			// delay it inflicts on other users of the lock. We run one worker
  1177  			// of each kind, so any contention in one would be due to the other.
  1178  			//
  1179  			// We measure how long our runtime.lock call takes, which sets an
  1180  			// upper bound on how much blame to expect for the other worker type
  1181  			// in the profile. And if we acquire the lock first, we wait for the
  1182  			// other worker to announce its contention. We measure the
  1183  			// known-contended time, to use as a lower bound on how much blame
  1184  			// we expect of ourselves in the profile. Then we stall for a little
  1185  			// while (different amounts for "fast" versus "slow") before
  1186  			// unlocking the mutex.
  1187  
  1188  			marker(func() {
  1189  				t0 := runtime.Cputicks()
  1190  				runtime.Lock(mu)
  1191  				maxTicks[role][n] = runtime.Cputicks() - t0
  1192  				minTicks[role][n] = 0
  1193  				for int(needContention.Load()) == n {
  1194  					if runtime.MutexContended(mu) {
  1195  						t1 := runtime.Cputicks()
  1196  						// make them wait a little while
  1197  						for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
  1198  							runtime.Usleep(uint32(1 + delayMicros/8))
  1199  						}
  1200  						minTicks[role][n] = runtime.Cputicks() - t1
  1201  						break
  1202  					}
  1203  				}
  1204  				runtime.Unlock(mu)
  1205  				needContention.Store(int64(n - 1))
  1206  			})
  1207  
  1208  			return true
  1209  		}
  1210  
  1211  		stks := make([][]string, 2)
  1212  		for i := range stks {
  1213  			marker := "runtime_test.fastMarkerFrame"
  1214  			if i == slowRole {
  1215  				marker = "runtime_test.slowMarkerFrame"
  1216  			}
  1217  
  1218  			stks[i] = []string{
  1219  				"runtime.unlock",
  1220  				"runtime_test." + name + ".func4.1.1",
  1221  				marker,
  1222  				"runtime_test." + name + ".func4.1",
  1223  				"runtime_test.(*contentionWorker).run",
  1224  			}
  1225  		}
  1226  
  1227  		t.Run("sample-1", func(t *testing.T) {
  1228  			old := runtime.SetMutexProfileFraction(1)
  1229  			defer runtime.SetMutexProfileFraction(old)
  1230  
  1231  			needContention.Store(int64(len(mus) - 1))
  1232  			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
  1233  			defer func() {
  1234  				if t.Failed() {
  1235  					explain()
  1236  				}
  1237  			}()
  1238  
  1239  			t.Run("metric", func(t *testing.T) {
  1240  				// The runtime/metrics view may be sampled at 1 per
  1241  				// gTrackingPeriod, so we don't have a hard lower bound here.
  1242  				testenv.SkipFlaky(t, 64253)
  1243  
  1244  				if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want {
  1245  					// The test imposes a delay with usleep, verified with calls to
  1246  					// nanotime. Compare against the runtime/metrics package's view
  1247  					// (based on nanotime) rather than runtime/pprof's view (based
  1248  					// on cputicks).
  1249  					t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
  1250  				}
  1251  			})
  1252  			if have, want := n, int64(len(mus)); have != want {
  1253  				t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
  1254  			}
  1255  
  1256  			var slowMinTicks, fastMinTicks int64
  1257  			for role, ticks := range minTicks {
  1258  				for _, delta := range ticks {
  1259  					if role == slowRole {
  1260  						slowMinTicks += delta
  1261  					} else {
  1262  						fastMinTicks += delta
  1263  					}
  1264  				}
  1265  			}
  1266  			var slowMaxTicks, fastMaxTicks int64
  1267  			for role, ticks := range maxTicks {
  1268  				for _, delta := range ticks {
  1269  					if role == slowRole {
  1270  						slowMaxTicks += delta
  1271  					} else {
  1272  						fastMaxTicks += delta
  1273  					}
  1274  				}
  1275  			}
  1276  
  1277  			cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9
  1278  			for _, set := range []struct {
  1279  				name     string
  1280  				profTime int64
  1281  				minTime  int64
  1282  				maxTime  int64
  1283  			}{
  1284  				{
  1285  					name:     "slow",
  1286  					profTime: profileGrowth[slowRole],
  1287  					minTime:  int64(float64(slowMinTicks) / cpuGHz),
  1288  					maxTime:  int64(float64(fastMaxTicks) / cpuGHz),
  1289  				},
  1290  				{
  1291  					name:     "fast",
  1292  					profTime: profileGrowth[fastRole],
  1293  					minTime:  int64(float64(fastMinTicks) / cpuGHz),
  1294  					maxTime:  int64(float64(slowMaxTicks) / cpuGHz),
  1295  				},
  1296  			} {
  1297  				t.Logf("profile's view of delays due to %q critical section:                 %dns", set.name, set.profTime)
  1298  				t.Logf("test's view of known-contended time within %q critical section:      %dns", set.name, set.minTime)
  1299  				t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime)
  1300  
  1301  				if set.profTime < set.minTime {
  1302  					t.Errorf("profile undercounted %q critical section", set.name)
  1303  				}
  1304  				if set.profTime > set.maxTime {
  1305  					t.Errorf("profile overcounted %q critical section", set.name)
  1306  				}
  1307  			}
  1308  
  1309  			var totalProfileGrowth float64
  1310  			for _, growth := range profileGrowth {
  1311  				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
  1312  			}
  1313  
  1314  			const slop = 1.5 // account for nanotime vs cputicks
  1315  			t.Run("compare timers", func(t *testing.T) {
  1316  				testenv.SkipFlaky(t, 64253)
  1317  				if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth {
  1318  					t.Errorf("views differ by more than %fx", slop)
  1319  				}
  1320  			})
  1321  		})
  1322  
  1323  		t.Run("sample-2", func(t *testing.T) {
  1324  			testenv.SkipFlaky(t, 64253)
  1325  
  1326  			old := runtime.SetMutexProfileFraction(2)
  1327  			defer runtime.SetMutexProfileFraction(old)
  1328  
  1329  			needContention.Store(int64(len(mus) - 1))
  1330  			metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t)
  1331  			defer func() {
  1332  				if t.Failed() {
  1333  					explain()
  1334  				}
  1335  			}()
  1336  
  1337  			// With 100 trials and profile fraction of 2, we expect to capture
  1338  			// 50 samples. Allow the test to pass if we get at least 20 samples;
  1339  			// the CDF of the binomial distribution says there's less than a
  1340  			// 1e-9 chance of that, which is an acceptably low flakiness rate.
  1341  			const samplingSlop = 2.5
  1342  
  1343  			if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want {
  1344  				// The test imposes a delay with usleep, verified with calls to
  1345  				// nanotime. Compare against the runtime/metrics package's view
  1346  				// (based on nanotime) rather than runtime/pprof's view (based
  1347  				// on cputicks).
  1348  				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
  1349  			}
  1350  			if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
  1351  				t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
  1352  			}
  1353  
  1354  			var totalProfileGrowth float64
  1355  			for _, growth := range profileGrowth {
  1356  				totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds()
  1357  			}
  1358  
  1359  			const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
  1360  			if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth {
  1361  				t.Errorf("views differ by more than %fx", timerSlop)
  1362  			}
  1363  		})
  1364  	})
  1365  
  1366  	t.Run("runtime.semrelease", func(t *testing.T) {
  1367  		testenv.SkipFlaky(t, 64253)
  1368  
  1369  		old := runtime.SetMutexProfileFraction(1)
  1370  		defer runtime.SetMutexProfileFraction(old)
  1371  
  1372  		const workers = 3
  1373  		if runtime.GOMAXPROCS(0) < workers {
  1374  			t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
  1375  		}
  1376  
  1377  		var sem uint32 = 1
  1378  		var tries atomic.Int32
  1379  		tries.Store(10_000_000) // prefer controlled failure to timeout
  1380  		var sawContention atomic.Int32
  1381  		var need int32 = 1
  1382  		fn := func() bool {
  1383  			if sawContention.Load() >= need {
  1384  				return false
  1385  			}
  1386  			if tries.Add(-1) < 0 {
  1387  				return false
  1388  			}
  1389  
  1390  			runtime.Semacquire(&sem)
  1391  			runtime.Semrelease1(&sem, false, 0)
  1392  			if runtime.MutexContended(runtime.SemRootLock(&sem)) {
  1393  				sawContention.Add(1)
  1394  			}
  1395  			return true
  1396  		}
  1397  
  1398  		stks := [][]string{
  1399  			{
  1400  				"runtime.unlock",
  1401  				"runtime.semrelease1",
  1402  				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
  1403  				"runtime_test.(*contentionWorker).run",
  1404  			},
  1405  			{
  1406  				"runtime.unlock",
  1407  				"runtime.semacquire1",
  1408  				"runtime.semacquire",
  1409  				"runtime_test.TestRuntimeLockMetricsAndProfile.func5.1",
  1410  				"runtime_test.(*contentionWorker).run",
  1411  			},
  1412  		}
  1413  
  1414  		// Verify that we get call stack we expect, with anything more than zero
  1415  		// cycles / zero samples. The duration of each contention event is too
  1416  		// small relative to the expected overhead for us to verify its value
  1417  		// more directly. Leave that to the explicit lock/unlock test.
  1418  
  1419  		testcase(false, stks, workers, fn)(t)
  1420  
  1421  		if remaining := tries.Load(); remaining >= 0 {
  1422  			t.Logf("finished test early (%d tries remaining)", remaining)
  1423  		}
  1424  	})
  1425  }
  1426  
  1427  func slowMarkerFrame(fn func()) { fn() }
  1428  func fastMarkerFrame(fn func()) { fn() }
  1429  
  1430  // contentionWorker provides cleaner call stacks for lock contention profile tests
  1431  type contentionWorker struct {
  1432  	before func()
  1433  	fn     func() bool
  1434  	after  func()
  1435  }
  1436  
  1437  func (w *contentionWorker) run() {
  1438  	defer w.after()
  1439  	w.before()
  1440  
  1441  	for w.fn() {
  1442  	}
  1443  }
  1444  
  1445  func TestCPUStats(t *testing.T) {
  1446  	// Run a few GC cycles to get some of the stats to be non-zero.
  1447  	runtime.GC()
  1448  	runtime.GC()
  1449  	runtime.GC()
  1450  
  1451  	// Set GOMAXPROCS high then sleep briefly to ensure we generate
  1452  	// some idle time.
  1453  	oldmaxprocs := runtime.GOMAXPROCS(10)
  1454  	time.Sleep(time.Millisecond)
  1455  	runtime.GOMAXPROCS(oldmaxprocs)
  1456  
  1457  	stats := runtime.ReadCPUStats()
  1458  	gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime
  1459  	if gcTotal != stats.GCTotalTime {
  1460  		t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime)
  1461  	}
  1462  	scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime
  1463  	if scavTotal != stats.ScavengeTotalTime {
  1464  		t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime)
  1465  	}
  1466  	total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime
  1467  	if total != stats.TotalTime {
  1468  		t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime)
  1469  	}
  1470  	if total == 0 {
  1471  		t.Error("total time is zero")
  1472  	}
  1473  	if gcTotal == 0 {
  1474  		t.Error("GC total time is zero")
  1475  	}
  1476  	if stats.IdleTime == 0 {
  1477  		t.Error("idle time is zero")
  1478  	}
  1479  }
  1480  
  1481  func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
  1482  	// This test makes sure /memory/classes/heap/unused:bytes
  1483  	// doesn't overflow when allocating and deallocating large
  1484  	// objects. It is a regression test for #67019.
  1485  	done := make(chan struct{})
  1486  	var wg sync.WaitGroup
  1487  	wg.Add(1)
  1488  	go func() {
  1489  		defer wg.Done()
  1490  		for {
  1491  			for range 10 {
  1492  				abi.Escape(make([]byte, 1<<20))
  1493  			}
  1494  			runtime.GC()
  1495  			select {
  1496  			case <-done:
  1497  				return
  1498  			default:
  1499  			}
  1500  		}
  1501  	}()
  1502  	s := []metrics.Sample{
  1503  		{Name: "/memory/classes/heap/unused:bytes"},
  1504  	}
  1505  	for range 1000 {
  1506  		metrics.Read(s)
  1507  		if s[0].Value.Uint64() > 1<<40 {
  1508  			t.Errorf("overflow")
  1509  			break
  1510  		}
  1511  	}
  1512  	done <- struct{}{}
  1513  	wg.Wait()
  1514  }
  1515  

View as plain text