Source file src/runtime/trace.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // # Go execution tracer
     6  //
     7  // The tracer captures a wide range of execution events like goroutine
     8  // creation/blocking/unblocking, syscall enter/exit/block, GC-related events,
     9  // changes of heap size, processor start/stop, etc and writes them to a buffer
    10  // in a compact form. A precise nanosecond-precision timestamp and a stack
    11  // trace is captured for most events.
    12  //
    13  // ## Design
    14  //
    15  // The basic idea behind the the execution tracer is to have per-M buffers that
    16  // trace data may be written into. Each M maintains a seqlock indicating whether
    17  // its trace buffer is currently in use.
    18  //
    19  // Tracing is initiated by StartTrace, and proceeds in "generations," with each
    20  // generation being marked by a call to traceAdvance, to advance to the next
    21  // generation. Generations are a global synchronization point for trace data,
    22  // and we proceed to a new generation by moving forward trace.gen. Each M reads
    23  // trace.gen under its own seqlock to determine which generation it is writing
    24  // trace data for. To this end, each M has 2 slots for buffers: one slot for the
    25  // previous generation, one slot for the current one. It uses tl.gen to select
    26  // which buffer slot to write to. Simultaneously, traceAdvance uses the seqlock
    27  // to determine whether every thread is guaranteed to observe an updated
    28  // trace.gen. Once it is sure, it may then flush any buffers that are left over
    29  // from the previous generation safely, since it knows the Ms will not mutate
    30  // it.
    31  //
    32  // Flushed buffers are processed by the ReadTrace function, which is called by
    33  // the trace reader goroutine. The first goroutine to call ReadTrace is designated
    34  // as the trace reader goroutine until tracing completes. (There may only be one at
    35  // a time.)
    36  //
    37  // Once all buffers are flushed, any extra post-processing complete, and flushed
    38  // buffers are processed by the trace reader goroutine, the trace emits an
    39  // EndOfGeneration event to mark the global synchronization point in the trace.
    40  //
    41  // All other trace features, including CPU profile samples, stack information,
    42  // string tables, etc. all revolve around this generation system, and typically
    43  // appear in pairs: one for the previous generation, and one for the current one.
    44  // Like the per-M buffers, which of the two is written to is selected using trace.gen,
    45  // and anything managed this way must similarly be mutated only in traceAdvance or
    46  // under the M's seqlock.
    47  //
    48  // Trace events themselves are simple. They consist of a single byte for the event type,
    49  // followed by zero or more LEB128-encoded unsigned varints. They are decoded using
    50  // a pre-determined table for each trace version: internal/trace/tracev2.specs.
    51  //
    52  // To avoid relying on timestamps for correctness and validation, each G and P have
    53  // sequence counters that are written into trace events to encode a partial order.
    54  // The sequence counters reset on each generation. Ms do not need sequence counters
    55  // because they are the source of truth for execution: trace events, and even whole
    56  // buffers, are guaranteed to appear in order in the trace data stream, simply because
    57  // that's the order the thread emitted them in.
    58  //
    59  // See traceruntime.go for the API the tracer exposes to the runtime for emitting events.
    60  //
    61  // In each generation, we ensure that we enumerate all goroutines, such that each
    62  // generation's data is fully self-contained. This makes features like the flight
    63  // recorder easy to implement. To this end, we guarantee that every live goroutine is
    64  // listed at least once by emitting a status event for the goroutine, indicating its
    65  // starting state. These status events are emitted based on context, generally based
    66  // on the event that's about to be emitted.
    67  //
    68  // The traceEventWriter type encapsulates these details, and is the backbone of
    69  // the API exposed in traceruntime.go, though there are deviations where necessary.
    70  //
    71  // This is the overall design, but as always, there are many details. Beyond this,
    72  // look to the invariants and select corner cases below and the code itself for the
    73  // source of truth.
    74  //
    75  // See https://go.dev/issue/60773 for a link to a more complete design with rationale,
    76  // though parts of it are out-of-date.
    77  //
    78  // ## Invariants
    79  //
    80  // 1. An m that has a trace buffer MUST be on either the allm or sched.freem lists.
    81  //
    82  // Otherwise, traceAdvance might miss an M with a buffer that needs to be flushed.
    83  //
    84  // 2. Trace buffers MUST only be mutated in traceAdvance or under a traceAcquire/traceRelease.
    85  //
    86  // Otherwise, traceAdvance may race with Ms writing trace data when trying to flush buffers.
    87  //
    88  // 3. traceAdvance MUST NOT return until all of the current generation's buffers are flushed.
    89  //
    90  // Otherwise, callers cannot rely on all the data they need being available (for example, for
    91  // the flight recorder).
    92  //
    93  // 4. P and goroutine state transition events MUST be emitted by an M that owns its ability
    94  //    to transition.
    95  //
    96  // What this means is that the M must either be the owner of the P, the owner of the goroutine,
    97  // or owner of a non-running goroutine's _Gscan bit. There are a lot of bad things that can
    98  // happen if this invariant isn't maintained, mostly around generating inconsistencies in the
    99  // trace due to racy emission of events.
   100  //
   101  // 5. Acquisition of a P (pidleget or takeP/gcstopP) MUST NOT be performed under a traceAcquire/traceRelease pair.
   102  //
   103  // Notably, it's important that traceAcquire/traceRelease not cover a state in which the
   104  // goroutine or P is not yet owned. For example, if traceAcquire is held across both wirep and
   105  // pidleget, then we could end up emitting an event in the wrong generation. Suppose T1
   106  // traceAcquires in generation 1, a generation transition happens, T2 emits a ProcStop and
   107  // executes pidleput in generation 2, and finally T1 calls pidleget and emits ProcStart.
   108  // The ProcStart must follow the ProcStop in the trace to make any sense, but ProcStop was
   109  // emitted in a latter generation.
   110  //
   111  // 6. Goroutine state transitions, with the exception of transitions into _Grunning, MUST be
   112  //    performed under the traceAcquire/traceRelease pair where the event is emitted.
   113  //
   114  // Otherwise, traceAdvance may observe a goroutine state that is inconsistent with the
   115  // events being emitted. traceAdvance inspects all goroutines' states in order to emit
   116  // a status event for any goroutine that did not have an event emitted for it already.
   117  // If the generation then advances in between that observation and the event being emitted,
   118  // then the trace will contain a status that doesn't line up with the event. For example,
   119  // if the event is emitted after the state transition _Gwaiting -> _Grunnable, then
   120  // traceAdvance may observe the goroutine in _Grunnable, emit a status event, advance the
   121  // generation, and the following generation contains a GoUnblock event. The trace parser
   122  // will get confused because it sees that goroutine in _Grunnable in the previous generation
   123  // trying to be transitioned from _Gwaiting into _Grunnable in the following one. Something
   124  // similar happens if the trace event is emitted before the state transition, so that does
   125  // not help either.
   126  //
   127  // Transitions to _Grunning do not have the same problem because traceAdvance is unable to
   128  // observe running goroutines directly. It must stop them, or wait for them to emit an event.
   129  // Note that it cannot even stop them with asynchronous preemption in any "bad" window between
   130  // the state transition to _Grunning and the event emission because async preemption cannot
   131  // stop goroutines in the runtime.
   132  //
   133  // 7. Goroutine state transitions into _Grunning MUST emit an event for the transition after
   134  //    the state transition.
   135  //
   136  // This follows from invariants (4), (5), and the explanation of (6).
   137  // The relevant part of the previous invariant is that in order for the tracer to be unable to
   138  // stop a goroutine, it must be in _Grunning and in the runtime. So to close any windows between
   139  // event emission and the state transition, the event emission must happen *after* the transition
   140  // to _Grunning.
   141  //
   142  // ## Select corner cases
   143  //
   144  // ### CGO calls / system calls
   145  //
   146  // CGO calls and system calls are mostly straightforward, except for P stealing. For historical
   147  // reasons, this introduces a new trace-level P state called ProcSyscall which used to model
   148  // _Psyscall (now _Psyscall_unused). This state is used to indicate in the trace that a P
   149  // is eligible for stealing as part of the parser's ordering logic.
   150  //
   151  // Another quirk of this corner case is the ProcSyscallAbandoned trace-level P state, which
   152  // is used only in status events to indicate a relaxation of verification requirements. It
   153  // means that if the execution trace parser can't find the corresponding thread that the P
   154  // was stolen from in the state it expects it to be, to accept the trace anyway. This is also
   155  // historical. When _Psyscall still existed, one would steal and then ProcSteal, and there
   156  // was no ordering between the ProcSteal and the subsequent GoSyscallEndBlocked. One clearly
   157  // happened before the other, but since P stealing was a single atomic, there was no way
   158  // to enforce the order. The GoSyscallEndBlocked thread could move on and end up in any
   159  // state, and the GoSyscallEndBlocked could be in a completely different generation to the
   160  // ProcSteal. Today this is no longer possible as the ProcSteal is always ordered before
   161  // the GoSyscallEndBlocked event in the runtime.
   162  //
   163  // Both ProcSyscall and ProcSyscallAbandoned are likely no longer be necessary.
   164  //
   165  // ### CGO callbacks
   166  //
   167  // When a C thread calls into Go, the execution tracer models that as the creation of a new
   168  // goroutine. When the thread exits back into C, that is modeled as the destruction of that
   169  // goroutine. These are the GoCreateSyscall and GoDestroySyscall events, which represent the
   170  // creation and destruction of a goroutine with its starting and ending states being _Gsyscall.
   171  //
   172  // This model is simple to reason about but contradicts the runtime implementation, which
   173  // doesn't do this directly for performance reasons. The runtime implementation instead caches
   174  // a G on the M created for the C thread. On Linux this M is then cached in the thread's TLS,
   175  // and on other systems, the M is put on a global list on exit from Go. We need to do some
   176  // extra work to make sure that this is modeled correctly in the the tracer. For example,
   177  // a C thread exiting Go may leave a P hanging off of its M (whether that M is kept in TLS
   178  // or placed back on a list). In order to correctly model goroutine creation and destruction,
   179  // we must behave as if the P was at some point stolen by the runtime, if the C thread
   180  // reenters Go with the same M (and thus, same P) once more.
   181  
   182  package runtime
   183  
   184  import (
   185  	"internal/runtime/atomic"
   186  	"internal/trace/tracev2"
   187  	"unsafe"
   188  )
   189  
   190  // Trace state.
   191  
   192  // trace is global tracing context.
   193  var trace struct {
   194  	// trace.lock must only be acquired on the system stack where
   195  	// stack splits cannot happen while it is held.
   196  	lock mutex
   197  
   198  	// Trace buffer management.
   199  	//
   200  	// First we check the empty list for any free buffers. If not, buffers
   201  	// are allocated directly from the OS. Once they're filled up and/or
   202  	// flushed, they end up on the full queue for trace.gen%2.
   203  	//
   204  	// The trace reader takes buffers off the full list one-by-one and
   205  	// places them into reading until they're finished being read from.
   206  	// Then they're placed onto the empty list.
   207  	//
   208  	// Protected by trace.lock.
   209  	reading       *traceBuf // buffer currently handed off to user
   210  	empty         *traceBuf // stack of empty buffers
   211  	full          [2]traceBufQueue
   212  	workAvailable atomic.Bool
   213  
   214  	// State for the trace reader goroutine.
   215  	//
   216  	// Protected by trace.lock.
   217  	readerGen              atomic.Uintptr // the generation the reader is currently reading for
   218  	flushedGen             atomic.Uintptr // the last completed generation
   219  	headerWritten          bool           // whether ReadTrace has emitted trace header
   220  	endOfGenerationWritten bool           // whether readTrace has emitted the end of the generation signal
   221  
   222  	// doneSema is used to synchronize the reader and traceAdvance. Specifically,
   223  	// it notifies traceAdvance that the reader is done with a generation.
   224  	// Both semaphores are 0 by default (so, acquires block). traceAdvance
   225  	// attempts to acquire for gen%2 after flushing the last buffers for gen.
   226  	// Meanwhile the reader releases the sema for gen%2 when it has finished
   227  	// processing gen.
   228  	doneSema [2]uint32
   229  
   230  	// Trace data tables for deduplicating data going into the trace.
   231  	// There are 2 of each: one for gen%2, one for 1-gen%2.
   232  	stackTab  [2]traceStackTable  // maps stack traces to unique ids
   233  	stringTab [2]traceStringTable // maps strings to unique ids
   234  	typeTab   [2]traceTypeTable   // maps type pointers to unique ids
   235  
   236  	// cpuLogRead accepts CPU profile samples from the signal handler where
   237  	// they're generated. There are two profBufs here: one for gen%2, one for
   238  	// 1-gen%2. These profBufs use a three-word header to hold the IDs of the P, G,
   239  	// and M (respectively) that were active at the time of the sample. Because
   240  	// profBuf uses a record with all zeros in its header to indicate overflow,
   241  	// we make sure to make the P field always non-zero: The ID of a real P will
   242  	// start at bit 1, and bit 0 will be set. Samples that arrive while no P is
   243  	// running (such as near syscalls) will set the first header field to 0b10.
   244  	// This careful handling of the first header field allows us to store ID of
   245  	// the active G directly in the second field, even though that will be 0
   246  	// when sampling g0.
   247  	//
   248  	// Initialization and teardown of these fields is protected by traceAdvanceSema.
   249  	cpuLogRead  [2]*profBuf
   250  	signalLock  atomic.Uint32              // protects use of the following member, only usable in signal handlers
   251  	cpuLogWrite [2]atomic.Pointer[profBuf] // copy of cpuLogRead for use in signal handlers, set without signalLock
   252  	cpuSleep    *wakeableSleep
   253  	cpuLogDone  <-chan struct{}
   254  	cpuBuf      [2]*traceBuf
   255  
   256  	reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil
   257  
   258  	// Fast mappings from enumerations to string IDs that are prepopulated
   259  	// in the trace.
   260  	markWorkerLabels [2][len(gcMarkWorkerModeStrings)]traceArg
   261  	goStopReasons    [2][len(traceGoStopReasonStrings)]traceArg
   262  	goBlockReasons   [2][len(traceBlockReasonStrings)]traceArg
   263  
   264  	// enabled indicates whether tracing is enabled, but it is only an optimization,
   265  	// NOT the source of truth on whether tracing is enabled. Tracing is only truly
   266  	// enabled if gen != 0. This is used as an optimistic fast path check.
   267  	//
   268  	// Transitioning this value from true -> false is easy (once gen is 0)
   269  	// because it's OK for enabled to have a stale "true" value. traceAcquire will
   270  	// always double-check gen.
   271  	//
   272  	// Transitioning this value from false -> true is harder. We need to make sure
   273  	// this is observable as true strictly before gen != 0. To maintain this invariant
   274  	// we only make this transition with the world stopped and use the store to gen
   275  	// as a publication barrier.
   276  	enabled bool
   277  
   278  	// enabledWithAllocFree is set if debug.traceallocfree is != 0 when tracing begins.
   279  	// It follows the same synchronization protocol as enabled.
   280  	enabledWithAllocFree bool
   281  
   282  	// Trace generation counter.
   283  	gen            atomic.Uintptr
   284  	lastNonZeroGen uintptr // last non-zero value of gen
   285  
   286  	// shutdown is set when we are waiting for trace reader to finish after setting gen to 0
   287  	//
   288  	// Writes protected by trace.lock.
   289  	shutdown atomic.Bool
   290  
   291  	// Number of goroutines in syscall exiting slow path.
   292  	exitingSyscall atomic.Int32
   293  
   294  	// seqGC is the sequence counter for GC begin/end.
   295  	//
   296  	// Mutated only during stop-the-world.
   297  	seqGC uint64
   298  
   299  	// minPageHeapAddr is the minimum address of the page heap when tracing started.
   300  	minPageHeapAddr uint64
   301  
   302  	// debugMalloc is the value of debug.malloc before tracing began.
   303  	debugMalloc bool
   304  }
   305  
   306  // Trace public API.
   307  
   308  var (
   309  	traceAdvanceSema  uint32 = 1
   310  	traceShutdownSema uint32 = 1
   311  )
   312  
   313  // StartTrace enables tracing for the current process.
   314  // While tracing, the data will be buffered and available via [ReadTrace].
   315  // StartTrace returns an error if tracing is already enabled.
   316  // Most clients should use the [runtime/trace] package or the [testing] package's
   317  // -test.trace flag instead of calling StartTrace directly.
   318  func StartTrace() error {
   319  	if traceEnabled() || traceShuttingDown() {
   320  		return errorString("tracing is already enabled")
   321  	}
   322  	// Block until cleanup of the last trace is done.
   323  	semacquire(&traceShutdownSema)
   324  	semrelease(&traceShutdownSema)
   325  
   326  	// Hold traceAdvanceSema across trace start, since we'll want it on
   327  	// the other side of tracing being enabled globally.
   328  	semacquire(&traceAdvanceSema)
   329  
   330  	// Initialize CPU profile -> trace ingestion.
   331  	traceInitReadCPU()
   332  
   333  	// Compute the first generation for this StartTrace.
   334  	//
   335  	// Note: we start from the last non-zero generation rather than 1 so we
   336  	// can avoid resetting all the arrays indexed by gen%2 or gen%3. There's
   337  	// more than one of each per m, p, and goroutine.
   338  	firstGen := traceNextGen(trace.lastNonZeroGen)
   339  
   340  	// Reset GC sequencer.
   341  	trace.seqGC = 1
   342  
   343  	// Reset trace reader state.
   344  	trace.headerWritten = false
   345  	trace.readerGen.Store(firstGen)
   346  	trace.flushedGen.Store(0)
   347  
   348  	// Register some basic strings in the string tables.
   349  	traceRegisterLabelsAndReasons(firstGen)
   350  
   351  	// N.B. This may block for quite a while to get a frequency estimate. Do it
   352  	// here to minimize the time that the world is stopped.
   353  	frequency := traceClockUnitsPerSecond()
   354  
   355  	// Stop the world.
   356  	//
   357  	// What we need to successfully begin tracing is to make sure that the next time
   358  	// *any goroutine* hits a traceAcquire, it sees that the trace is enabled.
   359  	//
   360  	// Stopping the world gets us most of the way there, since it makes sure that goroutines
   361  	// stop executing. There is however one exception: goroutines without Ps concurrently
   362  	// exiting a syscall. We handle this by making sure that, after we update trace.gen,
   363  	// there isn't a single goroutine calling traceAcquire on the syscall slow path by checking
   364  	// trace.exitingSyscall. See the comment on the check below for more details.
   365  	//
   366  	// Note also that stopping the world is necessary to make sure sweep-related events are
   367  	// coherent. Since the world is stopped and sweeps are non-preemptible, we can never start
   368  	// the world and see an unpaired sweep 'end' event. Other parts of the tracer rely on this.
   369  	stw := stopTheWorld(stwStartTrace)
   370  
   371  	// Prevent sysmon from running any code that could generate events.
   372  	lock(&sched.sysmonlock)
   373  
   374  	// Grab the minimum page heap address. All Ps are stopped, so it's safe to read this since
   375  	// nothing can allocate heap memory.
   376  	trace.minPageHeapAddr = uint64(mheap_.pages.inUse.ranges[0].base.addr())
   377  
   378  	// Reset mSyscallID on all Ps while we have them stationary and the trace is disabled.
   379  	for _, pp := range allp {
   380  		pp.trace.mSyscallID = -1
   381  	}
   382  
   383  	// Start tracing.
   384  	//
   385  	// Set trace.enabled. This is *very* subtle. We need to maintain the invariant that if
   386  	// trace.gen != 0, then trace.enabled is always observed as true. Simultaneously, for
   387  	// performance, we need trace.enabled to be read without any synchronization.
   388  	//
   389  	// We ensure this is safe by stopping the world, which acts a global barrier on almost
   390  	// every M, and explicitly synchronize with any other Ms that could be running concurrently
   391  	// with us. Today, there are only two such cases:
   392  	// - sysmon, which we synchronized with by acquiring sysmonlock.
   393  	// - goroutines exiting syscalls, which we synchronize with via trace.exitingSyscall.
   394  	//
   395  	// After trace.gen is updated, other Ms may start creating trace buffers and emitting
   396  	// data into them.
   397  	trace.enabled = true
   398  	if debug.traceallocfree.Load() != 0 {
   399  		// Enable memory events since the GODEBUG is set.
   400  		trace.debugMalloc = debug.malloc
   401  		trace.enabledWithAllocFree = true
   402  		debug.malloc = true
   403  	}
   404  	trace.gen.Store(firstGen)
   405  
   406  	// Wait for exitingSyscall to drain.
   407  	//
   408  	// It may not monotonically decrease to zero, but in the limit it will always become
   409  	// zero because the world is stopped and there are no available Ps for syscall-exited
   410  	// goroutines to run on.
   411  	//
   412  	// Because we set gen before checking this, and because exitingSyscall is always incremented
   413  	// *before* traceAcquire (which checks gen), we can be certain that when exitingSyscall is zero
   414  	// that any goroutine that goes to exit a syscall from then on *must* observe the new gen as
   415  	// well as trace.enabled being set to true.
   416  	//
   417  	// The critical section on each goroutine here is going to be quite short, so the likelihood
   418  	// that we observe a zero value is high.
   419  	for trace.exitingSyscall.Load() != 0 {
   420  		osyield()
   421  	}
   422  
   423  	// Record some initial pieces of information.
   424  	//
   425  	// N.B. This will also emit a status event for this goroutine.
   426  	tl := traceAcquire()
   427  	traceSyncBatch(firstGen, frequency) // Get this as early in the trace as possible. See comment in traceAdvance.
   428  	tl.Gomaxprocs(gomaxprocs)           // Get this as early in the trace as possible. See comment in traceAdvance.
   429  	tl.STWStart(stwStartTrace)          // We didn't trace this above, so trace it now.
   430  
   431  	// Record the fact that a GC is active, if applicable.
   432  	if gcphase == _GCmark || gcphase == _GCmarktermination {
   433  		tl.GCActive()
   434  	}
   435  
   436  	// Dump a snapshot of memory, if enabled.
   437  	if trace.enabledWithAllocFree {
   438  		traceSnapshotMemory(firstGen)
   439  	}
   440  
   441  	// Record the heap goal so we have it at the very beginning of the trace.
   442  	tl.HeapGoal()
   443  
   444  	// Make sure a ProcStatus is emitted for every P, while we're here.
   445  	for _, pp := range allp {
   446  		tl.writer().writeProcStatusForP(pp, pp == tl.mp.p.ptr()).end()
   447  	}
   448  	traceRelease(tl)
   449  
   450  	unlock(&sched.sysmonlock)
   451  	startTheWorld(stw)
   452  
   453  	traceStartReadCPU()
   454  	traceAdvancer.start()
   455  
   456  	semrelease(&traceAdvanceSema)
   457  	return nil
   458  }
   459  
   460  // StopTrace stops tracing, if it was previously enabled.
   461  // StopTrace only returns after all the reads for the trace have completed.
   462  func StopTrace() {
   463  	traceAdvance(true)
   464  }
   465  
   466  // traceAdvance moves tracing to the next generation, and cleans up the current generation,
   467  // ensuring that it's flushed out before returning. If stopTrace is true, it disables tracing
   468  // altogether instead of advancing to the next generation.
   469  //
   470  // traceAdvanceSema must not be held.
   471  //
   472  // traceAdvance is called by runtime/trace and golang.org/x/exp/trace using linkname.
   473  //
   474  //go:linkname traceAdvance
   475  func traceAdvance(stopTrace bool) {
   476  	semacquire(&traceAdvanceSema)
   477  
   478  	// Get the gen that we're advancing from. In this function we don't really care much
   479  	// about the generation we're advancing _into_ since we'll do all the cleanup in this
   480  	// generation for the next advancement.
   481  	gen := trace.gen.Load()
   482  	if gen == 0 {
   483  		// We may end up here traceAdvance is called concurrently with StopTrace.
   484  		semrelease(&traceAdvanceSema)
   485  		return
   486  	}
   487  
   488  	// Collect all the untraced Gs.
   489  	type untracedG struct {
   490  		gp           *g
   491  		goid         uint64
   492  		mid          int64
   493  		stackID      uint64
   494  		status       uint32
   495  		waitreason   waitReason
   496  		inMarkAssist bool
   497  	}
   498  	var untracedGs []untracedG
   499  	forEachGRace(func(gp *g) {
   500  		// Make absolutely sure all Gs are ready for the next
   501  		// generation. We need to do this even for dead Gs because
   502  		// they may come alive with a new identity, and its status
   503  		// traced bookkeeping might end up being stale.
   504  		// We may miss totally new goroutines, but they'll always
   505  		// have clean bookkeeping.
   506  		gp.trace.readyNextGen(gen)
   507  		// If the status was traced, nothing else to do.
   508  		if gp.trace.statusWasTraced(gen) {
   509  			return
   510  		}
   511  		// Scribble down information about this goroutine.
   512  		ug := untracedG{gp: gp, mid: -1}
   513  		systemstack(func() {
   514  			me := getg().m.curg
   515  			// We don't have to handle this G status transition because we
   516  			// already eliminated ourselves from consideration above.
   517  			casGToWaitingForSuspendG(me, _Grunning, waitReasonTraceGoroutineStatus)
   518  			// We need to suspend and take ownership of the G to safely read its
   519  			// goid. Note that we can't actually emit the event at this point
   520  			// because we might stop the G in a window where it's unsafe to write
   521  			// events based on the G's status. We need the global trace buffer flush
   522  			// coming up to make sure we're not racing with the G.
   523  			//
   524  			// It should be very unlikely that we try to preempt a running G here.
   525  			// The only situation that we might is that we're racing with a G
   526  			// that's running for the first time in this generation. Therefore,
   527  			// this should be relatively fast.
   528  			s := suspendG(gp)
   529  			if !s.dead {
   530  				ug.goid = s.g.goid
   531  				if s.g.m != nil {
   532  					ug.mid = int64(s.g.m.procid)
   533  				}
   534  				ug.status = readgstatus(s.g) &^ _Gscan
   535  				ug.waitreason = s.g.waitreason
   536  				ug.inMarkAssist = s.g.inMarkAssist
   537  				ug.stackID = traceStack(0, gp, &trace.stackTab[gen%2])
   538  			}
   539  			resumeG(s)
   540  			casgstatus(me, _Gwaiting, _Grunning)
   541  		})
   542  		if ug.goid != 0 {
   543  			untracedGs = append(untracedGs, ug)
   544  		}
   545  	})
   546  
   547  	if !stopTrace {
   548  		// Re-register runtime goroutine labels and stop/block reasons.
   549  		traceRegisterLabelsAndReasons(traceNextGen(gen))
   550  	}
   551  
   552  	// N.B. This may block for quite a while to get a frequency estimate. Do it
   553  	// here to minimize the time that we prevent the world from stopping.
   554  	frequency := traceClockUnitsPerSecond()
   555  
   556  	// Now that we've done some of the heavy stuff, prevent the world from stopping.
   557  	// This is necessary to ensure the consistency of the STW events. If we're feeling
   558  	// adventurous we could lift this restriction and add a STWActive event, but the
   559  	// cost of maintaining this consistency is low. We're not going to hold this semaphore
   560  	// for very long and most STW periods are very short.
   561  	// Once we hold worldsema, prevent preemption as well so we're not interrupted partway
   562  	// through this. We want to get this done as soon as possible.
   563  	semacquire(&worldsema)
   564  	mp := acquirem()
   565  
   566  	// Advance the generation or stop the trace.
   567  	trace.lastNonZeroGen = gen
   568  	if stopTrace {
   569  		systemstack(func() {
   570  			// Ordering is important here. Set shutdown first, then disable tracing,
   571  			// so that conditions like (traceEnabled() || traceShuttingDown()) have
   572  			// no opportunity to be false. Hold the trace lock so this update appears
   573  			// atomic to the trace reader.
   574  			lock(&trace.lock)
   575  			trace.shutdown.Store(true)
   576  			trace.gen.Store(0)
   577  			unlock(&trace.lock)
   578  
   579  			// Clear trace.enabled. It is totally OK for this value to be stale,
   580  			// because traceAcquire will always double-check gen.
   581  			trace.enabled = false
   582  		})
   583  	} else {
   584  		trace.gen.Store(traceNextGen(gen))
   585  	}
   586  
   587  	// Emit a sync batch which contains a ClockSnapshot. Also emit a ProcsChange
   588  	// event so we have one on record for each generation. Let's emit it as soon
   589  	// as possible so that downstream tools can rely on the value being there
   590  	// fairly soon in a generation.
   591  	//
   592  	// It's important that we do this before allowing stop-the-worlds again,
   593  	// because the procs count could change.
   594  	if !stopTrace {
   595  		tl := traceAcquire()
   596  		traceSyncBatch(tl.gen, frequency)
   597  		tl.Gomaxprocs(gomaxprocs)
   598  		traceRelease(tl)
   599  	}
   600  
   601  	// Emit a GCActive event in the new generation if necessary.
   602  	//
   603  	// It's important that we do this before allowing stop-the-worlds again,
   604  	// because that could emit global GC-related events.
   605  	if !stopTrace && (gcphase == _GCmark || gcphase == _GCmarktermination) {
   606  		tl := traceAcquire()
   607  		tl.GCActive()
   608  		traceRelease(tl)
   609  	}
   610  
   611  	// Preemption is OK again after this. If the world stops or whatever it's fine.
   612  	// We're just cleaning up the last generation after this point.
   613  	//
   614  	// We also don't care if the GC starts again after this for the same reasons.
   615  	releasem(mp)
   616  	semrelease(&worldsema)
   617  
   618  	// Snapshot allm and freem.
   619  	//
   620  	// Snapshotting after the generation counter update is sufficient.
   621  	// Because an m must be on either allm or sched.freem if it has an active trace
   622  	// buffer, new threads added to allm after this point must necessarily observe
   623  	// the new generation number (sched.lock acts as a barrier).
   624  	//
   625  	// Threads that exit before this point and are on neither list explicitly
   626  	// flush their own buffers in traceThreadDestroy.
   627  	//
   628  	// Snapshotting freem is necessary because Ms can continue to emit events
   629  	// while they're still on that list. Removal from sched.freem is serialized with
   630  	// this snapshot, so either we'll capture an m on sched.freem and race with
   631  	// the removal to flush its buffers (resolved by traceThreadDestroy acquiring
   632  	// the thread's seqlock, which one of us must win, so at least its old gen buffer
   633  	// will be flushed in time for the new generation) or it will have flushed its
   634  	// buffers before we snapshotted it to begin with.
   635  	lock(&sched.lock)
   636  	mToFlush := allm
   637  	for mp := mToFlush; mp != nil; mp = mp.alllink {
   638  		mp.trace.link = mp.alllink
   639  	}
   640  	for mp := sched.freem; mp != nil; mp = mp.freelink {
   641  		mp.trace.link = mToFlush
   642  		mToFlush = mp
   643  	}
   644  	unlock(&sched.lock)
   645  
   646  	// Iterate over our snapshot, flushing every buffer until we're done.
   647  	//
   648  	// Because trace writers read the generation while the seqlock is
   649  	// held, we can be certain that when there are no writers there are
   650  	// also no stale generation values left. Therefore, it's safe to flush
   651  	// any buffers that remain in that generation's slot.
   652  	const debugDeadlock = false
   653  	systemstack(func() {
   654  		// Track iterations for some rudimentary deadlock detection.
   655  		i := 0
   656  		detectedDeadlock := false
   657  
   658  		for mToFlush != nil {
   659  			prev := &mToFlush
   660  			for mp := *prev; mp != nil; {
   661  				if mp.trace.seqlock.Load()%2 != 0 {
   662  					// The M is writing. Come back to it later.
   663  					prev = &mp.trace.link
   664  					mp = mp.trace.link
   665  					continue
   666  				}
   667  				// Flush the trace buffer.
   668  				//
   669  				// trace.lock needed for traceBufFlush, but also to synchronize
   670  				// with traceThreadDestroy, which flushes both buffers unconditionally.
   671  				lock(&trace.lock)
   672  				for exp, buf := range mp.trace.buf[gen%2] {
   673  					if buf != nil {
   674  						traceBufFlush(buf, gen)
   675  						mp.trace.buf[gen%2][exp] = nil
   676  					}
   677  				}
   678  				unlock(&trace.lock)
   679  
   680  				// Remove the m from the flush list.
   681  				*prev = mp.trace.link
   682  				mp.trace.link = nil
   683  				mp = *prev
   684  			}
   685  			// Yield only if we're going to be going around the loop again.
   686  			if mToFlush != nil {
   687  				osyield()
   688  			}
   689  
   690  			if debugDeadlock {
   691  				// Try to detect a deadlock. We probably shouldn't loop here
   692  				// this many times.
   693  				if i > 100000 && !detectedDeadlock {
   694  					detectedDeadlock = true
   695  					println("runtime: failing to flush")
   696  					for mp := mToFlush; mp != nil; mp = mp.trace.link {
   697  						print("runtime: m=", mp.id, "\n")
   698  					}
   699  				}
   700  				i++
   701  			}
   702  		}
   703  	})
   704  
   705  	// At this point, the old generation is fully flushed minus stack and string
   706  	// tables, CPU samples, and goroutines that haven't run at all during the last
   707  	// generation.
   708  
   709  	// Check to see if any Gs still haven't had events written out for them.
   710  	statusWriter := unsafeTraceWriter(gen, nil)
   711  	for _, ug := range untracedGs {
   712  		if ug.gp.trace.statusWasTraced(gen) {
   713  			// It was traced, we don't need to do anything.
   714  			continue
   715  		}
   716  		// It still wasn't traced. Because we ensured all Ms stopped writing trace
   717  		// events to the last generation, that must mean the G never had its status
   718  		// traced in gen between when we recorded it and now. If that's true, the goid
   719  		// and status we recorded then is exactly what we want right now.
   720  		status := goStatusToTraceGoStatus(ug.status, ug.waitreason)
   721  		statusWriter = statusWriter.writeGoStatus(ug.goid, ug.mid, status, ug.inMarkAssist, ug.stackID)
   722  	}
   723  	statusWriter.flush().end()
   724  
   725  	// Read everything out of the last gen's CPU profile buffer.
   726  	traceReadCPU(gen)
   727  
   728  	// Flush CPU samples, stacks, and strings for the last generation. This is safe,
   729  	// because we're now certain no M is writing to the last generation.
   730  	//
   731  	// Ordering is important here. traceCPUFlush may generate new stacks and dumping
   732  	// stacks may generate new strings.
   733  	traceCPUFlush(gen)
   734  	trace.stackTab[gen%2].dump(gen)
   735  	trace.typeTab[gen%2].dump(gen)
   736  	trace.stringTab[gen%2].reset(gen)
   737  
   738  	// That's it. This generation is done producing buffers.
   739  	systemstack(func() {
   740  		lock(&trace.lock)
   741  		trace.flushedGen.Store(gen)
   742  		unlock(&trace.lock)
   743  	})
   744  
   745  	// Perform status reset on dead Ps because they just appear as idle.
   746  	//
   747  	// Preventing preemption is sufficient to access allp safely. allp is only
   748  	// mutated by GOMAXPROCS calls, which require a STW.
   749  	//
   750  	// TODO(mknyszek): Consider explicitly emitting ProcCreate and ProcDestroy
   751  	// events to indicate whether a P exists, rather than just making its
   752  	// existence implicit.
   753  	mp = acquirem()
   754  	for _, pp := range allp[len(allp):cap(allp)] {
   755  		pp.trace.readyNextGen(traceNextGen(gen))
   756  	}
   757  	releasem(mp)
   758  
   759  	if stopTrace {
   760  		// Acquire the shutdown sema to begin the shutdown process.
   761  		semacquire(&traceShutdownSema)
   762  
   763  		// Finish off CPU profile reading.
   764  		traceStopReadCPU()
   765  
   766  		// Reset debug.malloc if necessary. Note that this is set in a racy
   767  		// way; that's OK. Some mallocs may still enter into the debug.malloc
   768  		// block, but they won't generate events because tracing is disabled.
   769  		// That is, it's OK if mallocs read a stale debug.malloc or
   770  		// trace.enabledWithAllocFree value.
   771  		if trace.enabledWithAllocFree {
   772  			trace.enabledWithAllocFree = false
   773  			debug.malloc = trace.debugMalloc
   774  		}
   775  	} else {
   776  		// Go over each P and emit a status event for it if necessary.
   777  		//
   778  		// We do this at the beginning of the new generation instead of the
   779  		// end like we do for goroutines because forEachP doesn't give us a
   780  		// hook to skip Ps that have already been traced. Since we have to
   781  		// preempt all Ps anyway, might as well stay consistent with StartTrace
   782  		// which does this during the STW.
   783  		semacquire(&worldsema)
   784  		forEachP(waitReasonTraceProcStatus, func(pp *p) {
   785  			tl := traceAcquire()
   786  			if !pp.trace.statusWasTraced(tl.gen) {
   787  				tl.writer().writeProcStatusForP(pp, false).end()
   788  			}
   789  			traceRelease(tl)
   790  		})
   791  		semrelease(&worldsema)
   792  	}
   793  
   794  	// Block until the trace reader has finished processing the last generation.
   795  	semacquire(&trace.doneSema[gen%2])
   796  	if raceenabled {
   797  		raceacquire(unsafe.Pointer(&trace.doneSema[gen%2]))
   798  	}
   799  
   800  	// Double-check that things look as we expect after advancing and perform some
   801  	// final cleanup if the trace has fully stopped.
   802  	systemstack(func() {
   803  		lock(&trace.lock)
   804  		if !trace.full[gen%2].empty() {
   805  			throw("trace: non-empty full trace buffer for done generation")
   806  		}
   807  		if stopTrace {
   808  			if !trace.full[1-(gen%2)].empty() {
   809  				throw("trace: non-empty full trace buffer for next generation")
   810  			}
   811  			if trace.reading != nil || trace.reader.Load() != nil {
   812  				throw("trace: reading after shutdown")
   813  			}
   814  			// Free all the empty buffers.
   815  			for trace.empty != nil {
   816  				buf := trace.empty
   817  				trace.empty = buf.link
   818  				sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf), &memstats.other_sys)
   819  			}
   820  			// Clear trace.shutdown and other flags.
   821  			trace.headerWritten = false
   822  			trace.shutdown.Store(false)
   823  		}
   824  		unlock(&trace.lock)
   825  	})
   826  
   827  	if stopTrace {
   828  		// Clear the sweep state on every P for the next time tracing is enabled.
   829  		//
   830  		// It may be stale in the next trace because we may have ended tracing in
   831  		// the middle of a sweep on a P.
   832  		//
   833  		// It's fine not to call forEachP here because tracing is disabled and we
   834  		// know at this point that nothing is calling into the tracer, but we do
   835  		// need to look at dead Ps too just because GOMAXPROCS could have been called
   836  		// at any point since we stopped tracing, and we have to ensure there's no
   837  		// bad state on dead Ps too. Prevent a STW and a concurrent GOMAXPROCS that
   838  		// might mutate allp by making ourselves briefly non-preemptible.
   839  		mp := acquirem()
   840  		for _, pp := range allp[:cap(allp)] {
   841  			pp.trace.inSweep = false
   842  			pp.trace.maySweep = false
   843  			pp.trace.swept = 0
   844  			pp.trace.reclaimed = 0
   845  		}
   846  		releasem(mp)
   847  	}
   848  
   849  	// Release the advance semaphore. If stopTrace is true we're still holding onto
   850  	// traceShutdownSema.
   851  	//
   852  	// Do a direct handoff. Don't let one caller of traceAdvance starve
   853  	// other calls to traceAdvance.
   854  	semrelease1(&traceAdvanceSema, true, 0)
   855  
   856  	if stopTrace {
   857  		// Stop the traceAdvancer. We can't be holding traceAdvanceSema here because
   858  		// we'll deadlock (we're blocked on the advancer goroutine exiting, but it
   859  		// may be currently trying to acquire traceAdvanceSema).
   860  		traceAdvancer.stop()
   861  		semrelease(&traceShutdownSema)
   862  	}
   863  }
   864  
   865  func traceNextGen(gen uintptr) uintptr {
   866  	if gen == ^uintptr(0) {
   867  		// gen is used both %2 and %3 and we want both patterns to continue when we loop around.
   868  		// ^uint32(0) and ^uint64(0) are both odd and multiples of 3. Therefore the next generation
   869  		// we want is even and one more than a multiple of 3. The smallest such number is 4.
   870  		return 4
   871  	}
   872  	return gen + 1
   873  }
   874  
   875  // traceRegisterLabelsAndReasons re-registers mark worker labels and
   876  // goroutine stop/block reasons in the string table for the provided
   877  // generation. Note: the provided generation must not have started yet.
   878  func traceRegisterLabelsAndReasons(gen uintptr) {
   879  	for i, label := range gcMarkWorkerModeStrings[:] {
   880  		trace.markWorkerLabels[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, label))
   881  	}
   882  	for i, str := range traceBlockReasonStrings[:] {
   883  		trace.goBlockReasons[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, str))
   884  	}
   885  	for i, str := range traceGoStopReasonStrings[:] {
   886  		trace.goStopReasons[gen%2][i] = traceArg(trace.stringTab[gen%2].put(gen, str))
   887  	}
   888  }
   889  
   890  // ReadTrace returns the next chunk of binary tracing data, blocking until data
   891  // is available. If tracing is turned off and all the data accumulated while it
   892  // was on has been returned, ReadTrace returns nil. The caller must copy the
   893  // returned data before calling ReadTrace again.
   894  // ReadTrace must be called from one goroutine at a time.
   895  func ReadTrace() (buf []byte) {
   896  top:
   897  	var park bool
   898  	systemstack(func() {
   899  		buf, park = readTrace0()
   900  	})
   901  	if park {
   902  		gopark(func(gp *g, _ unsafe.Pointer) bool {
   903  			if !trace.reader.CompareAndSwapNoWB(nil, gp) {
   904  				// We're racing with another reader.
   905  				// Wake up and handle this case.
   906  				return false
   907  			}
   908  
   909  			if g2 := traceReader(); gp == g2 {
   910  				// New data arrived between unlocking
   911  				// and the CAS and we won the wake-up
   912  				// race, so wake up directly.
   913  				return false
   914  			} else if g2 != nil {
   915  				printlock()
   916  				println("runtime: got trace reader", g2, g2.goid)
   917  				throw("unexpected trace reader")
   918  			}
   919  
   920  			return true
   921  		}, nil, waitReasonTraceReaderBlocked, traceBlockSystemGoroutine, 2)
   922  		goto top
   923  	}
   924  	return buf
   925  }
   926  
   927  // readTrace0 is ReadTrace's continuation on g0. This must run on the
   928  // system stack because it acquires trace.lock.
   929  //
   930  //go:systemstack
   931  func readTrace0() (buf []byte, park bool) {
   932  	if raceenabled {
   933  		// g0 doesn't have a race context. Borrow the user G's.
   934  		if getg().racectx != 0 {
   935  			throw("expected racectx == 0")
   936  		}
   937  		getg().racectx = getg().m.curg.racectx
   938  		// (This defer should get open-coded, which is safe on
   939  		// the system stack.)
   940  		defer func() { getg().racectx = 0 }()
   941  	}
   942  
   943  	// This function must not allocate while holding trace.lock:
   944  	// allocation can call heap allocate, which will try to emit a trace
   945  	// event while holding heap lock.
   946  	lock(&trace.lock)
   947  
   948  	if trace.reader.Load() != nil {
   949  		// More than one goroutine reads trace. This is bad.
   950  		// But we rather do not crash the program because of tracing,
   951  		// because tracing can be enabled at runtime on prod servers.
   952  		unlock(&trace.lock)
   953  		println("runtime: ReadTrace called from multiple goroutines simultaneously")
   954  		return nil, false
   955  	}
   956  	// Recycle the old buffer.
   957  	if buf := trace.reading; buf != nil {
   958  		buf.link = trace.empty
   959  		trace.empty = buf
   960  		trace.reading = nil
   961  	}
   962  	// Write trace header.
   963  	if !trace.headerWritten {
   964  		trace.headerWritten = true
   965  		unlock(&trace.lock)
   966  		return []byte("go 1.26 trace\x00\x00\x00"), false
   967  	}
   968  
   969  	// Read the next buffer.
   970  
   971  	if trace.readerGen.Load() == 0 {
   972  		trace.readerGen.Store(1)
   973  	}
   974  	var gen uintptr
   975  	for {
   976  		assertLockHeld(&trace.lock)
   977  		gen = trace.readerGen.Load()
   978  
   979  		// Check to see if we need to block for more data in this generation
   980  		// or if we need to move our generation forward.
   981  		if !trace.full[gen%2].empty() {
   982  			break
   983  		}
   984  		// Most of the time readerGen is one generation ahead of flushedGen, as the
   985  		// current generation is being read from. Then, once the last buffer is flushed
   986  		// into readerGen, flushedGen will rise to meet it. At this point, the tracer
   987  		// is waiting on the reader to finish flushing the last generation so that it
   988  		// can continue to advance.
   989  		if trace.flushedGen.Load() == gen {
   990  			// Write out the internal in-band end-of-generation signal.
   991  			if !trace.endOfGenerationWritten {
   992  				trace.endOfGenerationWritten = true
   993  				unlock(&trace.lock)
   994  				return []byte{byte(tracev2.EvEndOfGeneration)}, false
   995  			}
   996  
   997  			// Reset the flag.
   998  			trace.endOfGenerationWritten = false
   999  
  1000  			// Handle shutdown.
  1001  			if trace.shutdown.Load() {
  1002  				unlock(&trace.lock)
  1003  
  1004  				// Wake up anyone waiting for us to be done with this generation.
  1005  				//
  1006  				// Do this after reading trace.shutdown, because the thread we're
  1007  				// waking up is going to clear trace.shutdown.
  1008  				if raceenabled {
  1009  					// Model synchronization on trace.doneSema, which te race
  1010  					// detector does not see. This is required to avoid false
  1011  					// race reports on writer passed to trace.Start.
  1012  					racerelease(unsafe.Pointer(&trace.doneSema[gen%2]))
  1013  				}
  1014  				semrelease(&trace.doneSema[gen%2])
  1015  
  1016  				// We're shutting down, and the last generation is fully
  1017  				// read. We're done.
  1018  				return nil, false
  1019  			}
  1020  			// Handle advancing to the next generation.
  1021  
  1022  			// The previous gen has had all of its buffers flushed, and
  1023  			// there's nothing else for us to read. Advance the generation
  1024  			// we're reading from and try again.
  1025  			trace.readerGen.Store(trace.gen.Load())
  1026  			unlock(&trace.lock)
  1027  
  1028  			// Wake up anyone waiting for us to be done with this generation.
  1029  			//
  1030  			// Do this after reading gen to make sure we can't have the trace
  1031  			// advance until we've read it.
  1032  			if raceenabled {
  1033  				// See comment above in the shutdown case.
  1034  				racerelease(unsafe.Pointer(&trace.doneSema[gen%2]))
  1035  			}
  1036  			semrelease(&trace.doneSema[gen%2])
  1037  
  1038  			// Reacquire the lock and go back to the top of the loop.
  1039  			lock(&trace.lock)
  1040  			continue
  1041  		}
  1042  		// Wait for new data.
  1043  		//
  1044  		// We don't simply use a note because the scheduler
  1045  		// executes this goroutine directly when it wakes up
  1046  		// (also a note would consume an M).
  1047  		//
  1048  		// Before we drop the lock, clear the workAvailable flag. Work can
  1049  		// only be queued with trace.lock held, so this is at least true until
  1050  		// we drop the lock.
  1051  		trace.workAvailable.Store(false)
  1052  		unlock(&trace.lock)
  1053  		return nil, true
  1054  	}
  1055  	// Pull a buffer.
  1056  	tbuf := trace.full[gen%2].pop()
  1057  	trace.reading = tbuf
  1058  	unlock(&trace.lock)
  1059  	return tbuf.arr[:tbuf.pos], false
  1060  }
  1061  
  1062  // traceReader returns the trace reader that should be woken up, if any.
  1063  // Callers should first check (traceEnabled() || traceShuttingDown()).
  1064  //
  1065  // This must run on the system stack because it acquires trace.lock.
  1066  //
  1067  //go:systemstack
  1068  func traceReader() *g {
  1069  	gp := traceReaderAvailable()
  1070  	if gp == nil || !trace.reader.CompareAndSwapNoWB(gp, nil) {
  1071  		return nil
  1072  	}
  1073  	return gp
  1074  }
  1075  
  1076  // traceReaderAvailable returns the trace reader if it is not currently
  1077  // scheduled and should be. Callers should first check that
  1078  // (traceEnabled() || traceShuttingDown()) is true.
  1079  func traceReaderAvailable() *g {
  1080  	// There are two conditions under which we definitely want to schedule
  1081  	// the reader:
  1082  	// - The reader is lagging behind in finishing off the last generation.
  1083  	//   In this case, trace buffers could even be empty, but the trace
  1084  	//   advancer will be waiting on the reader, so we have to make sure
  1085  	//   to schedule the reader ASAP.
  1086  	// - The reader has pending work to process for it's reader generation
  1087  	//   (assuming readerGen is not lagging behind). Note that we also want
  1088  	//   to be careful *not* to schedule the reader if there's no work to do.
  1089  	//
  1090  	// We also want to be careful not to schedule the reader if there's no
  1091  	// reason to.
  1092  	if trace.flushedGen.Load() == trace.readerGen.Load() || trace.workAvailable.Load() {
  1093  		return trace.reader.Load()
  1094  	}
  1095  	return nil
  1096  }
  1097  
  1098  // Trace advancer goroutine.
  1099  var traceAdvancer traceAdvancerState
  1100  
  1101  type traceAdvancerState struct {
  1102  	timer *wakeableSleep
  1103  	done  chan struct{}
  1104  }
  1105  
  1106  // start starts a new traceAdvancer.
  1107  func (s *traceAdvancerState) start() {
  1108  	// Start a goroutine to periodically advance the trace generation.
  1109  	s.done = make(chan struct{})
  1110  	s.timer = newWakeableSleep()
  1111  	go func() {
  1112  		for traceEnabled() {
  1113  			// Set a timer to wake us up
  1114  			s.timer.sleep(int64(debug.traceadvanceperiod))
  1115  
  1116  			// Try to advance the trace.
  1117  			traceAdvance(false)
  1118  		}
  1119  		s.done <- struct{}{}
  1120  	}()
  1121  }
  1122  
  1123  // stop stops a traceAdvancer and blocks until it exits.
  1124  func (s *traceAdvancerState) stop() {
  1125  	s.timer.wake()
  1126  	<-s.done
  1127  	close(s.done)
  1128  	s.timer.close()
  1129  }
  1130  
  1131  // traceAdvancePeriod is the approximate period between
  1132  // new generations.
  1133  const defaultTraceAdvancePeriod = 1e9 // 1 second.
  1134  
  1135  // wakeableSleep manages a wakeable goroutine sleep.
  1136  //
  1137  // Users of this type must call init before first use and
  1138  // close to free up resources. Once close is called, init
  1139  // must be called before another use.
  1140  type wakeableSleep struct {
  1141  	timer *timer
  1142  
  1143  	// lock protects access to wakeup, but not send/recv on it.
  1144  	lock   mutex
  1145  	wakeup chan struct{}
  1146  }
  1147  
  1148  // newWakeableSleep initializes a new wakeableSleep and returns it.
  1149  func newWakeableSleep() *wakeableSleep {
  1150  	s := new(wakeableSleep)
  1151  	lockInit(&s.lock, lockRankWakeableSleep)
  1152  	s.wakeup = make(chan struct{}, 1)
  1153  	s.timer = new(timer)
  1154  	f := func(s any, _ uintptr, _ int64) {
  1155  		s.(*wakeableSleep).wake()
  1156  	}
  1157  	s.timer.init(f, s)
  1158  	return s
  1159  }
  1160  
  1161  // sleep sleeps for the provided duration in nanoseconds or until
  1162  // another goroutine calls wake.
  1163  //
  1164  // Must not be called by more than one goroutine at a time and
  1165  // must not be called concurrently with close.
  1166  func (s *wakeableSleep) sleep(ns int64) {
  1167  	s.timer.reset(nanotime()+ns, 0)
  1168  	lock(&s.lock)
  1169  	if raceenabled {
  1170  		raceacquire(unsafe.Pointer(&s.lock))
  1171  	}
  1172  	wakeup := s.wakeup
  1173  	if raceenabled {
  1174  		racerelease(unsafe.Pointer(&s.lock))
  1175  	}
  1176  	unlock(&s.lock)
  1177  	<-wakeup
  1178  	s.timer.stop()
  1179  }
  1180  
  1181  // wake awakens any goroutine sleeping on the timer.
  1182  //
  1183  // Safe for concurrent use with all other methods.
  1184  func (s *wakeableSleep) wake() {
  1185  	// Grab the wakeup channel, which may be nil if we're
  1186  	// racing with close.
  1187  	lock(&s.lock)
  1188  	if raceenabled {
  1189  		raceacquire(unsafe.Pointer(&s.lock))
  1190  	}
  1191  	if s.wakeup != nil {
  1192  		// Non-blocking send.
  1193  		//
  1194  		// Others may also write to this channel and we don't
  1195  		// want to block on the receiver waking up. This also
  1196  		// effectively batches together wakeup notifications.
  1197  		select {
  1198  		case s.wakeup <- struct{}{}:
  1199  		default:
  1200  		}
  1201  	}
  1202  	if raceenabled {
  1203  		racerelease(unsafe.Pointer(&s.lock))
  1204  	}
  1205  	unlock(&s.lock)
  1206  }
  1207  
  1208  // close wakes any goroutine sleeping on the timer and prevents
  1209  // further sleeping on it.
  1210  //
  1211  // Once close is called, the wakeableSleep must no longer be used.
  1212  //
  1213  // It must only be called once no goroutine is sleeping on the
  1214  // timer *and* nothing else will call wake concurrently.
  1215  func (s *wakeableSleep) close() {
  1216  	// Set wakeup to nil so that a late timer ends up being a no-op.
  1217  	lock(&s.lock)
  1218  	if raceenabled {
  1219  		raceacquire(unsafe.Pointer(&s.lock))
  1220  	}
  1221  	wakeup := s.wakeup
  1222  	s.wakeup = nil
  1223  
  1224  	// Close the channel.
  1225  	close(wakeup)
  1226  
  1227  	if raceenabled {
  1228  		racerelease(unsafe.Pointer(&s.lock))
  1229  	}
  1230  	unlock(&s.lock)
  1231  	return
  1232  }
  1233  

View as plain text