Source file src/runtime/mgcmark.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Garbage collector: marking and scanning
     6  
     7  package runtime
     8  
     9  import (
    10  	"internal/abi"
    11  	"internal/goarch"
    12  	"internal/goexperiment"
    13  	"internal/runtime/atomic"
    14  	"runtime/internal/sys"
    15  	"unsafe"
    16  )
    17  
    18  const (
    19  	fixedRootFinalizers = iota
    20  	fixedRootFreeGStacks
    21  	fixedRootCount
    22  
    23  	// rootBlockBytes is the number of bytes to scan per data or
    24  	// BSS root.
    25  	rootBlockBytes = 256 << 10
    26  
    27  	// maxObletBytes is the maximum bytes of an object to scan at
    28  	// once. Larger objects will be split up into "oblets" of at
    29  	// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
    30  	// scan preemption at ~100 µs.
    31  	//
    32  	// This must be > _MaxSmallSize so that the object base is the
    33  	// span base.
    34  	maxObletBytes = 128 << 10
    35  
    36  	// drainCheckThreshold specifies how many units of work to do
    37  	// between self-preemption checks in gcDrain. Assuming a scan
    38  	// rate of 1 MB/ms, this is ~100 µs. Lower values have higher
    39  	// overhead in the scan loop (the scheduler check may perform
    40  	// a syscall, so its overhead is nontrivial). Higher values
    41  	// make the system less responsive to incoming work.
    42  	drainCheckThreshold = 100000
    43  
    44  	// pagesPerSpanRoot indicates how many pages to scan from a span root
    45  	// at a time. Used by special root marking.
    46  	//
    47  	// Higher values improve throughput by increasing locality, but
    48  	// increase the minimum latency of a marking operation.
    49  	//
    50  	// Must be a multiple of the pageInUse bitmap element size and
    51  	// must also evenly divide pagesPerArena.
    52  	pagesPerSpanRoot = 512
    53  )
    54  
    55  // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
    56  // some miscellany) and initializes scanning-related state.
    57  //
    58  // The world must be stopped.
    59  func gcMarkRootPrepare() {
    60  	assertWorldStopped()
    61  
    62  	// Compute how many data and BSS root blocks there are.
    63  	nBlocks := func(bytes uintptr) int {
    64  		return int(divRoundUp(bytes, rootBlockBytes))
    65  	}
    66  
    67  	work.nDataRoots = 0
    68  	work.nBSSRoots = 0
    69  
    70  	// Scan globals.
    71  	for _, datap := range activeModules() {
    72  		nDataRoots := nBlocks(datap.edata - datap.data)
    73  		if nDataRoots > work.nDataRoots {
    74  			work.nDataRoots = nDataRoots
    75  		}
    76  
    77  		nBSSRoots := nBlocks(datap.ebss - datap.bss)
    78  		if nBSSRoots > work.nBSSRoots {
    79  			work.nBSSRoots = nBSSRoots
    80  		}
    81  	}
    82  
    83  	// Scan span roots for finalizer specials.
    84  	//
    85  	// We depend on addfinalizer to mark objects that get
    86  	// finalizers after root marking.
    87  	//
    88  	// We're going to scan the whole heap (that was available at the time the
    89  	// mark phase started, i.e. markArenas) for in-use spans which have specials.
    90  	//
    91  	// Break up the work into arenas, and further into chunks.
    92  	//
    93  	// Snapshot allArenas as markArenas. This snapshot is safe because allArenas
    94  	// is append-only.
    95  	mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
    96  	work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
    97  
    98  	// Scan stacks.
    99  	//
   100  	// Gs may be created after this point, but it's okay that we
   101  	// ignore them because they begin life without any roots, so
   102  	// there's nothing to scan, and any roots they create during
   103  	// the concurrent phase will be caught by the write barrier.
   104  	work.stackRoots = allGsSnapshot()
   105  	work.nStackRoots = len(work.stackRoots)
   106  
   107  	work.markrootNext = 0
   108  	work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots)
   109  
   110  	// Calculate base indexes of each root type
   111  	work.baseData = uint32(fixedRootCount)
   112  	work.baseBSS = work.baseData + uint32(work.nDataRoots)
   113  	work.baseSpans = work.baseBSS + uint32(work.nBSSRoots)
   114  	work.baseStacks = work.baseSpans + uint32(work.nSpanRoots)
   115  	work.baseEnd = work.baseStacks + uint32(work.nStackRoots)
   116  }
   117  
   118  // gcMarkRootCheck checks that all roots have been scanned. It is
   119  // purely for debugging.
   120  func gcMarkRootCheck() {
   121  	if work.markrootNext < work.markrootJobs {
   122  		print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
   123  		throw("left over markroot jobs")
   124  	}
   125  
   126  	// Check that stacks have been scanned.
   127  	//
   128  	// We only check the first nStackRoots Gs that we should have scanned.
   129  	// Since we don't care about newer Gs (see comment in
   130  	// gcMarkRootPrepare), no locking is required.
   131  	i := 0
   132  	forEachGRace(func(gp *g) {
   133  		if i >= work.nStackRoots {
   134  			return
   135  		}
   136  
   137  		if !gp.gcscandone {
   138  			println("gp", gp, "goid", gp.goid,
   139  				"status", readgstatus(gp),
   140  				"gcscandone", gp.gcscandone)
   141  			throw("scan missed a g")
   142  		}
   143  
   144  		i++
   145  	})
   146  }
   147  
   148  // ptrmask for an allocation containing a single pointer.
   149  var oneptrmask = [...]uint8{1}
   150  
   151  // markroot scans the i'th root.
   152  //
   153  // Preemption must be disabled (because this uses a gcWork).
   154  //
   155  // Returns the amount of GC work credit produced by the operation.
   156  // If flushBgCredit is true, then that credit is also flushed
   157  // to the background credit pool.
   158  //
   159  // nowritebarrier is only advisory here.
   160  //
   161  //go:nowritebarrier
   162  func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
   163  	// Note: if you add a case here, please also update heapdump.go:dumproots.
   164  	var workDone int64
   165  	var workCounter *atomic.Int64
   166  	switch {
   167  	case work.baseData <= i && i < work.baseBSS:
   168  		workCounter = &gcController.globalsScanWork
   169  		for _, datap := range activeModules() {
   170  			workDone += markrootBlock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, gcw, int(i-work.baseData))
   171  		}
   172  
   173  	case work.baseBSS <= i && i < work.baseSpans:
   174  		workCounter = &gcController.globalsScanWork
   175  		for _, datap := range activeModules() {
   176  			workDone += markrootBlock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, gcw, int(i-work.baseBSS))
   177  		}
   178  
   179  	case i == fixedRootFinalizers:
   180  		for fb := allfin; fb != nil; fb = fb.alllink {
   181  			cnt := uintptr(atomic.Load(&fb.cnt))
   182  			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
   183  		}
   184  
   185  	case i == fixedRootFreeGStacks:
   186  		// Switch to the system stack so we can call
   187  		// stackfree.
   188  		systemstack(markrootFreeGStacks)
   189  
   190  	case work.baseSpans <= i && i < work.baseStacks:
   191  		// mark mspan.specials
   192  		markrootSpans(gcw, int(i-work.baseSpans))
   193  
   194  	default:
   195  		// the rest is scanning goroutine stacks
   196  		workCounter = &gcController.stackScanWork
   197  		if i < work.baseStacks || work.baseEnd <= i {
   198  			printlock()
   199  			print("runtime: markroot index ", i, " not in stack roots range [", work.baseStacks, ", ", work.baseEnd, ")\n")
   200  			throw("markroot: bad index")
   201  		}
   202  		gp := work.stackRoots[i-work.baseStacks]
   203  
   204  		// remember when we've first observed the G blocked
   205  		// needed only to output in traceback
   206  		status := readgstatus(gp) // We are not in a scan state
   207  		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
   208  			gp.waitsince = work.tstart
   209  		}
   210  
   211  		// scanstack must be done on the system stack in case
   212  		// we're trying to scan our own stack.
   213  		systemstack(func() {
   214  			// If this is a self-scan, put the user G in
   215  			// _Gwaiting to prevent self-deadlock. It may
   216  			// already be in _Gwaiting if this is a mark
   217  			// worker or we're in mark termination.
   218  			userG := getg().m.curg
   219  			selfScan := gp == userG && readgstatus(userG) == _Grunning
   220  			if selfScan {
   221  				casGToWaiting(userG, _Grunning, waitReasonGarbageCollectionScan)
   222  			}
   223  
   224  			// TODO: suspendG blocks (and spins) until gp
   225  			// stops, which may take a while for
   226  			// running goroutines. Consider doing this in
   227  			// two phases where the first is non-blocking:
   228  			// we scan the stacks we can and ask running
   229  			// goroutines to scan themselves; and the
   230  			// second blocks.
   231  			stopped := suspendG(gp)
   232  			if stopped.dead {
   233  				gp.gcscandone = true
   234  				return
   235  			}
   236  			if gp.gcscandone {
   237  				throw("g already scanned")
   238  			}
   239  			workDone += scanstack(gp, gcw)
   240  			gp.gcscandone = true
   241  			resumeG(stopped)
   242  
   243  			if selfScan {
   244  				casgstatus(userG, _Gwaiting, _Grunning)
   245  			}
   246  		})
   247  	}
   248  	if workCounter != nil && workDone != 0 {
   249  		workCounter.Add(workDone)
   250  		if flushBgCredit {
   251  			gcFlushBgCredit(workDone)
   252  		}
   253  	}
   254  	return workDone
   255  }
   256  
   257  // markrootBlock scans the shard'th shard of the block of memory [b0,
   258  // b0+n0), with the given pointer mask.
   259  //
   260  // Returns the amount of work done.
   261  //
   262  //go:nowritebarrier
   263  func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) int64 {
   264  	if rootBlockBytes%(8*goarch.PtrSize) != 0 {
   265  		// This is necessary to pick byte offsets in ptrmask0.
   266  		throw("rootBlockBytes must be a multiple of 8*ptrSize")
   267  	}
   268  
   269  	// Note that if b0 is toward the end of the address space,
   270  	// then b0 + rootBlockBytes might wrap around.
   271  	// These tests are written to avoid any possible overflow.
   272  	off := uintptr(shard) * rootBlockBytes
   273  	if off >= n0 {
   274  		return 0
   275  	}
   276  	b := b0 + off
   277  	ptrmask := (*uint8)(add(unsafe.Pointer(ptrmask0), uintptr(shard)*(rootBlockBytes/(8*goarch.PtrSize))))
   278  	n := uintptr(rootBlockBytes)
   279  	if off+n > n0 {
   280  		n = n0 - off
   281  	}
   282  
   283  	// Scan this shard.
   284  	scanblock(b, n, ptrmask, gcw, nil)
   285  	return int64(n)
   286  }
   287  
   288  // markrootFreeGStacks frees stacks of dead Gs.
   289  //
   290  // This does not free stacks of dead Gs cached on Ps, but having a few
   291  // cached stacks around isn't a problem.
   292  func markrootFreeGStacks() {
   293  	// Take list of dead Gs with stacks.
   294  	lock(&sched.gFree.lock)
   295  	list := sched.gFree.stack
   296  	sched.gFree.stack = gList{}
   297  	unlock(&sched.gFree.lock)
   298  	if list.empty() {
   299  		return
   300  	}
   301  
   302  	// Free stacks.
   303  	q := gQueue{list.head, list.head}
   304  	for gp := list.head.ptr(); gp != nil; gp = gp.schedlink.ptr() {
   305  		stackfree(gp.stack)
   306  		gp.stack.lo = 0
   307  		gp.stack.hi = 0
   308  		// Manipulate the queue directly since the Gs are
   309  		// already all linked the right way.
   310  		q.tail.set(gp)
   311  	}
   312  
   313  	// Put Gs back on the free list.
   314  	lock(&sched.gFree.lock)
   315  	sched.gFree.noStack.pushAll(q)
   316  	unlock(&sched.gFree.lock)
   317  }
   318  
   319  // markrootSpans marks roots for one shard of markArenas.
   320  //
   321  //go:nowritebarrier
   322  func markrootSpans(gcw *gcWork, shard int) {
   323  	// Objects with finalizers have two GC-related invariants:
   324  	//
   325  	// 1) Everything reachable from the object must be marked.
   326  	// This ensures that when we pass the object to its finalizer,
   327  	// everything the finalizer can reach will be retained.
   328  	//
   329  	// 2) Finalizer specials (which are not in the garbage
   330  	// collected heap) are roots. In practice, this means the fn
   331  	// field must be scanned.
   332  	sg := mheap_.sweepgen
   333  
   334  	// Find the arena and page index into that arena for this shard.
   335  	ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)]
   336  	ha := mheap_.arenas[ai.l1()][ai.l2()]
   337  	arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena)
   338  
   339  	// Construct slice of bitmap which we'll iterate over.
   340  	specialsbits := ha.pageSpecials[arenaPage/8:]
   341  	specialsbits = specialsbits[:pagesPerSpanRoot/8]
   342  	for i := range specialsbits {
   343  		// Find set bits, which correspond to spans with specials.
   344  		specials := atomic.Load8(&specialsbits[i])
   345  		if specials == 0 {
   346  			continue
   347  		}
   348  		for j := uint(0); j < 8; j++ {
   349  			if specials&(1<<j) == 0 {
   350  				continue
   351  			}
   352  			// Find the span for this bit.
   353  			//
   354  			// This value is guaranteed to be non-nil because having
   355  			// specials implies that the span is in-use, and since we're
   356  			// currently marking we can be sure that we don't have to worry
   357  			// about the span being freed and re-used.
   358  			s := ha.spans[arenaPage+uint(i)*8+j]
   359  
   360  			// The state must be mSpanInUse if the specials bit is set, so
   361  			// sanity check that.
   362  			if state := s.state.get(); state != mSpanInUse {
   363  				print("s.state = ", state, "\n")
   364  				throw("non in-use span found with specials bit set")
   365  			}
   366  			// Check that this span was swept (it may be cached or uncached).
   367  			if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
   368  				// sweepgen was updated (+2) during non-checkmark GC pass
   369  				print("sweep ", s.sweepgen, " ", sg, "\n")
   370  				throw("gc: unswept span")
   371  			}
   372  
   373  			// Lock the specials to prevent a special from being
   374  			// removed from the list while we're traversing it.
   375  			lock(&s.speciallock)
   376  			for sp := s.specials; sp != nil; sp = sp.next {
   377  				if sp.kind != _KindSpecialFinalizer {
   378  					continue
   379  				}
   380  				// don't mark finalized object, but scan it so we
   381  				// retain everything it points to.
   382  				spf := (*specialfinalizer)(unsafe.Pointer(sp))
   383  				// A finalizer can be set for an inner byte of an object, find object beginning.
   384  				p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
   385  
   386  				// Mark everything that can be reached from
   387  				// the object (but *not* the object itself or
   388  				// we'll never collect it).
   389  				if !s.spanclass.noscan() {
   390  					scanobject(p, gcw)
   391  				}
   392  
   393  				// The special itself is a root.
   394  				scanblock(uintptr(unsafe.Pointer(&spf.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
   395  			}
   396  			unlock(&s.speciallock)
   397  		}
   398  	}
   399  }
   400  
   401  // gcAssistAlloc performs GC work to make gp's assist debt positive.
   402  // gp must be the calling user goroutine.
   403  //
   404  // This must be called with preemption enabled.
   405  func gcAssistAlloc(gp *g) {
   406  	// Don't assist in non-preemptible contexts. These are
   407  	// generally fragile and won't allow the assist to block.
   408  	if getg() == gp.m.g0 {
   409  		return
   410  	}
   411  	if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
   412  		return
   413  	}
   414  
   415  	// This extremely verbose boolean indicates whether we've
   416  	// entered mark assist from the perspective of the tracer.
   417  	//
   418  	// In the old tracer, this is just before we call gcAssistAlloc1
   419  	// *and* tracing is enabled. Because the old tracer doesn't
   420  	// do any extra tracking, we need to be careful to not emit an
   421  	// "end" event if there was no corresponding "begin" for the
   422  	// mark assist.
   423  	//
   424  	// In the new tracer, this is just before we call gcAssistAlloc1
   425  	// *regardless* of whether tracing is enabled. This is because
   426  	// the new tracer allows for tracing to begin (and advance
   427  	// generations) in the middle of a GC mark phase, so we need to
   428  	// record some state so that the tracer can pick it up to ensure
   429  	// a consistent trace result.
   430  	//
   431  	// TODO(mknyszek): Hide the details of inMarkAssist in tracer
   432  	// functions and simplify all the state tracking. This is a lot.
   433  	enteredMarkAssistForTracing := false
   434  retry:
   435  	if gcCPULimiter.limiting() {
   436  		// If the CPU limiter is enabled, intentionally don't
   437  		// assist to reduce the amount of CPU time spent in the GC.
   438  		if enteredMarkAssistForTracing {
   439  			trace := traceAcquire()
   440  			if trace.ok() {
   441  				trace.GCMarkAssistDone()
   442  				// Set this *after* we trace the end to make sure
   443  				// that we emit an in-progress event if this is
   444  				// the first event for the goroutine in the trace
   445  				// or trace generation. Also, do this between
   446  				// acquire/release because this is part of the
   447  				// goroutine's trace state, and it must be atomic
   448  				// with respect to the tracer.
   449  				gp.inMarkAssist = false
   450  				traceRelease(trace)
   451  			} else {
   452  				// This state is tracked even if tracing isn't enabled.
   453  				// It's only used by the new tracer.
   454  				// See the comment on enteredMarkAssistForTracing.
   455  				gp.inMarkAssist = false
   456  			}
   457  		}
   458  		return
   459  	}
   460  	// Compute the amount of scan work we need to do to make the
   461  	// balance positive. When the required amount of work is low,
   462  	// we over-assist to build up credit for future allocations
   463  	// and amortize the cost of assisting.
   464  	assistWorkPerByte := gcController.assistWorkPerByte.Load()
   465  	assistBytesPerWork := gcController.assistBytesPerWork.Load()
   466  	debtBytes := -gp.gcAssistBytes
   467  	scanWork := int64(assistWorkPerByte * float64(debtBytes))
   468  	if scanWork < gcOverAssistWork {
   469  		scanWork = gcOverAssistWork
   470  		debtBytes = int64(assistBytesPerWork * float64(scanWork))
   471  	}
   472  
   473  	// Steal as much credit as we can from the background GC's
   474  	// scan credit. This is racy and may drop the background
   475  	// credit below 0 if two mutators steal at the same time. This
   476  	// will just cause steals to fail until credit is accumulated
   477  	// again, so in the long run it doesn't really matter, but we
   478  	// do have to handle the negative credit case.
   479  	bgScanCredit := gcController.bgScanCredit.Load()
   480  	stolen := int64(0)
   481  	if bgScanCredit > 0 {
   482  		if bgScanCredit < scanWork {
   483  			stolen = bgScanCredit
   484  			gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen))
   485  		} else {
   486  			stolen = scanWork
   487  			gp.gcAssistBytes += debtBytes
   488  		}
   489  		gcController.bgScanCredit.Add(-stolen)
   490  
   491  		scanWork -= stolen
   492  
   493  		if scanWork == 0 {
   494  			// We were able to steal all of the credit we
   495  			// needed.
   496  			if enteredMarkAssistForTracing {
   497  				trace := traceAcquire()
   498  				if trace.ok() {
   499  					trace.GCMarkAssistDone()
   500  					// Set this *after* we trace the end to make sure
   501  					// that we emit an in-progress event if this is
   502  					// the first event for the goroutine in the trace
   503  					// or trace generation. Also, do this between
   504  					// acquire/release because this is part of the
   505  					// goroutine's trace state, and it must be atomic
   506  					// with respect to the tracer.
   507  					gp.inMarkAssist = false
   508  					traceRelease(trace)
   509  				} else {
   510  					// This state is tracked even if tracing isn't enabled.
   511  					// It's only used by the new tracer.
   512  					// See the comment on enteredMarkAssistForTracing.
   513  					gp.inMarkAssist = false
   514  				}
   515  			}
   516  			return
   517  		}
   518  	}
   519  	if !enteredMarkAssistForTracing {
   520  		trace := traceAcquire()
   521  		if trace.ok() {
   522  			if !goexperiment.ExecTracer2 {
   523  				// In the old tracer, enter mark assist tracing only
   524  				// if we actually traced an event. Otherwise a goroutine
   525  				// waking up from mark assist post-GC might end up
   526  				// writing a stray "end" event.
   527  				//
   528  				// This means inMarkAssist will not be meaningful
   529  				// in the old tracer; that's OK, it's unused.
   530  				//
   531  				// See the comment on enteredMarkAssistForTracing.
   532  				enteredMarkAssistForTracing = true
   533  			}
   534  			trace.GCMarkAssistStart()
   535  			// Set this *after* we trace the start, otherwise we may
   536  			// emit an in-progress event for an assist we're about to start.
   537  			gp.inMarkAssist = true
   538  			traceRelease(trace)
   539  		} else {
   540  			gp.inMarkAssist = true
   541  		}
   542  		if goexperiment.ExecTracer2 {
   543  			// In the new tracer, set enter mark assist tracing if we
   544  			// ever pass this point, because we must manage inMarkAssist
   545  			// correctly.
   546  			//
   547  			// See the comment on enteredMarkAssistForTracing.
   548  			enteredMarkAssistForTracing = true
   549  		}
   550  	}
   551  
   552  	// Perform assist work
   553  	systemstack(func() {
   554  		gcAssistAlloc1(gp, scanWork)
   555  		// The user stack may have moved, so this can't touch
   556  		// anything on it until it returns from systemstack.
   557  	})
   558  
   559  	completed := gp.param != nil
   560  	gp.param = nil
   561  	if completed {
   562  		gcMarkDone()
   563  	}
   564  
   565  	if gp.gcAssistBytes < 0 {
   566  		// We were unable steal enough credit or perform
   567  		// enough work to pay off the assist debt. We need to
   568  		// do one of these before letting the mutator allocate
   569  		// more to prevent over-allocation.
   570  		//
   571  		// If this is because we were preempted, reschedule
   572  		// and try some more.
   573  		if gp.preempt {
   574  			Gosched()
   575  			goto retry
   576  		}
   577  
   578  		// Add this G to an assist queue and park. When the GC
   579  		// has more background credit, it will satisfy queued
   580  		// assists before flushing to the global credit pool.
   581  		//
   582  		// Note that this does *not* get woken up when more
   583  		// work is added to the work list. The theory is that
   584  		// there wasn't enough work to do anyway, so we might
   585  		// as well let background marking take care of the
   586  		// work that is available.
   587  		if !gcParkAssist() {
   588  			goto retry
   589  		}
   590  
   591  		// At this point either background GC has satisfied
   592  		// this G's assist debt, or the GC cycle is over.
   593  	}
   594  	if enteredMarkAssistForTracing {
   595  		trace := traceAcquire()
   596  		if trace.ok() {
   597  			trace.GCMarkAssistDone()
   598  			// Set this *after* we trace the end to make sure
   599  			// that we emit an in-progress event if this is
   600  			// the first event for the goroutine in the trace
   601  			// or trace generation. Also, do this between
   602  			// acquire/release because this is part of the
   603  			// goroutine's trace state, and it must be atomic
   604  			// with respect to the tracer.
   605  			gp.inMarkAssist = false
   606  			traceRelease(trace)
   607  		} else {
   608  			// This state is tracked even if tracing isn't enabled.
   609  			// It's only used by the new tracer.
   610  			// See the comment on enteredMarkAssistForTracing.
   611  			gp.inMarkAssist = false
   612  		}
   613  	}
   614  }
   615  
   616  // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
   617  // stack. This is a separate function to make it easier to see that
   618  // we're not capturing anything from the user stack, since the user
   619  // stack may move while we're in this function.
   620  //
   621  // gcAssistAlloc1 indicates whether this assist completed the mark
   622  // phase by setting gp.param to non-nil. This can't be communicated on
   623  // the stack since it may move.
   624  //
   625  //go:systemstack
   626  func gcAssistAlloc1(gp *g, scanWork int64) {
   627  	// Clear the flag indicating that this assist completed the
   628  	// mark phase.
   629  	gp.param = nil
   630  
   631  	if atomic.Load(&gcBlackenEnabled) == 0 {
   632  		// The gcBlackenEnabled check in malloc races with the
   633  		// store that clears it but an atomic check in every malloc
   634  		// would be a performance hit.
   635  		// Instead we recheck it here on the non-preemptible system
   636  		// stack to determine if we should perform an assist.
   637  
   638  		// GC is done, so ignore any remaining debt.
   639  		gp.gcAssistBytes = 0
   640  		return
   641  	}
   642  	// Track time spent in this assist. Since we're on the
   643  	// system stack, this is non-preemptible, so we can
   644  	// just measure start and end time.
   645  	//
   646  	// Limiter event tracking might be disabled if we end up here
   647  	// while on a mark worker.
   648  	startTime := nanotime()
   649  	trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
   650  
   651  	decnwait := atomic.Xadd(&work.nwait, -1)
   652  	if decnwait == work.nproc {
   653  		println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
   654  		throw("nwait > work.nprocs")
   655  	}
   656  
   657  	// gcDrainN requires the caller to be preemptible.
   658  	casGToWaiting(gp, _Grunning, waitReasonGCAssistMarking)
   659  
   660  	// drain own cached work first in the hopes that it
   661  	// will be more cache friendly.
   662  	gcw := &getg().m.p.ptr().gcw
   663  	workDone := gcDrainN(gcw, scanWork)
   664  
   665  	casgstatus(gp, _Gwaiting, _Grunning)
   666  
   667  	// Record that we did this much scan work.
   668  	//
   669  	// Back out the number of bytes of assist credit that
   670  	// this scan work counts for. The "1+" is a poor man's
   671  	// round-up, to ensure this adds credit even if
   672  	// assistBytesPerWork is very low.
   673  	assistBytesPerWork := gcController.assistBytesPerWork.Load()
   674  	gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone))
   675  
   676  	// If this is the last worker and we ran out of work,
   677  	// signal a completion point.
   678  	incnwait := atomic.Xadd(&work.nwait, +1)
   679  	if incnwait > work.nproc {
   680  		println("runtime: work.nwait=", incnwait,
   681  			"work.nproc=", work.nproc)
   682  		throw("work.nwait > work.nproc")
   683  	}
   684  
   685  	if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
   686  		// This has reached a background completion point. Set
   687  		// gp.param to a non-nil value to indicate this. It
   688  		// doesn't matter what we set it to (it just has to be
   689  		// a valid pointer).
   690  		gp.param = unsafe.Pointer(gp)
   691  	}
   692  	now := nanotime()
   693  	duration := now - startTime
   694  	pp := gp.m.p.ptr()
   695  	pp.gcAssistTime += duration
   696  	if trackLimiterEvent {
   697  		pp.limiterEvent.stop(limiterEventMarkAssist, now)
   698  	}
   699  	if pp.gcAssistTime > gcAssistTimeSlack {
   700  		gcController.assistTime.Add(pp.gcAssistTime)
   701  		gcCPULimiter.update(now)
   702  		pp.gcAssistTime = 0
   703  	}
   704  }
   705  
   706  // gcWakeAllAssists wakes all currently blocked assists. This is used
   707  // at the end of a GC cycle. gcBlackenEnabled must be false to prevent
   708  // new assists from going to sleep after this point.
   709  func gcWakeAllAssists() {
   710  	lock(&work.assistQueue.lock)
   711  	list := work.assistQueue.q.popList()
   712  	injectglist(&list)
   713  	unlock(&work.assistQueue.lock)
   714  }
   715  
   716  // gcParkAssist puts the current goroutine on the assist queue and parks.
   717  //
   718  // gcParkAssist reports whether the assist is now satisfied. If it
   719  // returns false, the caller must retry the assist.
   720  func gcParkAssist() bool {
   721  	lock(&work.assistQueue.lock)
   722  	// If the GC cycle finished while we were getting the lock,
   723  	// exit the assist. The cycle can't finish while we hold the
   724  	// lock.
   725  	if atomic.Load(&gcBlackenEnabled) == 0 {
   726  		unlock(&work.assistQueue.lock)
   727  		return true
   728  	}
   729  
   730  	gp := getg()
   731  	oldList := work.assistQueue.q
   732  	work.assistQueue.q.pushBack(gp)
   733  
   734  	// Recheck for background credit now that this G is in
   735  	// the queue, but can still back out. This avoids a
   736  	// race in case background marking has flushed more
   737  	// credit since we checked above.
   738  	if gcController.bgScanCredit.Load() > 0 {
   739  		work.assistQueue.q = oldList
   740  		if oldList.tail != 0 {
   741  			oldList.tail.ptr().schedlink.set(nil)
   742  		}
   743  		unlock(&work.assistQueue.lock)
   744  		return false
   745  	}
   746  	// Park.
   747  	goparkunlock(&work.assistQueue.lock, waitReasonGCAssistWait, traceBlockGCMarkAssist, 2)
   748  	return true
   749  }
   750  
   751  // gcFlushBgCredit flushes scanWork units of background scan work
   752  // credit. This first satisfies blocked assists on the
   753  // work.assistQueue and then flushes any remaining credit to
   754  // gcController.bgScanCredit.
   755  //
   756  // Write barriers are disallowed because this is used by gcDrain after
   757  // it has ensured that all work is drained and this must preserve that
   758  // condition.
   759  //
   760  //go:nowritebarrierrec
   761  func gcFlushBgCredit(scanWork int64) {
   762  	if work.assistQueue.q.empty() {
   763  		// Fast path; there are no blocked assists. There's a
   764  		// small window here where an assist may add itself to
   765  		// the blocked queue and park. If that happens, we'll
   766  		// just get it on the next flush.
   767  		gcController.bgScanCredit.Add(scanWork)
   768  		return
   769  	}
   770  
   771  	assistBytesPerWork := gcController.assistBytesPerWork.Load()
   772  	scanBytes := int64(float64(scanWork) * assistBytesPerWork)
   773  
   774  	lock(&work.assistQueue.lock)
   775  	for !work.assistQueue.q.empty() && scanBytes > 0 {
   776  		gp := work.assistQueue.q.pop()
   777  		// Note that gp.gcAssistBytes is negative because gp
   778  		// is in debt. Think carefully about the signs below.
   779  		if scanBytes+gp.gcAssistBytes >= 0 {
   780  			// Satisfy this entire assist debt.
   781  			scanBytes += gp.gcAssistBytes
   782  			gp.gcAssistBytes = 0
   783  			// It's important that we *not* put gp in
   784  			// runnext. Otherwise, it's possible for user
   785  			// code to exploit the GC worker's high
   786  			// scheduler priority to get itself always run
   787  			// before other goroutines and always in the
   788  			// fresh quantum started by GC.
   789  			ready(gp, 0, false)
   790  		} else {
   791  			// Partially satisfy this assist.
   792  			gp.gcAssistBytes += scanBytes
   793  			scanBytes = 0
   794  			// As a heuristic, we move this assist to the
   795  			// back of the queue so that large assists
   796  			// can't clog up the assist queue and
   797  			// substantially delay small assists.
   798  			work.assistQueue.q.pushBack(gp)
   799  			break
   800  		}
   801  	}
   802  
   803  	if scanBytes > 0 {
   804  		// Convert from scan bytes back to work.
   805  		assistWorkPerByte := gcController.assistWorkPerByte.Load()
   806  		scanWork = int64(float64(scanBytes) * assistWorkPerByte)
   807  		gcController.bgScanCredit.Add(scanWork)
   808  	}
   809  	unlock(&work.assistQueue.lock)
   810  }
   811  
   812  // scanstack scans gp's stack, greying all pointers found on the stack.
   813  //
   814  // Returns the amount of scan work performed, but doesn't update
   815  // gcController.stackScanWork or flush any credit. Any background credit produced
   816  // by this function should be flushed by its caller. scanstack itself can't
   817  // safely flush because it may result in trying to wake up a goroutine that
   818  // was just scanned, resulting in a self-deadlock.
   819  //
   820  // scanstack will also shrink the stack if it is safe to do so. If it
   821  // is not, it schedules a stack shrink for the next synchronous safe
   822  // point.
   823  //
   824  // scanstack is marked go:systemstack because it must not be preempted
   825  // while using a workbuf.
   826  //
   827  //go:nowritebarrier
   828  //go:systemstack
   829  func scanstack(gp *g, gcw *gcWork) int64 {
   830  	if readgstatus(gp)&_Gscan == 0 {
   831  		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
   832  		throw("scanstack - bad status")
   833  	}
   834  
   835  	switch readgstatus(gp) &^ _Gscan {
   836  	default:
   837  		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
   838  		throw("mark - bad status")
   839  	case _Gdead:
   840  		return 0
   841  	case _Grunning:
   842  		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
   843  		throw("scanstack: goroutine not stopped")
   844  	case _Grunnable, _Gsyscall, _Gwaiting:
   845  		// ok
   846  	}
   847  
   848  	if gp == getg() {
   849  		throw("can't scan our own stack")
   850  	}
   851  
   852  	// scannedSize is the amount of work we'll be reporting.
   853  	//
   854  	// It is less than the allocated size (which is hi-lo).
   855  	var sp uintptr
   856  	if gp.syscallsp != 0 {
   857  		sp = gp.syscallsp // If in a system call this is the stack pointer (gp.sched.sp can be 0 in this case on Windows).
   858  	} else {
   859  		sp = gp.sched.sp
   860  	}
   861  	scannedSize := gp.stack.hi - sp
   862  
   863  	// Keep statistics for initial stack size calculation.
   864  	// Note that this accumulates the scanned size, not the allocated size.
   865  	p := getg().m.p.ptr()
   866  	p.scannedStackSize += uint64(scannedSize)
   867  	p.scannedStacks++
   868  
   869  	if isShrinkStackSafe(gp) {
   870  		// Shrink the stack if not much of it is being used.
   871  		shrinkstack(gp)
   872  	} else {
   873  		// Otherwise, shrink the stack at the next sync safe point.
   874  		gp.preemptShrink = true
   875  	}
   876  
   877  	var state stackScanState
   878  	state.stack = gp.stack
   879  
   880  	if stackTraceDebug {
   881  		println("stack trace goroutine", gp.goid)
   882  	}
   883  
   884  	if debugScanConservative && gp.asyncSafePoint {
   885  		print("scanning async preempted goroutine ", gp.goid, " stack [", hex(gp.stack.lo), ",", hex(gp.stack.hi), ")\n")
   886  	}
   887  
   888  	// Scan the saved context register. This is effectively a live
   889  	// register that gets moved back and forth between the
   890  	// register and sched.ctxt without a write barrier.
   891  	if gp.sched.ctxt != nil {
   892  		scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), goarch.PtrSize, &oneptrmask[0], gcw, &state)
   893  	}
   894  
   895  	// Scan the stack. Accumulate a list of stack objects.
   896  	var u unwinder
   897  	for u.init(gp, 0); u.valid(); u.next() {
   898  		scanframeworker(&u.frame, &state, gcw)
   899  	}
   900  
   901  	// Find additional pointers that point into the stack from the heap.
   902  	// Currently this includes defers and panics. See also function copystack.
   903  
   904  	// Find and trace other pointers in defer records.
   905  	for d := gp._defer; d != nil; d = d.link {
   906  		if d.fn != nil {
   907  			// Scan the func value, which could be a stack allocated closure.
   908  			// See issue 30453.
   909  			scanblock(uintptr(unsafe.Pointer(&d.fn)), goarch.PtrSize, &oneptrmask[0], gcw, &state)
   910  		}
   911  		if d.link != nil {
   912  			// The link field of a stack-allocated defer record might point
   913  			// to a heap-allocated defer record. Keep that heap record live.
   914  			scanblock(uintptr(unsafe.Pointer(&d.link)), goarch.PtrSize, &oneptrmask[0], gcw, &state)
   915  		}
   916  		// Retain defers records themselves.
   917  		// Defer records might not be reachable from the G through regular heap
   918  		// tracing because the defer linked list might weave between the stack and the heap.
   919  		if d.heap {
   920  			scanblock(uintptr(unsafe.Pointer(&d)), goarch.PtrSize, &oneptrmask[0], gcw, &state)
   921  		}
   922  	}
   923  	if gp._panic != nil {
   924  		// Panics are always stack allocated.
   925  		state.putPtr(uintptr(unsafe.Pointer(gp._panic)), false)
   926  	}
   927  
   928  	// Find and scan all reachable stack objects.
   929  	//
   930  	// The state's pointer queue prioritizes precise pointers over
   931  	// conservative pointers so that we'll prefer scanning stack
   932  	// objects precisely.
   933  	state.buildIndex()
   934  	for {
   935  		p, conservative := state.getPtr()
   936  		if p == 0 {
   937  			break
   938  		}
   939  		obj := state.findObject(p)
   940  		if obj == nil {
   941  			continue
   942  		}
   943  		r := obj.r
   944  		if r == nil {
   945  			// We've already scanned this object.
   946  			continue
   947  		}
   948  		obj.setRecord(nil) // Don't scan it again.
   949  		if stackTraceDebug {
   950  			printlock()
   951  			print("  live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of size", obj.size)
   952  			if conservative {
   953  				print(" (conservative)")
   954  			}
   955  			println()
   956  			printunlock()
   957  		}
   958  		gcdata := r.gcdata()
   959  		var s *mspan
   960  		if r.useGCProg() {
   961  			// This path is pretty unlikely, an object large enough
   962  			// to have a GC program allocated on the stack.
   963  			// We need some space to unpack the program into a straight
   964  			// bitmask, which we allocate/free here.
   965  			// TODO: it would be nice if there were a way to run a GC
   966  			// program without having to store all its bits. We'd have
   967  			// to change from a Lempel-Ziv style program to something else.
   968  			// Or we can forbid putting objects on stacks if they require
   969  			// a gc program (see issue 27447).
   970  			s = materializeGCProg(r.ptrdata(), gcdata)
   971  			gcdata = (*byte)(unsafe.Pointer(s.startAddr))
   972  		}
   973  
   974  		b := state.stack.lo + uintptr(obj.off)
   975  		if conservative {
   976  			scanConservative(b, r.ptrdata(), gcdata, gcw, &state)
   977  		} else {
   978  			scanblock(b, r.ptrdata(), gcdata, gcw, &state)
   979  		}
   980  
   981  		if s != nil {
   982  			dematerializeGCProg(s)
   983  		}
   984  	}
   985  
   986  	// Deallocate object buffers.
   987  	// (Pointer buffers were all deallocated in the loop above.)
   988  	for state.head != nil {
   989  		x := state.head
   990  		state.head = x.next
   991  		if stackTraceDebug {
   992  			for i := 0; i < x.nobj; i++ {
   993  				obj := &x.obj[i]
   994  				if obj.r == nil { // reachable
   995  					continue
   996  				}
   997  				println("  dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of size", obj.r.size)
   998  				// Note: not necessarily really dead - only reachable-from-ptr dead.
   999  			}
  1000  		}
  1001  		x.nobj = 0
  1002  		putempty((*workbuf)(unsafe.Pointer(x)))
  1003  	}
  1004  	if state.buf != nil || state.cbuf != nil || state.freeBuf != nil {
  1005  		throw("remaining pointer buffers")
  1006  	}
  1007  	return int64(scannedSize)
  1008  }
  1009  
  1010  // Scan a stack frame: local variables and function arguments/results.
  1011  //
  1012  //go:nowritebarrier
  1013  func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
  1014  	if _DebugGC > 1 && frame.continpc != 0 {
  1015  		print("scanframe ", funcname(frame.fn), "\n")
  1016  	}
  1017  
  1018  	isAsyncPreempt := frame.fn.valid() && frame.fn.funcID == abi.FuncID_asyncPreempt
  1019  	isDebugCall := frame.fn.valid() && frame.fn.funcID == abi.FuncID_debugCallV2
  1020  	if state.conservative || isAsyncPreempt || isDebugCall {
  1021  		if debugScanConservative {
  1022  			println("conservatively scanning function", funcname(frame.fn), "at PC", hex(frame.continpc))
  1023  		}
  1024  
  1025  		// Conservatively scan the frame. Unlike the precise
  1026  		// case, this includes the outgoing argument space
  1027  		// since we may have stopped while this function was
  1028  		// setting up a call.
  1029  		//
  1030  		// TODO: We could narrow this down if the compiler
  1031  		// produced a single map per function of stack slots
  1032  		// and registers that ever contain a pointer.
  1033  		if frame.varp != 0 {
  1034  			size := frame.varp - frame.sp
  1035  			if size > 0 {
  1036  				scanConservative(frame.sp, size, nil, gcw, state)
  1037  			}
  1038  		}
  1039  
  1040  		// Scan arguments to this frame.
  1041  		if n := frame.argBytes(); n != 0 {
  1042  			// TODO: We could pass the entry argument map
  1043  			// to narrow this down further.
  1044  			scanConservative(frame.argp, n, nil, gcw, state)
  1045  		}
  1046  
  1047  		if isAsyncPreempt || isDebugCall {
  1048  			// This function's frame contained the
  1049  			// registers for the asynchronously stopped
  1050  			// parent frame. Scan the parent
  1051  			// conservatively.
  1052  			state.conservative = true
  1053  		} else {
  1054  			// We only wanted to scan those two frames
  1055  			// conservatively. Clear the flag for future
  1056  			// frames.
  1057  			state.conservative = false
  1058  		}
  1059  		return
  1060  	}
  1061  
  1062  	locals, args, objs := frame.getStackMap(false)
  1063  
  1064  	// Scan local variables if stack frame has been allocated.
  1065  	if locals.n > 0 {
  1066  		size := uintptr(locals.n) * goarch.PtrSize
  1067  		scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
  1068  	}
  1069  
  1070  	// Scan arguments.
  1071  	if args.n > 0 {
  1072  		scanblock(frame.argp, uintptr(args.n)*goarch.PtrSize, args.bytedata, gcw, state)
  1073  	}
  1074  
  1075  	// Add all stack objects to the stack object list.
  1076  	if frame.varp != 0 {
  1077  		// varp is 0 for defers, where there are no locals.
  1078  		// In that case, there can't be a pointer to its args, either.
  1079  		// (And all args would be scanned above anyway.)
  1080  		for i := range objs {
  1081  			obj := &objs[i]
  1082  			off := obj.off
  1083  			base := frame.varp // locals base pointer
  1084  			if off >= 0 {
  1085  				base = frame.argp // arguments and return values base pointer
  1086  			}
  1087  			ptr := base + uintptr(off)
  1088  			if ptr < frame.sp {
  1089  				// object hasn't been allocated in the frame yet.
  1090  				continue
  1091  			}
  1092  			if stackTraceDebug {
  1093  				println("stkobj at", hex(ptr), "of size", obj.size)
  1094  			}
  1095  			state.addObject(ptr, obj)
  1096  		}
  1097  	}
  1098  }
  1099  
  1100  type gcDrainFlags int
  1101  
  1102  const (
  1103  	gcDrainUntilPreempt gcDrainFlags = 1 << iota
  1104  	gcDrainFlushBgCredit
  1105  	gcDrainIdle
  1106  	gcDrainFractional
  1107  )
  1108  
  1109  // gcDrainMarkWorkerIdle is a wrapper for gcDrain that exists to better account
  1110  // mark time in profiles.
  1111  func gcDrainMarkWorkerIdle(gcw *gcWork) {
  1112  	gcDrain(gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1113  }
  1114  
  1115  // gcDrainMarkWorkerDedicated is a wrapper for gcDrain that exists to better account
  1116  // mark time in profiles.
  1117  func gcDrainMarkWorkerDedicated(gcw *gcWork, untilPreempt bool) {
  1118  	flags := gcDrainFlushBgCredit
  1119  	if untilPreempt {
  1120  		flags |= gcDrainUntilPreempt
  1121  	}
  1122  	gcDrain(gcw, flags)
  1123  }
  1124  
  1125  // gcDrainMarkWorkerFractional is a wrapper for gcDrain that exists to better account
  1126  // mark time in profiles.
  1127  func gcDrainMarkWorkerFractional(gcw *gcWork) {
  1128  	gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
  1129  }
  1130  
  1131  // gcDrain scans roots and objects in work buffers, blackening grey
  1132  // objects until it is unable to get more work. It may return before
  1133  // GC is done; it's the caller's responsibility to balance work from
  1134  // other Ps.
  1135  //
  1136  // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
  1137  // is set.
  1138  //
  1139  // If flags&gcDrainIdle != 0, gcDrain returns when there is other work
  1140  // to do.
  1141  //
  1142  // If flags&gcDrainFractional != 0, gcDrain self-preempts when
  1143  // pollFractionalWorkerExit() returns true. This implies
  1144  // gcDrainNoBlock.
  1145  //
  1146  // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
  1147  // credit to gcController.bgScanCredit every gcCreditSlack units of
  1148  // scan work.
  1149  //
  1150  // gcDrain will always return if there is a pending STW or forEachP.
  1151  //
  1152  // Disabling write barriers is necessary to ensure that after we've
  1153  // confirmed that we've drained gcw, that we don't accidentally end
  1154  // up flipping that condition by immediately adding work in the form
  1155  // of a write barrier buffer flush.
  1156  //
  1157  // Don't set nowritebarrierrec because it's safe for some callees to
  1158  // have write barriers enabled.
  1159  //
  1160  //go:nowritebarrier
  1161  func gcDrain(gcw *gcWork, flags gcDrainFlags) {
  1162  	if !writeBarrier.enabled {
  1163  		throw("gcDrain phase incorrect")
  1164  	}
  1165  
  1166  	// N.B. We must be running in a non-preemptible context, so it's
  1167  	// safe to hold a reference to our P here.
  1168  	gp := getg().m.curg
  1169  	pp := gp.m.p.ptr()
  1170  	preemptible := flags&gcDrainUntilPreempt != 0
  1171  	flushBgCredit := flags&gcDrainFlushBgCredit != 0
  1172  	idle := flags&gcDrainIdle != 0
  1173  
  1174  	initScanWork := gcw.heapScanWork
  1175  
  1176  	// checkWork is the scan work before performing the next
  1177  	// self-preempt check.
  1178  	checkWork := int64(1<<63 - 1)
  1179  	var check func() bool
  1180  	if flags&(gcDrainIdle|gcDrainFractional) != 0 {
  1181  		checkWork = initScanWork + drainCheckThreshold
  1182  		if idle {
  1183  			check = pollWork
  1184  		} else if flags&gcDrainFractional != 0 {
  1185  			check = pollFractionalWorkerExit
  1186  		}
  1187  	}
  1188  
  1189  	// Drain root marking jobs.
  1190  	if work.markrootNext < work.markrootJobs {
  1191  		// Stop if we're preemptible, if someone wants to STW, or if
  1192  		// someone is calling forEachP.
  1193  		for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
  1194  			job := atomic.Xadd(&work.markrootNext, +1) - 1
  1195  			if job >= work.markrootJobs {
  1196  				break
  1197  			}
  1198  			markroot(gcw, job, flushBgCredit)
  1199  			if check != nil && check() {
  1200  				goto done
  1201  			}
  1202  		}
  1203  	}
  1204  
  1205  	// Drain heap marking jobs.
  1206  	//
  1207  	// Stop if we're preemptible, if someone wants to STW, or if
  1208  	// someone is calling forEachP.
  1209  	//
  1210  	// TODO(mknyszek): Consider always checking gp.preempt instead
  1211  	// of having the preempt flag, and making an exception for certain
  1212  	// mark workers in retake. That might be simpler than trying to
  1213  	// enumerate all the reasons why we might want to preempt, even
  1214  	// if we're supposed to be mostly non-preemptible.
  1215  	for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) {
  1216  		// Try to keep work available on the global queue. We used to
  1217  		// check if there were waiting workers, but it's better to
  1218  		// just keep work available than to make workers wait. In the
  1219  		// worst case, we'll do O(log(_WorkbufSize)) unnecessary
  1220  		// balances.
  1221  		if work.full == 0 {
  1222  			gcw.balance()
  1223  		}
  1224  
  1225  		b := gcw.tryGetFast()
  1226  		if b == 0 {
  1227  			b = gcw.tryGet()
  1228  			if b == 0 {
  1229  				// Flush the write barrier
  1230  				// buffer; this may create
  1231  				// more work.
  1232  				wbBufFlush()
  1233  				b = gcw.tryGet()
  1234  			}
  1235  		}
  1236  		if b == 0 {
  1237  			// Unable to get work.
  1238  			break
  1239  		}
  1240  		scanobject(b, gcw)
  1241  
  1242  		// Flush background scan work credit to the global
  1243  		// account if we've accumulated enough locally so
  1244  		// mutator assists can draw on it.
  1245  		if gcw.heapScanWork >= gcCreditSlack {
  1246  			gcController.heapScanWork.Add(gcw.heapScanWork)
  1247  			if flushBgCredit {
  1248  				gcFlushBgCredit(gcw.heapScanWork - initScanWork)
  1249  				initScanWork = 0
  1250  			}
  1251  			checkWork -= gcw.heapScanWork
  1252  			gcw.heapScanWork = 0
  1253  
  1254  			if checkWork <= 0 {
  1255  				checkWork += drainCheckThreshold
  1256  				if check != nil && check() {
  1257  					break
  1258  				}
  1259  			}
  1260  		}
  1261  	}
  1262  
  1263  done:
  1264  	// Flush remaining scan work credit.
  1265  	if gcw.heapScanWork > 0 {
  1266  		gcController.heapScanWork.Add(gcw.heapScanWork)
  1267  		if flushBgCredit {
  1268  			gcFlushBgCredit(gcw.heapScanWork - initScanWork)
  1269  		}
  1270  		gcw.heapScanWork = 0
  1271  	}
  1272  }
  1273  
  1274  // gcDrainN blackens grey objects until it has performed roughly
  1275  // scanWork units of scan work or the G is preempted. This is
  1276  // best-effort, so it may perform less work if it fails to get a work
  1277  // buffer. Otherwise, it will perform at least n units of work, but
  1278  // may perform more because scanning is always done in whole object
  1279  // increments. It returns the amount of scan work performed.
  1280  //
  1281  // The caller goroutine must be in a preemptible state (e.g.,
  1282  // _Gwaiting) to prevent deadlocks during stack scanning. As a
  1283  // consequence, this must be called on the system stack.
  1284  //
  1285  //go:nowritebarrier
  1286  //go:systemstack
  1287  func gcDrainN(gcw *gcWork, scanWork int64) int64 {
  1288  	if !writeBarrier.enabled {
  1289  		throw("gcDrainN phase incorrect")
  1290  	}
  1291  
  1292  	// There may already be scan work on the gcw, which we don't
  1293  	// want to claim was done by this call.
  1294  	workFlushed := -gcw.heapScanWork
  1295  
  1296  	// In addition to backing out because of a preemption, back out
  1297  	// if the GC CPU limiter is enabled.
  1298  	gp := getg().m.curg
  1299  	for !gp.preempt && !gcCPULimiter.limiting() && workFlushed+gcw.heapScanWork < scanWork {
  1300  		// See gcDrain comment.
  1301  		if work.full == 0 {
  1302  			gcw.balance()
  1303  		}
  1304  
  1305  		b := gcw.tryGetFast()
  1306  		if b == 0 {
  1307  			b = gcw.tryGet()
  1308  			if b == 0 {
  1309  				// Flush the write barrier buffer;
  1310  				// this may create more work.
  1311  				wbBufFlush()
  1312  				b = gcw.tryGet()
  1313  			}
  1314  		}
  1315  
  1316  		if b == 0 {
  1317  			// Try to do a root job.
  1318  			if work.markrootNext < work.markrootJobs {
  1319  				job := atomic.Xadd(&work.markrootNext, +1) - 1
  1320  				if job < work.markrootJobs {
  1321  					workFlushed += markroot(gcw, job, false)
  1322  					continue
  1323  				}
  1324  			}
  1325  			// No heap or root jobs.
  1326  			break
  1327  		}
  1328  
  1329  		scanobject(b, gcw)
  1330  
  1331  		// Flush background scan work credit.
  1332  		if gcw.heapScanWork >= gcCreditSlack {
  1333  			gcController.heapScanWork.Add(gcw.heapScanWork)
  1334  			workFlushed += gcw.heapScanWork
  1335  			gcw.heapScanWork = 0
  1336  		}
  1337  	}
  1338  
  1339  	// Unlike gcDrain, there's no need to flush remaining work
  1340  	// here because this never flushes to bgScanCredit and
  1341  	// gcw.dispose will flush any remaining work to scanWork.
  1342  
  1343  	return workFlushed + gcw.heapScanWork
  1344  }
  1345  
  1346  // scanblock scans b as scanobject would, but using an explicit
  1347  // pointer bitmap instead of the heap bitmap.
  1348  //
  1349  // This is used to scan non-heap roots, so it does not update
  1350  // gcw.bytesMarked or gcw.heapScanWork.
  1351  //
  1352  // If stk != nil, possible stack pointers are also reported to stk.putPtr.
  1353  //
  1354  //go:nowritebarrier
  1355  func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
  1356  	// Use local copies of original parameters, so that a stack trace
  1357  	// due to one of the throws below shows the original block
  1358  	// base and extent.
  1359  	b := b0
  1360  	n := n0
  1361  
  1362  	for i := uintptr(0); i < n; {
  1363  		// Find bits for the next word.
  1364  		bits := uint32(*addb(ptrmask, i/(goarch.PtrSize*8)))
  1365  		if bits == 0 {
  1366  			i += goarch.PtrSize * 8
  1367  			continue
  1368  		}
  1369  		for j := 0; j < 8 && i < n; j++ {
  1370  			if bits&1 != 0 {
  1371  				// Same work as in scanobject; see comments there.
  1372  				p := *(*uintptr)(unsafe.Pointer(b + i))
  1373  				if p != 0 {
  1374  					if obj, span, objIndex := findObject(p, b, i); obj != 0 {
  1375  						greyobject(obj, b, i, span, gcw, objIndex)
  1376  					} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
  1377  						stk.putPtr(p, false)
  1378  					}
  1379  				}
  1380  			}
  1381  			bits >>= 1
  1382  			i += goarch.PtrSize
  1383  		}
  1384  	}
  1385  }
  1386  
  1387  // scanobject scans the object starting at b, adding pointers to gcw.
  1388  // b must point to the beginning of a heap object or an oblet.
  1389  // scanobject consults the GC bitmap for the pointer mask and the
  1390  // spans for the size of the object.
  1391  //
  1392  //go:nowritebarrier
  1393  func scanobject(b uintptr, gcw *gcWork) {
  1394  	// Prefetch object before we scan it.
  1395  	//
  1396  	// This will overlap fetching the beginning of the object with initial
  1397  	// setup before we start scanning the object.
  1398  	sys.Prefetch(b)
  1399  
  1400  	// Find the bits for b and the size of the object at b.
  1401  	//
  1402  	// b is either the beginning of an object, in which case this
  1403  	// is the size of the object to scan, or it points to an
  1404  	// oblet, in which case we compute the size to scan below.
  1405  	s := spanOfUnchecked(b)
  1406  	n := s.elemsize
  1407  	if n == 0 {
  1408  		throw("scanobject n == 0")
  1409  	}
  1410  	if s.spanclass.noscan() {
  1411  		// Correctness-wise this is ok, but it's inefficient
  1412  		// if noscan objects reach here.
  1413  		throw("scanobject of a noscan object")
  1414  	}
  1415  
  1416  	var tp typePointers
  1417  	if n > maxObletBytes {
  1418  		// Large object. Break into oblets for better
  1419  		// parallelism and lower latency.
  1420  		if b == s.base() {
  1421  			// Enqueue the other oblets to scan later.
  1422  			// Some oblets may be in b's scalar tail, but
  1423  			// these will be marked as "no more pointers",
  1424  			// so we'll drop out immediately when we go to
  1425  			// scan those.
  1426  			for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
  1427  				if !gcw.putFast(oblet) {
  1428  					gcw.put(oblet)
  1429  				}
  1430  			}
  1431  		}
  1432  
  1433  		// Compute the size of the oblet. Since this object
  1434  		// must be a large object, s.base() is the beginning
  1435  		// of the object.
  1436  		n = s.base() + s.elemsize - b
  1437  		n = min(n, maxObletBytes)
  1438  		if goexperiment.AllocHeaders {
  1439  			tp = s.typePointersOfUnchecked(s.base())
  1440  			tp = tp.fastForward(b-tp.addr, b+n)
  1441  		}
  1442  	} else {
  1443  		if goexperiment.AllocHeaders {
  1444  			tp = s.typePointersOfUnchecked(b)
  1445  		}
  1446  	}
  1447  
  1448  	var hbits heapBits
  1449  	if !goexperiment.AllocHeaders {
  1450  		hbits = heapBitsForAddr(b, n)
  1451  	}
  1452  	var scanSize uintptr
  1453  	for {
  1454  		var addr uintptr
  1455  		if goexperiment.AllocHeaders {
  1456  			if tp, addr = tp.nextFast(); addr == 0 {
  1457  				if tp, addr = tp.next(b + n); addr == 0 {
  1458  					break
  1459  				}
  1460  			}
  1461  		} else {
  1462  			if hbits, addr = hbits.nextFast(); addr == 0 {
  1463  				if hbits, addr = hbits.next(); addr == 0 {
  1464  					break
  1465  				}
  1466  			}
  1467  		}
  1468  
  1469  		// Keep track of farthest pointer we found, so we can
  1470  		// update heapScanWork. TODO: is there a better metric,
  1471  		// now that we can skip scalar portions pretty efficiently?
  1472  		scanSize = addr - b + goarch.PtrSize
  1473  
  1474  		// Work here is duplicated in scanblock and above.
  1475  		// If you make changes here, make changes there too.
  1476  		obj := *(*uintptr)(unsafe.Pointer(addr))
  1477  
  1478  		// At this point we have extracted the next potential pointer.
  1479  		// Quickly filter out nil and pointers back to the current object.
  1480  		if obj != 0 && obj-b >= n {
  1481  			// Test if obj points into the Go heap and, if so,
  1482  			// mark the object.
  1483  			//
  1484  			// Note that it's possible for findObject to
  1485  			// fail if obj points to a just-allocated heap
  1486  			// object because of a race with growing the
  1487  			// heap. In this case, we know the object was
  1488  			// just allocated and hence will be marked by
  1489  			// allocation itself.
  1490  			if obj, span, objIndex := findObject(obj, b, addr-b); obj != 0 {
  1491  				greyobject(obj, b, addr-b, span, gcw, objIndex)
  1492  			}
  1493  		}
  1494  	}
  1495  	gcw.bytesMarked += uint64(n)
  1496  	gcw.heapScanWork += int64(scanSize)
  1497  }
  1498  
  1499  // scanConservative scans block [b, b+n) conservatively, treating any
  1500  // pointer-like value in the block as a pointer.
  1501  //
  1502  // If ptrmask != nil, only words that are marked in ptrmask are
  1503  // considered as potential pointers.
  1504  //
  1505  // If state != nil, it's assumed that [b, b+n) is a block in the stack
  1506  // and may contain pointers to stack objects.
  1507  func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackScanState) {
  1508  	if debugScanConservative {
  1509  		printlock()
  1510  		print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
  1511  		hexdumpWords(b, b+n, func(p uintptr) byte {
  1512  			if ptrmask != nil {
  1513  				word := (p - b) / goarch.PtrSize
  1514  				bits := *addb(ptrmask, word/8)
  1515  				if (bits>>(word%8))&1 == 0 {
  1516  					return '$'
  1517  				}
  1518  			}
  1519  
  1520  			val := *(*uintptr)(unsafe.Pointer(p))
  1521  			if state != nil && state.stack.lo <= val && val < state.stack.hi {
  1522  				return '@'
  1523  			}
  1524  
  1525  			span := spanOfHeap(val)
  1526  			if span == nil {
  1527  				return ' '
  1528  			}
  1529  			idx := span.objIndex(val)
  1530  			if span.isFree(idx) {
  1531  				return ' '
  1532  			}
  1533  			return '*'
  1534  		})
  1535  		printunlock()
  1536  	}
  1537  
  1538  	for i := uintptr(0); i < n; i += goarch.PtrSize {
  1539  		if ptrmask != nil {
  1540  			word := i / goarch.PtrSize
  1541  			bits := *addb(ptrmask, word/8)
  1542  			if bits == 0 {
  1543  				// Skip 8 words (the loop increment will do the 8th)
  1544  				//
  1545  				// This must be the first time we've
  1546  				// seen this word of ptrmask, so i
  1547  				// must be 8-word-aligned, but check
  1548  				// our reasoning just in case.
  1549  				if i%(goarch.PtrSize*8) != 0 {
  1550  					throw("misaligned mask")
  1551  				}
  1552  				i += goarch.PtrSize*8 - goarch.PtrSize
  1553  				continue
  1554  			}
  1555  			if (bits>>(word%8))&1 == 0 {
  1556  				continue
  1557  			}
  1558  		}
  1559  
  1560  		val := *(*uintptr)(unsafe.Pointer(b + i))
  1561  
  1562  		// Check if val points into the stack.
  1563  		if state != nil && state.stack.lo <= val && val < state.stack.hi {
  1564  			// val may point to a stack object. This
  1565  			// object may be dead from last cycle and
  1566  			// hence may contain pointers to unallocated
  1567  			// objects, but unlike heap objects we can't
  1568  			// tell if it's already dead. Hence, if all
  1569  			// pointers to this object are from
  1570  			// conservative scanning, we have to scan it
  1571  			// defensively, too.
  1572  			state.putPtr(val, true)
  1573  			continue
  1574  		}
  1575  
  1576  		// Check if val points to a heap span.
  1577  		span := spanOfHeap(val)
  1578  		if span == nil {
  1579  			continue
  1580  		}
  1581  
  1582  		// Check if val points to an allocated object.
  1583  		idx := span.objIndex(val)
  1584  		if span.isFree(idx) {
  1585  			continue
  1586  		}
  1587  
  1588  		// val points to an allocated object. Mark it.
  1589  		obj := span.base() + idx*span.elemsize
  1590  		greyobject(obj, b, i, span, gcw, idx)
  1591  	}
  1592  }
  1593  
  1594  // Shade the object if it isn't already.
  1595  // The object is not nil and known to be in the heap.
  1596  // Preemption must be disabled.
  1597  //
  1598  //go:nowritebarrier
  1599  func shade(b uintptr) {
  1600  	if obj, span, objIndex := findObject(b, 0, 0); obj != 0 {
  1601  		gcw := &getg().m.p.ptr().gcw
  1602  		greyobject(obj, 0, 0, span, gcw, objIndex)
  1603  	}
  1604  }
  1605  
  1606  // obj is the start of an object with mark mbits.
  1607  // If it isn't already marked, mark it and enqueue into gcw.
  1608  // base and off are for debugging only and could be removed.
  1609  //
  1610  // See also wbBufFlush1, which partially duplicates this logic.
  1611  //
  1612  //go:nowritebarrierrec
  1613  func greyobject(obj, base, off uintptr, span *mspan, gcw *gcWork, objIndex uintptr) {
  1614  	// obj should be start of allocation, and so must be at least pointer-aligned.
  1615  	if obj&(goarch.PtrSize-1) != 0 {
  1616  		throw("greyobject: obj not pointer-aligned")
  1617  	}
  1618  	mbits := span.markBitsForIndex(objIndex)
  1619  
  1620  	if useCheckmark {
  1621  		if setCheckmark(obj, base, off, mbits) {
  1622  			// Already marked.
  1623  			return
  1624  		}
  1625  	} else {
  1626  		if debug.gccheckmark > 0 && span.isFree(objIndex) {
  1627  			print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
  1628  			gcDumpObject("base", base, off)
  1629  			gcDumpObject("obj", obj, ^uintptr(0))
  1630  			getg().m.traceback = 2
  1631  			throw("marking free object")
  1632  		}
  1633  
  1634  		// If marked we have nothing to do.
  1635  		if mbits.isMarked() {
  1636  			return
  1637  		}
  1638  		mbits.setMarked()
  1639  
  1640  		// Mark span.
  1641  		arena, pageIdx, pageMask := pageIndexOf(span.base())
  1642  		if arena.pageMarks[pageIdx]&pageMask == 0 {
  1643  			atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
  1644  		}
  1645  
  1646  		// If this is a noscan object, fast-track it to black
  1647  		// instead of greying it.
  1648  		if span.spanclass.noscan() {
  1649  			gcw.bytesMarked += uint64(span.elemsize)
  1650  			return
  1651  		}
  1652  	}
  1653  
  1654  	// We're adding obj to P's local workbuf, so it's likely
  1655  	// this object will be processed soon by the same P.
  1656  	// Even if the workbuf gets flushed, there will likely still be
  1657  	// some benefit on platforms with inclusive shared caches.
  1658  	sys.Prefetch(obj)
  1659  	// Queue the obj for scanning.
  1660  	if !gcw.putFast(obj) {
  1661  		gcw.put(obj)
  1662  	}
  1663  }
  1664  
  1665  // gcDumpObject dumps the contents of obj for debugging and marks the
  1666  // field at byte offset off in obj.
  1667  func gcDumpObject(label string, obj, off uintptr) {
  1668  	s := spanOf(obj)
  1669  	print(label, "=", hex(obj))
  1670  	if s == nil {
  1671  		print(" s=nil\n")
  1672  		return
  1673  	}
  1674  	print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
  1675  	if state := s.state.get(); 0 <= state && int(state) < len(mSpanStateNames) {
  1676  		print(mSpanStateNames[state], "\n")
  1677  	} else {
  1678  		print("unknown(", state, ")\n")
  1679  	}
  1680  
  1681  	skipped := false
  1682  	size := s.elemsize
  1683  	if s.state.get() == mSpanManual && size == 0 {
  1684  		// We're printing something from a stack frame. We
  1685  		// don't know how big it is, so just show up to an
  1686  		// including off.
  1687  		size = off + goarch.PtrSize
  1688  	}
  1689  	for i := uintptr(0); i < size; i += goarch.PtrSize {
  1690  		// For big objects, just print the beginning (because
  1691  		// that usually hints at the object's type) and the
  1692  		// fields around off.
  1693  		if !(i < 128*goarch.PtrSize || off-16*goarch.PtrSize < i && i < off+16*goarch.PtrSize) {
  1694  			skipped = true
  1695  			continue
  1696  		}
  1697  		if skipped {
  1698  			print(" ...\n")
  1699  			skipped = false
  1700  		}
  1701  		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i))))
  1702  		if i == off {
  1703  			print(" <==")
  1704  		}
  1705  		print("\n")
  1706  	}
  1707  	if skipped {
  1708  		print(" ...\n")
  1709  	}
  1710  }
  1711  
  1712  // gcmarknewobject marks a newly allocated object black. obj must
  1713  // not contain any non-nil pointers.
  1714  //
  1715  // This is nosplit so it can manipulate a gcWork without preemption.
  1716  //
  1717  //go:nowritebarrier
  1718  //go:nosplit
  1719  func gcmarknewobject(span *mspan, obj uintptr) {
  1720  	if useCheckmark { // The world should be stopped so this should not happen.
  1721  		throw("gcmarknewobject called while doing checkmark")
  1722  	}
  1723  
  1724  	// Mark object.
  1725  	objIndex := span.objIndex(obj)
  1726  	span.markBitsForIndex(objIndex).setMarked()
  1727  
  1728  	// Mark span.
  1729  	arena, pageIdx, pageMask := pageIndexOf(span.base())
  1730  	if arena.pageMarks[pageIdx]&pageMask == 0 {
  1731  		atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
  1732  	}
  1733  
  1734  	gcw := &getg().m.p.ptr().gcw
  1735  	gcw.bytesMarked += uint64(span.elemsize)
  1736  }
  1737  
  1738  // gcMarkTinyAllocs greys all active tiny alloc blocks.
  1739  //
  1740  // The world must be stopped.
  1741  func gcMarkTinyAllocs() {
  1742  	assertWorldStopped()
  1743  
  1744  	for _, p := range allp {
  1745  		c := p.mcache
  1746  		if c == nil || c.tiny == 0 {
  1747  			continue
  1748  		}
  1749  		_, span, objIndex := findObject(c.tiny, 0, 0)
  1750  		gcw := &p.gcw
  1751  		greyobject(c.tiny, 0, 0, span, gcw, objIndex)
  1752  	}
  1753  }
  1754  

View as plain text