Source file src/runtime/mgcwork.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/goarch"
     9  	"internal/goexperiment"
    10  	"internal/runtime/atomic"
    11  	"internal/runtime/gc"
    12  	"internal/runtime/sys"
    13  	"unsafe"
    14  )
    15  
    16  const (
    17  	_WorkbufSize = 2048 // in bytes; larger values result in less contention
    18  
    19  	// workbufAlloc is the number of bytes to allocate at a time
    20  	// for new workbufs. This must be a multiple of pageSize and
    21  	// should be a multiple of _WorkbufSize.
    22  	//
    23  	// Larger values reduce workbuf allocation overhead. Smaller
    24  	// values reduce heap fragmentation.
    25  	workbufAlloc = 32 << 10
    26  )
    27  
    28  func init() {
    29  	if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
    30  		throw("bad workbufAlloc")
    31  	}
    32  }
    33  
    34  // Garbage collector work pool abstraction.
    35  //
    36  // This implements a producer/consumer model for pointers to grey
    37  // objects.
    38  //
    39  // For objects in workbufs, a grey object is one that is marked and
    40  // on a work queue. A black object is marked and not on a work queue.
    41  //
    42  // For objects in the span queue, a grey object is one that is marked
    43  // and has an unset scan bit. A black object is marked and has its scan
    44  // bit set. (Green Tea GC only.)
    45  //
    46  // Write barriers, root discovery, stack scanning, and object scanning
    47  // produce pointers to grey objects. Scanning consumes pointers to
    48  // grey objects, thus blackening them, and then scans them,
    49  // potentially producing new pointers to grey objects.
    50  //
    51  // Work queues must be prioritized in the following order wherever work
    52  // is processed.
    53  //
    54  // +----------------------------------------------------------+
    55  // | Priority | Work queue | Restrictions | Function          |
    56  // |----------------------------------------------------------|
    57  // | 1        | Workbufs   | P-local      | tryGetObjFast     |
    58  // | 2        | Span queue | P-local      | tryGetSpanFast    | [greenteagc]
    59  // | 3        | Workbufs   | None         | tryGetObj         |
    60  // | 4        | Span queue | None         | tryGetSpan        | [greenteagc]
    61  // | 5        | Span queue | None         | tryStealSpan      | [greenteagc]
    62  // +----------------------------------------------------------+
    63  //
    64  // The rationale behind this ordering comes from two insights:
    65  // 1. It's always preferable to look for P-local work first to avoid hammering on
    66  //    global lists.
    67  // 2. It's always preferable to scan individual objects first to increase the
    68  //    likelihood that spans will accumulate more objects to scan.
    69  
    70  // A gcWork provides the interface to produce and consume work for the
    71  // garbage collector.
    72  //
    73  // A gcWork can be used on the stack as follows:
    74  //
    75  //	(preemption must be disabled)
    76  //	gcw := &getg().m.p.ptr().gcw
    77  //	.. call gcw.put() to produce and gcw.tryGet() to consume ..
    78  //
    79  // It's important that any use of gcWork during the mark phase prevent
    80  // the garbage collector from transitioning to mark termination since
    81  // gcWork may locally hold GC work buffers. This can be done by
    82  // disabling preemption (systemstack or acquirem).
    83  type gcWork struct {
    84  	id int32 // same ID as the parent P
    85  
    86  	// wbuf1 and wbuf2 are the primary and secondary work buffers.
    87  	//
    88  	// This can be thought of as a stack of both work buffers'
    89  	// pointers concatenated. When we pop the last pointer, we
    90  	// shift the stack up by one work buffer by bringing in a new
    91  	// full buffer and discarding an empty one. When we fill both
    92  	// buffers, we shift the stack down by one work buffer by
    93  	// bringing in a new empty buffer and discarding a full one.
    94  	// This way we have one buffer's worth of hysteresis, which
    95  	// amortizes the cost of getting or putting a work buffer over
    96  	// at least one buffer of work and reduces contention on the
    97  	// global work lists.
    98  	//
    99  	// wbuf1 is always the buffer we're currently pushing to and
   100  	// popping from and wbuf2 is the buffer that will be discarded
   101  	// next.
   102  	//
   103  	// Invariant: Both wbuf1 and wbuf2 are nil or neither are.
   104  	wbuf1, wbuf2 *workbuf
   105  
   106  	// spanq is a queue of spans to process.
   107  	//
   108  	// Only used if goexperiment.GreenTeaGC.
   109  	spanq spanQueue
   110  
   111  	// ptrBuf is a temporary buffer used by span scanning.
   112  	ptrBuf *[pageSize / goarch.PtrSize]uintptr
   113  
   114  	// Bytes marked (blackened) on this gcWork. This is aggregated
   115  	// into work.bytesMarked by dispose.
   116  	bytesMarked uint64
   117  
   118  	// Heap scan work performed on this gcWork. This is aggregated into
   119  	// gcController by dispose and may also be flushed by callers.
   120  	// Other types of scan work are flushed immediately.
   121  	heapScanWork int64
   122  
   123  	// flushedWork indicates that a non-empty work buffer was
   124  	// flushed to the global work list since the last gcMarkDone
   125  	// termination check. Specifically, this indicates that this
   126  	// gcWork may have communicated work to another gcWork.
   127  	flushedWork bool
   128  
   129  	// mayNeedWorker is a hint that we may need to spin up a new
   130  	// worker, and that gcDrain* should call enlistWorker. This flag
   131  	// is set only if goexperiment.GreenTeaGC. If !goexperiment.GreenTeaGC,
   132  	// enlistWorker is called directly instead.
   133  	mayNeedWorker bool
   134  
   135  	// stats are scan stats broken down by size class.
   136  	stats [gc.NumSizeClasses]sizeClassScanStats
   137  }
   138  
   139  // Most of the methods of gcWork are go:nowritebarrierrec because the
   140  // write barrier itself can invoke gcWork methods but the methods are
   141  // not generally re-entrant. Hence, if a gcWork method invoked the
   142  // write barrier while the gcWork was in an inconsistent state, and
   143  // the write barrier in turn invoked a gcWork method, it could
   144  // permanently corrupt the gcWork.
   145  
   146  func (w *gcWork) init() {
   147  	w.wbuf1 = getempty()
   148  	wbuf2 := trygetfull()
   149  	if wbuf2 == nil {
   150  		wbuf2 = getempty()
   151  	}
   152  	w.wbuf2 = wbuf2
   153  }
   154  
   155  // putObj enqueues a pointer for the garbage collector to trace.
   156  // obj must point to the beginning of a heap object or an oblet.
   157  //
   158  //go:nowritebarrierrec
   159  func (w *gcWork) putObj(obj uintptr) {
   160  	flushed := false
   161  	wbuf := w.wbuf1
   162  	// Record that this may acquire the wbufSpans or heap lock to
   163  	// allocate a workbuf.
   164  	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
   165  	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
   166  	if wbuf == nil {
   167  		w.init()
   168  		wbuf = w.wbuf1
   169  		// wbuf is empty at this point.
   170  	} else if wbuf.nobj == len(wbuf.obj) {
   171  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   172  		wbuf = w.wbuf1
   173  		if wbuf.nobj == len(wbuf.obj) {
   174  			putfull(wbuf)
   175  			w.flushedWork = true
   176  			wbuf = getempty()
   177  			w.wbuf1 = wbuf
   178  			flushed = true
   179  		}
   180  	}
   181  
   182  	wbuf.obj[wbuf.nobj] = obj
   183  	wbuf.nobj++
   184  
   185  	// If we put a buffer on full, let the GC controller know so
   186  	// it can encourage more workers to run. We delay this until
   187  	// the end of put so that w is in a consistent state, since
   188  	// enlistWorker may itself manipulate w.
   189  	if flushed && gcphase == _GCmark {
   190  		if goexperiment.GreenTeaGC {
   191  			w.mayNeedWorker = true
   192  		} else {
   193  			gcController.enlistWorker()
   194  		}
   195  	}
   196  }
   197  
   198  // putObjFast does a put and reports whether it can be done quickly
   199  // otherwise it returns false and the caller needs to call put.
   200  //
   201  //go:nowritebarrierrec
   202  func (w *gcWork) putObjFast(obj uintptr) bool {
   203  	wbuf := w.wbuf1
   204  	if wbuf == nil || wbuf.nobj == len(wbuf.obj) {
   205  		return false
   206  	}
   207  
   208  	wbuf.obj[wbuf.nobj] = obj
   209  	wbuf.nobj++
   210  	return true
   211  }
   212  
   213  // putObjBatch performs a put on every pointer in obj. See put for
   214  // constraints on these pointers.
   215  //
   216  //go:nowritebarrierrec
   217  func (w *gcWork) putObjBatch(obj []uintptr) {
   218  	if len(obj) == 0 {
   219  		return
   220  	}
   221  
   222  	flushed := false
   223  	wbuf := w.wbuf1
   224  	if wbuf == nil {
   225  		w.init()
   226  		wbuf = w.wbuf1
   227  	}
   228  
   229  	for len(obj) > 0 {
   230  		for wbuf.nobj == len(wbuf.obj) {
   231  			putfull(wbuf)
   232  			w.flushedWork = true
   233  			w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
   234  			wbuf = w.wbuf1
   235  			flushed = true
   236  		}
   237  		n := copy(wbuf.obj[wbuf.nobj:], obj)
   238  		wbuf.nobj += n
   239  		obj = obj[n:]
   240  	}
   241  
   242  	if flushed && gcphase == _GCmark {
   243  		if goexperiment.GreenTeaGC {
   244  			w.mayNeedWorker = true
   245  		} else {
   246  			gcController.enlistWorker()
   247  		}
   248  	}
   249  }
   250  
   251  // tryGetObj dequeues a pointer for the garbage collector to trace.
   252  //
   253  // If there are no pointers remaining in this gcWork or in the global
   254  // queue, tryGet returns 0.  Note that there may still be pointers in
   255  // other gcWork instances or other caches.
   256  //
   257  //go:nowritebarrierrec
   258  func (w *gcWork) tryGetObj() uintptr {
   259  	wbuf := w.wbuf1
   260  	if wbuf == nil {
   261  		w.init()
   262  		wbuf = w.wbuf1
   263  		// wbuf is empty at this point.
   264  	}
   265  	if wbuf.nobj == 0 {
   266  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   267  		wbuf = w.wbuf1
   268  		if wbuf.nobj == 0 {
   269  			owbuf := wbuf
   270  			wbuf = trygetfull()
   271  			if wbuf == nil {
   272  				return 0
   273  			}
   274  			putempty(owbuf)
   275  			w.wbuf1 = wbuf
   276  		}
   277  	}
   278  
   279  	wbuf.nobj--
   280  	return wbuf.obj[wbuf.nobj]
   281  }
   282  
   283  // tryGetObjFast dequeues a pointer for the garbage collector to trace
   284  // if one is readily available. Otherwise it returns 0 and
   285  // the caller is expected to call tryGet().
   286  //
   287  //go:nowritebarrierrec
   288  func (w *gcWork) tryGetObjFast() uintptr {
   289  	wbuf := w.wbuf1
   290  	if wbuf == nil || wbuf.nobj == 0 {
   291  		return 0
   292  	}
   293  
   294  	wbuf.nobj--
   295  	return wbuf.obj[wbuf.nobj]
   296  }
   297  
   298  // dispose returns any cached pointers to the global queue.
   299  // The buffers are being put on the full queue so that the
   300  // write barriers will not simply reacquire them before the
   301  // GC can inspect them. This helps reduce the mutator's
   302  // ability to hide pointers during the concurrent mark phase.
   303  //
   304  //go:nowritebarrierrec
   305  func (w *gcWork) dispose() {
   306  	if wbuf := w.wbuf1; wbuf != nil {
   307  		if wbuf.nobj == 0 {
   308  			putempty(wbuf)
   309  		} else {
   310  			putfull(wbuf)
   311  			w.flushedWork = true
   312  		}
   313  		w.wbuf1 = nil
   314  
   315  		wbuf = w.wbuf2
   316  		if wbuf.nobj == 0 {
   317  			putempty(wbuf)
   318  		} else {
   319  			putfull(wbuf)
   320  			w.flushedWork = true
   321  		}
   322  		w.wbuf2 = nil
   323  	}
   324  	if !w.spanq.empty() {
   325  		w.spanq.flush() // Flush any local work.
   326  
   327  		// There's globally-visible work now, so make everyone aware of it.
   328  		//
   329  		// Note that we need to make everyone aware even if flush didn't
   330  		// flush any local work. The global work was always visible, but
   331  		// the bitmap bit may have been unset.
   332  		//
   333  		// See the comment in tryStealSpan, which explains how it relies
   334  		// on this behavior.
   335  		work.spanqMask.set(w.id)
   336  		w.flushedWork = true
   337  	}
   338  	if w.bytesMarked != 0 {
   339  		// dispose happens relatively infrequently. If this
   340  		// atomic becomes a problem, we should first try to
   341  		// dispose less and if necessary aggregate in a per-P
   342  		// counter.
   343  		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
   344  		w.bytesMarked = 0
   345  	}
   346  	if w.heapScanWork != 0 {
   347  		gcController.heapScanWork.Add(w.heapScanWork)
   348  		w.heapScanWork = 0
   349  	}
   350  }
   351  
   352  // balance moves some work that's cached in this gcWork back on the
   353  // global queue.
   354  //
   355  //go:nowritebarrierrec
   356  func (w *gcWork) balance() {
   357  	if w.wbuf1 == nil {
   358  		return
   359  	}
   360  	if wbuf := w.wbuf2; wbuf.nobj != 0 {
   361  		putfull(wbuf)
   362  		w.flushedWork = true
   363  		w.wbuf2 = getempty()
   364  	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
   365  		w.wbuf1 = handoff(wbuf)
   366  		w.flushedWork = true // handoff did putfull
   367  	} else {
   368  		return
   369  	}
   370  	// We flushed a buffer to the full list, so wake a worker.
   371  	if gcphase == _GCmark {
   372  		if goexperiment.GreenTeaGC {
   373  			w.mayNeedWorker = true
   374  		} else {
   375  			gcController.enlistWorker()
   376  		}
   377  	}
   378  }
   379  
   380  // empty reports whether w has no mark work available.
   381  //
   382  //go:nowritebarrierrec
   383  func (w *gcWork) empty() bool {
   384  	return (w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)) && w.spanq.empty()
   385  }
   386  
   387  // Internally, the GC work pool is kept in arrays in work buffers.
   388  // The gcWork interface caches a work buffer until full (or empty) to
   389  // avoid contending on the global work buffer lists.
   390  
   391  type workbufhdr struct {
   392  	node lfnode // must be first
   393  	nobj int
   394  }
   395  
   396  type workbuf struct {
   397  	_ sys.NotInHeap
   398  	workbufhdr
   399  	// account for the above fields
   400  	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / goarch.PtrSize]uintptr
   401  }
   402  
   403  // workbuf factory routines. These funcs are used to manage the
   404  // workbufs.
   405  // If the GC asks for some work these are the only routines that
   406  // make wbufs available to the GC.
   407  
   408  func (b *workbuf) checknonempty() {
   409  	if b.nobj == 0 {
   410  		throw("workbuf is empty")
   411  	}
   412  }
   413  
   414  func (b *workbuf) checkempty() {
   415  	if b.nobj != 0 {
   416  		throw("workbuf is not empty")
   417  	}
   418  }
   419  
   420  // getempty pops an empty work buffer off the work.empty list,
   421  // allocating new buffers if none are available.
   422  //
   423  //go:nowritebarrier
   424  func getempty() *workbuf {
   425  	var b *workbuf
   426  	if work.empty != 0 {
   427  		b = (*workbuf)(work.empty.pop())
   428  		if b != nil {
   429  			b.checkempty()
   430  		}
   431  	}
   432  	// Record that this may acquire the wbufSpans or heap lock to
   433  	// allocate a workbuf.
   434  	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
   435  	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
   436  	if b == nil {
   437  		// Allocate more workbufs.
   438  		var s *mspan
   439  		if work.wbufSpans.free.first != nil {
   440  			lock(&work.wbufSpans.lock)
   441  			s = work.wbufSpans.free.first
   442  			if s != nil {
   443  				work.wbufSpans.free.remove(s)
   444  				work.wbufSpans.busy.insert(s)
   445  			}
   446  			unlock(&work.wbufSpans.lock)
   447  		}
   448  		if s == nil {
   449  			systemstack(func() {
   450  				s = mheap_.allocManual(workbufAlloc/pageSize, spanAllocWorkBuf)
   451  			})
   452  			if s == nil {
   453  				throw("out of memory")
   454  			}
   455  			// Record the new span in the busy list.
   456  			lock(&work.wbufSpans.lock)
   457  			work.wbufSpans.busy.insert(s)
   458  			unlock(&work.wbufSpans.lock)
   459  		}
   460  		// Slice up the span into new workbufs. Return one and
   461  		// put the rest on the empty list.
   462  		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
   463  			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
   464  			newb.nobj = 0
   465  			lfnodeValidate(&newb.node)
   466  			if i == 0 {
   467  				b = newb
   468  			} else {
   469  				putempty(newb)
   470  			}
   471  		}
   472  	}
   473  	return b
   474  }
   475  
   476  // putempty puts a workbuf onto the work.empty list.
   477  // Upon entry this goroutine owns b. The lfstack.push relinquishes ownership.
   478  //
   479  //go:nowritebarrier
   480  func putempty(b *workbuf) {
   481  	b.checkempty()
   482  	work.empty.push(&b.node)
   483  }
   484  
   485  // putfull puts the workbuf on the work.full list for the GC.
   486  // putfull accepts partially full buffers so the GC can avoid competing
   487  // with the mutators for ownership of partially full buffers.
   488  //
   489  //go:nowritebarrier
   490  func putfull(b *workbuf) {
   491  	b.checknonempty()
   492  	work.full.push(&b.node)
   493  }
   494  
   495  // trygetfull tries to get a full or partially empty workbuffer.
   496  // If one is not immediately available return nil.
   497  //
   498  //go:nowritebarrier
   499  func trygetfull() *workbuf {
   500  	b := (*workbuf)(work.full.pop())
   501  	if b != nil {
   502  		b.checknonempty()
   503  		return b
   504  	}
   505  	return b
   506  }
   507  
   508  //go:nowritebarrier
   509  func handoff(b *workbuf) *workbuf {
   510  	// Make new buffer with half of b's pointers.
   511  	b1 := getempty()
   512  	n := b.nobj / 2
   513  	b.nobj -= n
   514  	b1.nobj = n
   515  	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
   516  
   517  	// Put b on full list - let first half of b get stolen.
   518  	putfull(b)
   519  	return b1
   520  }
   521  
   522  // prepareFreeWorkbufs moves busy workbuf spans to free list so they
   523  // can be freed to the heap. This must only be called when all
   524  // workbufs are on the empty list.
   525  func prepareFreeWorkbufs() {
   526  	lock(&work.wbufSpans.lock)
   527  	if work.full != 0 {
   528  		throw("cannot free workbufs when work.full != 0")
   529  	}
   530  	// Since all workbufs are on the empty list, we don't care
   531  	// which ones are in which spans. We can wipe the entire empty
   532  	// list and move all workbuf spans to the free list.
   533  	work.empty = 0
   534  	work.wbufSpans.free.takeAll(&work.wbufSpans.busy)
   535  	unlock(&work.wbufSpans.lock)
   536  }
   537  
   538  // freeSomeWbufs frees some workbufs back to the heap and returns
   539  // true if it should be called again to free more.
   540  func freeSomeWbufs(preemptible bool) bool {
   541  	const batchSize = 64 // ~1–2 µs per span.
   542  	lock(&work.wbufSpans.lock)
   543  	if gcphase != _GCoff || work.wbufSpans.free.isEmpty() {
   544  		unlock(&work.wbufSpans.lock)
   545  		return false
   546  	}
   547  	systemstack(func() {
   548  		gp := getg().m.curg
   549  		for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
   550  			span := work.wbufSpans.free.first
   551  			if span == nil {
   552  				break
   553  			}
   554  			work.wbufSpans.free.remove(span)
   555  			mheap_.freeManual(span, spanAllocWorkBuf)
   556  		}
   557  	})
   558  	more := !work.wbufSpans.free.isEmpty()
   559  	unlock(&work.wbufSpans.lock)
   560  	return more
   561  }
   562  

View as plain text