Source file src/runtime/mgcwork.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/goarch"
     9  	"internal/goexperiment"
    10  	"internal/runtime/atomic"
    11  	"internal/runtime/gc"
    12  	"internal/runtime/sys"
    13  	"unsafe"
    14  )
    15  
    16  const (
    17  	_WorkbufSize = 2048 // in bytes; larger values result in less contention
    18  
    19  	// workbufAlloc is the number of bytes to allocate at a time
    20  	// for new workbufs. This must be a multiple of pageSize and
    21  	// should be a multiple of _WorkbufSize.
    22  	//
    23  	// Larger values reduce workbuf allocation overhead. Smaller
    24  	// values reduce heap fragmentation.
    25  	workbufAlloc = 32 << 10
    26  )
    27  
    28  func init() {
    29  	if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 {
    30  		throw("bad workbufAlloc")
    31  	}
    32  }
    33  
    34  // Garbage collector work pool abstraction.
    35  //
    36  // This implements a producer/consumer model for pointers to grey
    37  // objects.
    38  //
    39  // For objects in workbufs, a grey object is one that is marked and
    40  // on a work queue. A black object is marked and not on a work queue.
    41  //
    42  // For objects in the span queue, a grey object is one that is marked
    43  // and has an unset scan bit. A black object is marked and has its scan
    44  // bit set. (Green Tea GC only.)
    45  //
    46  // Write barriers, root discovery, stack scanning, and object scanning
    47  // produce pointers to grey objects. Scanning consumes pointers to
    48  // grey objects, thus blackening them, and then scans them,
    49  // potentially producing new pointers to grey objects.
    50  //
    51  // Work queues must be prioritized in the following order wherever work
    52  // is processed.
    53  //
    54  // +----------------------------------------------------------+
    55  // | Priority | Work queue | Restrictions | Function          |
    56  // |----------------------------------------------------------|
    57  // | 1        | Workbufs   | P-local      | tryGetObjFast     |
    58  // | 2        | Span queue | P-local      | tryGetSpan(false) | [greenteagc]
    59  // | 3        | Workbufs   | None         | tryGetObj         |
    60  // | 4        | Span queue | None         | tryGetSpan(true)  | [greenteagc]
    61  // +----------------------------------------------------------+
    62  //
    63  // The rationale behind this ordering comes from two insights:
    64  // 1. It's always preferable to look for P-local work first to avoid hammering on
    65  //    global lists.
    66  // 2. It's always preferable to scan individual objects first to increase the
    67  //    likelihood that spans will accumulate more objects to scan.
    68  
    69  // A gcWork provides the interface to produce and consume work for the
    70  // garbage collector.
    71  //
    72  // A gcWork can be used on the stack as follows:
    73  //
    74  //	(preemption must be disabled)
    75  //	gcw := &getg().m.p.ptr().gcw
    76  //	.. call gcw.put() to produce and gcw.tryGet() to consume ..
    77  //
    78  // It's important that any use of gcWork during the mark phase prevent
    79  // the garbage collector from transitioning to mark termination since
    80  // gcWork may locally hold GC work buffers. This can be done by
    81  // disabling preemption (systemstack or acquirem).
    82  type gcWork struct {
    83  	// wbuf1 and wbuf2 are the primary and secondary work buffers.
    84  	//
    85  	// This can be thought of as a stack of both work buffers'
    86  	// pointers concatenated. When we pop the last pointer, we
    87  	// shift the stack up by one work buffer by bringing in a new
    88  	// full buffer and discarding an empty one. When we fill both
    89  	// buffers, we shift the stack down by one work buffer by
    90  	// bringing in a new empty buffer and discarding a full one.
    91  	// This way we have one buffer's worth of hysteresis, which
    92  	// amortizes the cost of getting or putting a work buffer over
    93  	// at least one buffer of work and reduces contention on the
    94  	// global work lists.
    95  	//
    96  	// wbuf1 is always the buffer we're currently pushing to and
    97  	// popping from and wbuf2 is the buffer that will be discarded
    98  	// next.
    99  	//
   100  	// Invariant: Both wbuf1 and wbuf2 are nil or neither are.
   101  	wbuf1, wbuf2 *workbuf
   102  
   103  	// spanq is a queue of spans to process.
   104  	//
   105  	// Only used if goexperiment.GreenTeaGC.
   106  	spanq localSpanQueue
   107  
   108  	// ptrBuf is a temporary buffer used by span scanning.
   109  	ptrBuf *[pageSize / goarch.PtrSize]uintptr
   110  
   111  	// Bytes marked (blackened) on this gcWork. This is aggregated
   112  	// into work.bytesMarked by dispose.
   113  	bytesMarked uint64
   114  
   115  	// Heap scan work performed on this gcWork. This is aggregated into
   116  	// gcController by dispose and may also be flushed by callers.
   117  	// Other types of scan work are flushed immediately.
   118  	heapScanWork int64
   119  
   120  	// flushedWork indicates that a non-empty work buffer was
   121  	// flushed to the global work list since the last gcMarkDone
   122  	// termination check. Specifically, this indicates that this
   123  	// gcWork may have communicated work to another gcWork.
   124  	flushedWork bool
   125  
   126  	// mayNeedWorker is a hint that we may need to spin up a new
   127  	// worker, and that gcDrain* should call enlistWorker. This flag
   128  	// is set only if goexperiment.GreenTeaGC. If !goexperiment.GreenTeaGC,
   129  	// enlistWorker is called directly instead.
   130  	mayNeedWorker bool
   131  
   132  	// stats are scan stats broken down by size class.
   133  	stats [gc.NumSizeClasses]sizeClassScanStats
   134  }
   135  
   136  // Most of the methods of gcWork are go:nowritebarrierrec because the
   137  // write barrier itself can invoke gcWork methods but the methods are
   138  // not generally re-entrant. Hence, if a gcWork method invoked the
   139  // write barrier while the gcWork was in an inconsistent state, and
   140  // the write barrier in turn invoked a gcWork method, it could
   141  // permanently corrupt the gcWork.
   142  
   143  func (w *gcWork) init() {
   144  	w.wbuf1 = getempty()
   145  	wbuf2 := trygetfull()
   146  	if wbuf2 == nil {
   147  		wbuf2 = getempty()
   148  	}
   149  	w.wbuf2 = wbuf2
   150  }
   151  
   152  // putObj enqueues a pointer for the garbage collector to trace.
   153  // obj must point to the beginning of a heap object or an oblet.
   154  //
   155  //go:nowritebarrierrec
   156  func (w *gcWork) putObj(obj uintptr) {
   157  	flushed := false
   158  	wbuf := w.wbuf1
   159  	// Record that this may acquire the wbufSpans or heap lock to
   160  	// allocate a workbuf.
   161  	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
   162  	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
   163  	if wbuf == nil {
   164  		w.init()
   165  		wbuf = w.wbuf1
   166  		// wbuf is empty at this point.
   167  	} else if wbuf.nobj == len(wbuf.obj) {
   168  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   169  		wbuf = w.wbuf1
   170  		if wbuf.nobj == len(wbuf.obj) {
   171  			putfull(wbuf)
   172  			w.flushedWork = true
   173  			wbuf = getempty()
   174  			w.wbuf1 = wbuf
   175  			flushed = true
   176  		}
   177  	}
   178  
   179  	wbuf.obj[wbuf.nobj] = obj
   180  	wbuf.nobj++
   181  
   182  	// If we put a buffer on full, let the GC controller know so
   183  	// it can encourage more workers to run. We delay this until
   184  	// the end of put so that w is in a consistent state, since
   185  	// enlistWorker may itself manipulate w.
   186  	if flushed && gcphase == _GCmark {
   187  		if goexperiment.GreenTeaGC {
   188  			w.mayNeedWorker = true
   189  		} else {
   190  			gcController.enlistWorker()
   191  		}
   192  	}
   193  }
   194  
   195  // putObjFast does a put and reports whether it can be done quickly
   196  // otherwise it returns false and the caller needs to call put.
   197  //
   198  //go:nowritebarrierrec
   199  func (w *gcWork) putObjFast(obj uintptr) bool {
   200  	wbuf := w.wbuf1
   201  	if wbuf == nil || wbuf.nobj == len(wbuf.obj) {
   202  		return false
   203  	}
   204  
   205  	wbuf.obj[wbuf.nobj] = obj
   206  	wbuf.nobj++
   207  	return true
   208  }
   209  
   210  // putObjBatch performs a put on every pointer in obj. See put for
   211  // constraints on these pointers.
   212  //
   213  //go:nowritebarrierrec
   214  func (w *gcWork) putObjBatch(obj []uintptr) {
   215  	if len(obj) == 0 {
   216  		return
   217  	}
   218  
   219  	flushed := false
   220  	wbuf := w.wbuf1
   221  	if wbuf == nil {
   222  		w.init()
   223  		wbuf = w.wbuf1
   224  	}
   225  
   226  	for len(obj) > 0 {
   227  		for wbuf.nobj == len(wbuf.obj) {
   228  			putfull(wbuf)
   229  			w.flushedWork = true
   230  			w.wbuf1, w.wbuf2 = w.wbuf2, getempty()
   231  			wbuf = w.wbuf1
   232  			flushed = true
   233  		}
   234  		n := copy(wbuf.obj[wbuf.nobj:], obj)
   235  		wbuf.nobj += n
   236  		obj = obj[n:]
   237  	}
   238  
   239  	if flushed && gcphase == _GCmark {
   240  		if goexperiment.GreenTeaGC {
   241  			w.mayNeedWorker = true
   242  		} else {
   243  			gcController.enlistWorker()
   244  		}
   245  	}
   246  }
   247  
   248  // tryGetObj dequeues a pointer for the garbage collector to trace.
   249  //
   250  // If there are no pointers remaining in this gcWork or in the global
   251  // queue, tryGet returns 0.  Note that there may still be pointers in
   252  // other gcWork instances or other caches.
   253  //
   254  //go:nowritebarrierrec
   255  func (w *gcWork) tryGetObj() uintptr {
   256  	wbuf := w.wbuf1
   257  	if wbuf == nil {
   258  		w.init()
   259  		wbuf = w.wbuf1
   260  		// wbuf is empty at this point.
   261  	}
   262  	if wbuf.nobj == 0 {
   263  		w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1
   264  		wbuf = w.wbuf1
   265  		if wbuf.nobj == 0 {
   266  			owbuf := wbuf
   267  			wbuf = trygetfull()
   268  			if wbuf == nil {
   269  				return 0
   270  			}
   271  			putempty(owbuf)
   272  			w.wbuf1 = wbuf
   273  		}
   274  	}
   275  
   276  	wbuf.nobj--
   277  	return wbuf.obj[wbuf.nobj]
   278  }
   279  
   280  // tryGetObjFast dequeues a pointer for the garbage collector to trace
   281  // if one is readily available. Otherwise it returns 0 and
   282  // the caller is expected to call tryGet().
   283  //
   284  //go:nowritebarrierrec
   285  func (w *gcWork) tryGetObjFast() uintptr {
   286  	wbuf := w.wbuf1
   287  	if wbuf == nil || wbuf.nobj == 0 {
   288  		return 0
   289  	}
   290  
   291  	wbuf.nobj--
   292  	return wbuf.obj[wbuf.nobj]
   293  }
   294  
   295  // dispose returns any cached pointers to the global queue.
   296  // The buffers are being put on the full queue so that the
   297  // write barriers will not simply reacquire them before the
   298  // GC can inspect them. This helps reduce the mutator's
   299  // ability to hide pointers during the concurrent mark phase.
   300  //
   301  //go:nowritebarrierrec
   302  func (w *gcWork) dispose() {
   303  	if wbuf := w.wbuf1; wbuf != nil {
   304  		if wbuf.nobj == 0 {
   305  			putempty(wbuf)
   306  		} else {
   307  			putfull(wbuf)
   308  			w.flushedWork = true
   309  		}
   310  		w.wbuf1 = nil
   311  
   312  		wbuf = w.wbuf2
   313  		if wbuf.nobj == 0 {
   314  			putempty(wbuf)
   315  		} else {
   316  			putfull(wbuf)
   317  			w.flushedWork = true
   318  		}
   319  		w.wbuf2 = nil
   320  	}
   321  	if w.spanq.drain() {
   322  		w.flushedWork = true
   323  	}
   324  	if w.bytesMarked != 0 {
   325  		// dispose happens relatively infrequently. If this
   326  		// atomic becomes a problem, we should first try to
   327  		// dispose less and if necessary aggregate in a per-P
   328  		// counter.
   329  		atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked))
   330  		w.bytesMarked = 0
   331  	}
   332  	if w.heapScanWork != 0 {
   333  		gcController.heapScanWork.Add(w.heapScanWork)
   334  		w.heapScanWork = 0
   335  	}
   336  }
   337  
   338  // balance moves some work that's cached in this gcWork back on the
   339  // global queue.
   340  //
   341  //go:nowritebarrierrec
   342  func (w *gcWork) balance() {
   343  	if w.wbuf1 == nil {
   344  		return
   345  	}
   346  	if wbuf := w.wbuf2; wbuf.nobj != 0 {
   347  		putfull(wbuf)
   348  		w.flushedWork = true
   349  		w.wbuf2 = getempty()
   350  	} else if wbuf := w.wbuf1; wbuf.nobj > 4 {
   351  		w.wbuf1 = handoff(wbuf)
   352  		w.flushedWork = true // handoff did putfull
   353  	} else {
   354  		return
   355  	}
   356  	// We flushed a buffer to the full list, so wake a worker.
   357  	if gcphase == _GCmark {
   358  		if goexperiment.GreenTeaGC {
   359  			w.mayNeedWorker = true
   360  		} else {
   361  			gcController.enlistWorker()
   362  		}
   363  	}
   364  }
   365  
   366  // empty reports whether w has no mark work available.
   367  //
   368  //go:nowritebarrierrec
   369  func (w *gcWork) empty() bool {
   370  	return (w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)) && w.spanq.empty()
   371  }
   372  
   373  // Internally, the GC work pool is kept in arrays in work buffers.
   374  // The gcWork interface caches a work buffer until full (or empty) to
   375  // avoid contending on the global work buffer lists.
   376  
   377  type workbufhdr struct {
   378  	node lfnode // must be first
   379  	nobj int
   380  }
   381  
   382  type workbuf struct {
   383  	_ sys.NotInHeap
   384  	workbufhdr
   385  	// account for the above fields
   386  	obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / goarch.PtrSize]uintptr
   387  }
   388  
   389  // workbuf factory routines. These funcs are used to manage the
   390  // workbufs.
   391  // If the GC asks for some work these are the only routines that
   392  // make wbufs available to the GC.
   393  
   394  func (b *workbuf) checknonempty() {
   395  	if b.nobj == 0 {
   396  		throw("workbuf is empty")
   397  	}
   398  }
   399  
   400  func (b *workbuf) checkempty() {
   401  	if b.nobj != 0 {
   402  		throw("workbuf is not empty")
   403  	}
   404  }
   405  
   406  // getempty pops an empty work buffer off the work.empty list,
   407  // allocating new buffers if none are available.
   408  //
   409  //go:nowritebarrier
   410  func getempty() *workbuf {
   411  	var b *workbuf
   412  	if work.empty != 0 {
   413  		b = (*workbuf)(work.empty.pop())
   414  		if b != nil {
   415  			b.checkempty()
   416  		}
   417  	}
   418  	// Record that this may acquire the wbufSpans or heap lock to
   419  	// allocate a workbuf.
   420  	lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans)
   421  	lockWithRankMayAcquire(&mheap_.lock, lockRankMheap)
   422  	if b == nil {
   423  		// Allocate more workbufs.
   424  		var s *mspan
   425  		if work.wbufSpans.free.first != nil {
   426  			lock(&work.wbufSpans.lock)
   427  			s = work.wbufSpans.free.first
   428  			if s != nil {
   429  				work.wbufSpans.free.remove(s)
   430  				work.wbufSpans.busy.insert(s)
   431  			}
   432  			unlock(&work.wbufSpans.lock)
   433  		}
   434  		if s == nil {
   435  			systemstack(func() {
   436  				s = mheap_.allocManual(workbufAlloc/pageSize, spanAllocWorkBuf)
   437  			})
   438  			if s == nil {
   439  				throw("out of memory")
   440  			}
   441  			// Record the new span in the busy list.
   442  			lock(&work.wbufSpans.lock)
   443  			work.wbufSpans.busy.insert(s)
   444  			unlock(&work.wbufSpans.lock)
   445  		}
   446  		// Slice up the span into new workbufs. Return one and
   447  		// put the rest on the empty list.
   448  		for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize {
   449  			newb := (*workbuf)(unsafe.Pointer(s.base() + i))
   450  			newb.nobj = 0
   451  			lfnodeValidate(&newb.node)
   452  			if i == 0 {
   453  				b = newb
   454  			} else {
   455  				putempty(newb)
   456  			}
   457  		}
   458  	}
   459  	return b
   460  }
   461  
   462  // putempty puts a workbuf onto the work.empty list.
   463  // Upon entry this goroutine owns b. The lfstack.push relinquishes ownership.
   464  //
   465  //go:nowritebarrier
   466  func putempty(b *workbuf) {
   467  	b.checkempty()
   468  	work.empty.push(&b.node)
   469  }
   470  
   471  // putfull puts the workbuf on the work.full list for the GC.
   472  // putfull accepts partially full buffers so the GC can avoid competing
   473  // with the mutators for ownership of partially full buffers.
   474  //
   475  //go:nowritebarrier
   476  func putfull(b *workbuf) {
   477  	b.checknonempty()
   478  	work.full.push(&b.node)
   479  }
   480  
   481  // trygetfull tries to get a full or partially empty workbuffer.
   482  // If one is not immediately available return nil.
   483  //
   484  //go:nowritebarrier
   485  func trygetfull() *workbuf {
   486  	b := (*workbuf)(work.full.pop())
   487  	if b != nil {
   488  		b.checknonempty()
   489  		return b
   490  	}
   491  	return b
   492  }
   493  
   494  //go:nowritebarrier
   495  func handoff(b *workbuf) *workbuf {
   496  	// Make new buffer with half of b's pointers.
   497  	b1 := getempty()
   498  	n := b.nobj / 2
   499  	b.nobj -= n
   500  	b1.nobj = n
   501  	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
   502  
   503  	// Put b on full list - let first half of b get stolen.
   504  	putfull(b)
   505  	return b1
   506  }
   507  
   508  // prepareFreeWorkbufs moves busy workbuf spans to free list so they
   509  // can be freed to the heap. This must only be called when all
   510  // workbufs are on the empty list.
   511  func prepareFreeWorkbufs() {
   512  	lock(&work.wbufSpans.lock)
   513  	if work.full != 0 {
   514  		throw("cannot free workbufs when work.full != 0")
   515  	}
   516  	// Since all workbufs are on the empty list, we don't care
   517  	// which ones are in which spans. We can wipe the entire empty
   518  	// list and move all workbuf spans to the free list.
   519  	work.empty = 0
   520  	work.wbufSpans.free.takeAll(&work.wbufSpans.busy)
   521  	unlock(&work.wbufSpans.lock)
   522  }
   523  
   524  // freeSomeWbufs frees some workbufs back to the heap and returns
   525  // true if it should be called again to free more.
   526  func freeSomeWbufs(preemptible bool) bool {
   527  	const batchSize = 64 // ~1–2 µs per span.
   528  	lock(&work.wbufSpans.lock)
   529  	if gcphase != _GCoff || work.wbufSpans.free.isEmpty() {
   530  		unlock(&work.wbufSpans.lock)
   531  		return false
   532  	}
   533  	systemstack(func() {
   534  		gp := getg().m.curg
   535  		for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
   536  			span := work.wbufSpans.free.first
   537  			if span == nil {
   538  				break
   539  			}
   540  			work.wbufSpans.free.remove(span)
   541  			mheap_.freeManual(span, spanAllocWorkBuf)
   542  		}
   543  	})
   544  	more := !work.wbufSpans.free.isEmpty()
   545  	unlock(&work.wbufSpans.lock)
   546  	return more
   547  }
   548  

View as plain text