Source file src/runtime/mgcwork.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/goarch" 9 "internal/goexperiment" 10 "internal/runtime/atomic" 11 "internal/runtime/gc" 12 "internal/runtime/sys" 13 "unsafe" 14 ) 15 16 const ( 17 _WorkbufSize = 2048 // in bytes; larger values result in less contention 18 19 // workbufAlloc is the number of bytes to allocate at a time 20 // for new workbufs. This must be a multiple of pageSize and 21 // should be a multiple of _WorkbufSize. 22 // 23 // Larger values reduce workbuf allocation overhead. Smaller 24 // values reduce heap fragmentation. 25 workbufAlloc = 32 << 10 26 ) 27 28 func init() { 29 if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 { 30 throw("bad workbufAlloc") 31 } 32 } 33 34 // Garbage collector work pool abstraction. 35 // 36 // This implements a producer/consumer model for pointers to grey 37 // objects. 38 // 39 // For objects in workbufs, a grey object is one that is marked and 40 // on a work queue. A black object is marked and not on a work queue. 41 // 42 // For objects in the span queue, a grey object is one that is marked 43 // and has an unset scan bit. A black object is marked and has its scan 44 // bit set. (Green Tea GC only.) 45 // 46 // Write barriers, root discovery, stack scanning, and object scanning 47 // produce pointers to grey objects. Scanning consumes pointers to 48 // grey objects, thus blackening them, and then scans them, 49 // potentially producing new pointers to grey objects. 50 // 51 // Work queues must be prioritized in the following order wherever work 52 // is processed. 53 // 54 // +----------------------------------------------------------+ 55 // | Priority | Work queue | Restrictions | Function | 56 // |----------------------------------------------------------| 57 // | 1 | Workbufs | P-local | tryGetObjFast | 58 // | 2 | Span queue | P-local | tryGetSpan(false) | [greenteagc] 59 // | 3 | Workbufs | None | tryGetObj | 60 // | 4 | Span queue | None | tryGetSpan(true) | [greenteagc] 61 // +----------------------------------------------------------+ 62 // 63 // The rationale behind this ordering comes from two insights: 64 // 1. It's always preferable to look for P-local work first to avoid hammering on 65 // global lists. 66 // 2. It's always preferable to scan individual objects first to increase the 67 // likelihood that spans will accumulate more objects to scan. 68 69 // A gcWork provides the interface to produce and consume work for the 70 // garbage collector. 71 // 72 // A gcWork can be used on the stack as follows: 73 // 74 // (preemption must be disabled) 75 // gcw := &getg().m.p.ptr().gcw 76 // .. call gcw.put() to produce and gcw.tryGet() to consume .. 77 // 78 // It's important that any use of gcWork during the mark phase prevent 79 // the garbage collector from transitioning to mark termination since 80 // gcWork may locally hold GC work buffers. This can be done by 81 // disabling preemption (systemstack or acquirem). 82 type gcWork struct { 83 // wbuf1 and wbuf2 are the primary and secondary work buffers. 84 // 85 // This can be thought of as a stack of both work buffers' 86 // pointers concatenated. When we pop the last pointer, we 87 // shift the stack up by one work buffer by bringing in a new 88 // full buffer and discarding an empty one. When we fill both 89 // buffers, we shift the stack down by one work buffer by 90 // bringing in a new empty buffer and discarding a full one. 91 // This way we have one buffer's worth of hysteresis, which 92 // amortizes the cost of getting or putting a work buffer over 93 // at least one buffer of work and reduces contention on the 94 // global work lists. 95 // 96 // wbuf1 is always the buffer we're currently pushing to and 97 // popping from and wbuf2 is the buffer that will be discarded 98 // next. 99 // 100 // Invariant: Both wbuf1 and wbuf2 are nil or neither are. 101 wbuf1, wbuf2 *workbuf 102 103 // spanq is a queue of spans to process. 104 // 105 // Only used if goexperiment.GreenTeaGC. 106 spanq localSpanQueue 107 108 // ptrBuf is a temporary buffer used by span scanning. 109 ptrBuf *[pageSize / goarch.PtrSize]uintptr 110 111 // Bytes marked (blackened) on this gcWork. This is aggregated 112 // into work.bytesMarked by dispose. 113 bytesMarked uint64 114 115 // Heap scan work performed on this gcWork. This is aggregated into 116 // gcController by dispose and may also be flushed by callers. 117 // Other types of scan work are flushed immediately. 118 heapScanWork int64 119 120 // flushedWork indicates that a non-empty work buffer was 121 // flushed to the global work list since the last gcMarkDone 122 // termination check. Specifically, this indicates that this 123 // gcWork may have communicated work to another gcWork. 124 flushedWork bool 125 126 // mayNeedWorker is a hint that we may need to spin up a new 127 // worker, and that gcDrain* should call enlistWorker. This flag 128 // is set only if goexperiment.GreenTeaGC. If !goexperiment.GreenTeaGC, 129 // enlistWorker is called directly instead. 130 mayNeedWorker bool 131 132 // stats are scan stats broken down by size class. 133 stats [gc.NumSizeClasses]sizeClassScanStats 134 } 135 136 // Most of the methods of gcWork are go:nowritebarrierrec because the 137 // write barrier itself can invoke gcWork methods but the methods are 138 // not generally re-entrant. Hence, if a gcWork method invoked the 139 // write barrier while the gcWork was in an inconsistent state, and 140 // the write barrier in turn invoked a gcWork method, it could 141 // permanently corrupt the gcWork. 142 143 func (w *gcWork) init() { 144 w.wbuf1 = getempty() 145 wbuf2 := trygetfull() 146 if wbuf2 == nil { 147 wbuf2 = getempty() 148 } 149 w.wbuf2 = wbuf2 150 } 151 152 // putObj enqueues a pointer for the garbage collector to trace. 153 // obj must point to the beginning of a heap object or an oblet. 154 // 155 //go:nowritebarrierrec 156 func (w *gcWork) putObj(obj uintptr) { 157 flushed := false 158 wbuf := w.wbuf1 159 // Record that this may acquire the wbufSpans or heap lock to 160 // allocate a workbuf. 161 lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans) 162 lockWithRankMayAcquire(&mheap_.lock, lockRankMheap) 163 if wbuf == nil { 164 w.init() 165 wbuf = w.wbuf1 166 // wbuf is empty at this point. 167 } else if wbuf.nobj == len(wbuf.obj) { 168 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 169 wbuf = w.wbuf1 170 if wbuf.nobj == len(wbuf.obj) { 171 putfull(wbuf) 172 w.flushedWork = true 173 wbuf = getempty() 174 w.wbuf1 = wbuf 175 flushed = true 176 } 177 } 178 179 wbuf.obj[wbuf.nobj] = obj 180 wbuf.nobj++ 181 182 // If we put a buffer on full, let the GC controller know so 183 // it can encourage more workers to run. We delay this until 184 // the end of put so that w is in a consistent state, since 185 // enlistWorker may itself manipulate w. 186 if flushed && gcphase == _GCmark { 187 if goexperiment.GreenTeaGC { 188 w.mayNeedWorker = true 189 } else { 190 gcController.enlistWorker() 191 } 192 } 193 } 194 195 // putObjFast does a put and reports whether it can be done quickly 196 // otherwise it returns false and the caller needs to call put. 197 // 198 //go:nowritebarrierrec 199 func (w *gcWork) putObjFast(obj uintptr) bool { 200 wbuf := w.wbuf1 201 if wbuf == nil || wbuf.nobj == len(wbuf.obj) { 202 return false 203 } 204 205 wbuf.obj[wbuf.nobj] = obj 206 wbuf.nobj++ 207 return true 208 } 209 210 // putObjBatch performs a put on every pointer in obj. See put for 211 // constraints on these pointers. 212 // 213 //go:nowritebarrierrec 214 func (w *gcWork) putObjBatch(obj []uintptr) { 215 if len(obj) == 0 { 216 return 217 } 218 219 flushed := false 220 wbuf := w.wbuf1 221 if wbuf == nil { 222 w.init() 223 wbuf = w.wbuf1 224 } 225 226 for len(obj) > 0 { 227 for wbuf.nobj == len(wbuf.obj) { 228 putfull(wbuf) 229 w.flushedWork = true 230 w.wbuf1, w.wbuf2 = w.wbuf2, getempty() 231 wbuf = w.wbuf1 232 flushed = true 233 } 234 n := copy(wbuf.obj[wbuf.nobj:], obj) 235 wbuf.nobj += n 236 obj = obj[n:] 237 } 238 239 if flushed && gcphase == _GCmark { 240 if goexperiment.GreenTeaGC { 241 w.mayNeedWorker = true 242 } else { 243 gcController.enlistWorker() 244 } 245 } 246 } 247 248 // tryGetObj dequeues a pointer for the garbage collector to trace. 249 // 250 // If there are no pointers remaining in this gcWork or in the global 251 // queue, tryGet returns 0. Note that there may still be pointers in 252 // other gcWork instances or other caches. 253 // 254 //go:nowritebarrierrec 255 func (w *gcWork) tryGetObj() uintptr { 256 wbuf := w.wbuf1 257 if wbuf == nil { 258 w.init() 259 wbuf = w.wbuf1 260 // wbuf is empty at this point. 261 } 262 if wbuf.nobj == 0 { 263 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 264 wbuf = w.wbuf1 265 if wbuf.nobj == 0 { 266 owbuf := wbuf 267 wbuf = trygetfull() 268 if wbuf == nil { 269 return 0 270 } 271 putempty(owbuf) 272 w.wbuf1 = wbuf 273 } 274 } 275 276 wbuf.nobj-- 277 return wbuf.obj[wbuf.nobj] 278 } 279 280 // tryGetObjFast dequeues a pointer for the garbage collector to trace 281 // if one is readily available. Otherwise it returns 0 and 282 // the caller is expected to call tryGet(). 283 // 284 //go:nowritebarrierrec 285 func (w *gcWork) tryGetObjFast() uintptr { 286 wbuf := w.wbuf1 287 if wbuf == nil || wbuf.nobj == 0 { 288 return 0 289 } 290 291 wbuf.nobj-- 292 return wbuf.obj[wbuf.nobj] 293 } 294 295 // dispose returns any cached pointers to the global queue. 296 // The buffers are being put on the full queue so that the 297 // write barriers will not simply reacquire them before the 298 // GC can inspect them. This helps reduce the mutator's 299 // ability to hide pointers during the concurrent mark phase. 300 // 301 //go:nowritebarrierrec 302 func (w *gcWork) dispose() { 303 if wbuf := w.wbuf1; wbuf != nil { 304 if wbuf.nobj == 0 { 305 putempty(wbuf) 306 } else { 307 putfull(wbuf) 308 w.flushedWork = true 309 } 310 w.wbuf1 = nil 311 312 wbuf = w.wbuf2 313 if wbuf.nobj == 0 { 314 putempty(wbuf) 315 } else { 316 putfull(wbuf) 317 w.flushedWork = true 318 } 319 w.wbuf2 = nil 320 } 321 if w.spanq.drain() { 322 w.flushedWork = true 323 } 324 if w.bytesMarked != 0 { 325 // dispose happens relatively infrequently. If this 326 // atomic becomes a problem, we should first try to 327 // dispose less and if necessary aggregate in a per-P 328 // counter. 329 atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) 330 w.bytesMarked = 0 331 } 332 if w.heapScanWork != 0 { 333 gcController.heapScanWork.Add(w.heapScanWork) 334 w.heapScanWork = 0 335 } 336 } 337 338 // balance moves some work that's cached in this gcWork back on the 339 // global queue. 340 // 341 //go:nowritebarrierrec 342 func (w *gcWork) balance() { 343 if w.wbuf1 == nil { 344 return 345 } 346 if wbuf := w.wbuf2; wbuf.nobj != 0 { 347 putfull(wbuf) 348 w.flushedWork = true 349 w.wbuf2 = getempty() 350 } else if wbuf := w.wbuf1; wbuf.nobj > 4 { 351 w.wbuf1 = handoff(wbuf) 352 w.flushedWork = true // handoff did putfull 353 } else { 354 return 355 } 356 // We flushed a buffer to the full list, so wake a worker. 357 if gcphase == _GCmark { 358 if goexperiment.GreenTeaGC { 359 w.mayNeedWorker = true 360 } else { 361 gcController.enlistWorker() 362 } 363 } 364 } 365 366 // empty reports whether w has no mark work available. 367 // 368 //go:nowritebarrierrec 369 func (w *gcWork) empty() bool { 370 return (w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0)) && w.spanq.empty() 371 } 372 373 // Internally, the GC work pool is kept in arrays in work buffers. 374 // The gcWork interface caches a work buffer until full (or empty) to 375 // avoid contending on the global work buffer lists. 376 377 type workbufhdr struct { 378 node lfnode // must be first 379 nobj int 380 } 381 382 type workbuf struct { 383 _ sys.NotInHeap 384 workbufhdr 385 // account for the above fields 386 obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / goarch.PtrSize]uintptr 387 } 388 389 // workbuf factory routines. These funcs are used to manage the 390 // workbufs. 391 // If the GC asks for some work these are the only routines that 392 // make wbufs available to the GC. 393 394 func (b *workbuf) checknonempty() { 395 if b.nobj == 0 { 396 throw("workbuf is empty") 397 } 398 } 399 400 func (b *workbuf) checkempty() { 401 if b.nobj != 0 { 402 throw("workbuf is not empty") 403 } 404 } 405 406 // getempty pops an empty work buffer off the work.empty list, 407 // allocating new buffers if none are available. 408 // 409 //go:nowritebarrier 410 func getempty() *workbuf { 411 var b *workbuf 412 if work.empty != 0 { 413 b = (*workbuf)(work.empty.pop()) 414 if b != nil { 415 b.checkempty() 416 } 417 } 418 // Record that this may acquire the wbufSpans or heap lock to 419 // allocate a workbuf. 420 lockWithRankMayAcquire(&work.wbufSpans.lock, lockRankWbufSpans) 421 lockWithRankMayAcquire(&mheap_.lock, lockRankMheap) 422 if b == nil { 423 // Allocate more workbufs. 424 var s *mspan 425 if work.wbufSpans.free.first != nil { 426 lock(&work.wbufSpans.lock) 427 s = work.wbufSpans.free.first 428 if s != nil { 429 work.wbufSpans.free.remove(s) 430 work.wbufSpans.busy.insert(s) 431 } 432 unlock(&work.wbufSpans.lock) 433 } 434 if s == nil { 435 systemstack(func() { 436 s = mheap_.allocManual(workbufAlloc/pageSize, spanAllocWorkBuf) 437 }) 438 if s == nil { 439 throw("out of memory") 440 } 441 // Record the new span in the busy list. 442 lock(&work.wbufSpans.lock) 443 work.wbufSpans.busy.insert(s) 444 unlock(&work.wbufSpans.lock) 445 } 446 // Slice up the span into new workbufs. Return one and 447 // put the rest on the empty list. 448 for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize { 449 newb := (*workbuf)(unsafe.Pointer(s.base() + i)) 450 newb.nobj = 0 451 lfnodeValidate(&newb.node) 452 if i == 0 { 453 b = newb 454 } else { 455 putempty(newb) 456 } 457 } 458 } 459 return b 460 } 461 462 // putempty puts a workbuf onto the work.empty list. 463 // Upon entry this goroutine owns b. The lfstack.push relinquishes ownership. 464 // 465 //go:nowritebarrier 466 func putempty(b *workbuf) { 467 b.checkempty() 468 work.empty.push(&b.node) 469 } 470 471 // putfull puts the workbuf on the work.full list for the GC. 472 // putfull accepts partially full buffers so the GC can avoid competing 473 // with the mutators for ownership of partially full buffers. 474 // 475 //go:nowritebarrier 476 func putfull(b *workbuf) { 477 b.checknonempty() 478 work.full.push(&b.node) 479 } 480 481 // trygetfull tries to get a full or partially empty workbuffer. 482 // If one is not immediately available return nil. 483 // 484 //go:nowritebarrier 485 func trygetfull() *workbuf { 486 b := (*workbuf)(work.full.pop()) 487 if b != nil { 488 b.checknonempty() 489 return b 490 } 491 return b 492 } 493 494 //go:nowritebarrier 495 func handoff(b *workbuf) *workbuf { 496 // Make new buffer with half of b's pointers. 497 b1 := getempty() 498 n := b.nobj / 2 499 b.nobj -= n 500 b1.nobj = n 501 memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0])) 502 503 // Put b on full list - let first half of b get stolen. 504 putfull(b) 505 return b1 506 } 507 508 // prepareFreeWorkbufs moves busy workbuf spans to free list so they 509 // can be freed to the heap. This must only be called when all 510 // workbufs are on the empty list. 511 func prepareFreeWorkbufs() { 512 lock(&work.wbufSpans.lock) 513 if work.full != 0 { 514 throw("cannot free workbufs when work.full != 0") 515 } 516 // Since all workbufs are on the empty list, we don't care 517 // which ones are in which spans. We can wipe the entire empty 518 // list and move all workbuf spans to the free list. 519 work.empty = 0 520 work.wbufSpans.free.takeAll(&work.wbufSpans.busy) 521 unlock(&work.wbufSpans.lock) 522 } 523 524 // freeSomeWbufs frees some workbufs back to the heap and returns 525 // true if it should be called again to free more. 526 func freeSomeWbufs(preemptible bool) bool { 527 const batchSize = 64 // ~1–2 µs per span. 528 lock(&work.wbufSpans.lock) 529 if gcphase != _GCoff || work.wbufSpans.free.isEmpty() { 530 unlock(&work.wbufSpans.lock) 531 return false 532 } 533 systemstack(func() { 534 gp := getg().m.curg 535 for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { 536 span := work.wbufSpans.free.first 537 if span == nil { 538 break 539 } 540 work.wbufSpans.free.remove(span) 541 mheap_.freeManual(span, spanAllocWorkBuf) 542 } 543 }) 544 more := !work.wbufSpans.free.isEmpty() 545 unlock(&work.wbufSpans.lock) 546 return more 547 } 548