Source file
src/runtime/mgcmark_greenteagc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 package runtime
38
39 import (
40 "internal/cpu"
41 "internal/goarch"
42 "internal/runtime/atomic"
43 "internal/runtime/gc"
44 "internal/runtime/sys"
45 "unsafe"
46 )
47
48 const doubleCheckGreenTea = false
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 type spanInlineMarkBits struct {
67 scans [63]uint8
68 owned spanScanOwnership
69 marks [63]uint8
70 class spanClass
71 }
72
73
74
75
76
77
78
79 type spanScanOwnership uint8
80
81 const (
82 spanScanUnowned spanScanOwnership = 0
83 spanScanOneMark = 1 << iota
84 spanScanManyMark
85
86
87
88 )
89
90
91 func (o *spanScanOwnership) load() spanScanOwnership {
92 return spanScanOwnership(atomic.Load8((*uint8)(unsafe.Pointer(o))))
93 }
94
95 func (o *spanScanOwnership) or(v spanScanOwnership) spanScanOwnership {
96
97
98
99
100
101
102
103
104
105 o32 := (*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(o)) &^ 0b11))
106 off := (uintptr(unsafe.Pointer(o)) & 0b11) * 8
107 if goarch.BigEndian {
108 off = 32 - off - 8
109 }
110 return spanScanOwnership(atomic.Or32(o32, uint32(v)<<off) >> off)
111 }
112
113 func (imb *spanInlineMarkBits) init(class spanClass) {
114 *imb = spanInlineMarkBits{}
115 imb.class = class
116 }
117
118
119
120 func (imb *spanInlineMarkBits) tryAcquire() bool {
121 switch imb.owned.load() {
122 case spanScanUnowned:
123
124 if imb.owned.or(spanScanOneMark) == spanScanUnowned {
125 return true
126 }
127
128
129
130 fallthrough
131 case spanScanOneMark:
132
133
134 return imb.owned.or(spanScanManyMark) == spanScanUnowned
135 }
136 return false
137 }
138
139
140
141
142
143
144
145
146
147 func (imb *spanInlineMarkBits) release() spanScanOwnership {
148 return spanScanOwnership(atomic.Xchg8((*uint8)(unsafe.Pointer(&imb.owned)), uint8(spanScanUnowned)))
149 }
150
151
152
153
154 func spanInlineMarkBitsFromBase(base uintptr) *spanInlineMarkBits {
155 return (*spanInlineMarkBits)(unsafe.Pointer(base + gc.PageSize - unsafe.Sizeof(spanInlineMarkBits{})))
156 }
157
158
159 func (s *mspan) initInlineMarkBits() {
160 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
161 throw("expected span with inline mark bits")
162 }
163 s.inlineMarkBits().init(s.spanclass)
164 }
165
166
167
168
169 func (s *mspan) mergeInlineMarks(dst *gcBits) {
170 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
171 throw("expected span with inline mark bits")
172 }
173 bytes := divRoundUp(uintptr(s.nelems), 8)
174 imb := s.inlineMarkBits()
175 _ = imb.marks[bytes-1]
176 for i := uintptr(0); i < bytes; i++ {
177 *dst.bytep(i) |= imb.marks[i]
178 }
179 if doubleCheckGreenTea && !s.spanclass.noscan() && imb.marks != imb.scans {
180 throw("marks don't match scans for span with pointer")
181 }
182 }
183
184
185
186
187 func (s *mspan) inlineMarkBits() *spanInlineMarkBits {
188 if doubleCheckGreenTea && !gcUsesSpanInlineMarkBits(s.elemsize) {
189 throw("expected span with inline mark bits")
190 }
191 return spanInlineMarkBitsFromBase(s.base())
192 }
193
194 func (s *mspan) markBitsForIndex(objIndex uintptr) (bits markBits) {
195 if gcUsesSpanInlineMarkBits(s.elemsize) {
196 bits.bytep = &s.inlineMarkBits().marks[objIndex/8]
197 } else {
198 bits.bytep = s.gcmarkBits.bytep(objIndex / 8)
199 }
200 bits.mask = uint8(1) << (objIndex % 8)
201 bits.index = objIndex
202 return
203 }
204
205 func (s *mspan) markBitsForBase() markBits {
206 if gcUsesSpanInlineMarkBits(s.elemsize) {
207 return markBits{&s.inlineMarkBits().marks[0], uint8(1), 0}
208 }
209 return markBits{&s.gcmarkBits.x, uint8(1), 0}
210 }
211
212
213
214 func (s *mspan) scannedBitsForIndex(objIndex uintptr) markBits {
215 return markBits{&s.inlineMarkBits().scans[objIndex/8], uint8(1) << (objIndex % 8), objIndex}
216 }
217
218
219
220
221
222
223
224 func gcUsesSpanInlineMarkBits(size uintptr) bool {
225 return heapBitsInSpan(size) && size >= 16
226 }
227
228
229
230 func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
231 if useCheckmark {
232 return false
233 }
234
235
236 ha := heapArenaOf(p)
237 if ha == nil {
238 return false
239 }
240 pageIdx := ((p / pageSize) / 8) % uintptr(len(ha.pageInUse))
241 pageMask := byte(1 << ((p / pageSize) % 8))
242 if ha.pageUseSpanInlineMarkBits[pageIdx]&pageMask == 0 {
243 return false
244 }
245
246
247 base := alignDown(p, gc.PageSize)
248 q := spanInlineMarkBitsFromBase(base)
249 objIndex := uint16((uint64(p-base) * uint64(gc.SizeClassToDivMagic[q.class.sizeclass()])) >> 32)
250
251
252 idx, mask := objIndex/8, uint8(1)<<(objIndex%8)
253 if atomic.Load8(&q.marks[idx])&mask != 0 {
254 return true
255 }
256 atomic.Or8(&q.marks[idx], mask)
257
258
259 if q.class.noscan() {
260 gcw.bytesMarked += uint64(gc.SizeClassToSize[q.class.sizeclass()])
261 return true
262 }
263
264
265 if q.tryAcquire() {
266 if gcw.spanq.put(makeObjPtr(base, objIndex)) {
267 if gcphase == _GCmark {
268 gcw.mayNeedWorker = true
269 }
270 gcw.flushedWork = true
271 }
272 }
273 return true
274 }
275
276
277 func (w *gcWork) tryGetSpan(slow bool) objptr {
278 if s := w.spanq.get(); s != 0 {
279 return s
280 }
281
282 if slow {
283
284 if s := work.spanq.get(w); s != 0 {
285 return s
286 }
287
288
289 return spanQueueSteal(w)
290 }
291 return 0
292 }
293
294
295
296 type spanQueue struct {
297 avail atomic.Bool
298 _ cpu.CacheLinePad
299 lock mutex
300 q mSpanQueue
301 }
302
303 func (q *spanQueue) empty() bool {
304 return !q.avail.Load()
305 }
306
307 func (q *spanQueue) size() int {
308 return q.q.n
309 }
310
311
312 func (q *spanQueue) putBatch(batch []objptr) {
313 var list mSpanQueue
314 for _, p := range batch {
315 s := spanOfUnchecked(p.spanBase())
316 s.scanIdx = p.objIndex()
317 list.push(s)
318 }
319
320 lock(&q.lock)
321 if q.q.n == 0 {
322 q.avail.Store(true)
323 }
324 q.q.takeAll(&list)
325 unlock(&q.lock)
326 }
327
328
329
330
331
332 func (q *spanQueue) get(gcw *gcWork) objptr {
333 if q.empty() {
334 return 0
335 }
336 lock(&q.lock)
337 if q.q.n == 0 {
338 unlock(&q.lock)
339 return 0
340 }
341 n := q.q.n/int(gomaxprocs) + 1
342 if n > q.q.n {
343 n = q.q.n
344 }
345 if max := len(gcw.spanq.ring) / 2; n > max {
346 n = max
347 }
348 newQ := q.q.popN(n)
349 if q.q.n == 0 {
350 q.avail.Store(false)
351 }
352 unlock(&q.lock)
353
354 s := newQ.pop()
355 for newQ.n > 0 {
356 s := newQ.pop()
357 gcw.spanq.put(makeObjPtr(s.base(), s.scanIdx))
358 }
359 return makeObjPtr(s.base(), s.scanIdx)
360 }
361
362
363
364
365
366
367
368
369
370 type localSpanQueue struct {
371 head atomic.Uint32
372 tail atomic.Uint32
373 ring [256]objptr
374 }
375
376
377
378 func (q *localSpanQueue) put(s objptr) (flushed bool) {
379 for {
380 h := q.head.Load()
381 t := q.tail.Load()
382 if t-h < uint32(len(q.ring)) {
383 q.ring[t%uint32(len(q.ring))] = s
384 q.tail.Store(t + 1)
385 return false
386 }
387 if q.putSlow(s, h, t) {
388 return true
389 }
390
391 }
392 }
393
394
395
396 func (q *localSpanQueue) putSlow(s objptr, h, t uint32) bool {
397 var batch [len(q.ring)/2 + 1]objptr
398
399
400 n := t - h
401 n = n / 2
402 if n != uint32(len(q.ring)/2) {
403 throw("localSpanQueue.putSlow: queue is not full")
404 }
405 for i := uint32(0); i < n; i++ {
406 batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
407 }
408 if !q.head.CompareAndSwap(h, h+n) {
409 return false
410 }
411 batch[n] = s
412
413 work.spanq.putBatch(batch[:])
414 return true
415 }
416
417
418
419
420
421 func (q *localSpanQueue) get() objptr {
422 for {
423 h := q.head.Load()
424 t := q.tail.Load()
425 if t == h {
426 return 0
427 }
428 s := q.ring[h%uint32(len(q.ring))]
429 if q.head.CompareAndSwap(h, h+1) {
430 return s
431 }
432 }
433 }
434
435 func (q *localSpanQueue) empty() bool {
436 h := q.head.Load()
437 t := q.tail.Load()
438 return t == h
439 }
440
441
442
443
444 func (q1 *localSpanQueue) stealFrom(q2 *localSpanQueue) objptr {
445 writeHead := q1.tail.Load()
446
447 var n uint32
448 for {
449 h := q2.head.Load()
450 t := q2.tail.Load()
451 n = t - h
452 n = n - n/2
453 if n == 0 {
454 return 0
455 }
456 if n > uint32(len(q2.ring)/2) {
457 continue
458 }
459 for i := uint32(0); i < n; i++ {
460 c := q2.ring[(h+i)%uint32(len(q2.ring))]
461 q1.ring[(writeHead+i)%uint32(len(q1.ring))] = c
462 }
463 if q2.head.CompareAndSwap(h, h+n) {
464 break
465 }
466 }
467 n--
468 c := q1.ring[(writeHead+n)%uint32(len(q1.ring))]
469 if n == 0 {
470 return c
471 }
472 h := q1.head.Load()
473 if writeHead-h+n >= uint32(len(q1.ring)) {
474 throw("localSpanQueue.stealFrom: queue overflow")
475 }
476 q1.tail.Store(writeHead + n)
477 return c
478 }
479
480
481
482
483 func (q *localSpanQueue) drain() bool {
484 var batch [len(q.ring)]objptr
485
486 var n uint32
487 for {
488 var h uint32
489 for {
490 h = q.head.Load()
491 t := q.tail.Load()
492 n = t - h
493 if n == 0 {
494 return false
495 }
496 if n <= uint32(len(q.ring)) {
497 break
498 }
499
500 }
501 for i := uint32(0); i < n; i++ {
502 batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
503 }
504 if q.head.CompareAndSwap(h, h+n) {
505 break
506 }
507 }
508 if !q.empty() {
509 throw("drained local span queue, but not empty")
510 }
511
512 work.spanq.putBatch(batch[:n])
513 return true
514 }
515
516
517
518
519 func spanQueueSteal(gcw *gcWork) objptr {
520 pp := getg().m.p.ptr()
521
522 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() {
523 p2 := allp[enum.position()]
524 if pp == p2 {
525 continue
526 }
527 if s := gcw.spanq.stealFrom(&p2.gcw.spanq); s != 0 {
528 return s
529 }
530 }
531 return 0
532 }
533
534
535 type objptr uintptr
536
537
538 func makeObjPtr(spanBase uintptr, objIndex uint16) objptr {
539 if doubleCheckGreenTea && spanBase&((1<<gc.PageShift)-1) != 0 {
540 throw("created objptr with address that is incorrectly aligned")
541 }
542 return objptr(spanBase | uintptr(objIndex))
543 }
544
545 func (p objptr) spanBase() uintptr {
546 return uintptr(p) &^ ((1 << gc.PageShift) - 1)
547 }
548
549 func (p objptr) objIndex() uint16 {
550 return uint16(p) & ((1 << gc.PageShift) - 1)
551 }
552
553
554
555 func scanSpan(p objptr, gcw *gcWork) {
556 spanBase := p.spanBase()
557 imb := spanInlineMarkBitsFromBase(spanBase)
558 spanclass := imb.class
559 if spanclass.noscan() {
560 throw("noscan object in scanSpan")
561 }
562 elemsize := uintptr(gc.SizeClassToSize[spanclass.sizeclass()])
563
564
565 if imb.release() == spanScanOneMark {
566
567
568 objIndex := p.objIndex()
569 bytep := &imb.scans[objIndex/8]
570 mask := uint8(1) << (objIndex % 8)
571 if atomic.Load8(bytep)&mask != 0 {
572 return
573 }
574 atomic.Or8(bytep, mask)
575 gcw.bytesMarked += uint64(elemsize)
576 if debug.gctrace > 1 {
577 gcw.stats[spanclass.sizeclass()].spansSparseScanned++
578 gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++
579 }
580 b := spanBase + uintptr(objIndex)*elemsize
581 scanObjectSmall(spanBase, b, elemsize, gcw)
582 return
583 }
584
585
586 divMagic := uint64(gc.SizeClassToDivMagic[spanclass.sizeclass()])
587 usableSpanSize := uint64(gc.PageSize - unsafe.Sizeof(spanInlineMarkBits{}))
588 if !spanclass.noscan() {
589 usableSpanSize -= gc.PageSize / goarch.PtrSize / 8
590 }
591 nelems := uint16((usableSpanSize * divMagic) >> 32)
592
593
594 var toScan gc.ObjMask
595 objsMarked := spanSetScans(spanBase, nelems, imb, &toScan)
596 if objsMarked == 0 {
597 return
598 }
599 gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize)
600 if debug.gctrace > 1 {
601 gcw.stats[spanclass.sizeclass()].spansDenseScanned++
602 gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked)
603 }
604 scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
605 }
606
607
608
609
610
611
612 func spanSetScans(spanBase uintptr, nelems uint16, imb *spanInlineMarkBits, toScan *gc.ObjMask) int {
613 arena, pageIdx, pageMask := pageIndexOf(spanBase)
614 if arena.pageMarks[pageIdx]&pageMask == 0 {
615 atomic.Or8(&arena.pageMarks[pageIdx], pageMask)
616 }
617
618 bytes := divRoundUp(uintptr(nelems), 8)
619 objsMarked := 0
620
621
622
623
624 imbMarks := (*gc.ObjMask)(unsafe.Pointer(&imb.marks))
625 imbScans := (*gc.ObjMask)(unsafe.Pointer(&imb.scans))
626
627
628
629
630 for i := uintptr(0); i < bytes; i += goarch.PtrSize {
631 scans := atomic.Loaduintptr(&imbScans[i/goarch.PtrSize])
632 marks := imbMarks[i/goarch.PtrSize]
633 scans = bswapIfBigEndian(scans)
634 marks = bswapIfBigEndian(marks)
635 if i/goarch.PtrSize == 64/goarch.PtrSize-1 {
636 scans &^= 0xff << ((goarch.PtrSize - 1) * 8)
637 marks &^= 0xff << ((goarch.PtrSize - 1) * 8)
638 }
639 toGrey := marks &^ scans
640 toScan[i/goarch.PtrSize] = toGrey
641
642
643 if toGrey != 0 {
644 toGrey = bswapIfBigEndian(toGrey)
645 if goarch.PtrSize == 4 {
646 atomic.Or32((*uint32)(unsafe.Pointer(&imbScans[i/goarch.PtrSize])), uint32(toGrey))
647 } else {
648 atomic.Or64((*uint64)(unsafe.Pointer(&imbScans[i/goarch.PtrSize])), uint64(toGrey))
649 }
650 }
651 objsMarked += sys.OnesCount64(uint64(toGrey))
652 }
653 return objsMarked
654 }
655
656 func scanObjectSmall(spanBase, b, objSize uintptr, gcw *gcWork) {
657 ptrBits := heapBitsSmallForAddrInline(spanBase, b, objSize)
658 gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
659 nptrs := 0
660 n := sys.OnesCount64(uint64(ptrBits))
661 for range n {
662 k := sys.TrailingZeros64(uint64(ptrBits))
663 ptrBits &^= 1 << k
664 addr := b + uintptr(k)*goarch.PtrSize
665
666
667
668 sys.Prefetch(addr)
669
670
671 gcw.ptrBuf[nptrs] = addr
672 nptrs++
673 }
674
675
676 for _, p := range gcw.ptrBuf[:nptrs] {
677 p = *(*uintptr)(unsafe.Pointer(p))
678 if p == 0 {
679 continue
680 }
681 if !tryDeferToSpanScan(p, gcw) {
682 if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
683 greyobject(obj, 0, 0, span, gcw, objIndex)
684 }
685 }
686 }
687 }
688
689 func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *gc.ObjMask) {
690 nptrs := 0
691 for i, bits := range scans {
692 if i*(goarch.PtrSize*8) > int(elems) {
693 break
694 }
695 n := sys.OnesCount64(uint64(bits))
696 for range n {
697 j := sys.TrailingZeros64(uint64(bits))
698 bits &^= 1 << j
699
700 b := base + uintptr(i*(goarch.PtrSize*8)+j)*objSize
701 ptrBits := heapBitsSmallForAddrInline(base, b, objSize)
702 gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
703
704 n := sys.OnesCount64(uint64(ptrBits))
705 for range n {
706 k := sys.TrailingZeros64(uint64(ptrBits))
707 ptrBits &^= 1 << k
708 addr := b + uintptr(k)*goarch.PtrSize
709
710
711
712 sys.Prefetch(addr)
713
714
715 gcw.ptrBuf[nptrs] = addr
716 nptrs++
717 }
718 }
719 }
720
721
722 for _, p := range gcw.ptrBuf[:nptrs] {
723 p = *(*uintptr)(unsafe.Pointer(p))
724 if p == 0 {
725 continue
726 }
727 if !tryDeferToSpanScan(p, gcw) {
728 if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
729 greyobject(obj, 0, 0, span, gcw, objIndex)
730 }
731 }
732 }
733 }
734
735 func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
736 hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, elemsize)
737 hbits := (*byte)(unsafe.Pointer(hbitsBase))
738
739
740
741
742
743
744
745
746
747 i := (addr - spanBase) / goarch.PtrSize / ptrBits
748 j := (addr - spanBase) / goarch.PtrSize % ptrBits
749 bits := elemsize / goarch.PtrSize
750 word0 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+0))))
751 word1 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+1))))
752
753 var read uintptr
754 if j+bits > ptrBits {
755
756 bits0 := ptrBits - j
757 bits1 := bits - bits0
758 read = *word0 >> j
759 read |= (*word1 & ((1 << bits1) - 1)) << bits0
760 } else {
761
762 read = (*word0 >> j) & ((1 << bits) - 1)
763 }
764 return read
765 }
766
View as plain text