Source file src/runtime/malloc_stubs.go
1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file contains stub functions that are not meant to be called directly, 6 // but that will be assembled together using the inlining logic in runtime/_mkmalloc 7 // to produce a full mallocgc function that's specialized for a span class 8 // or specific size in the case of the tiny allocator. 9 // 10 // To assemble a mallocgc function, the mallocStub function is cloned, and the call to 11 // inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub, 12 // smallNoScanStub or tinyStub, depending on the parameters being specialized. 13 // 14 // The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases) 15 // identifiers are replaced with the value of the parameter in the specialized case. 16 // The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub 17 // functions are also inlined by _mkmalloc. 18 19 package runtime 20 21 import ( 22 "internal/goarch" 23 "internal/runtime/sys" 24 "unsafe" 25 ) 26 27 // These identifiers will all be replaced by the inliner. So their values don't 28 // really matter: they just need to be set so that the stub functions, which 29 // will never be used on their own, can compile. elemsize_ can't be set to 30 // zero because we divide by it in nextFreeFastTiny, and the compiler would 31 // complain about a division by zero. Its replaced value will always be greater 32 // than zero. 33 const elemsize_ = 8 34 const sizeclass_ = 0 35 const noscanint_ = 0 36 const size_ = 0 37 38 func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 39 if doubleCheckMalloc { 40 if gcphase == _GCmarktermination { 41 throw("mallocgc called with gcphase == _GCmarktermination") 42 } 43 } 44 45 // Short-circuit zero-sized allocation requests. 46 return unsafe.Pointer(&zerobase) 47 } 48 49 func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 50 panic("not defined for sizeclass") 51 } 52 53 // WARNING: mallocStub does not do any work for sanitizers so callers need 54 // to steer out of this codepath early if sanitizers are enabled. 55 func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer { 56 if doubleCheckMalloc { 57 if gcphase == _GCmarktermination { 58 throw("mallocgc called with gcphase == _GCmarktermination") 59 } 60 } 61 62 // It's possible for any malloc to trigger sweeping, which may in 63 // turn queue finalizers. Record this dynamic lock edge. 64 // N.B. Compiled away if lockrank experiment is not enabled. 65 lockRankMayQueueFinalizer() 66 67 // Pre-malloc debug hooks. 68 if debug.malloc { 69 if x := preMallocgcDebug(size, typ); x != nil { 70 return x 71 } 72 } 73 74 // Assist the GC if needed. 75 if gcBlackenEnabled != 0 { 76 deductAssistCredit(size) 77 } 78 79 // Actually do the allocation. 80 x, elemsize := inlinedMalloc(size, typ, needzero) 81 82 // Notify valgrind, if enabled. 83 // To allow the compiler to not know about valgrind, we do valgrind instrumentation 84 // unlike the other sanitizers. 85 if valgrindenabled { 86 valgrindMalloc(x, size) 87 } 88 89 // Adjust our GC assist debt to account for internal fragmentation. 90 if gcBlackenEnabled != 0 && elemsize != 0 { 91 if assistG := getg().m.curg; assistG != nil { 92 assistG.gcAssistBytes -= int64(elemsize - size) 93 } 94 } 95 96 // Post-malloc debug hooks. 97 if debug.malloc { 98 postMallocgcDebug(x, elemsize, typ) 99 } 100 return x 101 } 102 103 // inlinedMalloc will never be called. It is defined just so that the compiler can compile 104 // the mallocStub function, which will also never be called, but instead used as a template 105 // to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub 106 // will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub 107 // when generating the size-specialized malloc function. See the comment at the top of this 108 // file for more information. 109 func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { 110 return unsafe.Pointer(uintptr(0)), 0 111 } 112 113 func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) { 114 if mp.mallocing != 0 { 115 throw("malloc deadlock") 116 } 117 if mp.gsignal == getg() { 118 throw("malloc during signal") 119 } 120 if typ == nil || !typ.Pointers() { 121 throw("noscan allocated in scan-only path") 122 } 123 if !heapBitsInSpan(size) { 124 throw("heap bits in not in span for non-header-only path") 125 } 126 } 127 128 func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { 129 const sizeclass = sizeclass_ 130 const elemsize = elemsize_ 131 132 // Set mp.mallocing to keep from being preempted by GC. 133 mp := acquirem() 134 if doubleCheckMalloc { 135 doubleCheckSmallScanNoHeader(size, typ, mp) 136 } 137 mp.mallocing = 1 138 139 checkGCTrigger := false 140 c := getMCache(mp) 141 const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) 142 span := c.alloc[spc] 143 v := nextFreeFastStub(span) 144 if v == 0 { 145 v, span, checkGCTrigger = c.nextFree(spc) 146 } 147 x := unsafe.Pointer(v) 148 if span.needzero != 0 { 149 memclrNoHeapPointers(x, elemsize) 150 } 151 if goarch.PtrSize == 8 && sizeclass == 1 { 152 // initHeapBits already set the pointer bits for the 8-byte sizeclass 153 // on 64-bit platforms. 154 c.scanAlloc += 8 155 } else { 156 dataSize := size // make the inliner happy 157 x := uintptr(x) 158 scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span) 159 c.scanAlloc += scanSize 160 } 161 162 // Ensure that the stores above that initialize x to 163 // type-safe memory and set the heap bits occur before 164 // the caller can make x observable to the garbage 165 // collector. Otherwise, on weakly ordered machines, 166 // the garbage collector could follow a pointer to x, 167 // but see uninitialized memory or stale heap bits. 168 publicationBarrier() 169 170 if writeBarrier.enabled { 171 // Allocate black during GC. 172 // All slots hold nil so no scanning is needed. 173 // This may be racing with GC so do it atomically if there can be 174 // a race marking the bit. 175 gcmarknewobject(span, uintptr(x)) 176 } else { 177 // Track the last free index before the mark phase. This field 178 // is only used by the garbage collector. During the mark phase 179 // this is used by the conservative scanner to filter out objects 180 // that are both free and recently-allocated. It's safe to do that 181 // because we allocate-black if the GC is enabled. The conservative 182 // scanner produces pointers out of thin air, so without additional 183 // synchronization it might otherwise observe a partially-initialized 184 // object, which could crash the program. 185 span.freeIndexForScan = span.freeindex 186 } 187 188 // Note cache c only valid while m acquired; see #47302 189 // 190 // N.B. Use the full size because that matches how the GC 191 // will update the mem profile on the "free" side. 192 // 193 // TODO(mknyszek): We should really count the header as part 194 // of gc_sys or something. The code below just pretends it is 195 // internal fragmentation and matches the GC's accounting by 196 // using the whole allocation slot. 197 c.nextSample -= int64(elemsize) 198 if c.nextSample < 0 || MemProfileRate != c.memProfRate { 199 profilealloc(mp, x, elemsize) 200 } 201 mp.mallocing = 0 202 releasem(mp) 203 204 if checkGCTrigger { 205 if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 206 gcStart(t) 207 } 208 } 209 210 return x, elemsize 211 } 212 213 func doubleCheckSmallNoScan(typ *_type, mp *m) { 214 if mp.mallocing != 0 { 215 throw("malloc deadlock") 216 } 217 if mp.gsignal == getg() { 218 throw("malloc during signal") 219 } 220 if typ != nil && typ.Pointers() { 221 throw("expected noscan type for noscan alloc") 222 } 223 } 224 225 func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { 226 // TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant 227 // sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class 228 // and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically 229 // spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself, 230 // so that its code could not diverge from the generated functions. 231 const sizeclass = sizeclass_ 232 const elemsize = elemsize_ 233 234 // Set mp.mallocing to keep from being preempted by GC. 235 mp := acquirem() 236 if doubleCheckMalloc { 237 doubleCheckSmallNoScan(typ, mp) 238 } 239 mp.mallocing = 1 240 241 checkGCTrigger := false 242 c := getMCache(mp) 243 const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) 244 span := c.alloc[spc] 245 v := nextFreeFastStub(span) 246 if v == 0 { 247 v, span, checkGCTrigger = c.nextFree(spc) 248 } 249 x := unsafe.Pointer(v) 250 if needzero && span.needzero != 0 { 251 memclrNoHeapPointers(x, elemsize) 252 } 253 254 // Ensure that the stores above that initialize x to 255 // type-safe memory and set the heap bits occur before 256 // the caller can make x observable to the garbage 257 // collector. Otherwise, on weakly ordered machines, 258 // the garbage collector could follow a pointer to x, 259 // but see uninitialized memory or stale heap bits. 260 publicationBarrier() 261 262 if writeBarrier.enabled { 263 // Allocate black during GC. 264 // All slots hold nil so no scanning is needed. 265 // This may be racing with GC so do it atomically if there can be 266 // a race marking the bit. 267 gcmarknewobject(span, uintptr(x)) 268 } else { 269 // Track the last free index before the mark phase. This field 270 // is only used by the garbage collector. During the mark phase 271 // this is used by the conservative scanner to filter out objects 272 // that are both free and recently-allocated. It's safe to do that 273 // because we allocate-black if the GC is enabled. The conservative 274 // scanner produces pointers out of thin air, so without additional 275 // synchronization it might otherwise observe a partially-initialized 276 // object, which could crash the program. 277 span.freeIndexForScan = span.freeindex 278 } 279 280 // Note cache c only valid while m acquired; see #47302 281 // 282 // N.B. Use the full size because that matches how the GC 283 // will update the mem profile on the "free" side. 284 // 285 // TODO(mknyszek): We should really count the header as part 286 // of gc_sys or something. The code below just pretends it is 287 // internal fragmentation and matches the GC's accounting by 288 // using the whole allocation slot. 289 c.nextSample -= int64(elemsize) 290 if c.nextSample < 0 || MemProfileRate != c.memProfRate { 291 profilealloc(mp, x, elemsize) 292 } 293 mp.mallocing = 0 294 releasem(mp) 295 296 if checkGCTrigger { 297 if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 298 gcStart(t) 299 } 300 } 301 return x, elemsize 302 } 303 304 func doubleCheckTiny(size uintptr, typ *_type, mp *m) { 305 if mp.mallocing != 0 { 306 throw("malloc deadlock") 307 } 308 if mp.gsignal == getg() { 309 throw("malloc during signal") 310 } 311 if typ != nil && typ.Pointers() { 312 throw("expected noscan for tiny alloc") 313 } 314 } 315 316 func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { 317 const constsize = size_ 318 const elemsize = elemsize_ 319 320 // Set mp.mallocing to keep from being preempted by GC. 321 mp := acquirem() 322 if doubleCheckMalloc { 323 doubleCheckTiny(constsize, typ, mp) 324 } 325 mp.mallocing = 1 326 327 // Tiny allocator. 328 // 329 // Tiny allocator combines several tiny allocation requests 330 // into a single memory block. The resulting memory block 331 // is freed when all subobjects are unreachable. The subobjects 332 // must be noscan (don't have pointers), this ensures that 333 // the amount of potentially wasted memory is bounded. 334 // 335 // Size of the memory block used for combining (maxTinySize) is tunable. 336 // Current setting is 16 bytes, which relates to 2x worst case memory 337 // wastage (when all but one subobjects are unreachable). 338 // 8 bytes would result in no wastage at all, but provides less 339 // opportunities for combining. 340 // 32 bytes provides more opportunities for combining, 341 // but can lead to 4x worst case wastage. 342 // The best case winning is 8x regardless of block size. 343 // 344 // Objects obtained from tiny allocator must not be freed explicitly. 345 // So when an object will be freed explicitly, we ensure that 346 // its size >= maxTinySize. 347 // 348 // SetFinalizer has a special case for objects potentially coming 349 // from tiny allocator, it such case it allows to set finalizers 350 // for an inner byte of a memory block. 351 // 352 // The main targets of tiny allocator are small strings and 353 // standalone escaping variables. On a json benchmark 354 // the allocator reduces number of allocations by ~12% and 355 // reduces heap size by ~20%. 356 c := getMCache(mp) 357 off := c.tinyoffset 358 // Align tiny pointer for required (conservative) alignment. 359 if constsize&7 == 0 { 360 off = alignUp(off, 8) 361 } else if goarch.PtrSize == 4 && constsize == 12 { 362 // Conservatively align 12-byte objects to 8 bytes on 32-bit 363 // systems so that objects whose first field is a 64-bit 364 // value is aligned to 8 bytes and does not cause a fault on 365 // atomic access. See issue 37262. 366 // TODO(mknyszek): Remove this workaround if/when issue 36606 367 // is resolved. 368 off = alignUp(off, 8) 369 } else if constsize&3 == 0 { 370 off = alignUp(off, 4) 371 } else if constsize&1 == 0 { 372 off = alignUp(off, 2) 373 } 374 if off+constsize <= maxTinySize && c.tiny != 0 { 375 // The object fits into existing tiny block. 376 x := unsafe.Pointer(c.tiny + off) 377 c.tinyoffset = off + constsize 378 c.tinyAllocs++ 379 mp.mallocing = 0 380 releasem(mp) 381 return x, 0 382 } 383 // Allocate a new maxTinySize block. 384 checkGCTrigger := false 385 span := c.alloc[tinySpanClass] 386 v := nextFreeFastTiny(span) 387 if v == 0 { 388 v, span, checkGCTrigger = c.nextFree(tinySpanClass) 389 } 390 x := unsafe.Pointer(v) 391 (*[2]uint64)(x)[0] = 0 // Always zero 392 (*[2]uint64)(x)[1] = 0 393 // See if we need to replace the existing tiny block with the new one 394 // based on amount of remaining free space. 395 if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { 396 // Note: disabled when race detector is on, see comment near end of this function. 397 c.tiny = uintptr(x) 398 c.tinyoffset = constsize 399 } 400 401 // Ensure that the stores above that initialize x to 402 // type-safe memory and set the heap bits occur before 403 // the caller can make x observable to the garbage 404 // collector. Otherwise, on weakly ordered machines, 405 // the garbage collector could follow a pointer to x, 406 // but see uninitialized memory or stale heap bits. 407 publicationBarrier() 408 409 if writeBarrier.enabled { 410 // Allocate black during GC. 411 // All slots hold nil so no scanning is needed. 412 // This may be racing with GC so do it atomically if there can be 413 // a race marking the bit. 414 gcmarknewobject(span, uintptr(x)) 415 } else { 416 // Track the last free index before the mark phase. This field 417 // is only used by the garbage collector. During the mark phase 418 // this is used by the conservative scanner to filter out objects 419 // that are both free and recently-allocated. It's safe to do that 420 // because we allocate-black if the GC is enabled. The conservative 421 // scanner produces pointers out of thin air, so without additional 422 // synchronization it might otherwise observe a partially-initialized 423 // object, which could crash the program. 424 span.freeIndexForScan = span.freeindex 425 } 426 427 // Note cache c only valid while m acquired; see #47302 428 // 429 // N.B. Use the full size because that matches how the GC 430 // will update the mem profile on the "free" side. 431 // 432 // TODO(mknyszek): We should really count the header as part 433 // of gc_sys or something. The code below just pretends it is 434 // internal fragmentation and matches the GC's accounting by 435 // using the whole allocation slot. 436 c.nextSample -= int64(elemsize) 437 if c.nextSample < 0 || MemProfileRate != c.memProfRate { 438 profilealloc(mp, x, elemsize) 439 } 440 mp.mallocing = 0 441 releasem(mp) 442 443 if checkGCTrigger { 444 if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { 445 gcStart(t) 446 } 447 } 448 449 if raceenabled { 450 // Pad tinysize allocations so they are aligned with the end 451 // of the tinyalloc region. This ensures that any arithmetic 452 // that goes off the top end of the object will be detectable 453 // by checkptr (issue 38872). 454 // Note that we disable tinyalloc when raceenabled for this to work. 455 // TODO: This padding is only performed when the race detector 456 // is enabled. It would be nice to enable it if any package 457 // was compiled with checkptr, but there's no easy way to 458 // detect that (especially at compile time). 459 // TODO: enable this padding for all allocations, not just 460 // tinyalloc ones. It's tricky because of pointer maps. 461 // Maybe just all noscan objects? 462 x = add(x, elemsize-constsize) 463 } 464 return x, elemsize 465 } 466 467 // TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc? 468 // We won't be able to use elemsize_ but that's probably ok. 469 func nextFreeFastTiny(span *mspan) gclinkptr { 470 const nbytes = 8192 471 const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_) 472 var nextFreeFastResult gclinkptr 473 if span.allocCache != 0 { 474 theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? 475 result := span.freeindex + uint16(theBit) 476 if result < nelems { 477 freeidx := result + 1 478 if !(freeidx%64 == 0 && freeidx != nelems) { 479 span.allocCache >>= uint(theBit + 1) 480 span.freeindex = freeidx 481 span.allocCount++ 482 nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) 483 } 484 } 485 } 486 return nextFreeFastResult 487 } 488 489 func nextFreeFastStub(span *mspan) gclinkptr { 490 var nextFreeFastResult gclinkptr 491 if span.allocCache != 0 { 492 theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? 493 result := span.freeindex + uint16(theBit) 494 if result < span.nelems { 495 freeidx := result + 1 496 if !(freeidx%64 == 0 && freeidx != span.nelems) { 497 span.allocCache >>= uint(theBit + 1) 498 span.freeindex = freeidx 499 span.allocCount++ 500 nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) 501 } 502 } 503 } 504 return nextFreeFastResult 505 } 506 507 func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr { 508 if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) { 509 throw("tried to write heap bits, but no heap bits in span") 510 } 511 scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ) 512 if doubleCheckHeapSetType { 513 doubleCheckHeapType(x, dataSize, typ, nil, span) 514 } 515 return scanSize 516 } 517 518 // writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is 519 // stored as a bitmap at the end of the span. 520 // 521 // Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span. 522 // heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_. 523 // 524 //go:nosplit 525 func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr { 526 // The objects here are always really small, so a single load is sufficient. 527 src0 := readUintptr(getGCMask(typ)) 528 529 const elemsize = elemsize_ 530 531 // Create repetitions of the bitmap if we have a small slice backing store. 532 scanSize := typ.PtrBytes 533 src := src0 534 if typ.Size_ == goarch.PtrSize { 535 src = (1 << (dataSize / goarch.PtrSize)) - 1 536 } else { 537 // N.B. We rely on dataSize being an exact multiple of the type size. 538 // The alternative is to be defensive and mask out src to the length 539 // of dataSize. The purpose is to save on one additional masking operation. 540 if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { 541 throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") 542 } 543 for i := typ.Size_; i < dataSize; i += typ.Size_ { 544 src |= src0 << (i / goarch.PtrSize) 545 scanSize += typ.Size_ 546 } 547 } 548 549 // Since we're never writing more than one uintptr's worth of bits, we're either going 550 // to do one or two writes. 551 dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) 552 dst := unsafe.Pointer(dstBase) 553 o := (x - span.base()) / goarch.PtrSize 554 i := o / ptrBits 555 j := o % ptrBits 556 const bits uintptr = elemsize / goarch.PtrSize 557 // In the if statement below, we have to do two uintptr writes if the bits 558 // we need to write straddle across two different memory locations. But if 559 // the number of bits we're writing divides evenly into the number of bits 560 // in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo 561 // is a compile-time constant in the generated code, in the case where the size is 562 // a power of two less than or equal to ptrBits, the compiler can remove the 563 // 'two writes' branch of the if statement and always do only one write without 564 // the check. 565 const bitsIsPowerOfTwo = bits&(bits-1) == 0 566 if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { 567 // Two writes. 568 bits0 := ptrBits - j 569 bits1 := bits - bits0 570 dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) 571 dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) 572 *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) 573 *dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0) 574 } else { 575 // One write. 576 dst := (*uintptr)(add(dst, i*goarch.PtrSize)) 577 *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch 578 } 579 580 const doubleCheck = false 581 if doubleCheck { 582 writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) 583 } 584 return scanSize 585 } 586 587 func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) { 588 srcRead := span.heapBitsSmallForAddr(x) 589 if srcRead != src { 590 print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n") 591 print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n") 592 print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n") 593 throw("bad pointer bits written for small object") 594 } 595 } 596