Source file src/runtime/malloc_stubs.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains stub functions that are not meant to be called directly,
     6  // but that will be assembled together using the inlining logic in runtime/_mkmalloc
     7  // to produce a full mallocgc function that's specialized for a span class
     8  // or specific size in the case of the tiny allocator.
     9  //
    10  // To assemble a mallocgc function, the mallocStub function is cloned, and the call to
    11  // inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
    12  // smallNoScanStub or tinyStub, depending on the parameters being specialized.
    13  //
    14  // The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases)
    15  // identifiers are replaced with the value of the parameter in the specialized case.
    16  // The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub
    17  // functions are also inlined by _mkmalloc.
    18  
    19  package runtime
    20  
    21  import (
    22  	"internal/goarch"
    23  	"internal/runtime/sys"
    24  	"unsafe"
    25  )
    26  
    27  // These identifiers will all be replaced by the inliner. So their values don't
    28  // really matter: they just need to be set so that the stub functions, which
    29  // will never be used on their own, can compile. elemsize_ can't be  set to
    30  // zero because we divide by it in nextFreeFastTiny, and the compiler would
    31  // complain about a division by zero. Its replaced value will always be greater
    32  // than zero.
    33  const elemsize_ = 8
    34  const sizeclass_ = 0
    35  const noscanint_ = 0
    36  const size_ = 0
    37  
    38  func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    39  	if doubleCheckMalloc {
    40  		if gcphase == _GCmarktermination {
    41  			throw("mallocgc called with gcphase == _GCmarktermination")
    42  		}
    43  	}
    44  
    45  	// Short-circuit zero-sized allocation requests.
    46  	return unsafe.Pointer(&zerobase)
    47  }
    48  
    49  func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    50  	panic("not defined for sizeclass")
    51  }
    52  
    53  // WARNING: mallocStub does not do any work for sanitizers so callers need
    54  // to steer out of this codepath early if sanitizers are enabled.
    55  func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    56  	if doubleCheckMalloc {
    57  		if gcphase == _GCmarktermination {
    58  			throw("mallocgc called with gcphase == _GCmarktermination")
    59  		}
    60  	}
    61  
    62  	// It's possible for any malloc to trigger sweeping, which may in
    63  	// turn queue finalizers. Record this dynamic lock edge.
    64  	// N.B. Compiled away if lockrank experiment is not enabled.
    65  	lockRankMayQueueFinalizer()
    66  
    67  	// Pre-malloc debug hooks.
    68  	if debug.malloc {
    69  		if x := preMallocgcDebug(size, typ); x != nil {
    70  			return x
    71  		}
    72  	}
    73  
    74  	// Assist the GC if needed.
    75  	if gcBlackenEnabled != 0 {
    76  		deductAssistCredit(size)
    77  	}
    78  
    79  	// Actually do the allocation.
    80  	x, elemsize := inlinedMalloc(size, typ, needzero)
    81  
    82  	// Notify valgrind, if enabled.
    83  	// To allow the compiler to not know about valgrind, we do valgrind instrumentation
    84  	// unlike the other sanitizers.
    85  	if valgrindenabled {
    86  		valgrindMalloc(x, size)
    87  	}
    88  
    89  	// Adjust our GC assist debt to account for internal fragmentation.
    90  	if gcBlackenEnabled != 0 && elemsize != 0 {
    91  		if assistG := getg().m.curg; assistG != nil {
    92  			assistG.gcAssistBytes -= int64(elemsize - size)
    93  		}
    94  	}
    95  
    96  	// Post-malloc debug hooks.
    97  	if debug.malloc {
    98  		postMallocgcDebug(x, elemsize, typ)
    99  	}
   100  	return x
   101  }
   102  
   103  // inlinedMalloc will never be called. It is defined just so that the compiler can compile
   104  // the mallocStub function, which will also never be called, but instead used as a template
   105  // to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
   106  // will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub
   107  // when generating the size-specialized malloc function. See the comment at the top of this
   108  // file for more information.
   109  func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   110  	return unsafe.Pointer(uintptr(0)), 0
   111  }
   112  
   113  func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) {
   114  	if mp.mallocing != 0 {
   115  		throw("malloc deadlock")
   116  	}
   117  	if mp.gsignal == getg() {
   118  		throw("malloc during signal")
   119  	}
   120  	if typ == nil || !typ.Pointers() {
   121  		throw("noscan allocated in scan-only path")
   122  	}
   123  	if !heapBitsInSpan(size) {
   124  		throw("heap bits in not in span for non-header-only path")
   125  	}
   126  }
   127  
   128  func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   129  	const sizeclass = sizeclass_
   130  	const elemsize = elemsize_
   131  
   132  	// Set mp.mallocing to keep from being preempted by GC.
   133  	mp := acquirem()
   134  	if doubleCheckMalloc {
   135  		doubleCheckSmallScanNoHeader(size, typ, mp)
   136  	}
   137  	mp.mallocing = 1
   138  
   139  	checkGCTrigger := false
   140  	c := getMCache(mp)
   141  	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
   142  	span := c.alloc[spc]
   143  	v := nextFreeFastStub(span)
   144  	if v == 0 {
   145  		v, span, checkGCTrigger = c.nextFree(spc)
   146  	}
   147  	x := unsafe.Pointer(v)
   148  	if span.needzero != 0 {
   149  		memclrNoHeapPointers(x, elemsize)
   150  	}
   151  	if goarch.PtrSize == 8 && sizeclass == 1 {
   152  		// initHeapBits already set the pointer bits for the 8-byte sizeclass
   153  		// on 64-bit platforms.
   154  		c.scanAlloc += 8
   155  	} else {
   156  		dataSize := size // make the inliner happy
   157  		x := uintptr(x)
   158  		scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span)
   159  		c.scanAlloc += scanSize
   160  	}
   161  
   162  	// Ensure that the stores above that initialize x to
   163  	// type-safe memory and set the heap bits occur before
   164  	// the caller can make x observable to the garbage
   165  	// collector. Otherwise, on weakly ordered machines,
   166  	// the garbage collector could follow a pointer to x,
   167  	// but see uninitialized memory or stale heap bits.
   168  	publicationBarrier()
   169  
   170  	if writeBarrier.enabled {
   171  		// Allocate black during GC.
   172  		// All slots hold nil so no scanning is needed.
   173  		// This may be racing with GC so do it atomically if there can be
   174  		// a race marking the bit.
   175  		gcmarknewobject(span, uintptr(x))
   176  	} else {
   177  		// Track the last free index before the mark phase. This field
   178  		// is only used by the garbage collector. During the mark phase
   179  		// this is used by the conservative scanner to filter out objects
   180  		// that are both free and recently-allocated. It's safe to do that
   181  		// because we allocate-black if the GC is enabled. The conservative
   182  		// scanner produces pointers out of thin air, so without additional
   183  		// synchronization it might otherwise observe a partially-initialized
   184  		// object, which could crash the program.
   185  		span.freeIndexForScan = span.freeindex
   186  	}
   187  
   188  	// Note cache c only valid while m acquired; see #47302
   189  	//
   190  	// N.B. Use the full size because that matches how the GC
   191  	// will update the mem profile on the "free" side.
   192  	//
   193  	// TODO(mknyszek): We should really count the header as part
   194  	// of gc_sys or something. The code below just pretends it is
   195  	// internal fragmentation and matches the GC's accounting by
   196  	// using the whole allocation slot.
   197  	c.nextSample -= int64(elemsize)
   198  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   199  		profilealloc(mp, x, elemsize)
   200  	}
   201  	mp.mallocing = 0
   202  	releasem(mp)
   203  
   204  	if checkGCTrigger {
   205  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   206  			gcStart(t)
   207  		}
   208  	}
   209  
   210  	return x, elemsize
   211  }
   212  
   213  func doubleCheckSmallNoScan(typ *_type, mp *m) {
   214  	if mp.mallocing != 0 {
   215  		throw("malloc deadlock")
   216  	}
   217  	if mp.gsignal == getg() {
   218  		throw("malloc during signal")
   219  	}
   220  	if typ != nil && typ.Pointers() {
   221  		throw("expected noscan type for noscan alloc")
   222  	}
   223  }
   224  
   225  func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   226  	// TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant
   227  	// sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class
   228  	// and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically
   229  	// spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself,
   230  	// so that its code could not diverge from the generated functions.
   231  	const sizeclass = sizeclass_
   232  	const elemsize = elemsize_
   233  
   234  	// Set mp.mallocing to keep from being preempted by GC.
   235  	mp := acquirem()
   236  	if doubleCheckMalloc {
   237  		doubleCheckSmallNoScan(typ, mp)
   238  	}
   239  	mp.mallocing = 1
   240  
   241  	checkGCTrigger := false
   242  	c := getMCache(mp)
   243  	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
   244  	span := c.alloc[spc]
   245  	v := nextFreeFastStub(span)
   246  	if v == 0 {
   247  		v, span, checkGCTrigger = c.nextFree(spc)
   248  	}
   249  	x := unsafe.Pointer(v)
   250  	if needzero && span.needzero != 0 {
   251  		memclrNoHeapPointers(x, elemsize)
   252  	}
   253  
   254  	// Ensure that the stores above that initialize x to
   255  	// type-safe memory and set the heap bits occur before
   256  	// the caller can make x observable to the garbage
   257  	// collector. Otherwise, on weakly ordered machines,
   258  	// the garbage collector could follow a pointer to x,
   259  	// but see uninitialized memory or stale heap bits.
   260  	publicationBarrier()
   261  
   262  	if writeBarrier.enabled {
   263  		// Allocate black during GC.
   264  		// All slots hold nil so no scanning is needed.
   265  		// This may be racing with GC so do it atomically if there can be
   266  		// a race marking the bit.
   267  		gcmarknewobject(span, uintptr(x))
   268  	} else {
   269  		// Track the last free index before the mark phase. This field
   270  		// is only used by the garbage collector. During the mark phase
   271  		// this is used by the conservative scanner to filter out objects
   272  		// that are both free and recently-allocated. It's safe to do that
   273  		// because we allocate-black if the GC is enabled. The conservative
   274  		// scanner produces pointers out of thin air, so without additional
   275  		// synchronization it might otherwise observe a partially-initialized
   276  		// object, which could crash the program.
   277  		span.freeIndexForScan = span.freeindex
   278  	}
   279  
   280  	// Note cache c only valid while m acquired; see #47302
   281  	//
   282  	// N.B. Use the full size because that matches how the GC
   283  	// will update the mem profile on the "free" side.
   284  	//
   285  	// TODO(mknyszek): We should really count the header as part
   286  	// of gc_sys or something. The code below just pretends it is
   287  	// internal fragmentation and matches the GC's accounting by
   288  	// using the whole allocation slot.
   289  	c.nextSample -= int64(elemsize)
   290  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   291  		profilealloc(mp, x, elemsize)
   292  	}
   293  	mp.mallocing = 0
   294  	releasem(mp)
   295  
   296  	if checkGCTrigger {
   297  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   298  			gcStart(t)
   299  		}
   300  	}
   301  	return x, elemsize
   302  }
   303  
   304  func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
   305  	if mp.mallocing != 0 {
   306  		throw("malloc deadlock")
   307  	}
   308  	if mp.gsignal == getg() {
   309  		throw("malloc during signal")
   310  	}
   311  	if typ != nil && typ.Pointers() {
   312  		throw("expected noscan for tiny alloc")
   313  	}
   314  }
   315  
   316  func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
   317  	const constsize = size_
   318  	const elemsize = elemsize_
   319  
   320  	// Set mp.mallocing to keep from being preempted by GC.
   321  	mp := acquirem()
   322  	if doubleCheckMalloc {
   323  		doubleCheckTiny(constsize, typ, mp)
   324  	}
   325  	mp.mallocing = 1
   326  
   327  	// Tiny allocator.
   328  	//
   329  	// Tiny allocator combines several tiny allocation requests
   330  	// into a single memory block. The resulting memory block
   331  	// is freed when all subobjects are unreachable. The subobjects
   332  	// must be noscan (don't have pointers), this ensures that
   333  	// the amount of potentially wasted memory is bounded.
   334  	//
   335  	// Size of the memory block used for combining (maxTinySize) is tunable.
   336  	// Current setting is 16 bytes, which relates to 2x worst case memory
   337  	// wastage (when all but one subobjects are unreachable).
   338  	// 8 bytes would result in no wastage at all, but provides less
   339  	// opportunities for combining.
   340  	// 32 bytes provides more opportunities for combining,
   341  	// but can lead to 4x worst case wastage.
   342  	// The best case winning is 8x regardless of block size.
   343  	//
   344  	// Objects obtained from tiny allocator must not be freed explicitly.
   345  	// So when an object will be freed explicitly, we ensure that
   346  	// its size >= maxTinySize.
   347  	//
   348  	// SetFinalizer has a special case for objects potentially coming
   349  	// from tiny allocator, it such case it allows to set finalizers
   350  	// for an inner byte of a memory block.
   351  	//
   352  	// The main targets of tiny allocator are small strings and
   353  	// standalone escaping variables. On a json benchmark
   354  	// the allocator reduces number of allocations by ~12% and
   355  	// reduces heap size by ~20%.
   356  	c := getMCache(mp)
   357  	off := c.tinyoffset
   358  	// Align tiny pointer for required (conservative) alignment.
   359  	if constsize&7 == 0 {
   360  		off = alignUp(off, 8)
   361  	} else if goarch.PtrSize == 4 && constsize == 12 {
   362  		// Conservatively align 12-byte objects to 8 bytes on 32-bit
   363  		// systems so that objects whose first field is a 64-bit
   364  		// value is aligned to 8 bytes and does not cause a fault on
   365  		// atomic access. See issue 37262.
   366  		// TODO(mknyszek): Remove this workaround if/when issue 36606
   367  		// is resolved.
   368  		off = alignUp(off, 8)
   369  	} else if constsize&3 == 0 {
   370  		off = alignUp(off, 4)
   371  	} else if constsize&1 == 0 {
   372  		off = alignUp(off, 2)
   373  	}
   374  	if off+constsize <= maxTinySize && c.tiny != 0 {
   375  		// The object fits into existing tiny block.
   376  		x := unsafe.Pointer(c.tiny + off)
   377  		c.tinyoffset = off + constsize
   378  		c.tinyAllocs++
   379  		mp.mallocing = 0
   380  		releasem(mp)
   381  		return x, 0
   382  	}
   383  	// Allocate a new maxTinySize block.
   384  	checkGCTrigger := false
   385  	span := c.alloc[tinySpanClass]
   386  	v := nextFreeFastTiny(span)
   387  	if v == 0 {
   388  		v, span, checkGCTrigger = c.nextFree(tinySpanClass)
   389  	}
   390  	x := unsafe.Pointer(v)
   391  	(*[2]uint64)(x)[0] = 0 // Always zero
   392  	(*[2]uint64)(x)[1] = 0
   393  	// See if we need to replace the existing tiny block with the new one
   394  	// based on amount of remaining free space.
   395  	if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) {
   396  		// Note: disabled when race detector is on, see comment near end of this function.
   397  		c.tiny = uintptr(x)
   398  		c.tinyoffset = constsize
   399  	}
   400  
   401  	// Ensure that the stores above that initialize x to
   402  	// type-safe memory and set the heap bits occur before
   403  	// the caller can make x observable to the garbage
   404  	// collector. Otherwise, on weakly ordered machines,
   405  	// the garbage collector could follow a pointer to x,
   406  	// but see uninitialized memory or stale heap bits.
   407  	publicationBarrier()
   408  
   409  	if writeBarrier.enabled {
   410  		// Allocate black during GC.
   411  		// All slots hold nil so no scanning is needed.
   412  		// This may be racing with GC so do it atomically if there can be
   413  		// a race marking the bit.
   414  		gcmarknewobject(span, uintptr(x))
   415  	} else {
   416  		// Track the last free index before the mark phase. This field
   417  		// is only used by the garbage collector. During the mark phase
   418  		// this is used by the conservative scanner to filter out objects
   419  		// that are both free and recently-allocated. It's safe to do that
   420  		// because we allocate-black if the GC is enabled. The conservative
   421  		// scanner produces pointers out of thin air, so without additional
   422  		// synchronization it might otherwise observe a partially-initialized
   423  		// object, which could crash the program.
   424  		span.freeIndexForScan = span.freeindex
   425  	}
   426  
   427  	// Note cache c only valid while m acquired; see #47302
   428  	//
   429  	// N.B. Use the full size because that matches how the GC
   430  	// will update the mem profile on the "free" side.
   431  	//
   432  	// TODO(mknyszek): We should really count the header as part
   433  	// of gc_sys or something. The code below just pretends it is
   434  	// internal fragmentation and matches the GC's accounting by
   435  	// using the whole allocation slot.
   436  	c.nextSample -= int64(elemsize)
   437  	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
   438  		profilealloc(mp, x, elemsize)
   439  	}
   440  	mp.mallocing = 0
   441  	releasem(mp)
   442  
   443  	if checkGCTrigger {
   444  		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   445  			gcStart(t)
   446  		}
   447  	}
   448  
   449  	if raceenabled {
   450  		// Pad tinysize allocations so they are aligned with the end
   451  		// of the tinyalloc region. This ensures that any arithmetic
   452  		// that goes off the top end of the object will be detectable
   453  		// by checkptr (issue 38872).
   454  		// Note that we disable tinyalloc when raceenabled for this to work.
   455  		// TODO: This padding is only performed when the race detector
   456  		// is enabled. It would be nice to enable it if any package
   457  		// was compiled with checkptr, but there's no easy way to
   458  		// detect that (especially at compile time).
   459  		// TODO: enable this padding for all allocations, not just
   460  		// tinyalloc ones. It's tricky because of pointer maps.
   461  		// Maybe just all noscan objects?
   462  		x = add(x, elemsize-constsize)
   463  	}
   464  	return x, elemsize
   465  }
   466  
   467  // TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc?
   468  // We won't be able to use elemsize_ but that's probably ok.
   469  func nextFreeFastTiny(span *mspan) gclinkptr {
   470  	const nbytes = 8192
   471  	const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_)
   472  	var nextFreeFastResult gclinkptr
   473  	if span.allocCache != 0 {
   474  		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
   475  		result := span.freeindex + uint16(theBit)
   476  		if result < nelems {
   477  			freeidx := result + 1
   478  			if !(freeidx%64 == 0 && freeidx != nelems) {
   479  				span.allocCache >>= uint(theBit + 1)
   480  				span.freeindex = freeidx
   481  				span.allocCount++
   482  				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
   483  			}
   484  		}
   485  	}
   486  	return nextFreeFastResult
   487  }
   488  
   489  func nextFreeFastStub(span *mspan) gclinkptr {
   490  	var nextFreeFastResult gclinkptr
   491  	if span.allocCache != 0 {
   492  		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
   493  		result := span.freeindex + uint16(theBit)
   494  		if result < span.nelems {
   495  			freeidx := result + 1
   496  			if !(freeidx%64 == 0 && freeidx != span.nelems) {
   497  				span.allocCache >>= uint(theBit + 1)
   498  				span.freeindex = freeidx
   499  				span.allocCount++
   500  				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
   501  			}
   502  		}
   503  	}
   504  	return nextFreeFastResult
   505  }
   506  
   507  func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
   508  	if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) {
   509  		throw("tried to write heap bits, but no heap bits in span")
   510  	}
   511  	scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ)
   512  	if doubleCheckHeapSetType {
   513  		doubleCheckHeapType(x, dataSize, typ, nil, span)
   514  	}
   515  	return scanSize
   516  }
   517  
   518  // writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is
   519  // stored as a bitmap at the end of the span.
   520  //
   521  // Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span.
   522  // heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_.
   523  //
   524  //go:nosplit
   525  func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr {
   526  	// The objects here are always really small, so a single load is sufficient.
   527  	src0 := readUintptr(getGCMask(typ))
   528  
   529  	const elemsize = elemsize_
   530  
   531  	// Create repetitions of the bitmap if we have a small slice backing store.
   532  	scanSize := typ.PtrBytes
   533  	src := src0
   534  	if typ.Size_ == goarch.PtrSize {
   535  		src = (1 << (dataSize / goarch.PtrSize)) - 1
   536  	} else {
   537  		// N.B. We rely on dataSize being an exact multiple of the type size.
   538  		// The alternative is to be defensive and mask out src to the length
   539  		// of dataSize. The purpose is to save on one additional masking operation.
   540  		if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 {
   541  			throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_")
   542  		}
   543  		for i := typ.Size_; i < dataSize; i += typ.Size_ {
   544  			src |= src0 << (i / goarch.PtrSize)
   545  			scanSize += typ.Size_
   546  		}
   547  	}
   548  
   549  	// Since we're never writing more than one uintptr's worth of bits, we're either going
   550  	// to do one or two writes.
   551  	dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize)
   552  	dst := unsafe.Pointer(dstBase)
   553  	o := (x - span.base()) / goarch.PtrSize
   554  	i := o / ptrBits
   555  	j := o % ptrBits
   556  	const bits uintptr = elemsize / goarch.PtrSize
   557  	// In the if statement below, we have to do two uintptr writes if the bits
   558  	// we need to write straddle across two different memory locations. But if
   559  	// the number of bits we're writing divides evenly into the number of bits
   560  	// in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo
   561  	// is a compile-time constant in the generated code, in the case where the size is
   562  	// a power of two less than or equal to ptrBits, the compiler can remove the
   563  	// 'two writes' branch of the if statement and always do only one write without
   564  	// the check.
   565  	const bitsIsPowerOfTwo = bits&(bits-1) == 0
   566  	if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) {
   567  		// Two writes.
   568  		bits0 := ptrBits - j
   569  		bits1 := bits - bits0
   570  		dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize))
   571  		dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize))
   572  		*dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j)
   573  		*dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0)
   574  	} else {
   575  		// One write.
   576  		dst := (*uintptr)(add(dst, i*goarch.PtrSize))
   577  		*dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch
   578  	}
   579  
   580  	const doubleCheck = false
   581  	if doubleCheck {
   582  		writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ)
   583  	}
   584  	return scanSize
   585  }
   586  
   587  func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) {
   588  	srcRead := span.heapBitsSmallForAddr(x)
   589  	if srcRead != src {
   590  		print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n")
   591  		print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n")
   592  		print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n")
   593  		throw("bad pointer bits written for small object")
   594  	}
   595  }
   596  

View as plain text