Source file src/runtime/mcache.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/runtime/atomic"
     9  	"internal/runtime/gc"
    10  	"internal/runtime/sys"
    11  	"unsafe"
    12  )
    13  
    14  // Per-thread (in Go, per-P) cache for small objects.
    15  // This includes a small object cache and local allocation stats.
    16  // No locking needed because it is per-thread (per-P).
    17  //
    18  // mcaches are allocated from non-GC'd memory, so any heap pointers
    19  // must be specially handled.
    20  type mcache struct {
    21  	_ sys.NotInHeap
    22  
    23  	// The following members are accessed on every malloc,
    24  	// so they are grouped here for better caching.
    25  	nextSample  int64   // trigger heap sample after allocating this many bytes
    26  	memProfRate int     // cached mem profile rate, used to detect changes
    27  	scanAlloc   uintptr // bytes of scannable heap allocated
    28  
    29  	// Allocator cache for tiny objects w/o pointers.
    30  	// See "Tiny allocator" comment in malloc.go.
    31  
    32  	// tiny points to the beginning of the current tiny block, or
    33  	// nil if there is no current tiny block.
    34  	//
    35  	// tiny is a heap pointer. Since mcache is in non-GC'd memory,
    36  	// we handle it by clearing it in releaseAll during mark
    37  	// termination.
    38  	//
    39  	// tinyAllocs is the number of tiny allocations performed
    40  	// by the P that owns this mcache.
    41  	tiny       uintptr
    42  	tinyoffset uintptr
    43  	tinyAllocs uintptr
    44  
    45  	// The rest is not accessed on every malloc.
    46  
    47  	// alloc contains spans to allocate from, indexed by spanClass.
    48  	alloc [numSpanClasses]*mspan
    49  
    50  	// TODO(thepudds): better to interleave alloc and reusableScan/reusableNoscan so that
    51  	// a single malloc call can often access both in the same cache line for a given spanClass.
    52  	// It's not interleaved right now in part to have slightly smaller diff, and might be
    53  	// negligible effect on current microbenchmarks.
    54  
    55  	// reusableNoscan contains linked lists of reusable noscan heap objects, indexed by spanClass.
    56  	// The next pointers are stored in the first word of the heap objects.
    57  	reusableNoscan [numSpanClasses]gclinkptr
    58  
    59  	stackcache [_NumStackOrders]stackfreelist
    60  
    61  	// flushGen indicates the sweepgen during which this mcache
    62  	// was last flushed. If flushGen != mheap_.sweepgen, the spans
    63  	// in this mcache are stale and need to be flushed so they
    64  	// can be swept. This is done in acquirep.
    65  	flushGen atomic.Uint32
    66  }
    67  
    68  // A gclink is a node in a linked list of blocks, like mlink,
    69  // but it is opaque to the garbage collector.
    70  // The GC does not trace the pointers during collection,
    71  // and the compiler does not emit write barriers for assignments
    72  // of gclinkptr values. Code should store references to gclinks
    73  // as gclinkptr, not as *gclink.
    74  type gclink struct {
    75  	next gclinkptr
    76  }
    77  
    78  // A gclinkptr is a pointer to a gclink, but it is opaque
    79  // to the garbage collector.
    80  type gclinkptr uintptr
    81  
    82  // ptr returns the *gclink form of p.
    83  // The result should be used for accessing fields, not stored
    84  // in other data structures.
    85  func (p gclinkptr) ptr() *gclink {
    86  	return (*gclink)(unsafe.Pointer(p))
    87  }
    88  
    89  type stackfreelist struct {
    90  	list gclinkptr // linked list of free stacks
    91  	size uintptr   // total size of stacks in list
    92  }
    93  
    94  // dummy mspan that contains no free objects.
    95  var emptymspan mspan
    96  
    97  func allocmcache() *mcache {
    98  	var c *mcache
    99  	systemstack(func() {
   100  		lock(&mheap_.lock)
   101  		c = (*mcache)(mheap_.cachealloc.alloc())
   102  		c.flushGen.Store(mheap_.sweepgen)
   103  		unlock(&mheap_.lock)
   104  	})
   105  	for i := range c.alloc {
   106  		c.alloc[i] = &emptymspan
   107  	}
   108  	c.nextSample = nextSample()
   109  
   110  	return c
   111  }
   112  
   113  // freemcache releases resources associated with this
   114  // mcache and puts the object onto a free list.
   115  //
   116  // In some cases there is no way to simply release
   117  // resources, such as statistics, so donate them to
   118  // a different mcache (the recipient).
   119  func freemcache(c *mcache) {
   120  	systemstack(func() {
   121  		c.releaseAll()
   122  		stackcache_clear(c)
   123  
   124  		// NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
   125  		// with the stealing of gcworkbufs during garbage collection to avoid
   126  		// a race where the workbuf is double-freed.
   127  		// gcworkbuffree(c.gcworkbuf)
   128  
   129  		lock(&mheap_.lock)
   130  		mheap_.cachealloc.free(unsafe.Pointer(c))
   131  		unlock(&mheap_.lock)
   132  	})
   133  }
   134  
   135  // getMCache is a convenience function which tries to obtain an mcache.
   136  //
   137  // Returns nil if we're not bootstrapping or we don't have a P. The caller's
   138  // P must not change, so we must be in a non-preemptible state.
   139  func getMCache(mp *m) *mcache {
   140  	// Grab the mcache, since that's where stats live.
   141  	pp := mp.p.ptr()
   142  	var c *mcache
   143  	if pp == nil {
   144  		// We will be called without a P while bootstrapping,
   145  		// in which case we use mcache0, which is set in mallocinit.
   146  		// mcache0 is cleared when bootstrapping is complete,
   147  		// by procresize.
   148  		c = mcache0
   149  	} else {
   150  		c = pp.mcache
   151  	}
   152  	return c
   153  }
   154  
   155  // refill acquires a new span of span class spc for c. This span will
   156  // have at least one free object. The current span in c must be full.
   157  //
   158  // Must run in a non-preemptible context since otherwise the owner of
   159  // c could change.
   160  func (c *mcache) refill(spc spanClass) {
   161  	// Return the current cached span to the central lists.
   162  	s := c.alloc[spc]
   163  
   164  	if s.allocCount != s.nelems {
   165  		throw("refill of span with free space remaining")
   166  	}
   167  
   168  	// TODO(thepudds): we might be able to allow mallocgcTiny to reuse 16 byte objects from spc==5,
   169  	// but for now, just clear our reusable objects for tinySpanClass.
   170  	if spc == tinySpanClass {
   171  		c.reusableNoscan[spc] = 0
   172  	}
   173  	if c.reusableNoscan[spc] != 0 {
   174  		throw("refill of span with reusable pointers remaining on pointer free list")
   175  	}
   176  
   177  	if s != &emptymspan {
   178  		// Mark this span as no longer cached.
   179  		if s.sweepgen != mheap_.sweepgen+3 {
   180  			throw("bad sweepgen in refill")
   181  		}
   182  		mheap_.central[spc].mcentral.uncacheSpan(s)
   183  
   184  		// Count up how many slots were used and record it.
   185  		stats := memstats.heapStats.acquire()
   186  		slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
   187  		atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed)
   188  
   189  		// Flush tinyAllocs.
   190  		if spc == tinySpanClass {
   191  			atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
   192  			c.tinyAllocs = 0
   193  		}
   194  		memstats.heapStats.release()
   195  
   196  		// Count the allocs in inconsistent, internal stats.
   197  		bytesAllocated := slotsUsed * int64(s.elemsize)
   198  		gcController.totalAlloc.Add(bytesAllocated)
   199  
   200  		// Clear the second allocCount just to be safe.
   201  		s.allocCountBeforeCache = 0
   202  	}
   203  
   204  	// Get a new cached span from the central lists.
   205  	s = mheap_.central[spc].mcentral.cacheSpan()
   206  	if s == nil {
   207  		throw("out of memory")
   208  	}
   209  
   210  	if s.allocCount == s.nelems {
   211  		throw("span has no free space")
   212  	}
   213  
   214  	// Indicate that this span is cached and prevent asynchronous
   215  	// sweeping in the next sweep phase.
   216  	s.sweepgen = mheap_.sweepgen + 3
   217  
   218  	// Store the current alloc count for accounting later.
   219  	s.allocCountBeforeCache = s.allocCount
   220  
   221  	// Update heapLive and flush scanAlloc.
   222  	//
   223  	// We have not yet allocated anything new into the span, but we
   224  	// assume that all of its slots will get used, so this makes
   225  	// heapLive an overestimate.
   226  	//
   227  	// When the span gets uncached, we'll fix up this overestimate
   228  	// if necessary (see releaseAll).
   229  	//
   230  	// We pick an overestimate here because an underestimate leads
   231  	// the pacer to believe that it's in better shape than it is,
   232  	// which appears to lead to more memory used. See #53738 for
   233  	// more details.
   234  	usedBytes := uintptr(s.allocCount) * s.elemsize
   235  	gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc))
   236  	c.scanAlloc = 0
   237  
   238  	c.alloc[spc] = s
   239  }
   240  
   241  // allocLarge allocates a span for a large object.
   242  func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
   243  	if size+pageSize < size {
   244  		throw("out of memory")
   245  	}
   246  	npages := size >> gc.PageShift
   247  	if size&pageMask != 0 {
   248  		npages++
   249  	}
   250  
   251  	// Deduct credit for this span allocation and sweep if
   252  	// necessary. mHeap_Alloc will also sweep npages, so this only
   253  	// pays the debt down to npage pages.
   254  	deductSweepCredit(npages*pageSize, npages)
   255  
   256  	spc := makeSpanClass(0, noscan)
   257  	s := mheap_.alloc(npages, spc)
   258  	if s == nil {
   259  		throw("out of memory")
   260  	}
   261  
   262  	// Count the alloc in consistent, external stats.
   263  	stats := memstats.heapStats.acquire()
   264  	atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize))
   265  	atomic.Xadd64(&stats.largeAllocCount, 1)
   266  	memstats.heapStats.release()
   267  
   268  	// Count the alloc in inconsistent, internal stats.
   269  	gcController.totalAlloc.Add(int64(npages * pageSize))
   270  
   271  	// Update heapLive.
   272  	gcController.update(int64(s.npages*pageSize), 0)
   273  
   274  	// Put the large span in the mcentral swept list so that it's
   275  	// visible to the background sweeper.
   276  	mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
   277  
   278  	// Adjust s.limit down to the object-containing part of the span.
   279  	//
   280  	// This is just to create a slightly tighter bound on the limit.
   281  	// It's totally OK if the garbage collector, in particular
   282  	// conservative scanning, can temporarily observes an inflated
   283  	// limit. It will simply mark the whole object or just skip it
   284  	// since we're in the mark phase anyway.
   285  	s.limit = s.base() + size
   286  	s.initHeapBits()
   287  	return s
   288  }
   289  
   290  func (c *mcache) releaseAll() {
   291  	// Take this opportunity to flush scanAlloc.
   292  	scanAlloc := int64(c.scanAlloc)
   293  	c.scanAlloc = 0
   294  
   295  	sg := mheap_.sweepgen
   296  	dHeapLive := int64(0)
   297  	for i := range c.alloc {
   298  		s := c.alloc[i]
   299  		if s != &emptymspan {
   300  			slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
   301  			s.allocCountBeforeCache = 0
   302  
   303  			// Adjust smallAllocCount for whatever was allocated.
   304  			stats := memstats.heapStats.acquire()
   305  			atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed)
   306  			memstats.heapStats.release()
   307  
   308  			// Adjust the actual allocs in inconsistent, internal stats.
   309  			// We assumed earlier that the full span gets allocated.
   310  			gcController.totalAlloc.Add(slotsUsed * int64(s.elemsize))
   311  
   312  			if s.sweepgen != sg+1 {
   313  				// refill conservatively counted unallocated slots in gcController.heapLive.
   314  				// Undo this.
   315  				//
   316  				// If this span was cached before sweep, then gcController.heapLive was totally
   317  				// recomputed since caching this span, so we don't do this for stale spans.
   318  				dHeapLive -= int64(s.nelems-s.allocCount) * int64(s.elemsize)
   319  			}
   320  
   321  			// Release the span to the mcentral.
   322  			mheap_.central[i].mcentral.uncacheSpan(s)
   323  			c.alloc[i] = &emptymspan
   324  		}
   325  	}
   326  	// Clear tinyalloc pool.
   327  	c.tiny = 0
   328  	c.tinyoffset = 0
   329  
   330  	// Flush tinyAllocs.
   331  	stats := memstats.heapStats.acquire()
   332  	atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
   333  	c.tinyAllocs = 0
   334  	memstats.heapStats.release()
   335  
   336  	// Clear the reusable linked lists.
   337  	// For noscan objects, the nodes of the linked lists are the reusable heap objects themselves,
   338  	// so we can simply clear the linked list head pointers.
   339  	// TODO(thepudds): consider having debug logging of a non-empty reusable lists getting cleared,
   340  	// maybe based on the existing debugReusableLog.
   341  	clear(c.reusableNoscan[:])
   342  
   343  	// Update heapLive and heapScan.
   344  	gcController.update(dHeapLive, scanAlloc)
   345  }
   346  
   347  // prepareForSweep flushes c if the system has entered a new sweep phase
   348  // since c was populated. This must happen between the sweep phase
   349  // starting and the first allocation from c.
   350  func (c *mcache) prepareForSweep() {
   351  	// Alternatively, instead of making sure we do this on every P
   352  	// between starting the world and allocating on that P, we
   353  	// could leave allocate-black on, allow allocation to continue
   354  	// as usual, use a ragged barrier at the beginning of sweep to
   355  	// ensure all cached spans are swept, and then disable
   356  	// allocate-black. However, with this approach it's difficult
   357  	// to avoid spilling mark bits into the *next* GC cycle.
   358  	sg := mheap_.sweepgen
   359  	flushGen := c.flushGen.Load()
   360  	if flushGen == sg {
   361  		return
   362  	} else if flushGen != sg-2 {
   363  		println("bad flushGen", flushGen, "in prepareForSweep; sweepgen", sg)
   364  		throw("bad flushGen")
   365  	}
   366  	c.releaseAll()
   367  	stackcache_clear(c)
   368  	c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart
   369  }
   370  
   371  // addReusableNoscan adds a noscan object pointer to the reusable pointer free list
   372  // for a span class.
   373  func (c *mcache) addReusableNoscan(spc spanClass, ptr uintptr) {
   374  	if !runtimeFreegcEnabled {
   375  		return
   376  	}
   377  
   378  	// Add to the reusable pointers free list.
   379  	v := gclinkptr(ptr)
   380  	v.ptr().next = c.reusableNoscan[spc]
   381  	c.reusableNoscan[spc] = v
   382  }
   383  
   384  // hasReusableNoscan reports whether there is a reusable object available for
   385  // a noscan spc.
   386  func (c *mcache) hasReusableNoscan(spc spanClass) bool {
   387  	if !runtimeFreegcEnabled {
   388  		return false
   389  	}
   390  	return c.reusableNoscan[spc] != 0
   391  }
   392  

View as plain text