Source file src/runtime/mcache.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/runtime/atomic" 9 "internal/runtime/gc" 10 "internal/runtime/sys" 11 "unsafe" 12 ) 13 14 // Per-thread (in Go, per-P) cache for small objects. 15 // This includes a small object cache and local allocation stats. 16 // No locking needed because it is per-thread (per-P). 17 // 18 // mcaches are allocated from non-GC'd memory, so any heap pointers 19 // must be specially handled. 20 type mcache struct { 21 _ sys.NotInHeap 22 23 // The following members are accessed on every malloc, 24 // so they are grouped here for better caching. 25 nextSample int64 // trigger heap sample after allocating this many bytes 26 memProfRate int // cached mem profile rate, used to detect changes 27 scanAlloc uintptr // bytes of scannable heap allocated 28 29 // Allocator cache for tiny objects w/o pointers. 30 // See "Tiny allocator" comment in malloc.go. 31 32 // tiny points to the beginning of the current tiny block, or 33 // nil if there is no current tiny block. 34 // 35 // tiny is a heap pointer. Since mcache is in non-GC'd memory, 36 // we handle it by clearing it in releaseAll during mark 37 // termination. 38 // 39 // tinyAllocs is the number of tiny allocations performed 40 // by the P that owns this mcache. 41 tiny uintptr 42 tinyoffset uintptr 43 tinyAllocs uintptr 44 45 // The rest is not accessed on every malloc. 46 47 // alloc contains spans to allocate from, indexed by spanClass. 48 alloc [numSpanClasses]*mspan 49 50 // TODO(thepudds): better to interleave alloc and reusableScan/reusableNoscan so that 51 // a single malloc call can often access both in the same cache line for a given spanClass. 52 // It's not interleaved right now in part to have slightly smaller diff, and might be 53 // negligible effect on current microbenchmarks. 54 55 // reusableNoscan contains linked lists of reusable noscan heap objects, indexed by spanClass. 56 // The next pointers are stored in the first word of the heap objects. 57 reusableNoscan [numSpanClasses]gclinkptr 58 59 stackcache [_NumStackOrders]stackfreelist 60 61 // flushGen indicates the sweepgen during which this mcache 62 // was last flushed. If flushGen != mheap_.sweepgen, the spans 63 // in this mcache are stale and need to be flushed so they 64 // can be swept. This is done in acquirep. 65 flushGen atomic.Uint32 66 } 67 68 // A gclink is a node in a linked list of blocks, like mlink, 69 // but it is opaque to the garbage collector. 70 // The GC does not trace the pointers during collection, 71 // and the compiler does not emit write barriers for assignments 72 // of gclinkptr values. Code should store references to gclinks 73 // as gclinkptr, not as *gclink. 74 type gclink struct { 75 next gclinkptr 76 } 77 78 // A gclinkptr is a pointer to a gclink, but it is opaque 79 // to the garbage collector. 80 type gclinkptr uintptr 81 82 // ptr returns the *gclink form of p. 83 // The result should be used for accessing fields, not stored 84 // in other data structures. 85 func (p gclinkptr) ptr() *gclink { 86 return (*gclink)(unsafe.Pointer(p)) 87 } 88 89 type stackfreelist struct { 90 list gclinkptr // linked list of free stacks 91 size uintptr // total size of stacks in list 92 } 93 94 // dummy mspan that contains no free objects. 95 var emptymspan mspan 96 97 func allocmcache() *mcache { 98 var c *mcache 99 systemstack(func() { 100 lock(&mheap_.lock) 101 c = (*mcache)(mheap_.cachealloc.alloc()) 102 c.flushGen.Store(mheap_.sweepgen) 103 unlock(&mheap_.lock) 104 }) 105 for i := range c.alloc { 106 c.alloc[i] = &emptymspan 107 } 108 c.nextSample = nextSample() 109 110 return c 111 } 112 113 // freemcache releases resources associated with this 114 // mcache and puts the object onto a free list. 115 // 116 // In some cases there is no way to simply release 117 // resources, such as statistics, so donate them to 118 // a different mcache (the recipient). 119 func freemcache(c *mcache) { 120 systemstack(func() { 121 c.releaseAll() 122 stackcache_clear(c) 123 124 // NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate 125 // with the stealing of gcworkbufs during garbage collection to avoid 126 // a race where the workbuf is double-freed. 127 // gcworkbuffree(c.gcworkbuf) 128 129 lock(&mheap_.lock) 130 mheap_.cachealloc.free(unsafe.Pointer(c)) 131 unlock(&mheap_.lock) 132 }) 133 } 134 135 // getMCache is a convenience function which tries to obtain an mcache. 136 // 137 // Returns nil if we're not bootstrapping or we don't have a P. The caller's 138 // P must not change, so we must be in a non-preemptible state. 139 func getMCache(mp *m) *mcache { 140 // Grab the mcache, since that's where stats live. 141 pp := mp.p.ptr() 142 var c *mcache 143 if pp == nil { 144 // We will be called without a P while bootstrapping, 145 // in which case we use mcache0, which is set in mallocinit. 146 // mcache0 is cleared when bootstrapping is complete, 147 // by procresize. 148 c = mcache0 149 } else { 150 c = pp.mcache 151 } 152 return c 153 } 154 155 // refill acquires a new span of span class spc for c. This span will 156 // have at least one free object. The current span in c must be full. 157 // 158 // Must run in a non-preemptible context since otherwise the owner of 159 // c could change. 160 func (c *mcache) refill(spc spanClass) { 161 // Return the current cached span to the central lists. 162 s := c.alloc[spc] 163 164 if s.allocCount != s.nelems { 165 throw("refill of span with free space remaining") 166 } 167 168 // TODO(thepudds): we might be able to allow mallocgcTiny to reuse 16 byte objects from spc==5, 169 // but for now, just clear our reusable objects for tinySpanClass. 170 if spc == tinySpanClass { 171 c.reusableNoscan[spc] = 0 172 } 173 if c.reusableNoscan[spc] != 0 { 174 throw("refill of span with reusable pointers remaining on pointer free list") 175 } 176 177 if s != &emptymspan { 178 // Mark this span as no longer cached. 179 if s.sweepgen != mheap_.sweepgen+3 { 180 throw("bad sweepgen in refill") 181 } 182 mheap_.central[spc].mcentral.uncacheSpan(s) 183 184 // Count up how many slots were used and record it. 185 stats := memstats.heapStats.acquire() 186 slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache) 187 atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed) 188 189 // Flush tinyAllocs. 190 if spc == tinySpanClass { 191 atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) 192 c.tinyAllocs = 0 193 } 194 memstats.heapStats.release() 195 196 // Count the allocs in inconsistent, internal stats. 197 bytesAllocated := slotsUsed * int64(s.elemsize) 198 gcController.totalAlloc.Add(bytesAllocated) 199 200 // Clear the second allocCount just to be safe. 201 s.allocCountBeforeCache = 0 202 } 203 204 // Get a new cached span from the central lists. 205 s = mheap_.central[spc].mcentral.cacheSpan() 206 if s == nil { 207 throw("out of memory") 208 } 209 210 if s.allocCount == s.nelems { 211 throw("span has no free space") 212 } 213 214 // Indicate that this span is cached and prevent asynchronous 215 // sweeping in the next sweep phase. 216 s.sweepgen = mheap_.sweepgen + 3 217 218 // Store the current alloc count for accounting later. 219 s.allocCountBeforeCache = s.allocCount 220 221 // Update heapLive and flush scanAlloc. 222 // 223 // We have not yet allocated anything new into the span, but we 224 // assume that all of its slots will get used, so this makes 225 // heapLive an overestimate. 226 // 227 // When the span gets uncached, we'll fix up this overestimate 228 // if necessary (see releaseAll). 229 // 230 // We pick an overestimate here because an underestimate leads 231 // the pacer to believe that it's in better shape than it is, 232 // which appears to lead to more memory used. See #53738 for 233 // more details. 234 usedBytes := uintptr(s.allocCount) * s.elemsize 235 gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc)) 236 c.scanAlloc = 0 237 238 c.alloc[spc] = s 239 } 240 241 // allocLarge allocates a span for a large object. 242 func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan { 243 if size+pageSize < size { 244 throw("out of memory") 245 } 246 npages := size >> gc.PageShift 247 if size&pageMask != 0 { 248 npages++ 249 } 250 251 // Deduct credit for this span allocation and sweep if 252 // necessary. mHeap_Alloc will also sweep npages, so this only 253 // pays the debt down to npage pages. 254 deductSweepCredit(npages*pageSize, npages) 255 256 spc := makeSpanClass(0, noscan) 257 s := mheap_.alloc(npages, spc) 258 if s == nil { 259 throw("out of memory") 260 } 261 262 // Count the alloc in consistent, external stats. 263 stats := memstats.heapStats.acquire() 264 atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize)) 265 atomic.Xadd64(&stats.largeAllocCount, 1) 266 memstats.heapStats.release() 267 268 // Count the alloc in inconsistent, internal stats. 269 gcController.totalAlloc.Add(int64(npages * pageSize)) 270 271 // Update heapLive. 272 gcController.update(int64(s.npages*pageSize), 0) 273 274 // Put the large span in the mcentral swept list so that it's 275 // visible to the background sweeper. 276 mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s) 277 278 // Adjust s.limit down to the object-containing part of the span. 279 // 280 // This is just to create a slightly tighter bound on the limit. 281 // It's totally OK if the garbage collector, in particular 282 // conservative scanning, can temporarily observes an inflated 283 // limit. It will simply mark the whole object or just skip it 284 // since we're in the mark phase anyway. 285 s.limit = s.base() + size 286 s.initHeapBits() 287 return s 288 } 289 290 func (c *mcache) releaseAll() { 291 // Take this opportunity to flush scanAlloc. 292 scanAlloc := int64(c.scanAlloc) 293 c.scanAlloc = 0 294 295 sg := mheap_.sweepgen 296 dHeapLive := int64(0) 297 for i := range c.alloc { 298 s := c.alloc[i] 299 if s != &emptymspan { 300 slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache) 301 s.allocCountBeforeCache = 0 302 303 // Adjust smallAllocCount for whatever was allocated. 304 stats := memstats.heapStats.acquire() 305 atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed) 306 memstats.heapStats.release() 307 308 // Adjust the actual allocs in inconsistent, internal stats. 309 // We assumed earlier that the full span gets allocated. 310 gcController.totalAlloc.Add(slotsUsed * int64(s.elemsize)) 311 312 if s.sweepgen != sg+1 { 313 // refill conservatively counted unallocated slots in gcController.heapLive. 314 // Undo this. 315 // 316 // If this span was cached before sweep, then gcController.heapLive was totally 317 // recomputed since caching this span, so we don't do this for stale spans. 318 dHeapLive -= int64(s.nelems-s.allocCount) * int64(s.elemsize) 319 } 320 321 // Release the span to the mcentral. 322 mheap_.central[i].mcentral.uncacheSpan(s) 323 c.alloc[i] = &emptymspan 324 } 325 } 326 // Clear tinyalloc pool. 327 c.tiny = 0 328 c.tinyoffset = 0 329 330 // Flush tinyAllocs. 331 stats := memstats.heapStats.acquire() 332 atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) 333 c.tinyAllocs = 0 334 memstats.heapStats.release() 335 336 // Clear the reusable linked lists. 337 // For noscan objects, the nodes of the linked lists are the reusable heap objects themselves, 338 // so we can simply clear the linked list head pointers. 339 // TODO(thepudds): consider having debug logging of a non-empty reusable lists getting cleared, 340 // maybe based on the existing debugReusableLog. 341 clear(c.reusableNoscan[:]) 342 343 // Update heapLive and heapScan. 344 gcController.update(dHeapLive, scanAlloc) 345 } 346 347 // prepareForSweep flushes c if the system has entered a new sweep phase 348 // since c was populated. This must happen between the sweep phase 349 // starting and the first allocation from c. 350 func (c *mcache) prepareForSweep() { 351 // Alternatively, instead of making sure we do this on every P 352 // between starting the world and allocating on that P, we 353 // could leave allocate-black on, allow allocation to continue 354 // as usual, use a ragged barrier at the beginning of sweep to 355 // ensure all cached spans are swept, and then disable 356 // allocate-black. However, with this approach it's difficult 357 // to avoid spilling mark bits into the *next* GC cycle. 358 sg := mheap_.sweepgen 359 flushGen := c.flushGen.Load() 360 if flushGen == sg { 361 return 362 } else if flushGen != sg-2 { 363 println("bad flushGen", flushGen, "in prepareForSweep; sweepgen", sg) 364 throw("bad flushGen") 365 } 366 c.releaseAll() 367 stackcache_clear(c) 368 c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart 369 } 370 371 // addReusableNoscan adds a noscan object pointer to the reusable pointer free list 372 // for a span class. 373 func (c *mcache) addReusableNoscan(spc spanClass, ptr uintptr) { 374 if !runtimeFreegcEnabled { 375 return 376 } 377 378 // Add to the reusable pointers free list. 379 v := gclinkptr(ptr) 380 v.ptr().next = c.reusableNoscan[spc] 381 c.reusableNoscan[spc] = v 382 } 383 384 // hasReusableNoscan reports whether there is a reusable object available for 385 // a noscan spc. 386 func (c *mcache) hasReusableNoscan(spc spanClass) bool { 387 if !runtimeFreegcEnabled { 388 return false 389 } 390 return c.reusableNoscan[spc] != 0 391 } 392