mspanset.go

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/cpu"
     9  	"internal/goarch"
    10  	"internal/runtime/atomic"
    11  	"unsafe"
    12  )
    13  
    14  // A spanSet is a set of *mspans.
    15  //
    16  // spanSet is safe for concurrent push and pop operations.
    17  type spanSet struct {
    18  	// A spanSet is a two-level data structure consisting of a
    19  	// growable spine that points to fixed-sized blocks. The spine
    20  	// can be accessed without locks, but adding a block or
    21  	// growing it requires taking the spine lock.
    22  	//
    23  	// Because each mspan covers at least 8K of heap and takes at
    24  	// most 8 bytes in the spanSet, the growth of the spine is
    25  	// quite limited.
    26  	//
    27  	// The spine and all blocks are allocated off-heap, which
    28  	// allows this to be used in the memory manager and avoids the
    29  	// need for write barriers on all of these. spanSetBlocks are
    30  	// managed in a pool, though never freed back to the operating
    31  	// system. We never release spine memory because there could be
    32  	// concurrent lock-free access and we're likely to reuse it
    33  	// anyway. (In principle, we could do this during STW.)
    34  
    35  	spineLock mutex
    36  	spine     atomicSpanSetSpinePointer // *[N]atomic.Pointer[spanSetBlock]
    37  	spineLen  atomic.Uintptr            // Spine array length
    38  	spineCap  uintptr                   // Spine array cap, accessed under spineLock
    39  
    40  	// index is the head and tail of the spanSet in a single field.
    41  	// The head and the tail both represent an index into the logical
    42  	// concatenation of all blocks, with the head always behind or
    43  	// equal to the tail (indicating an empty set). This field is
    44  	// always accessed atomically.
    45  	//
    46  	// The head and the tail are only 32 bits wide, which means we
    47  	// can only support up to 2^32 pushes before a reset. If every
    48  	// span in the heap were stored in this set, and each span were
    49  	// the minimum size (1 runtime page, 8 KiB), then roughly the
    50  	// smallest heap which would be unrepresentable is 32 TiB in size.
    51  	index atomicHeadTailIndex
    52  }
    53  
    54  const (
    55  	spanSetBlockEntries = 512 // 4KB on 64-bit
    56  	spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit
    57  )
    58  
    59  type spanSetBlockHeader struct {
    60  	// Free spanSetBlocks are managed via a lock-free stack.
    61  	lfnode
    62  
    63  	// popped is the number of pop operations that have occurred on
    64  	// this block. This number is used to help determine when a block
    65  	// may be safely recycled.
    66  	popped atomic.Uint32
    67  }
    68  
    69  type spanSetBlockHeader2 struct {
    70  	spanSetBlockHeader
    71  	pad [tagAlign - unsafe.Sizeof(spanSetBlockHeader{})]byte
    72  }
    73  
    74  type spanSetBlock struct {
    75  	spanSetBlockHeader2
    76  
    77  	// spans is the set of spans in this block.
    78  	spans [spanSetBlockEntries]atomicMSpanPointer
    79  }
    80  
    81  // push adds span s to buffer b. push is safe to call concurrently
    82  // with other push and pop operations.
    83  func (b *spanSet) push(s *mspan) {
    84  	// Obtain our slot.
    85  	cursor := uintptr(b.index.incTail().tail() - 1)
    86  	top, bottom := cursor/spanSetBlockEntries, cursor%spanSetBlockEntries
    87  
    88  	// Do we need to add a block?
    89  	spineLen := b.spineLen.Load()
    90  	var block *spanSetBlock
    91  retry:
    92  	if top < spineLen {
    93  		block = b.spine.Load().lookup(top).Load()
    94  	} else {
    95  		// Add a new block to the spine, potentially growing
    96  		// the spine.
    97  		lock(&b.spineLock)
    98  		// spineLen cannot change until we release the lock,
    99  		// but may have changed while we were waiting.
   100  		spineLen = b.spineLen.Load()
   101  		if top < spineLen {
   102  			unlock(&b.spineLock)
   103  			goto retry
   104  		}
   105  
   106  		spine := b.spine.Load()
   107  		if spineLen == b.spineCap {
   108  			// Grow the spine.
   109  			newCap := b.spineCap * 2
   110  			if newCap == 0 {
   111  				newCap = spanSetInitSpineCap
   112  			}
   113  			newSpine := persistentalloc(newCap*goarch.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys)
   114  			if b.spineCap != 0 {
   115  				// Blocks are allocated off-heap, so
   116  				// no write barriers.
   117  				memmove(newSpine, spine.p, b.spineCap*goarch.PtrSize)
   118  			}
   119  			spine = spanSetSpinePointer{newSpine}
   120  
   121  			// Spine is allocated off-heap, so no write barrier.
   122  			b.spine.StoreNoWB(spine)
   123  			b.spineCap = newCap
   124  			// We can't immediately free the old spine
   125  			// since a concurrent push with a lower index
   126  			// could still be reading from it. We let it
   127  			// leak because even a 1TB heap would waste
   128  			// less than 2MB of memory on old spines. If
   129  			// this is a problem, we could free old spines
   130  			// during STW.
   131  		}
   132  
   133  		// Allocate a new block from the pool.
   134  		block = spanSetBlockPool.alloc()
   135  
   136  		// Add it to the spine.
   137  		// Blocks are allocated off-heap, so no write barrier.
   138  		spine.lookup(top).StoreNoWB(block)
   139  		b.spineLen.Store(spineLen + 1)
   140  		unlock(&b.spineLock)
   141  	}
   142  
   143  	// We have a block. Insert the span atomically, since there may be
   144  	// concurrent readers via the block API.
   145  	block.spans[bottom].StoreNoWB(s)
   146  }
   147  
   148  // pop removes and returns a span from buffer b, or nil if b is empty.
   149  // pop is safe to call concurrently with other pop and push operations.
   150  func (b *spanSet) pop() *mspan {
   151  	var head, tail uint32
   152  	var backoff uint32
   153  	// TODO: tweak backoff parameters on other architectures.
   154  	if GOARCH == "arm64" {
   155  		backoff = 128
   156  	}
   157  claimLoop:
   158  	for {
   159  		headtail := b.index.load()
   160  		head, tail = headtail.split()
   161  		if head >= tail {
   162  			// The buf is empty, as far as we can tell.
   163  			return nil
   164  		}
   165  		// Check if the head position we want to claim is actually
   166  		// backed by a block.
   167  		spineLen := b.spineLen.Load()
   168  		if spineLen <= uintptr(head)/spanSetBlockEntries {
   169  			// We're racing with a spine growth and the allocation of
   170  			// a new block (and maybe a new spine!), and trying to grab
   171  			// the span at the index which is currently being pushed.
   172  			// Instead of spinning, let's just notify the caller that
   173  			// there's nothing currently here. Spinning on this is
   174  			// almost definitely not worth it.
   175  			return nil
   176  		}
   177  		// Try to claim the current head by CASing in an updated head.
   178  		// This may fail transiently due to a push which modifies the
   179  		// tail, so keep trying while the head isn't changing.
   180  		want := head
   181  		for want == head {
   182  			if b.index.cas(headtail, makeHeadTailIndex(want+1, tail)) {
   183  				break claimLoop
   184  			}
   185  			// Use a backoff approach to reduce demand to the shared memory location
   186  			// decreases memory contention and allows for other threads to make quicker
   187  			// progress.
   188  			// Read more in this Arm blog post:
   189  			// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
   190  			procyield(backoff)
   191  			// Increase backoff time.
   192  			backoff += backoff / 2
   193  			headtail = b.index.load()
   194  			head, tail = headtail.split()
   195  		}
   196  		// We failed to claim the spot we were after and the head changed,
   197  		// meaning a popper got ahead of us. Try again from the top because
   198  		// the buf may not be empty.
   199  	}
   200  	top, bottom := head/spanSetBlockEntries, head%spanSetBlockEntries
   201  
   202  	// We may be reading a stale spine pointer, but because the length
   203  	// grows monotonically and we've already verified it, we'll definitely
   204  	// be reading from a valid block.
   205  	blockp := b.spine.Load().lookup(uintptr(top))
   206  
   207  	// Given that the spine length is correct, we know we will never
   208  	// see a nil block here, since the length is always updated after
   209  	// the block is set.
   210  	block := blockp.Load()
   211  	s := block.spans[bottom].Load()
   212  	for s == nil {
   213  		// We raced with the span actually being set, but given that we
   214  		// know a block for this span exists, the race window here is
   215  		// extremely small. Try again.
   216  		s = block.spans[bottom].Load()
   217  	}
   218  	// Clear the pointer. This isn't strictly necessary, but defensively
   219  	// avoids accidentally re-using blocks which could lead to memory
   220  	// corruption. This way, we'll get a nil pointer access instead.
   221  	block.spans[bottom].StoreNoWB(nil)
   222  
   223  	// Increase the popped count. If we are the last possible popper
   224  	// in the block (note that bottom need not equal spanSetBlockEntries-1
   225  	// due to races) then it's our responsibility to free the block.
   226  	//
   227  	// If we increment popped to spanSetBlockEntries, we can be sure that
   228  	// we're the last popper for this block, and it's thus safe to free it.
   229  	// Every other popper must have crossed this barrier (and thus finished
   230  	// popping its corresponding mspan) by the time we get here. Because
   231  	// we're the last popper, we also don't have to worry about concurrent
   232  	// pushers (there can't be any). Note that we may not be the popper
   233  	// which claimed the last slot in the block, we're just the last one
   234  	// to finish popping.
   235  	if block.popped.Add(1) == spanSetBlockEntries {
   236  		// Clear the block's pointer.
   237  		blockp.StoreNoWB(nil)
   238  
   239  		// Return the block to the block pool.
   240  		spanSetBlockPool.free(block)
   241  	}
   242  	return s
   243  }
   244  
   245  // reset resets a spanSet which is empty. It will also clean up
   246  // any left over blocks.
   247  //
   248  // Throws if the buf is not empty.
   249  //
   250  // reset may not be called concurrently with any other operations
   251  // on the span set.
   252  func (b *spanSet) reset() {
   253  	head, tail := b.index.load().split()
   254  	if head < tail {
   255  		print("head = ", head, ", tail = ", tail, "\n")
   256  		throw("attempt to clear non-empty span set")
   257  	}
   258  	top := head / spanSetBlockEntries
   259  	if uintptr(top) < b.spineLen.Load() {
   260  		// If the head catches up to the tail and the set is empty,
   261  		// we may not clean up the block containing the head and tail
   262  		// since it may be pushed into again. In order to avoid leaking
   263  		// memory since we're going to reset the head and tail, clean
   264  		// up such a block now, if it exists.
   265  		blockp := b.spine.Load().lookup(uintptr(top))
   266  		block := blockp.Load()
   267  		if block != nil {
   268  			// Check the popped value.
   269  			if block.popped.Load() == 0 {
   270  				// popped should never be zero because that means we have
   271  				// pushed at least one value but not yet popped if this
   272  				// block pointer is not nil.
   273  				throw("span set block with unpopped elements found in reset")
   274  			}
   275  			if block.popped.Load() == spanSetBlockEntries {
   276  				// popped should also never be equal to spanSetBlockEntries
   277  				// because the last popper should have made the block pointer
   278  				// in this slot nil.
   279  				throw("fully empty unfreed span set block found in reset")
   280  			}
   281  
   282  			// Clear the pointer to the block.
   283  			blockp.StoreNoWB(nil)
   284  
   285  			// Return the block to the block pool.
   286  			spanSetBlockPool.free(block)
   287  		}
   288  	}
   289  	b.index.reset()
   290  	b.spineLen.Store(0)
   291  }
   292  
   293  // atomicSpanSetSpinePointer is an atomically-accessed spanSetSpinePointer.
   294  //
   295  // It has the same semantics as atomic.UnsafePointer.
   296  type atomicSpanSetSpinePointer struct {
   297  	a atomic.UnsafePointer
   298  }
   299  
   300  // Loads the spanSetSpinePointer and returns it.
   301  //
   302  // It has the same semantics as atomic.UnsafePointer.
   303  func (s *atomicSpanSetSpinePointer) Load() spanSetSpinePointer {
   304  	return spanSetSpinePointer{s.a.Load()}
   305  }
   306  
   307  // Stores the spanSetSpinePointer.
   308  //
   309  // It has the same semantics as [atomic.UnsafePointer].
   310  func (s *atomicSpanSetSpinePointer) StoreNoWB(p spanSetSpinePointer) {
   311  	s.a.StoreNoWB(p.p)
   312  }
   313  
   314  // spanSetSpinePointer represents a pointer to a contiguous block of atomic.Pointer[spanSetBlock].
   315  type spanSetSpinePointer struct {
   316  	p unsafe.Pointer
   317  }
   318  
   319  // lookup returns &s[idx].
   320  func (s spanSetSpinePointer) lookup(idx uintptr) *atomic.Pointer[spanSetBlock] {
   321  	return (*atomic.Pointer[spanSetBlock])(add(s.p, goarch.PtrSize*idx))
   322  }
   323  
   324  // spanSetBlockPool is a global pool of spanSetBlocks.
   325  var spanSetBlockPool spanSetBlockAlloc
   326  
   327  // spanSetBlockAlloc represents a concurrent pool of spanSetBlocks.
   328  type spanSetBlockAlloc struct {
   329  	stack lfstack
   330  }
   331  
   332  // alloc tries to grab a spanSetBlock out of the pool, and if it fails
   333  // persistentallocs a new one and returns it.
   334  func (p *spanSetBlockAlloc) alloc() *spanSetBlock {
   335  	if s := (*spanSetBlock)(p.stack.pop()); s != nil {
   336  		return s
   337  	}
   338  	return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), max(cpu.CacheLineSize, tagAlign), &memstats.gcMiscSys))
   339  }
   340  
   341  // free returns a spanSetBlock back to the pool.
   342  func (p *spanSetBlockAlloc) free(block *spanSetBlock) {
   343  	block.popped.Store(0)
   344  	p.stack.push(&block.lfnode)
   345  }
   346  
   347  // headTailIndex represents a combined 32-bit head and 32-bit tail
   348  // of a queue into a single 64-bit value.
   349  type headTailIndex uint64
   350  
   351  // makeHeadTailIndex creates a headTailIndex value from a separate
   352  // head and tail.
   353  func makeHeadTailIndex(head, tail uint32) headTailIndex {
   354  	return headTailIndex(uint64(head)<<32 | uint64(tail))
   355  }
   356  
   357  // head returns the head of a headTailIndex value.
   358  func (h headTailIndex) head() uint32 {
   359  	return uint32(h >> 32)
   360  }
   361  
   362  // tail returns the tail of a headTailIndex value.
   363  func (h headTailIndex) tail() uint32 {
   364  	return uint32(h)
   365  }
   366  
   367  // split splits the headTailIndex value into its parts.
   368  func (h headTailIndex) split() (head uint32, tail uint32) {
   369  	return h.head(), h.tail()
   370  }
   371  
   372  // atomicHeadTailIndex is an atomically-accessed headTailIndex.
   373  type atomicHeadTailIndex struct {
   374  	u atomic.Uint64
   375  }
   376  
   377  // load atomically reads a headTailIndex value.
   378  func (h *atomicHeadTailIndex) load() headTailIndex {
   379  	return headTailIndex(h.u.Load())
   380  }
   381  
   382  // cas atomically compares-and-swaps a headTailIndex value.
   383  func (h *atomicHeadTailIndex) cas(old, new headTailIndex) bool {
   384  	return h.u.CompareAndSwap(uint64(old), uint64(new))
   385  }
   386  
   387  // incHead atomically increments the head of a headTailIndex.
   388  func (h *atomicHeadTailIndex) incHead() headTailIndex {
   389  	return headTailIndex(h.u.Add(1 << 32))
   390  }
   391  
   392  // decHead atomically decrements the head of a headTailIndex.
   393  func (h *atomicHeadTailIndex) decHead() headTailIndex {
   394  	return headTailIndex(h.u.Add(-(1 << 32)))
   395  }
   396  
   397  // incTail atomically increments the tail of a headTailIndex.
   398  func (h *atomicHeadTailIndex) incTail() headTailIndex {
   399  	ht := headTailIndex(h.u.Add(1))
   400  	// Check for overflow.
   401  	if ht.tail() == 0 {
   402  		print("runtime: head = ", ht.head(), ", tail = ", ht.tail(), "\n")
   403  		throw("headTailIndex overflow")
   404  	}
   405  	return ht
   406  }
   407  
   408  // reset clears the headTailIndex to (0, 0).
   409  func (h *atomicHeadTailIndex) reset() {
   410  	h.u.Store(0)
   411  }
   412  
   413  // atomicMSpanPointer is an atomic.Pointer[mspan]. Can't use generics because it's NotInHeap.
   414  type atomicMSpanPointer struct {
   415  	p atomic.UnsafePointer
   416  }
   417  
   418  // Load returns the *mspan.
   419  func (p *atomicMSpanPointer) Load() *mspan {
   420  	return (*mspan)(p.p.Load())
   421  }
   422  
   423  // StoreNoWB stores an *mspan.
   424  func (p *atomicMSpanPointer) StoreNoWB(s *mspan) {
   425  	p.p.StoreNoWB(unsafe.Pointer(s))
   426  }
   427
View as plain text