Source file src/runtime/lock_spinbit.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || plan9 || solaris || windows) && goexperiment.spinbitmutex
     6  
     7  package runtime
     8  
     9  import (
    10  	"internal/goarch"
    11  	"internal/runtime/atomic"
    12  	"unsafe"
    13  )
    14  
    15  // This implementation depends on OS-specific implementations of
    16  //
    17  //	func semacreate(mp *m)
    18  //		Create a semaphore for mp, if it does not already have one.
    19  //
    20  //	func semasleep(ns int64) int32
    21  //		If ns < 0, acquire m's semaphore and return 0.
    22  //		If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
    23  //		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
    24  //
    25  //	func semawakeup(mp *m)
    26  //		Wake up mp, which is or will soon be sleeping on its semaphore.
    27  
    28  // The mutex state consists of four flags and a pointer. The flag at bit 0,
    29  // mutexLocked, represents the lock itself. Bit 1, mutexSleeping, is a hint that
    30  // the pointer is non-nil. The fast paths for locking and unlocking the mutex
    31  // are based on atomic 8-bit swap operations on the low byte; bits 2 through 7
    32  // are unused.
    33  //
    34  // Bit 8, mutexSpinning, is a try-lock that grants a waiting M permission to
    35  // spin on the state word. Most other Ms must attempt to spend their time
    36  // sleeping to reduce traffic on the cache line. This is the "spin bit" for
    37  // which the implementation is named. (The anti-starvation mechanism also grants
    38  // temporary permission for an M to spin.)
    39  //
    40  // Bit 9, mutexStackLocked, is a try-lock that grants an unlocking M permission
    41  // to inspect the list of waiting Ms and to pop an M off of that stack.
    42  //
    43  // The upper bits hold a (partial) pointer to the M that most recently went to
    44  // sleep. The sleeping Ms form a stack linked by their mWaitList.next fields.
    45  // Because the fast paths use an 8-bit swap on the low byte of the state word,
    46  // we'll need to reconstruct the full M pointer from the bits we have. Most Ms
    47  // are allocated on the heap, and have a known alignment and base offset. (The
    48  // offset is due to mallocgc's allocation headers.) The main program thread uses
    49  // a static M value, m0. We check for m0 specifically and add a known offset
    50  // otherwise.
    51  
    52  const (
    53  	active_spin     = 4  // referenced in proc.go for sync.Mutex implementation
    54  	active_spin_cnt = 30 // referenced in proc.go for sync.Mutex implementation
    55  )
    56  
    57  const (
    58  	mutexLocked      = 0x001
    59  	mutexSleeping    = 0x002
    60  	mutexSpinning    = 0x100
    61  	mutexStackLocked = 0x200
    62  	mutexMMask       = 0x3FF
    63  	mutexMOffset     = mallocHeaderSize // alignment of heap-allocated Ms (those other than m0)
    64  
    65  	mutexActiveSpinCount  = 4
    66  	mutexActiveSpinSize   = 30
    67  	mutexPassiveSpinCount = 1
    68  
    69  	mutexTailWakePeriod = 16
    70  )
    71  
    72  //go:nosplit
    73  func key8(p *uintptr) *uint8 {
    74  	if goarch.BigEndian {
    75  		return &(*[8]uint8)(unsafe.Pointer(p))[goarch.PtrSize/1-1]
    76  	}
    77  	return &(*[8]uint8)(unsafe.Pointer(p))[0]
    78  }
    79  
    80  // mWaitList is part of the M struct, and holds the list of Ms that are waiting
    81  // for a particular runtime.mutex.
    82  //
    83  // When an M is unable to immediately obtain a lock, it adds itself to the list
    84  // of Ms waiting for the lock. It does that via this struct's next field,
    85  // forming a singly-linked list with the mutex's key field pointing to the head
    86  // of the list.
    87  type mWaitList struct {
    88  	next muintptr // next m waiting for lock
    89  }
    90  
    91  // lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
    92  func lockVerifyMSize() {
    93  	size := roundupsize(unsafe.Sizeof(m{}), false) + mallocHeaderSize
    94  	if size&mutexMMask != 0 {
    95  		print("M structure uses sizeclass ", size, "/", hex(size), " bytes; ",
    96  			"incompatible with mutex flag mask ", hex(mutexMMask), "\n")
    97  		throw("runtime.m memory alignment too small for spinbit mutex")
    98  	}
    99  }
   100  
   101  // mutexWaitListHead recovers a full muintptr that was missing its low bits.
   102  // With the exception of the static m0 value, it requires allocating runtime.m
   103  // values in a size class with a particular minimum alignment. The 2048-byte
   104  // size class allows recovering the full muintptr value even after overwriting
   105  // the low 11 bits with flags. We can use those 11 bits as 3 flags and an
   106  // atomically-swapped byte.
   107  //
   108  //go:nosplit
   109  func mutexWaitListHead(v uintptr) muintptr {
   110  	if highBits := v &^ mutexMMask; highBits == 0 {
   111  		return 0
   112  	} else if m0bits := muintptr(unsafe.Pointer(&m0)); highBits == uintptr(m0bits)&^mutexMMask {
   113  		return m0bits
   114  	} else {
   115  		return muintptr(highBits + mutexMOffset)
   116  	}
   117  }
   118  
   119  // mutexPreferLowLatency reports if this mutex prefers low latency at the risk
   120  // of performance collapse. If so, we can allow all waiting threads to spin on
   121  // the state word rather than go to sleep.
   122  //
   123  // TODO: We could have the waiting Ms each spin on their own private cache line,
   124  // especially if we can put a bound on the on-CPU time that would consume.
   125  //
   126  // TODO: If there's a small set of mutex values with special requirements, they
   127  // could make use of a more specialized lock2/unlock2 implementation. Otherwise,
   128  // we're constrained to what we can fit within a single uintptr with no
   129  // additional storage on the M for each lock held.
   130  //
   131  //go:nosplit
   132  func mutexPreferLowLatency(l *mutex) bool {
   133  	switch l {
   134  	default:
   135  		return false
   136  	case &sched.lock:
   137  		// We often expect sched.lock to pass quickly between Ms in a way that
   138  		// each M has unique work to do: for instance when we stop-the-world
   139  		// (bringing each P to idle) or add new netpoller-triggered work to the
   140  		// global run queue.
   141  		return true
   142  	}
   143  }
   144  
   145  func mutexContended(l *mutex) bool {
   146  	return atomic.Loaduintptr(&l.key) > mutexLocked
   147  }
   148  
   149  func lock(l *mutex) {
   150  	lockWithRank(l, getLockRank(l))
   151  }
   152  
   153  func lock2(l *mutex) {
   154  	gp := getg()
   155  	if gp.m.locks < 0 {
   156  		throw("runtime·lock: lock count")
   157  	}
   158  	gp.m.locks++
   159  
   160  	k8 := key8(&l.key)
   161  
   162  	// Speculative grab for lock.
   163  	v8 := atomic.Xchg8(k8, mutexLocked)
   164  	if v8&mutexLocked == 0 {
   165  		if v8&mutexSleeping != 0 {
   166  			atomic.Or8(k8, mutexSleeping)
   167  		}
   168  		return
   169  	}
   170  	semacreate(gp.m)
   171  
   172  	timer := &lockTimer{lock: l}
   173  	timer.begin()
   174  	// On uniprocessors, no point spinning.
   175  	// On multiprocessors, spin for mutexActiveSpinCount attempts.
   176  	spin := 0
   177  	if ncpu > 1 {
   178  		spin = mutexActiveSpinCount
   179  	}
   180  
   181  	var weSpin, atTail bool
   182  	v := atomic.Loaduintptr(&l.key)
   183  tryAcquire:
   184  	for i := 0; ; i++ {
   185  		if v&mutexLocked == 0 {
   186  			if weSpin {
   187  				next := (v &^ mutexSpinning) | mutexSleeping | mutexLocked
   188  				if next&^mutexMMask == 0 {
   189  					// The fast-path Xchg8 may have cleared mutexSleeping. Fix
   190  					// the hint so unlock2 knows when to use its slow path.
   191  					next = next &^ mutexSleeping
   192  				}
   193  				if atomic.Casuintptr(&l.key, v, next) {
   194  					timer.end()
   195  					return
   196  				}
   197  			} else {
   198  				prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping)
   199  				if prev8&mutexLocked == 0 {
   200  					timer.end()
   201  					return
   202  				}
   203  			}
   204  			v = atomic.Loaduintptr(&l.key)
   205  			continue tryAcquire
   206  		}
   207  
   208  		if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) {
   209  			v |= mutexSpinning
   210  			weSpin = true
   211  		}
   212  
   213  		if weSpin || atTail || mutexPreferLowLatency(l) {
   214  			if i < spin {
   215  				procyield(mutexActiveSpinSize)
   216  				v = atomic.Loaduintptr(&l.key)
   217  				continue tryAcquire
   218  			} else if i < spin+mutexPassiveSpinCount {
   219  				osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.
   220  				v = atomic.Loaduintptr(&l.key)
   221  				continue tryAcquire
   222  			}
   223  		}
   224  
   225  		// Go to sleep
   226  		if v&mutexLocked == 0 {
   227  			throw("runtime·lock: sleeping while lock is available")
   228  		}
   229  
   230  		// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
   231  		gp.m.mWaitList.next = mutexWaitListHead(v)
   232  
   233  		// Pack a (partial) pointer to this M with the current lock state bits
   234  		next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
   235  		if weSpin { // If we were spinning, prepare to retire
   236  			next = next &^ mutexSpinning
   237  		}
   238  
   239  		if atomic.Casuintptr(&l.key, v, next) {
   240  			weSpin = false
   241  			// We've pushed ourselves onto the stack of waiters. Wait.
   242  			semasleep(-1)
   243  			atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
   244  			i = 0
   245  		}
   246  
   247  		gp.m.mWaitList.next = 0
   248  		v = atomic.Loaduintptr(&l.key)
   249  	}
   250  }
   251  
   252  func unlock(l *mutex) {
   253  	unlockWithRank(l)
   254  }
   255  
   256  // We might not be holding a p in this code.
   257  //
   258  //go:nowritebarrier
   259  func unlock2(l *mutex) {
   260  	gp := getg()
   261  
   262  	prev8 := atomic.Xchg8(key8(&l.key), 0)
   263  	if prev8&mutexLocked == 0 {
   264  		throw("unlock of unlocked lock")
   265  	}
   266  
   267  	if prev8&mutexSleeping != 0 {
   268  		unlock2Wake(l)
   269  	}
   270  
   271  	gp.m.mLockProfile.recordUnlock(l)
   272  	gp.m.locks--
   273  	if gp.m.locks < 0 {
   274  		throw("runtime·unlock: lock count")
   275  	}
   276  	if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
   277  		gp.stackguard0 = stackPreempt
   278  	}
   279  }
   280  
   281  // unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
   282  //
   283  //go:nowritebarrier
   284  func unlock2Wake(l *mutex) {
   285  	v := atomic.Loaduintptr(&l.key)
   286  
   287  	// On occasion, seek out and wake the M at the bottom of the stack so it
   288  	// doesn't starve.
   289  	antiStarve := cheaprandn(mutexTailWakePeriod) == 0
   290  	if !(antiStarve || // avoiding starvation may require a wake
   291  		v&mutexSpinning == 0 || // no spinners means we must wake
   292  		mutexPreferLowLatency(l)) { // prefer waiters be awake as much as possible
   293  		return
   294  	}
   295  
   296  	for {
   297  		if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
   298  			// No waiting Ms means nothing to do.
   299  			//
   300  			// If the stack lock is unavailable, its owner would make the same
   301  			// wake decisions that we would, so there's nothing for us to do.
   302  			//
   303  			// Although: This thread may have a different call stack, which
   304  			// would result in a different entry in the mutex contention profile
   305  			// (upon completion of go.dev/issue/66999). That could lead to weird
   306  			// results if a slow critical section ends but another thread
   307  			// quickly takes the lock, finishes its own critical section,
   308  			// releases the lock, and then grabs the stack lock. That quick
   309  			// thread would then take credit (blame) for the delay that this
   310  			// slow thread caused. The alternative is to have more expensive
   311  			// atomic operations (a CAS) on the critical path of unlock2.
   312  			return
   313  		}
   314  		// Other M's are waiting for the lock.
   315  		// Obtain the stack lock, and pop off an M.
   316  		next := v | mutexStackLocked
   317  		if atomic.Casuintptr(&l.key, v, next) {
   318  			break
   319  		}
   320  		v = atomic.Loaduintptr(&l.key)
   321  	}
   322  
   323  	// We own the mutexStackLocked flag. New Ms may push themselves onto the
   324  	// stack concurrently, but we're now the only thread that can remove or
   325  	// modify the Ms that are sleeping in the list.
   326  
   327  	var committed *m // If we choose an M within the stack, we've made a promise to wake it
   328  	for {
   329  		headM := v &^ mutexMMask
   330  		flags := v & (mutexMMask &^ mutexStackLocked) // preserve low bits, but release stack lock
   331  
   332  		mp := mutexWaitListHead(v).ptr()
   333  		wakem := committed
   334  		if committed == nil {
   335  			if v&mutexSpinning == 0 || mutexPreferLowLatency(l) {
   336  				wakem = mp
   337  			}
   338  			if antiStarve {
   339  				// Wake the M at the bottom of the stack of waiters. (This is
   340  				// O(N) with the number of waiters.)
   341  				wakem = mp
   342  				prev := mp
   343  				for {
   344  					next := wakem.mWaitList.next.ptr()
   345  					if next == nil {
   346  						break
   347  					}
   348  					prev, wakem = wakem, next
   349  				}
   350  				if wakem != mp {
   351  					prev.mWaitList.next = wakem.mWaitList.next
   352  					committed = wakem
   353  				}
   354  			}
   355  		}
   356  
   357  		if wakem == mp {
   358  			headM = uintptr(mp.mWaitList.next) &^ mutexMMask
   359  		}
   360  
   361  		next := headM | flags
   362  		if atomic.Casuintptr(&l.key, v, next) {
   363  			if wakem != nil {
   364  				// Claimed an M. Wake it.
   365  				semawakeup(wakem)
   366  			}
   367  			break
   368  		}
   369  
   370  		v = atomic.Loaduintptr(&l.key)
   371  	}
   372  }
   373  

View as plain text