preempt.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Goroutine preemption
     6  //
     7  // A goroutine can be preempted at any safe-point. Currently, there
     8  // are a few categories of safe-points:
     9  //
    10  // 1. A blocked safe-point occurs for the duration that a goroutine is
    11  //    descheduled, blocked on synchronization, or in a system call.
    12  //
    13  // 2. Synchronous safe-points occur when a running goroutine checks
    14  //    for a preemption request.
    15  //
    16  // 3. Asynchronous safe-points occur at any instruction in user code
    17  //    where the goroutine can be safely paused and a conservative
    18  //    stack and register scan can find stack roots. The runtime can
    19  //    stop a goroutine at an async safe-point using a signal.
    20  //
    21  // At both blocked and synchronous safe-points, a goroutine's CPU
    22  // state is minimal and the garbage collector has complete information
    23  // about its entire stack. This makes it possible to deschedule a
    24  // goroutine with minimal space, and to precisely scan a goroutine's
    25  // stack.
    26  //
    27  // Synchronous safe-points are implemented by overloading the stack
    28  // bound check in function prologues. To preempt a goroutine at the
    29  // next synchronous safe-point, the runtime poisons the goroutine's
    30  // stack bound to a value that will cause the next stack bound check
    31  // to fail and enter the stack growth implementation, which will
    32  // detect that it was actually a preemption and redirect to preemption
    33  // handling.
    34  //
    35  // Preemption at asynchronous safe-points is implemented by suspending
    36  // the thread using an OS mechanism (e.g., signals) and inspecting its
    37  // state to determine if the goroutine was at an asynchronous
    38  // safe-point. Since the thread suspension itself is generally
    39  // asynchronous, it also checks if the running goroutine wants to be
    40  // preempted, since this could have changed. If all conditions are
    41  // satisfied, it adjusts the signal context to make it look like the
    42  // signaled thread just called asyncPreempt and resumes the thread.
    43  // asyncPreempt spills all registers and enters the scheduler.
    44  //
    45  // (An alternative would be to preempt in the signal handler itself.
    46  // This would let the OS save and restore the register state and the
    47  // runtime would only need to know how to extract potentially
    48  // pointer-containing registers from the signal context. However, this
    49  // would consume an M for every preempted G, and the scheduler itself
    50  // is not designed to run from a signal handler, as it tends to
    51  // allocate memory and start threads in the preemption path.)
    52  
    53  package runtime
    54  
    55  import (
    56  	"internal/abi"
    57  	"internal/goarch"
    58  	"internal/goexperiment"
    59  	"internal/stringslite"
    60  )
    61  
    62  type suspendGState struct {
    63  	g *g
    64  
    65  	// dead indicates the goroutine was not suspended because it
    66  	// is dead. This goroutine could be reused after the dead
    67  	// state was observed, so the caller must not assume that it
    68  	// remains dead.
    69  	dead bool
    70  
    71  	// stopped indicates that this suspendG transitioned the G to
    72  	// _Gwaiting via g.preemptStop and thus is responsible for
    73  	// readying it when done.
    74  	stopped bool
    75  }
    76  
    77  // suspendG suspends goroutine gp at a safe-point and returns the
    78  // state of the suspended goroutine. The caller gets read access to
    79  // the goroutine until it calls resumeG.
    80  //
    81  // It is safe for multiple callers to attempt to suspend the same
    82  // goroutine at the same time. The goroutine may execute between
    83  // subsequent successful suspend operations. The current
    84  // implementation grants exclusive access to the goroutine, and hence
    85  // multiple callers will serialize. However, the intent is to grant
    86  // shared read access, so please don't depend on exclusive access.
    87  //
    88  // This must be called from the system stack and the user goroutine on
    89  // the current M (if any) must be in a preemptible state. This
    90  // prevents deadlocks where two goroutines attempt to suspend each
    91  // other and both are in non-preemptible states. There are other ways
    92  // to resolve this deadlock, but this seems simplest.
    93  //
    94  // TODO(austin): What if we instead required this to be called from a
    95  // user goroutine? Then we could deschedule the goroutine while
    96  // waiting instead of blocking the thread. If two goroutines tried to
    97  // suspend each other, one of them would win and the other wouldn't
    98  // complete the suspend until it was resumed. We would have to be
    99  // careful that they couldn't actually queue up suspend for each other
   100  // and then both be suspended. This would also avoid the need for a
   101  // kernel context switch in the synchronous case because we could just
   102  // directly schedule the waiter. The context switch is unavoidable in
   103  // the signal case.
   104  //
   105  //go:systemstack
   106  func suspendG(gp *g) suspendGState {
   107  	if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
   108  		// Since we're on the system stack of this M, the user
   109  		// G is stuck at an unsafe point. If another goroutine
   110  		// were to try to preempt m.curg, it could deadlock.
   111  		throw("suspendG from non-preemptible goroutine")
   112  	}
   113  
   114  	// See https://golang.org/cl/21503 for justification of the yield delay.
   115  	const yieldDelay = 10 * 1000
   116  	var nextYield int64
   117  
   118  	// Drive the goroutine to a preemption point.
   119  	stopped := false
   120  	var asyncM *m
   121  	var asyncGen uint32
   122  	var nextPreemptM int64
   123  	for i := 0; ; i++ {
   124  		switch s := readgstatus(gp); s {
   125  		default:
   126  			if s&_Gscan != 0 {
   127  				// Someone else is suspending it. Wait
   128  				// for them to finish.
   129  				//
   130  				// TODO: It would be nicer if we could
   131  				// coalesce suspends.
   132  				break
   133  			}
   134  
   135  			dumpgstatus(gp)
   136  			throw("invalid g status")
   137  
   138  		case _Gdead, _Gdeadextra:
   139  			// Nothing to suspend.
   140  			//
   141  			// preemptStop may need to be cleared, but
   142  			// doing that here could race with goroutine
   143  			// reuse. Instead, goexit0 clears it.
   144  			return suspendGState{dead: true}
   145  
   146  		case _Gcopystack:
   147  			// The stack is being copied. We need to wait
   148  			// until this is done.
   149  
   150  		case _Gpreempted:
   151  			// We (or someone else) suspended the G. Claim
   152  			// ownership of it by transitioning it to
   153  			// _Gwaiting.
   154  			if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
   155  				break
   156  			}
   157  
   158  			// We stopped the G, so we have to ready it later.
   159  			stopped = true
   160  
   161  			s = _Gwaiting
   162  			fallthrough
   163  
   164  		case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked:
   165  			// Claim goroutine by setting scan bit.
   166  			// This may race with execution or readying of gp.
   167  			// The scan bit keeps it from transition state.
   168  			if !castogscanstatus(gp, s, s|_Gscan) {
   169  				break
   170  			}
   171  
   172  			// Clear the preemption request. It's safe to
   173  			// reset the stack guard because we hold the
   174  			// _Gscan bit and thus own the stack.
   175  			gp.preemptStop = false
   176  			gp.preempt = false
   177  			gp.stackguard0 = gp.stack.lo + stackGuard
   178  
   179  			// The goroutine was already at a safe-point
   180  			// and we've now locked that in.
   181  			//
   182  			// TODO: It would be much better if we didn't
   183  			// leave it in _Gscan, but instead gently
   184  			// prevented its scheduling until resumption.
   185  			// Maybe we only use this to bump a suspended
   186  			// count and the scheduler skips suspended
   187  			// goroutines? That wouldn't be enough for
   188  			// {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
   189  			// for all those transitions we need to check
   190  			// suspended and deschedule?
   191  			return suspendGState{g: gp, stopped: stopped}
   192  
   193  		case _Grunning:
   194  			// Optimization: if there is already a pending preemption request
   195  			// (from the previous loop iteration), don't bother with the atomics.
   196  			if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen {
   197  				break
   198  			}
   199  
   200  			// Temporarily block state transitions.
   201  			if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
   202  				break
   203  			}
   204  
   205  			// Request synchronous preemption.
   206  			gp.preemptStop = true
   207  			gp.preempt = true
   208  			gp.stackguard0 = stackPreempt
   209  
   210  			// Prepare for asynchronous preemption.
   211  			asyncM2 := gp.m
   212  			asyncGen2 := asyncM2.preemptGen.Load()
   213  			needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
   214  			asyncM = asyncM2
   215  			asyncGen = asyncGen2
   216  
   217  			casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
   218  
   219  			// Send asynchronous preemption. We do this
   220  			// after CASing the G back to _Grunning
   221  			// because preemptM may be synchronous and we
   222  			// don't want to catch the G just spinning on
   223  			// its status.
   224  			if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
   225  				// Rate limit preemptM calls. This is
   226  				// particularly important on Windows
   227  				// where preemptM is actually
   228  				// synchronous and the spin loop here
   229  				// can lead to live-lock.
   230  				now := nanotime()
   231  				if now >= nextPreemptM {
   232  					nextPreemptM = now + yieldDelay/2
   233  					preemptM(asyncM)
   234  				}
   235  			}
   236  		}
   237  
   238  		// TODO: Don't busy wait. This loop should really only
   239  		// be a simple read/decide/CAS loop that only fails if
   240  		// there's an active race. Once the CAS succeeds, we
   241  		// should queue up the preemption (which will require
   242  		// it to be reliable in the _Grunning case, not
   243  		// best-effort) and then sleep until we're notified
   244  		// that the goroutine is suspended.
   245  		if i == 0 {
   246  			nextYield = nanotime() + yieldDelay
   247  		}
   248  		if nanotime() < nextYield {
   249  			procyield(10)
   250  		} else {
   251  			osyield()
   252  			nextYield = nanotime() + yieldDelay/2
   253  		}
   254  	}
   255  }
   256  
   257  // resumeG undoes the effects of suspendG, allowing the suspended
   258  // goroutine to continue from its current safe-point.
   259  func resumeG(state suspendGState) {
   260  	if state.dead {
   261  		// We didn't actually stop anything.
   262  		return
   263  	}
   264  
   265  	gp := state.g
   266  	switch s := readgstatus(gp); s {
   267  	default:
   268  		dumpgstatus(gp)
   269  		throw("unexpected g status")
   270  
   271  	case _Grunnable | _Gscan,
   272  		_Gwaiting | _Gscan,
   273  		_Gleaked | _Gscan,
   274  		_Gsyscall | _Gscan:
   275  		casfrom_Gscanstatus(gp, s, s&^_Gscan)
   276  	}
   277  
   278  	if state.stopped {
   279  		// We stopped it, so we need to re-schedule it.
   280  		ready(gp, 0, true)
   281  	}
   282  }
   283  
   284  // canPreemptM reports whether mp is in a state that is safe to preempt.
   285  //
   286  // It is nosplit because it has nosplit callers.
   287  //
   288  //go:nosplit
   289  func canPreemptM(mp *m) bool {
   290  	return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning && mp.curg != nil && readgstatus(mp.curg)&^_Gscan != _Gsyscall
   291  }
   292  
   293  //go:generate go run mkpreempt.go
   294  
   295  // asyncPreempt saves all user registers and calls asyncPreempt2.
   296  //
   297  // It saves GP registers (anything that might contain a pointer) to the G stack.
   298  // Hence, when stack scanning encounters an asyncPreempt frame, it scans that
   299  // frame and its parent frame conservatively.
   300  //
   301  // On some platforms, it saves large additional scalar-only register state such
   302  // as vector registers to an "extended register state" on the P.
   303  //
   304  // asyncPreempt is implemented in assembly.
   305  func asyncPreempt()
   306  
   307  // asyncPreempt2 is the Go continuation of asyncPreempt.
   308  //
   309  // It must be deeply nosplit because there's untyped data on the stack from
   310  // asyncPreempt.
   311  //
   312  // It must not have any write barriers because we need to limit the amount of
   313  // stack it uses.
   314  //
   315  //go:nosplit
   316  //go:nowritebarrierrec
   317  func asyncPreempt2() {
   318  	// We can't grow the stack with untyped data from asyncPreempt, so switch to
   319  	// the system stack right away.
   320  	mcall(func(gp *g) {
   321  		gp.asyncSafePoint = true
   322  
   323  		// Move the extended register state from the P to the G. We do this now that
   324  		// we're on the system stack to avoid stack splits.
   325  		xRegSave(gp)
   326  
   327  		if gp.preemptStop {
   328  			preemptPark(gp)
   329  		} else {
   330  			gopreempt_m(gp)
   331  		}
   332  		// The above functions never return.
   333  	})
   334  
   335  	// Do not grow the stack below here!
   336  
   337  	gp := getg()
   338  
   339  	// Put the extended register state back on the M so resumption can find it.
   340  	// We can't do this in asyncPreemptM because the park calls never return.
   341  	xRegRestore(gp)
   342  
   343  	gp.asyncSafePoint = false
   344  }
   345  
   346  // asyncPreemptStack is the bytes of stack space required to inject an
   347  // asyncPreempt call.
   348  var asyncPreemptStack = ^uintptr(0)
   349  
   350  func init() {
   351  	f := findfunc(abi.FuncPCABI0(asyncPreempt))
   352  	total := funcMaxSPDelta(f)
   353  	f = findfunc(abi.FuncPCABIInternal(asyncPreempt2))
   354  	total += funcMaxSPDelta(f)
   355  	f = findfunc(abi.FuncPCABIInternal(xRegRestore))
   356  	total += funcMaxSPDelta(f)
   357  	// Add some overhead for return PCs, etc.
   358  	asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize
   359  	if asyncPreemptStack > stackNosplit {
   360  		// We need more than the nosplit limit. This isn't unsafe, but it may
   361  		// limit asynchronous preemption. Consider moving state into xRegState.
   362  		print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
   363  		throw("async stack too large")
   364  	}
   365  }
   366  
   367  // wantAsyncPreempt returns whether an asynchronous preemption is
   368  // queued for gp.
   369  func wantAsyncPreempt(gp *g) bool {
   370  	// Check both the G and the P.
   371  	return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
   372  }
   373  
   374  // isAsyncSafePoint reports whether gp at instruction PC is an
   375  // asynchronous safe point. This indicates that:
   376  //
   377  // 1. It's safe to suspend gp and conservatively scan its stack and
   378  // registers. There are no potentially hidden pointer values and it's
   379  // not in the middle of an atomic sequence like a write barrier.
   380  //
   381  // 2. gp has enough stack space to inject the asyncPreempt call.
   382  //
   383  // 3. It's generally safe to interact with the runtime, even if we're
   384  // in a signal handler stopped here. For example, there are no runtime
   385  // locks held, so acquiring a runtime lock won't self-deadlock.
   386  //
   387  // In some cases the PC is safe for asynchronous preemption but it
   388  // also needs to adjust the resumption PC. The new PC is returned in
   389  // the second result.
   390  func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
   391  	mp := gp.m
   392  
   393  	// Only user Gs can have safe-points. We check this first
   394  	// because it's extremely common that we'll catch mp in the
   395  	// scheduler processing this G preemption.
   396  	if mp.curg != gp {
   397  		return false, 0
   398  	}
   399  
   400  	// Check M state.
   401  	if mp.p == 0 || !canPreemptM(mp) {
   402  		return false, 0
   403  	}
   404  
   405  	// Check stack space.
   406  	if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack {
   407  		return false, 0
   408  	}
   409  
   410  	// If we're in the middle of a secret computation, we can't
   411  	// allow any conservative scanning of stacks, as that may lead
   412  	// to secrets leaking out from the stack into work buffers.
   413  	// Additionally, the preemption code will store the
   414  	// machine state (including registers which may contain confidential
   415  	// information) into the preemption buffers.
   416  	//
   417  	// TODO(dmo): there's technically nothing stopping us from doing the
   418  	// preemption, granted that don't conservatively scan and we clean up after
   419  	// ourselves. This is made slightly harder by the xRegs cached allocations
   420  	// that can move between Gs and Ps. In any case, for the intended users (cryptography code)
   421  	// they are unlikely get stuck in unterminating loops.
   422  	if goexperiment.RuntimeSecret && gp.secret > 0 {
   423  		return false, 0
   424  	}
   425  
   426  	// Check if PC is an unsafe-point.
   427  	f := findfunc(pc)
   428  	if !f.valid() {
   429  		// Not Go code.
   430  		return false, 0
   431  	}
   432  	if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc) == 0 {
   433  		// We probably stopped at a half-executed CALL instruction,
   434  		// where the LR is updated but the PC has not. If we preempt
   435  		// here we'll see a seemingly self-recursive call, which is in
   436  		// fact not.
   437  		// This is normally ok, as we use the return address saved on
   438  		// stack for unwinding, not the LR value. But if this is a
   439  		// call to morestack, we haven't created the frame, and we'll
   440  		// use the LR for unwinding, which will be bad.
   441  		return false, 0
   442  	}
   443  	up, startpc := pcdatavalue2(f, abi.PCDATA_UnsafePoint, pc)
   444  	if up == abi.UnsafePointUnsafe {
   445  		// Unsafe-point marked by compiler. This includes
   446  		// atomic sequences (e.g., write barrier) and nosplit
   447  		// functions (except at calls).
   448  		return false, 0
   449  	}
   450  	if fd := funcdata(f, abi.FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&abi.FuncFlagAsm != 0 {
   451  		// This is assembly code. Don't assume it's well-formed.
   452  		// TODO: Empirically we still need the fd == nil check. Why?
   453  		//
   454  		// TODO: Are there cases that are safe but don't have a
   455  		// locals pointer map, like empty frame functions?
   456  		// It might be possible to preempt any assembly functions
   457  		// except the ones that have funcFlag_SPWRITE set in f.flag.
   458  		return false, 0
   459  	}
   460  	// Check the inner-most name
   461  	u, uf := newInlineUnwinder(f, pc)
   462  	name := u.srcFunc(uf).name()
   463  	if stringslite.HasPrefix(name, "runtime.") ||
   464  		stringslite.HasPrefix(name, "internal/runtime/") ||
   465  		stringslite.HasPrefix(name, "reflect.") {
   466  		// For now we never async preempt the runtime or
   467  		// anything closely tied to the runtime. Known issues
   468  		// include: various points in the scheduler ("don't
   469  		// preempt between here and here"), much of the defer
   470  		// implementation (untyped info on stack), bulk write
   471  		// barriers (write barrier check), atomic functions in
   472  		// internal/runtime/atomic, reflect.{makeFuncStub,methodValueCall}.
   473  		//
   474  		// Note that this is a subset of the runtimePkgs in pkgspecial.go
   475  		// and these checks are theoretically redundant because the compiler
   476  		// marks "all points" in runtime functions as unsafe for async preemption.
   477  		// But for some reason, we can't eliminate these checks until https://go.dev/issue/72031
   478  		// is resolved.
   479  		//
   480  		// TODO(austin): We should improve this, or opt things
   481  		// in incrementally.
   482  		return false, 0
   483  	}
   484  	switch up {
   485  	case abi.UnsafePointRestart1, abi.UnsafePointRestart2:
   486  		// Restartable instruction sequence. Back off PC to
   487  		// the start PC.
   488  		if startpc == 0 || startpc > pc || pc-startpc > 20 {
   489  			throw("bad restart PC")
   490  		}
   491  		return true, startpc
   492  	case abi.UnsafePointRestartAtEntry:
   493  		// Restart from the function entry at resumption.
   494  		return true, f.entry()
   495  	}
   496  	return true, pc
   497  }
   498
View as plain text