Source file src/runtime/preempt.go
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Goroutine preemption 6 // 7 // A goroutine can be preempted at any safe-point. Currently, there 8 // are a few categories of safe-points: 9 // 10 // 1. A blocked safe-point occurs for the duration that a goroutine is 11 // descheduled, blocked on synchronization, or in a system call. 12 // 13 // 2. Synchronous safe-points occur when a running goroutine checks 14 // for a preemption request. 15 // 16 // 3. Asynchronous safe-points occur at any instruction in user code 17 // where the goroutine can be safely paused and a conservative 18 // stack and register scan can find stack roots. The runtime can 19 // stop a goroutine at an async safe-point using a signal. 20 // 21 // At both blocked and synchronous safe-points, a goroutine's CPU 22 // state is minimal and the garbage collector has complete information 23 // about its entire stack. This makes it possible to deschedule a 24 // goroutine with minimal space, and to precisely scan a goroutine's 25 // stack. 26 // 27 // Synchronous safe-points are implemented by overloading the stack 28 // bound check in function prologues. To preempt a goroutine at the 29 // next synchronous safe-point, the runtime poisons the goroutine's 30 // stack bound to a value that will cause the next stack bound check 31 // to fail and enter the stack growth implementation, which will 32 // detect that it was actually a preemption and redirect to preemption 33 // handling. 34 // 35 // Preemption at asynchronous safe-points is implemented by suspending 36 // the thread using an OS mechanism (e.g., signals) and inspecting its 37 // state to determine if the goroutine was at an asynchronous 38 // safe-point. Since the thread suspension itself is generally 39 // asynchronous, it also checks if the running goroutine wants to be 40 // preempted, since this could have changed. If all conditions are 41 // satisfied, it adjusts the signal context to make it look like the 42 // signaled thread just called asyncPreempt and resumes the thread. 43 // asyncPreempt spills all registers and enters the scheduler. 44 // 45 // (An alternative would be to preempt in the signal handler itself. 46 // This would let the OS save and restore the register state and the 47 // runtime would only need to know how to extract potentially 48 // pointer-containing registers from the signal context. However, this 49 // would consume an M for every preempted G, and the scheduler itself 50 // is not designed to run from a signal handler, as it tends to 51 // allocate memory and start threads in the preemption path.) 52 53 package runtime 54 55 import ( 56 "internal/abi" 57 "internal/goarch" 58 "internal/goexperiment" 59 "internal/stringslite" 60 ) 61 62 type suspendGState struct { 63 g *g 64 65 // dead indicates the goroutine was not suspended because it 66 // is dead. This goroutine could be reused after the dead 67 // state was observed, so the caller must not assume that it 68 // remains dead. 69 dead bool 70 71 // stopped indicates that this suspendG transitioned the G to 72 // _Gwaiting via g.preemptStop and thus is responsible for 73 // readying it when done. 74 stopped bool 75 } 76 77 // suspendG suspends goroutine gp at a safe-point and returns the 78 // state of the suspended goroutine. The caller gets read access to 79 // the goroutine until it calls resumeG. 80 // 81 // It is safe for multiple callers to attempt to suspend the same 82 // goroutine at the same time. The goroutine may execute between 83 // subsequent successful suspend operations. The current 84 // implementation grants exclusive access to the goroutine, and hence 85 // multiple callers will serialize. However, the intent is to grant 86 // shared read access, so please don't depend on exclusive access. 87 // 88 // This must be called from the system stack and the user goroutine on 89 // the current M (if any) must be in a preemptible state. This 90 // prevents deadlocks where two goroutines attempt to suspend each 91 // other and both are in non-preemptible states. There are other ways 92 // to resolve this deadlock, but this seems simplest. 93 // 94 // TODO(austin): What if we instead required this to be called from a 95 // user goroutine? Then we could deschedule the goroutine while 96 // waiting instead of blocking the thread. If two goroutines tried to 97 // suspend each other, one of them would win and the other wouldn't 98 // complete the suspend until it was resumed. We would have to be 99 // careful that they couldn't actually queue up suspend for each other 100 // and then both be suspended. This would also avoid the need for a 101 // kernel context switch in the synchronous case because we could just 102 // directly schedule the waiter. The context switch is unavoidable in 103 // the signal case. 104 // 105 //go:systemstack 106 func suspendG(gp *g) suspendGState { 107 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning { 108 // Since we're on the system stack of this M, the user 109 // G is stuck at an unsafe point. If another goroutine 110 // were to try to preempt m.curg, it could deadlock. 111 throw("suspendG from non-preemptible goroutine") 112 } 113 114 // See https://golang.org/cl/21503 for justification of the yield delay. 115 const yieldDelay = 10 * 1000 116 var nextYield int64 117 118 // Drive the goroutine to a preemption point. 119 stopped := false 120 var asyncM *m 121 var asyncGen uint32 122 var nextPreemptM int64 123 for i := 0; ; i++ { 124 switch s := readgstatus(gp); s { 125 default: 126 if s&_Gscan != 0 { 127 // Someone else is suspending it. Wait 128 // for them to finish. 129 // 130 // TODO: It would be nicer if we could 131 // coalesce suspends. 132 break 133 } 134 135 dumpgstatus(gp) 136 throw("invalid g status") 137 138 case _Gdead, _Gdeadextra: 139 // Nothing to suspend. 140 // 141 // preemptStop may need to be cleared, but 142 // doing that here could race with goroutine 143 // reuse. Instead, goexit0 clears it. 144 return suspendGState{dead: true} 145 146 case _Gcopystack: 147 // The stack is being copied. We need to wait 148 // until this is done. 149 150 case _Gpreempted: 151 // We (or someone else) suspended the G. Claim 152 // ownership of it by transitioning it to 153 // _Gwaiting. 154 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) { 155 break 156 } 157 158 // We stopped the G, so we have to ready it later. 159 stopped = true 160 161 s = _Gwaiting 162 fallthrough 163 164 case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: 165 // Claim goroutine by setting scan bit. 166 // This may race with execution or readying of gp. 167 // The scan bit keeps it from transition state. 168 if !castogscanstatus(gp, s, s|_Gscan) { 169 break 170 } 171 172 // Clear the preemption request. It's safe to 173 // reset the stack guard because we hold the 174 // _Gscan bit and thus own the stack. 175 gp.preemptStop = false 176 gp.preempt = false 177 gp.stackguard0 = gp.stack.lo + stackGuard 178 179 // The goroutine was already at a safe-point 180 // and we've now locked that in. 181 // 182 // TODO: It would be much better if we didn't 183 // leave it in _Gscan, but instead gently 184 // prevented its scheduling until resumption. 185 // Maybe we only use this to bump a suspended 186 // count and the scheduler skips suspended 187 // goroutines? That wouldn't be enough for 188 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe 189 // for all those transitions we need to check 190 // suspended and deschedule? 191 return suspendGState{g: gp, stopped: stopped} 192 193 case _Grunning: 194 // Optimization: if there is already a pending preemption request 195 // (from the previous loop iteration), don't bother with the atomics. 196 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen { 197 break 198 } 199 200 // Temporarily block state transitions. 201 if !castogscanstatus(gp, _Grunning, _Gscanrunning) { 202 break 203 } 204 205 // Request synchronous preemption. 206 gp.preemptStop = true 207 gp.preempt = true 208 gp.stackguard0 = stackPreempt 209 210 // Prepare for asynchronous preemption. 211 asyncM2 := gp.m 212 asyncGen2 := asyncM2.preemptGen.Load() 213 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2 214 asyncM = asyncM2 215 asyncGen = asyncGen2 216 217 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) 218 219 // Send asynchronous preemption. We do this 220 // after CASing the G back to _Grunning 221 // because preemptM may be synchronous and we 222 // don't want to catch the G just spinning on 223 // its status. 224 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync { 225 // Rate limit preemptM calls. This is 226 // particularly important on Windows 227 // where preemptM is actually 228 // synchronous and the spin loop here 229 // can lead to live-lock. 230 now := nanotime() 231 if now >= nextPreemptM { 232 nextPreemptM = now + yieldDelay/2 233 preemptM(asyncM) 234 } 235 } 236 } 237 238 // TODO: Don't busy wait. This loop should really only 239 // be a simple read/decide/CAS loop that only fails if 240 // there's an active race. Once the CAS succeeds, we 241 // should queue up the preemption (which will require 242 // it to be reliable in the _Grunning case, not 243 // best-effort) and then sleep until we're notified 244 // that the goroutine is suspended. 245 if i == 0 { 246 nextYield = nanotime() + yieldDelay 247 } 248 if nanotime() < nextYield { 249 procyield(10) 250 } else { 251 osyield() 252 nextYield = nanotime() + yieldDelay/2 253 } 254 } 255 } 256 257 // resumeG undoes the effects of suspendG, allowing the suspended 258 // goroutine to continue from its current safe-point. 259 func resumeG(state suspendGState) { 260 if state.dead { 261 // We didn't actually stop anything. 262 return 263 } 264 265 gp := state.g 266 switch s := readgstatus(gp); s { 267 default: 268 dumpgstatus(gp) 269 throw("unexpected g status") 270 271 case _Grunnable | _Gscan, 272 _Gwaiting | _Gscan, 273 _Gleaked | _Gscan, 274 _Gsyscall | _Gscan: 275 casfrom_Gscanstatus(gp, s, s&^_Gscan) 276 } 277 278 if state.stopped { 279 // We stopped it, so we need to re-schedule it. 280 ready(gp, 0, true) 281 } 282 } 283 284 // canPreemptM reports whether mp is in a state that is safe to preempt. 285 // 286 // It is nosplit because it has nosplit callers. 287 // 288 //go:nosplit 289 func canPreemptM(mp *m) bool { 290 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning && mp.curg != nil && readgstatus(mp.curg)&^_Gscan != _Gsyscall 291 } 292 293 //go:generate go run mkpreempt.go 294 295 // asyncPreempt saves all user registers and calls asyncPreempt2. 296 // 297 // It saves GP registers (anything that might contain a pointer) to the G stack. 298 // Hence, when stack scanning encounters an asyncPreempt frame, it scans that 299 // frame and its parent frame conservatively. 300 // 301 // On some platforms, it saves large additional scalar-only register state such 302 // as vector registers to an "extended register state" on the P. 303 // 304 // asyncPreempt is implemented in assembly. 305 func asyncPreempt() 306 307 // asyncPreempt2 is the Go continuation of asyncPreempt. 308 // 309 // It must be deeply nosplit because there's untyped data on the stack from 310 // asyncPreempt. 311 // 312 // It must not have any write barriers because we need to limit the amount of 313 // stack it uses. 314 // 315 //go:nosplit 316 //go:nowritebarrierrec 317 func asyncPreempt2() { 318 // We can't grow the stack with untyped data from asyncPreempt, so switch to 319 // the system stack right away. 320 mcall(func(gp *g) { 321 gp.asyncSafePoint = true 322 323 // Move the extended register state from the P to the G. We do this now that 324 // we're on the system stack to avoid stack splits. 325 xRegSave(gp) 326 327 if gp.preemptStop { 328 preemptPark(gp) 329 } else { 330 gopreempt_m(gp) 331 } 332 // The above functions never return. 333 }) 334 335 // Do not grow the stack below here! 336 337 gp := getg() 338 339 // Put the extended register state back on the M so resumption can find it. 340 // We can't do this in asyncPreemptM because the park calls never return. 341 xRegRestore(gp) 342 343 gp.asyncSafePoint = false 344 } 345 346 // asyncPreemptStack is the bytes of stack space required to inject an 347 // asyncPreempt call. 348 var asyncPreemptStack = ^uintptr(0) 349 350 func init() { 351 f := findfunc(abi.FuncPCABI0(asyncPreempt)) 352 total := funcMaxSPDelta(f) 353 f = findfunc(abi.FuncPCABIInternal(asyncPreempt2)) 354 total += funcMaxSPDelta(f) 355 f = findfunc(abi.FuncPCABIInternal(xRegRestore)) 356 total += funcMaxSPDelta(f) 357 // Add some overhead for return PCs, etc. 358 asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize 359 if asyncPreemptStack > stackNosplit { 360 // We need more than the nosplit limit. This isn't unsafe, but it may 361 // limit asynchronous preemption. Consider moving state into xRegState. 362 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n") 363 throw("async stack too large") 364 } 365 } 366 367 // wantAsyncPreempt returns whether an asynchronous preemption is 368 // queued for gp. 369 func wantAsyncPreempt(gp *g) bool { 370 // Check both the G and the P. 371 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning 372 } 373 374 // isAsyncSafePoint reports whether gp at instruction PC is an 375 // asynchronous safe point. This indicates that: 376 // 377 // 1. It's safe to suspend gp and conservatively scan its stack and 378 // registers. There are no potentially hidden pointer values and it's 379 // not in the middle of an atomic sequence like a write barrier. 380 // 381 // 2. gp has enough stack space to inject the asyncPreempt call. 382 // 383 // 3. It's generally safe to interact with the runtime, even if we're 384 // in a signal handler stopped here. For example, there are no runtime 385 // locks held, so acquiring a runtime lock won't self-deadlock. 386 // 387 // In some cases the PC is safe for asynchronous preemption but it 388 // also needs to adjust the resumption PC. The new PC is returned in 389 // the second result. 390 func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) { 391 mp := gp.m 392 393 // Only user Gs can have safe-points. We check this first 394 // because it's extremely common that we'll catch mp in the 395 // scheduler processing this G preemption. 396 if mp.curg != gp { 397 return false, 0 398 } 399 400 // Check M state. 401 if mp.p == 0 || !canPreemptM(mp) { 402 return false, 0 403 } 404 405 // Check stack space. 406 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack { 407 return false, 0 408 } 409 410 // If we're in the middle of a secret computation, we can't 411 // allow any conservative scanning of stacks, as that may lead 412 // to secrets leaking out from the stack into work buffers. 413 // Additionally, the preemption code will store the 414 // machine state (including registers which may contain confidential 415 // information) into the preemption buffers. 416 // 417 // TODO(dmo): there's technically nothing stopping us from doing the 418 // preemption, granted that don't conservatively scan and we clean up after 419 // ourselves. This is made slightly harder by the xRegs cached allocations 420 // that can move between Gs and Ps. In any case, for the intended users (cryptography code) 421 // they are unlikely get stuck in unterminating loops. 422 if goexperiment.RuntimeSecret && gp.secret > 0 { 423 return false, 0 424 } 425 426 // Check if PC is an unsafe-point. 427 f := findfunc(pc) 428 if !f.valid() { 429 // Not Go code. 430 return false, 0 431 } 432 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc) == 0 { 433 // We probably stopped at a half-executed CALL instruction, 434 // where the LR is updated but the PC has not. If we preempt 435 // here we'll see a seemingly self-recursive call, which is in 436 // fact not. 437 // This is normally ok, as we use the return address saved on 438 // stack for unwinding, not the LR value. But if this is a 439 // call to morestack, we haven't created the frame, and we'll 440 // use the LR for unwinding, which will be bad. 441 return false, 0 442 } 443 up, startpc := pcdatavalue2(f, abi.PCDATA_UnsafePoint, pc) 444 if up == abi.UnsafePointUnsafe { 445 // Unsafe-point marked by compiler. This includes 446 // atomic sequences (e.g., write barrier) and nosplit 447 // functions (except at calls). 448 return false, 0 449 } 450 if fd := funcdata(f, abi.FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&abi.FuncFlagAsm != 0 { 451 // This is assembly code. Don't assume it's well-formed. 452 // TODO: Empirically we still need the fd == nil check. Why? 453 // 454 // TODO: Are there cases that are safe but don't have a 455 // locals pointer map, like empty frame functions? 456 // It might be possible to preempt any assembly functions 457 // except the ones that have funcFlag_SPWRITE set in f.flag. 458 return false, 0 459 } 460 // Check the inner-most name 461 u, uf := newInlineUnwinder(f, pc) 462 name := u.srcFunc(uf).name() 463 if stringslite.HasPrefix(name, "runtime.") || 464 stringslite.HasPrefix(name, "internal/runtime/") || 465 stringslite.HasPrefix(name, "reflect.") { 466 // For now we never async preempt the runtime or 467 // anything closely tied to the runtime. Known issues 468 // include: various points in the scheduler ("don't 469 // preempt between here and here"), much of the defer 470 // implementation (untyped info on stack), bulk write 471 // barriers (write barrier check), atomic functions in 472 // internal/runtime/atomic, reflect.{makeFuncStub,methodValueCall}. 473 // 474 // Note that this is a subset of the runtimePkgs in pkgspecial.go 475 // and these checks are theoretically redundant because the compiler 476 // marks "all points" in runtime functions as unsafe for async preemption. 477 // But for some reason, we can't eliminate these checks until https://go.dev/issue/72031 478 // is resolved. 479 // 480 // TODO(austin): We should improve this, or opt things 481 // in incrementally. 482 return false, 0 483 } 484 switch up { 485 case abi.UnsafePointRestart1, abi.UnsafePointRestart2: 486 // Restartable instruction sequence. Back off PC to 487 // the start PC. 488 if startpc == 0 || startpc > pc || pc-startpc > 20 { 489 throw("bad restart PC") 490 } 491 return true, startpc 492 case abi.UnsafePointRestartAtEntry: 493 // Restart from the function entry at resumption. 494 return true, f.entry() 495 } 496 return true, pc 497 } 498