Source file src/runtime/proc.go
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/abi" 9 "internal/cpu" 10 "internal/goarch" 11 "internal/goexperiment" 12 "internal/goos" 13 "internal/runtime/atomic" 14 "internal/runtime/exithook" 15 "internal/runtime/maps" 16 "internal/runtime/sys" 17 "internal/strconv" 18 "internal/stringslite" 19 "unsafe" 20 ) 21 22 // set using cmd/go/internal/modload.ModInfoProg 23 var modinfo string 24 25 // Goroutine scheduler 26 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 27 // 28 // The main concepts are: 29 // G - goroutine. 30 // M - worker thread, or machine. 31 // P - processor, a resource that is required to execute Go code. 32 // M must have an associated P to execute Go code, however it can be 33 // blocked or in a syscall w/o an associated P. 34 // 35 // Design doc at https://golang.org/s/go11sched. 36 37 // Worker thread parking/unparking. 38 // We need to balance between keeping enough running worker threads to utilize 39 // available hardware parallelism and parking excessive running worker threads 40 // to conserve CPU resources and power. This is not simple for two reasons: 41 // (1) scheduler state is intentionally distributed (in particular, per-P work 42 // queues), so it is not possible to compute global predicates on fast paths; 43 // (2) for optimal thread management we would need to know the future (don't park 44 // a worker thread when a new goroutine will be readied in near future). 45 // 46 // Three rejected approaches that would work badly: 47 // 1. Centralize all scheduler state (would inhibit scalability). 48 // 2. Direct goroutine handoff. That is, when we ready a new goroutine and there 49 // is a spare P, unpark a thread and handoff it the thread and the goroutine. 50 // This would lead to thread state thrashing, as the thread that readied the 51 // goroutine can be out of work the very next moment, we will need to park it. 52 // Also, it would destroy locality of computation as we want to preserve 53 // dependent goroutines on the same thread; and introduce additional latency. 54 // 3. Unpark an additional thread whenever we ready a goroutine and there is an 55 // idle P, but don't do handoff. This would lead to excessive thread parking/ 56 // unparking as the additional threads will instantly park without discovering 57 // any work to do. 58 // 59 // The current approach: 60 // 61 // This approach applies to three primary sources of potential work: readying a 62 // goroutine, new/modified-earlier timers, and idle-priority GC. See below for 63 // additional details. 64 // 65 // We unpark an additional thread when we submit work if (this is wakep()): 66 // 1. There is an idle P, and 67 // 2. There are no "spinning" worker threads. 68 // 69 // A worker thread is considered spinning if it is out of local work and did 70 // not find work in the global run queue or netpoller; the spinning state is 71 // denoted in m.spinning and in sched.nmspinning. Threads unparked this way are 72 // also considered spinning; we don't do goroutine handoff so such threads are 73 // out of work initially. Spinning threads spin on looking for work in per-P 74 // run queues and timer heaps or from the GC before parking. If a spinning 75 // thread finds work it takes itself out of the spinning state and proceeds to 76 // execution. If it does not find work it takes itself out of the spinning 77 // state and then parks. 78 // 79 // If there is at least one spinning thread (sched.nmspinning>1), we don't 80 // unpark new threads when submitting work. To compensate for that, if the last 81 // spinning thread finds work and stops spinning, it must unpark a new spinning 82 // thread. This approach smooths out unjustified spikes of thread unparking, 83 // but at the same time guarantees eventual maximal CPU parallelism 84 // utilization. 85 // 86 // The main implementation complication is that we need to be very careful 87 // during spinning->non-spinning thread transition. This transition can race 88 // with submission of new work, and either one part or another needs to unpark 89 // another worker thread. If they both fail to do that, we can end up with 90 // semi-persistent CPU underutilization. 91 // 92 // The general pattern for submission is: 93 // 1. Submit work to the local or global run queue, timer heap, or GC state. 94 // 2. #StoreLoad-style memory barrier. 95 // 3. Check sched.nmspinning. 96 // 97 // The general pattern for spinning->non-spinning transition is: 98 // 1. Decrement nmspinning. 99 // 2. #StoreLoad-style memory barrier. 100 // 3. Check all per-P work queues and GC for new work. 101 // 102 // Note that all this complexity does not apply to global run queue as we are 103 // not sloppy about thread unparking when submitting to global queue. Also see 104 // comments for nmspinning manipulation. 105 // 106 // How these different sources of work behave varies, though it doesn't affect 107 // the synchronization approach: 108 // * Ready goroutine: this is an obvious source of work; the goroutine is 109 // immediately ready and must run on some thread eventually. 110 // * New/modified-earlier timer: The current timer implementation (see time.go) 111 // uses netpoll in a thread with no work available to wait for the soonest 112 // timer. If there is no thread waiting, we want a new spinning thread to go 113 // wait. 114 // * Idle-priority GC: The GC wakes a stopped idle thread to contribute to 115 // background GC work (note: currently disabled per golang.org/issue/19112). 116 // Also see golang.org/issue/44313, as this should be extended to all GC 117 // workers. 118 119 var ( 120 m0 m 121 g0 g 122 mcache0 *mcache 123 raceprocctx0 uintptr 124 raceFiniLock mutex 125 ) 126 127 // This slice records the initializing tasks that need to be 128 // done to start up the runtime. It is built by the linker. 129 var runtime_inittasks []*initTask 130 131 // mainInitDone is a signal used by cgocallbackg that initialization 132 // has been completed. If this is false, wait on mainInitDoneChan. 133 var mainInitDone atomic.Bool 134 135 // mainInitDoneChan is closed after initialization has been completed. 136 // It is made before _cgo_notify_runtime_init_done, so all cgo 137 // calls can rely on it existing. 138 var mainInitDoneChan chan bool 139 140 //go:linkname main_main main.main 141 func main_main() 142 143 // mainStarted indicates that the main M has started. 144 var mainStarted bool 145 146 // runtimeInitTime is the nanotime() at which the runtime started. 147 var runtimeInitTime int64 148 149 // Value to use for signal mask for newly created M's. 150 var initSigmask sigset 151 152 // The main goroutine. 153 func main() { 154 mp := getg().m 155 156 // Racectx of m0->g0 is used only as the parent of the main goroutine. 157 // It must not be used for anything else. 158 mp.g0.racectx = 0 159 160 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. 161 // Using decimal instead of binary GB and MB because 162 // they look nicer in the stack overflow failure message. 163 if goarch.PtrSize == 8 { 164 maxstacksize = 1000000000 165 } else { 166 maxstacksize = 250000000 167 } 168 169 // An upper limit for max stack size. Used to avoid random crashes 170 // after calling SetMaxStack and trying to allocate a stack that is too big, 171 // since stackalloc works with 32-bit sizes. 172 maxstackceiling = 2 * maxstacksize 173 174 // Allow newproc to start new Ms. 175 mainStarted = true 176 177 if haveSysmon { 178 systemstack(func() { 179 newm(sysmon, nil, -1) 180 }) 181 } 182 183 // Lock the main goroutine onto this, the main OS thread, 184 // during initialization. Most programs won't care, but a few 185 // do require certain calls to be made by the main thread. 186 // Those can arrange for main.main to run in the main thread 187 // by calling runtime.LockOSThread during initialization 188 // to preserve the lock. 189 lockOSThread() 190 191 if mp != &m0 { 192 throw("runtime.main not on m0") 193 } 194 195 // Record when the world started. 196 // Must be before doInit for tracing init. 197 runtimeInitTime = nanotime() 198 if runtimeInitTime == 0 { 199 throw("nanotime returning zero") 200 } 201 202 if debug.inittrace != 0 { 203 inittrace.id = getg().goid 204 inittrace.active = true 205 } 206 207 doInit(runtime_inittasks) // Must be before defer. 208 209 // Defer unlock so that runtime.Goexit during init does the unlock too. 210 needUnlock := true 211 defer func() { 212 if needUnlock { 213 unlockOSThread() 214 } 215 }() 216 217 gcenable() 218 defaultGOMAXPROCSUpdateEnable() // don't STW before runtime initialized. 219 220 mainInitDoneChan = make(chan bool) 221 if iscgo { 222 if _cgo_pthread_key_created == nil { 223 throw("_cgo_pthread_key_created missing") 224 } 225 226 if GOOS != "windows" { 227 if _cgo_thread_start == nil { 228 throw("_cgo_thread_start missing") 229 } 230 if _cgo_setenv == nil { 231 throw("_cgo_setenv missing") 232 } 233 if _cgo_unsetenv == nil { 234 throw("_cgo_unsetenv missing") 235 } 236 } 237 if _cgo_notify_runtime_init_done == nil { 238 throw("_cgo_notify_runtime_init_done missing") 239 } 240 241 // Set the x_crosscall2_ptr C function pointer variable point to crosscall2. 242 if set_crosscall2 == nil { 243 throw("set_crosscall2 missing") 244 } 245 set_crosscall2() 246 247 // Start the template thread in case we enter Go from 248 // a C-created thread and need to create a new thread. 249 startTemplateThread() 250 cgocall(_cgo_notify_runtime_init_done, nil) 251 } 252 253 // Run the initializing tasks. Depending on build mode this 254 // list can arrive a few different ways, but it will always 255 // contain the init tasks computed by the linker for all the 256 // packages in the program (excluding those added at runtime 257 // by package plugin). Run through the modules in dependency 258 // order (the order they are initialized by the dynamic 259 // loader, i.e. they are added to the moduledata linked list). 260 last := lastmoduledatap // grab before loop starts. Any added modules after this point will do their own doInit calls. 261 for m := &firstmoduledata; true; m = m.next { 262 doInit(m.inittasks) 263 if m == last { 264 break 265 } 266 } 267 268 // Disable init tracing after main init done to avoid overhead 269 // of collecting statistics in malloc and newproc 270 inittrace.active = false 271 272 mainInitDone.Store(true) 273 close(mainInitDoneChan) 274 275 needUnlock = false 276 unlockOSThread() 277 278 if isarchive || islibrary { 279 // A program compiled with -buildmode=c-archive or c-shared 280 // has a main, but it is not executed. 281 if GOARCH == "wasm" { 282 // On Wasm, pause makes it return to the host. 283 // Unlike cgo callbacks where Ms are created on demand, 284 // on Wasm we have only one M. So we keep this M (and this 285 // G) for callbacks. 286 // Using the caller's SP unwinds this frame and backs to 287 // goexit. The -16 is: 8 for goexit's (fake) return PC, 288 // and pause's epilogue pops 8. 289 pause(sys.GetCallerSP() - 16) // should not return 290 panic("unreachable") 291 } 292 return 293 } 294 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime 295 fn() 296 297 // Check for C memory leaks if using ASAN and we've made cgo calls, 298 // or if we are running as a library in a C program. 299 // We always make one cgo call, above, to notify_runtime_init_done, 300 // so we ignore that one. 301 // No point in leak checking if no cgo calls, since leak checking 302 // just looks for objects allocated using malloc and friends. 303 // Just checking iscgo doesn't help because asan implies iscgo. 304 exitHooksRun := false 305 if asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 306 runExitHooks(0) // lsandoleakcheck may not return 307 exitHooksRun = true 308 lsandoleakcheck() 309 } 310 311 // Make racy client program work: if panicking on 312 // another goroutine at the same time as main returns, 313 // let the other goroutine finish printing the panic trace. 314 // Once it does, it will exit. See issues 3934 and 20018. 315 if runningPanicDefers.Load() != 0 { 316 // Running deferred functions should not take long. 317 for c := 0; c < 1000; c++ { 318 if runningPanicDefers.Load() == 0 { 319 break 320 } 321 Gosched() 322 } 323 } 324 if panicking.Load() != 0 { 325 gopark(nil, nil, waitReasonPanicWait, traceBlockForever, 1) 326 } 327 if !exitHooksRun { 328 runExitHooks(0) 329 } 330 if raceenabled { 331 racefini() // does not return 332 } 333 334 exit(0) 335 for { 336 var x *int32 337 *x = 0 338 } 339 } 340 341 // os_beforeExit is called from os.Exit(0). 342 // 343 //go:linkname os_beforeExit os.runtime_beforeExit 344 func os_beforeExit(exitCode int) { 345 runExitHooks(exitCode) 346 if exitCode == 0 && raceenabled { 347 racefini() 348 } 349 350 // See comment in main, above. 351 if exitCode == 0 && asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 352 lsandoleakcheck() 353 } 354 } 355 356 func init() { 357 exithook.Gosched = Gosched 358 exithook.Goid = func() uint64 { return getg().goid } 359 exithook.Throw = throw 360 } 361 362 func runExitHooks(code int) { 363 exithook.Run(code) 364 } 365 366 // start forcegc helper goroutine 367 func init() { 368 go forcegchelper() 369 } 370 371 func forcegchelper() { 372 forcegc.g = getg() 373 lockInit(&forcegc.lock, lockRankForcegc) 374 for { 375 lock(&forcegc.lock) 376 if forcegc.idle.Load() { 377 throw("forcegc: phase error") 378 } 379 forcegc.idle.Store(true) 380 goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceBlockSystemGoroutine, 1) 381 // this goroutine is explicitly resumed by sysmon 382 if debug.gctrace > 0 { 383 println("GC forced") 384 } 385 // Time-triggered, fully concurrent. 386 gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) 387 } 388 } 389 390 // Gosched yields the processor, allowing other goroutines to run. It does not 391 // suspend the current goroutine, so execution resumes automatically. 392 // 393 //go:nosplit 394 func Gosched() { 395 checkTimeouts() 396 mcall(gosched_m) 397 } 398 399 // goschedguarded yields the processor like gosched, but also checks 400 // for forbidden states and opts out of the yield in those cases. 401 // 402 //go:nosplit 403 func goschedguarded() { 404 mcall(goschedguarded_m) 405 } 406 407 // goschedIfBusy yields the processor like gosched, but only does so if 408 // there are no idle Ps or if we're on the only P and there's nothing in 409 // the run queue. In both cases, there is freely available idle time. 410 // 411 //go:nosplit 412 func goschedIfBusy() { 413 gp := getg() 414 // Call gosched if gp.preempt is set; we may be in a tight loop that 415 // doesn't otherwise yield. 416 if !gp.preempt && sched.npidle.Load() > 0 { 417 return 418 } 419 mcall(gosched_m) 420 } 421 422 // Puts the current goroutine into a waiting state and calls unlockf on the 423 // system stack. 424 // 425 // If unlockf returns false, the goroutine is resumed. 426 // 427 // unlockf must not access this G's stack, as it may be moved between 428 // the call to gopark and the call to unlockf. 429 // 430 // Note that because unlockf is called after putting the G into a waiting 431 // state, the G may have already been readied by the time unlockf is called 432 // unless there is external synchronization preventing the G from being 433 // readied. If unlockf returns false, it must guarantee that the G cannot be 434 // externally readied. 435 // 436 // Reason explains why the goroutine has been parked. It is displayed in stack 437 // traces and heap dumps. Reasons should be unique and descriptive. Do not 438 // re-use reasons, add new ones. 439 // 440 // gopark should be an internal detail, 441 // but widely used packages access it using linkname. 442 // Notable members of the hall of shame include: 443 // - gvisor.dev/gvisor 444 // - github.com/sagernet/gvisor 445 // 446 // Do not remove or change the type signature. 447 // See go.dev/issue/67401. 448 // 449 //go:linkname gopark 450 func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceReason traceBlockReason, traceskip int) { 451 if reason != waitReasonSleep { 452 checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy 453 } 454 mp := acquirem() 455 gp := mp.curg 456 status := readgstatus(gp) 457 if status != _Grunning && status != _Gscanrunning { 458 throw("gopark: bad g status") 459 } 460 mp.waitlock = lock 461 mp.waitunlockf = unlockf 462 gp.waitreason = reason 463 mp.waitTraceBlockReason = traceReason 464 mp.waitTraceSkip = traceskip 465 releasem(mp) 466 // can't do anything that might move the G between Ms here. 467 mcall(park_m) 468 } 469 470 // Puts the current goroutine into a waiting state and unlocks the lock. 471 // The goroutine can be made runnable again by calling goready(gp). 472 func goparkunlock(lock *mutex, reason waitReason, traceReason traceBlockReason, traceskip int) { 473 gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceReason, traceskip) 474 } 475 476 // goready should be an internal detail, 477 // but widely used packages access it using linkname. 478 // Notable members of the hall of shame include: 479 // - gvisor.dev/gvisor 480 // - github.com/sagernet/gvisor 481 // 482 // Do not remove or change the type signature. 483 // See go.dev/issue/67401. 484 // 485 //go:linkname goready 486 func goready(gp *g, traceskip int) { 487 systemstack(func() { 488 ready(gp, traceskip, true) 489 }) 490 } 491 492 //go:nosplit 493 func acquireSudog() *sudog { 494 // Delicate dance: the semaphore implementation calls 495 // acquireSudog, acquireSudog calls new(sudog), 496 // new calls malloc, malloc can call the garbage collector, 497 // and the garbage collector calls the semaphore implementation 498 // in stopTheWorld. 499 // Break the cycle by doing acquirem/releasem around new(sudog). 500 // The acquirem/releasem increments m.locks during new(sudog), 501 // which keeps the garbage collector from being invoked. 502 mp := acquirem() 503 pp := mp.p.ptr() 504 if len(pp.sudogcache) == 0 { 505 lock(&sched.sudoglock) 506 // First, try to grab a batch from central cache. 507 for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil { 508 s := sched.sudogcache 509 sched.sudogcache = s.next 510 s.next = nil 511 pp.sudogcache = append(pp.sudogcache, s) 512 } 513 unlock(&sched.sudoglock) 514 // If the central cache is empty, allocate a new one. 515 if len(pp.sudogcache) == 0 { 516 pp.sudogcache = append(pp.sudogcache, new(sudog)) 517 } 518 } 519 n := len(pp.sudogcache) 520 s := pp.sudogcache[n-1] 521 pp.sudogcache[n-1] = nil 522 pp.sudogcache = pp.sudogcache[:n-1] 523 if s.elem.get() != nil { 524 throw("acquireSudog: found s.elem != nil in cache") 525 } 526 releasem(mp) 527 return s 528 } 529 530 //go:nosplit 531 func releaseSudog(s *sudog) { 532 if s.elem.get() != nil { 533 throw("runtime: sudog with non-nil elem") 534 } 535 if s.isSelect { 536 throw("runtime: sudog with non-false isSelect") 537 } 538 if s.next != nil { 539 throw("runtime: sudog with non-nil next") 540 } 541 if s.prev != nil { 542 throw("runtime: sudog with non-nil prev") 543 } 544 if s.waitlink != nil { 545 throw("runtime: sudog with non-nil waitlink") 546 } 547 if s.c.get() != nil { 548 throw("runtime: sudog with non-nil c") 549 } 550 gp := getg() 551 if gp.param != nil { 552 throw("runtime: releaseSudog with non-nil gp.param") 553 } 554 mp := acquirem() // avoid rescheduling to another P 555 pp := mp.p.ptr() 556 if len(pp.sudogcache) == cap(pp.sudogcache) { 557 // Transfer half of local cache to the central cache. 558 var first, last *sudog 559 for len(pp.sudogcache) > cap(pp.sudogcache)/2 { 560 n := len(pp.sudogcache) 561 p := pp.sudogcache[n-1] 562 pp.sudogcache[n-1] = nil 563 pp.sudogcache = pp.sudogcache[:n-1] 564 if first == nil { 565 first = p 566 } else { 567 last.next = p 568 } 569 last = p 570 } 571 lock(&sched.sudoglock) 572 last.next = sched.sudogcache 573 sched.sudogcache = first 574 unlock(&sched.sudoglock) 575 } 576 pp.sudogcache = append(pp.sudogcache, s) 577 releasem(mp) 578 } 579 580 // called from assembly. 581 func badmcall(fn func(*g)) { 582 throw("runtime: mcall called on m->g0 stack") 583 } 584 585 func badmcall2(fn func(*g)) { 586 throw("runtime: mcall function returned") 587 } 588 589 func badreflectcall() { 590 panic(plainError("arg size to reflect.call more than 1GB")) 591 } 592 593 //go:nosplit 594 //go:nowritebarrierrec 595 func badmorestackg0() { 596 if !crashStackImplemented { 597 writeErrStr("fatal: morestack on g0\n") 598 return 599 } 600 601 g := getg() 602 switchToCrashStack(func() { 603 print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n") 604 g.m.traceback = 2 // include pc and sp in stack trace 605 traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0) 606 print("\n") 607 608 throw("morestack on g0") 609 }) 610 } 611 612 //go:nosplit 613 //go:nowritebarrierrec 614 func badmorestackgsignal() { 615 writeErrStr("fatal: morestack on gsignal\n") 616 } 617 618 //go:nosplit 619 func badctxt() { 620 throw("ctxt != 0") 621 } 622 623 // gcrash is a fake g that can be used when crashing due to bad 624 // stack conditions. 625 var gcrash g 626 627 var crashingG atomic.Pointer[g] 628 629 // Switch to crashstack and call fn, with special handling of 630 // concurrent and recursive cases. 631 // 632 // Nosplit as it is called in a bad stack condition (we know 633 // morestack would fail). 634 // 635 //go:nosplit 636 //go:nowritebarrierrec 637 func switchToCrashStack(fn func()) { 638 me := getg() 639 if crashingG.CompareAndSwapNoWB(nil, me) { 640 switchToCrashStack0(fn) // should never return 641 abort() 642 } 643 if crashingG.Load() == me { 644 // recursive crashing. too bad. 645 writeErrStr("fatal: recursive switchToCrashStack\n") 646 abort() 647 } 648 // Another g is crashing. Give it some time, hopefully it will finish traceback. 649 usleep_no_g(100) 650 writeErrStr("fatal: concurrent switchToCrashStack\n") 651 abort() 652 } 653 654 // Disable crash stack on Windows for now. Apparently, throwing an exception 655 // on a non-system-allocated crash stack causes EXCEPTION_STACK_OVERFLOW and 656 // hangs the process (see issue 63938). 657 const crashStackImplemented = GOOS != "windows" 658 659 //go:noescape 660 func switchToCrashStack0(fn func()) // in assembly 661 662 func lockedOSThread() bool { 663 gp := getg() 664 return gp.lockedm != 0 && gp.m.lockedg != 0 665 } 666 667 var ( 668 // allgs contains all Gs ever created (including dead Gs), and thus 669 // never shrinks. 670 // 671 // Access via the slice is protected by allglock or stop-the-world. 672 // Readers that cannot take the lock may (carefully!) use the atomic 673 // variables below. 674 allglock mutex 675 allgs []*g 676 677 // allglen and allgptr are atomic variables that contain len(allgs) and 678 // &allgs[0] respectively. Proper ordering depends on totally-ordered 679 // loads and stores. Writes are protected by allglock. 680 // 681 // allgptr is updated before allglen. Readers should read allglen 682 // before allgptr to ensure that allglen is always <= len(allgptr). New 683 // Gs appended during the race can be missed. For a consistent view of 684 // all Gs, allglock must be held. 685 // 686 // allgptr copies should always be stored as a concrete type or 687 // unsafe.Pointer, not uintptr, to ensure that GC can still reach it 688 // even if it points to a stale array. 689 allglen uintptr 690 allgptr **g 691 ) 692 693 func allgadd(gp *g) { 694 if readgstatus(gp) == _Gidle { 695 throw("allgadd: bad status Gidle") 696 } 697 698 lock(&allglock) 699 allgs = append(allgs, gp) 700 if &allgs[0] != allgptr { 701 atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) 702 } 703 atomic.Storeuintptr(&allglen, uintptr(len(allgs))) 704 unlock(&allglock) 705 } 706 707 // allGsSnapshot returns a snapshot of the slice of all Gs. 708 // 709 // The world must be stopped or allglock must be held. 710 func allGsSnapshot() []*g { 711 assertWorldStoppedOrLockHeld(&allglock) 712 713 // Because the world is stopped or allglock is held, allgadd 714 // cannot happen concurrently with this. allgs grows 715 // monotonically and existing entries never change, so we can 716 // simply return a copy of the slice header. For added safety, 717 // we trim everything past len because that can still change. 718 return allgs[:len(allgs):len(allgs)] 719 } 720 721 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. 722 func atomicAllG() (**g, uintptr) { 723 length := atomic.Loaduintptr(&allglen) 724 ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) 725 return ptr, length 726 } 727 728 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. 729 func atomicAllGIndex(ptr **g, i uintptr) *g { 730 return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize)) 731 } 732 733 // forEachG calls fn on every G from allgs. 734 // 735 // forEachG takes a lock to exclude concurrent addition of new Gs. 736 func forEachG(fn func(gp *g)) { 737 lock(&allglock) 738 for _, gp := range allgs { 739 fn(gp) 740 } 741 unlock(&allglock) 742 } 743 744 // forEachGRace calls fn on every G from allgs. 745 // 746 // forEachGRace avoids locking, but does not exclude addition of new Gs during 747 // execution, which may be missed. 748 func forEachGRace(fn func(gp *g)) { 749 ptr, length := atomicAllG() 750 for i := uintptr(0); i < length; i++ { 751 gp := atomicAllGIndex(ptr, i) 752 fn(gp) 753 } 754 return 755 } 756 757 const ( 758 // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. 759 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 760 _GoidCacheBatch = 16 761 ) 762 763 // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete 764 // value of the GODEBUG environment variable. 765 func cpuinit(env string) { 766 cpu.Initialize(env) 767 768 // Support cpu feature variables are used in code generated by the compiler 769 // to guard execution of instructions that can not be assumed to be always supported. 770 switch GOARCH { 771 case "386", "amd64": 772 x86HasAVX = cpu.X86.HasAVX 773 x86HasFMA = cpu.X86.HasFMA 774 x86HasPOPCNT = cpu.X86.HasPOPCNT 775 x86HasSSE41 = cpu.X86.HasSSE41 776 777 case "arm": 778 armHasVFPv4 = cpu.ARM.HasVFPv4 779 780 case "arm64": 781 arm64HasATOMICS = cpu.ARM64.HasATOMICS 782 783 case "loong64": 784 loong64HasLAMCAS = cpu.Loong64.HasLAMCAS 785 loong64HasLAM_BH = cpu.Loong64.HasLAM_BH 786 loong64HasDBAR_HINTS = cpu.Loong64.HasDBAR_HINTS 787 loong64HasLSX = cpu.Loong64.HasLSX 788 789 case "riscv64": 790 riscv64HasZbb = cpu.RISCV64.HasZbb 791 } 792 } 793 794 // getGodebugEarly extracts the environment variable GODEBUG from the environment on 795 // Unix-like operating systems and returns it. This function exists to extract GODEBUG 796 // early before much of the runtime is initialized. 797 // 798 // Returns nil, false if OS doesn't provide env vars early in the init sequence. 799 func getGodebugEarly() (string, bool) { 800 const prefix = "GODEBUG=" 801 var env string 802 switch GOOS { 803 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 804 // Similar to goenv_unix but extracts the environment value for 805 // GODEBUG directly. 806 // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() 807 n := int32(0) 808 for argv_index(argv, argc+1+n) != nil { 809 n++ 810 } 811 812 for i := int32(0); i < n; i++ { 813 p := argv_index(argv, argc+1+i) 814 s := unsafe.String(p, findnull(p)) 815 816 if stringslite.HasPrefix(s, prefix) { 817 env = gostringnocopy(p)[len(prefix):] 818 break 819 } 820 } 821 break 822 823 default: 824 return "", false 825 } 826 return env, true 827 } 828 829 // The bootstrap sequence is: 830 // 831 // call osinit 832 // call schedinit 833 // make & queue new G 834 // call runtime·mstart 835 // 836 // The new G calls runtime·main. 837 func schedinit() { 838 lockInit(&sched.lock, lockRankSched) 839 lockInit(&sched.sysmonlock, lockRankSysmon) 840 lockInit(&sched.deferlock, lockRankDefer) 841 lockInit(&sched.sudoglock, lockRankSudog) 842 lockInit(&deadlock, lockRankDeadlock) 843 lockInit(&paniclk, lockRankPanic) 844 lockInit(&allglock, lockRankAllg) 845 lockInit(&allpLock, lockRankAllp) 846 lockInit(&reflectOffs.lock, lockRankReflectOffs) 847 lockInit(&finlock, lockRankFin) 848 lockInit(&cpuprof.lock, lockRankCpuprof) 849 lockInit(&computeMaxProcsLock, lockRankComputeMaxProcs) 850 allocmLock.init(lockRankAllocmR, lockRankAllocmRInternal, lockRankAllocmW) 851 execLock.init(lockRankExecR, lockRankExecRInternal, lockRankExecW) 852 traceLockInit() 853 // Enforce that this lock is always a leaf lock. 854 // All of this lock's critical sections should be 855 // extremely short. 856 lockInit(&memstats.heapStats.noPLock, lockRankLeafRank) 857 858 lockVerifyMSize() 859 860 sched.midle.init(unsafe.Offsetof(m{}.idleNode)) 861 862 // raceinit must be the first call to race detector. 863 // In particular, it must be done before mallocinit below calls racemapshadow. 864 gp := getg() 865 if raceenabled { 866 gp.racectx, raceprocctx0 = raceinit() 867 } 868 869 sched.maxmcount = 10000 870 crashFD.Store(^uintptr(0)) 871 872 // The world starts stopped. 873 worldStopped() 874 875 godebug, parsedGodebug := getGodebugEarly() 876 if parsedGodebug { 877 parseRuntimeDebugVars(godebug) 878 } 879 ticks.init() // run as early as possible 880 moduledataverify() 881 stackinit() 882 randinit() // must run before mallocinit, AlgInit, mcommoninit 883 mallocinit() 884 cpuinit(godebug) // must run before AlgInit 885 maps.AlgInit() // maps, hash, rand must not be used before this call 886 mcommoninit(gp.m, -1) 887 modulesinit() // provides activeModules 888 typelinksinit() // uses maps, activeModules 889 itabsinit() // uses activeModules 890 stkobjinit() // must run before GC starts 891 892 sigsave(&gp.m.sigmask) 893 initSigmask = gp.m.sigmask 894 895 goargs() 896 goenvs() 897 secure() 898 checkfds() 899 if !parsedGodebug { 900 // Some platforms, e.g., Windows, didn't make env vars available "early", 901 // so try again now. 902 parseRuntimeDebugVars(gogetenv("GODEBUG")) 903 } 904 finishDebugVarsSetup() 905 gcinit() 906 907 // Allocate stack space that can be used when crashing due to bad stack 908 // conditions, e.g. morestack on g0. 909 gcrash.stack = stackalloc(16384) 910 gcrash.stackguard0 = gcrash.stack.lo + 1000 911 gcrash.stackguard1 = gcrash.stack.lo + 1000 912 913 // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile. 914 // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is 915 // set to true by the linker, it means that nothing is consuming the profile, it is 916 // safe to set MemProfileRate to 0. 917 if disableMemoryProfiling { 918 MemProfileRate = 0 919 } 920 921 // mcommoninit runs before parsedebugvars, so init profstacks again. 922 mProfStackInit(gp.m) 923 defaultGOMAXPROCSInit() 924 925 lock(&sched.lock) 926 sched.lastpoll.Store(nanotime()) 927 var procs int32 928 if n, err := strconv.ParseInt(gogetenv("GOMAXPROCS"), 10, 32); err == nil && n > 0 { 929 procs = int32(n) 930 sched.customGOMAXPROCS = true 931 } else { 932 // Use numCPUStartup for initial GOMAXPROCS for two reasons: 933 // 934 // 1. We just computed it in osinit, recomputing is (minorly) wasteful. 935 // 936 // 2. More importantly, if debug.containermaxprocs == 0 && 937 // debug.updatemaxprocs == 0, we want to guarantee that 938 // runtime.GOMAXPROCS(0) always equals runtime.NumCPU (which is 939 // just numCPUStartup). 940 procs = defaultGOMAXPROCS(numCPUStartup) 941 } 942 if procresize(procs) != nil { 943 throw("unknown runnable goroutine during bootstrap") 944 } 945 unlock(&sched.lock) 946 947 // World is effectively started now, as P's can run. 948 worldStarted() 949 950 if buildVersion == "" { 951 // Condition should never trigger. This code just serves 952 // to ensure runtime·buildVersion is kept in the resulting binary. 953 buildVersion = "unknown" 954 } 955 if len(modinfo) == 1 { 956 // Condition should never trigger. This code just serves 957 // to ensure runtime·modinfo is kept in the resulting binary. 958 modinfo = "" 959 } 960 } 961 962 func dumpgstatus(gp *g) { 963 thisg := getg() 964 print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 965 print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n") 966 } 967 968 // sched.lock must be held. 969 func checkmcount() { 970 assertLockHeld(&sched.lock) 971 972 // Exclude extra M's, which are used for cgocallback from threads 973 // created in C. 974 // 975 // The purpose of the SetMaxThreads limit is to avoid accidental fork 976 // bomb from something like millions of goroutines blocking on system 977 // calls, causing the runtime to create millions of threads. By 978 // definition, this isn't a problem for threads created in C, so we 979 // exclude them from the limit. See https://go.dev/issue/60004. 980 count := mcount() - int32(extraMInUse.Load()) - int32(extraMLength.Load()) 981 if count > sched.maxmcount { 982 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") 983 throw("thread exhaustion") 984 } 985 } 986 987 // mReserveID returns the next ID to use for a new m. This new m is immediately 988 // considered 'running' by checkdead. 989 // 990 // sched.lock must be held. 991 func mReserveID() int64 { 992 assertLockHeld(&sched.lock) 993 994 if sched.mnext+1 < sched.mnext { 995 throw("runtime: thread ID overflow") 996 } 997 id := sched.mnext 998 sched.mnext++ 999 checkmcount() 1000 return id 1001 } 1002 1003 // Pre-allocated ID may be passed as 'id', or omitted by passing -1. 1004 func mcommoninit(mp *m, id int64) { 1005 gp := getg() 1006 1007 // g0 stack won't make sense for user (and is not necessary unwindable). 1008 if gp != gp.m.g0 { 1009 callers(1, mp.createstack[:]) 1010 } 1011 1012 lock(&sched.lock) 1013 1014 if id >= 0 { 1015 mp.id = id 1016 } else { 1017 mp.id = mReserveID() 1018 } 1019 1020 mp.self = newMWeakPointer(mp) 1021 1022 mrandinit(mp) 1023 1024 mpreinit(mp) 1025 if mp.gsignal != nil { 1026 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + stackGuard 1027 } 1028 1029 // Add to allm so garbage collector doesn't free g->m 1030 // when it is just in a register or thread-local storage. 1031 mp.alllink = allm 1032 1033 // NumCgoCall and others iterate over allm w/o schedlock, 1034 // so we need to publish it safely. 1035 atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) 1036 unlock(&sched.lock) 1037 1038 // Allocate memory to hold a cgo traceback if the cgo call crashes. 1039 if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { 1040 mp.cgoCallers = new(cgoCallers) 1041 } 1042 mProfStackInit(mp) 1043 } 1044 1045 // mProfStackInit is used to eagerly initialize stack trace buffers for 1046 // profiling. Lazy allocation would have to deal with reentrancy issues in 1047 // malloc and runtime locks for mLockProfile. 1048 // TODO(mknyszek): Implement lazy allocation if this becomes a problem. 1049 func mProfStackInit(mp *m) { 1050 if debug.profstackdepth == 0 { 1051 // debug.profstack is set to 0 by the user, or we're being called from 1052 // schedinit before parsedebugvars. 1053 return 1054 } 1055 mp.profStack = makeProfStackFP() 1056 mp.mLockProfile.stack = makeProfStackFP() 1057 } 1058 1059 // makeProfStackFP creates a buffer large enough to hold a maximum-sized stack 1060 // trace as well as any additional frames needed for frame pointer unwinding 1061 // with delayed inline expansion. 1062 func makeProfStackFP() []uintptr { 1063 // The "1" term is to account for the first stack entry being 1064 // taken up by a "skip" sentinel value for profilers which 1065 // defer inline frame expansion until the profile is reported. 1066 // The "maxSkip" term is for frame pointer unwinding, where we 1067 // want to end up with debug.profstackdebth frames but will discard 1068 // some "physical" frames to account for skipping. 1069 return make([]uintptr, 1+maxSkip+debug.profstackdepth) 1070 } 1071 1072 // makeProfStack returns a buffer large enough to hold a maximum-sized stack 1073 // trace. 1074 func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) } 1075 1076 //go:linkname pprof_makeProfStack 1077 func pprof_makeProfStack() []uintptr { return makeProfStack() } 1078 1079 func (mp *m) becomeSpinning() { 1080 mp.spinning = true 1081 sched.nmspinning.Add(1) 1082 sched.needspinning.Store(0) 1083 } 1084 1085 // Take a snapshot of allp, for use after dropping the P. 1086 // 1087 // Must be called with a P, but the returned slice may be used after dropping 1088 // the P. The M holds a reference on the snapshot to keep the backing array 1089 // alive. 1090 // 1091 //go:yeswritebarrierrec 1092 func (mp *m) snapshotAllp() []*p { 1093 mp.allpSnapshot = allp 1094 return mp.allpSnapshot 1095 } 1096 1097 // Clear the saved allp snapshot. Should be called as soon as the snapshot is 1098 // no longer required. 1099 // 1100 // Must be called after reacquiring a P, as it requires a write barrier. 1101 // 1102 //go:yeswritebarrierrec 1103 func (mp *m) clearAllpSnapshot() { 1104 mp.allpSnapshot = nil 1105 } 1106 1107 func (mp *m) hasCgoOnStack() bool { 1108 return mp.ncgo > 0 || mp.isextra 1109 } 1110 1111 const ( 1112 // osHasLowResTimer indicates that the platform's internal timer system has a low resolution, 1113 // typically on the order of 1 ms or more. 1114 osHasLowResTimer = GOOS == "windows" || GOOS == "openbsd" || GOOS == "netbsd" 1115 1116 // osHasLowResClockInt is osHasLowResClock but in integer form, so it can be used to create 1117 // constants conditionally. 1118 osHasLowResClockInt = goos.IsWindows 1119 1120 // osHasLowResClock indicates that timestamps produced by nanotime on the platform have a 1121 // low resolution, typically on the order of 1 ms or more. 1122 osHasLowResClock = osHasLowResClockInt > 0 1123 ) 1124 1125 // Mark gp ready to run. 1126 func ready(gp *g, traceskip int, next bool) { 1127 status := readgstatus(gp) 1128 1129 // Mark runnable. 1130 mp := acquirem() // disable preemption because it can be holding p in a local var 1131 if status&^_Gscan != _Gwaiting { 1132 dumpgstatus(gp) 1133 throw("bad g->status in ready") 1134 } 1135 1136 // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 1137 trace := traceAcquire() 1138 casgstatus(gp, _Gwaiting, _Grunnable) 1139 if trace.ok() { 1140 trace.GoUnpark(gp, traceskip) 1141 traceRelease(trace) 1142 } 1143 runqput(mp.p.ptr(), gp, next) 1144 wakep() 1145 releasem(mp) 1146 } 1147 1148 // freezeStopWait is a large value that freezetheworld sets 1149 // sched.stopwait to in order to request that all Gs permanently stop. 1150 const freezeStopWait = 0x7fffffff 1151 1152 // freezing is set to non-zero if the runtime is trying to freeze the 1153 // world. 1154 var freezing atomic.Bool 1155 1156 // Similar to stopTheWorld but best-effort and can be called several times. 1157 // There is no reverse operation, used during crashing. 1158 // This function must not lock any mutexes. 1159 func freezetheworld() { 1160 freezing.Store(true) 1161 if debug.dontfreezetheworld > 0 { 1162 // Don't prempt Ps to stop goroutines. That will perturb 1163 // scheduler state, making debugging more difficult. Instead, 1164 // allow goroutines to continue execution. 1165 // 1166 // fatalpanic will tracebackothers to trace all goroutines. It 1167 // is unsafe to trace a running goroutine, so tracebackothers 1168 // will skip running goroutines. That is OK and expected, we 1169 // expect users of dontfreezetheworld to use core files anyway. 1170 // 1171 // However, allowing the scheduler to continue running free 1172 // introduces a race: a goroutine may be stopped when 1173 // tracebackothers checks its status, and then start running 1174 // later when we are in the middle of traceback, potentially 1175 // causing a crash. 1176 // 1177 // To mitigate this, when an M naturally enters the scheduler, 1178 // schedule checks if freezing is set and if so stops 1179 // execution. This guarantees that while Gs can transition from 1180 // running to stopped, they can never transition from stopped 1181 // to running. 1182 // 1183 // The sleep here allows racing Ms that missed freezing and are 1184 // about to run a G to complete the transition to running 1185 // before we start traceback. 1186 usleep(1000) 1187 return 1188 } 1189 1190 // stopwait and preemption requests can be lost 1191 // due to races with concurrently executing threads, 1192 // so try several times 1193 for i := 0; i < 5; i++ { 1194 // this should tell the scheduler to not start any new goroutines 1195 sched.stopwait = freezeStopWait 1196 sched.gcwaiting.Store(true) 1197 // this should stop running goroutines 1198 if !preemptall() { 1199 break // no running goroutines 1200 } 1201 usleep(1000) 1202 } 1203 // to be sure 1204 usleep(1000) 1205 preemptall() 1206 usleep(1000) 1207 } 1208 1209 // All reads and writes of g's status go through readgstatus, casgstatus 1210 // castogscanstatus, casfrom_Gscanstatus. 1211 // 1212 //go:nosplit 1213 func readgstatus(gp *g) uint32 { 1214 return gp.atomicstatus.Load() 1215 } 1216 1217 // The Gscanstatuses are acting like locks and this releases them. 1218 // If it proves to be a performance hit we should be able to make these 1219 // simple atomic stores but for now we are going to throw if 1220 // we see an inconsistent state. 1221 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { 1222 success := false 1223 1224 // Check that transition is valid. 1225 switch oldval { 1226 default: 1227 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1228 dumpgstatus(gp) 1229 throw("casfrom_Gscanstatus:top gp->status is not in scan state") 1230 case _Gscanrunnable, 1231 _Gscanwaiting, 1232 _Gscanrunning, 1233 _Gscansyscall, 1234 _Gscanleaked, 1235 _Gscanpreempted, 1236 _Gscandeadextra: 1237 if newval == oldval&^_Gscan { 1238 success = gp.atomicstatus.CompareAndSwap(oldval, newval) 1239 } 1240 } 1241 if !success { 1242 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1243 dumpgstatus(gp) 1244 throw("casfrom_Gscanstatus: gp->status is not in scan state") 1245 } 1246 releaseLockRankAndM(lockRankGscan) 1247 } 1248 1249 // This will return false if the gp is not in the expected status and the cas fails. 1250 // This acts like a lock acquire while the casfromgstatus acts like a lock release. 1251 func castogscanstatus(gp *g, oldval, newval uint32) bool { 1252 switch oldval { 1253 case _Grunnable, 1254 _Grunning, 1255 _Gwaiting, 1256 _Gleaked, 1257 _Gsyscall, 1258 _Gdeadextra: 1259 if newval == oldval|_Gscan { 1260 r := gp.atomicstatus.CompareAndSwap(oldval, newval) 1261 if r { 1262 acquireLockRankAndM(lockRankGscan) 1263 } 1264 return r 1265 1266 } 1267 } 1268 print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") 1269 throw("bad oldval passed to castogscanstatus") 1270 return false 1271 } 1272 1273 // casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track 1274 // various latencies on every transition instead of sampling them. 1275 var casgstatusAlwaysTrack = false 1276 1277 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus 1278 // and casfrom_Gscanstatus instead. 1279 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 1280 // put it in the Gscan state is finished. 1281 // 1282 //go:nosplit 1283 func casgstatus(gp *g, oldval, newval uint32) { 1284 if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 1285 systemstack(func() { 1286 // Call on the systemstack to prevent print and throw from counting 1287 // against the nosplit stack reservation. 1288 print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 1289 throw("casgstatus: bad incoming values") 1290 }) 1291 } 1292 1293 lockWithRankMayAcquire(nil, lockRankGscan) 1294 1295 // See https://golang.org/cl/21503 for justification of the yield delay. 1296 const yieldDelay = 5 * 1000 1297 var nextYield int64 1298 1299 // loop if gp->atomicstatus is in a scan state giving 1300 // GC time to finish and change the state to oldval. 1301 for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { 1302 if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1303 systemstack(func() { 1304 // Call on the systemstack to prevent throw from counting 1305 // against the nosplit stack reservation. 1306 throw("casgstatus: waiting for Gwaiting but is Grunnable") 1307 }) 1308 } 1309 if i == 0 { 1310 nextYield = nanotime() + yieldDelay 1311 } 1312 if nanotime() < nextYield { 1313 for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1314 procyield(1) 1315 } 1316 } else { 1317 osyield() 1318 nextYield = nanotime() + yieldDelay/2 1319 } 1320 } 1321 1322 if gp.bubble != nil { 1323 systemstack(func() { 1324 gp.bubble.changegstatus(gp, oldval, newval) 1325 }) 1326 } 1327 1328 if (oldval == _Grunning || oldval == _Gsyscall) && (newval != _Grunning && newval != _Gsyscall) { 1329 // Track every gTrackingPeriod time a goroutine transitions out of _Grunning or _Gsyscall. 1330 // Do not track _Grunning <-> _Gsyscall transitions, since they're two very similar states. 1331 if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1332 gp.tracking = true 1333 } 1334 gp.trackingSeq++ 1335 } 1336 if !gp.tracking { 1337 return 1338 } 1339 1340 // Handle various kinds of tracking. 1341 // 1342 // Currently: 1343 // - Time spent in runnable. 1344 // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1345 switch oldval { 1346 case _Grunnable: 1347 // We transitioned out of runnable, so measure how much 1348 // time we spent in this state and add it to 1349 // runnableTime. 1350 now := nanotime() 1351 gp.runnableTime += now - gp.trackingStamp 1352 gp.trackingStamp = 0 1353 case _Gwaiting: 1354 if !gp.waitreason.isMutexWait() { 1355 // Not blocking on a lock. 1356 break 1357 } 1358 // Blocking on a lock, measure it. Note that because we're 1359 // sampling, we have to multiply by our sampling period to get 1360 // a more representative estimate of the absolute value. 1361 // gTrackingPeriod also represents an accurate sampling period 1362 // because we can only enter this state from _Grunning. 1363 now := nanotime() 1364 sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) 1365 gp.trackingStamp = 0 1366 } 1367 switch newval { 1368 case _Gwaiting: 1369 if !gp.waitreason.isMutexWait() { 1370 // Not blocking on a lock. 1371 break 1372 } 1373 // Blocking on a lock. Write down the timestamp. 1374 now := nanotime() 1375 gp.trackingStamp = now 1376 case _Grunnable: 1377 // We just transitioned into runnable, so record what 1378 // time that happened. 1379 now := nanotime() 1380 gp.trackingStamp = now 1381 case _Grunning: 1382 // We're transitioning into running, so turn off 1383 // tracking and record how much time we spent in 1384 // runnable. 1385 gp.tracking = false 1386 sched.timeToRun.record(gp.runnableTime) 1387 gp.runnableTime = 0 1388 } 1389 } 1390 1391 // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason. 1392 // 1393 // Use this over casgstatus when possible to ensure that a waitreason is set. 1394 func casGToWaiting(gp *g, old uint32, reason waitReason) { 1395 // Set the wait reason before calling casgstatus, because casgstatus will use it. 1396 gp.waitreason = reason 1397 casgstatus(gp, old, _Gwaiting) 1398 } 1399 1400 // casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. 1401 // The wait reason must be a valid isWaitingForSuspendG wait reason. 1402 // 1403 // While a goroutine is in this state, it's stack is effectively pinned. 1404 // The garbage collector must not shrink or otherwise mutate the goroutine's stack. 1405 // 1406 // Use this over casgstatus when possible to ensure that a waitreason is set. 1407 func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { 1408 if !reason.isWaitingForSuspendG() { 1409 throw("casGToWaitingForSuspendG with non-isWaitingForSuspendG wait reason") 1410 } 1411 casGToWaiting(gp, old, reason) 1412 } 1413 1414 // casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted. 1415 // 1416 // TODO(austin): This is the only status operation that both changes 1417 // the status and locks the _Gscan bit. Rethink this. 1418 func casGToPreemptScan(gp *g, old, new uint32) { 1419 if old != _Grunning || new != _Gscan|_Gpreempted { 1420 throw("bad g transition") 1421 } 1422 acquireLockRankAndM(lockRankGscan) 1423 for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) { 1424 } 1425 // We never notify gp.bubble that the goroutine state has moved 1426 // from _Grunning to _Gpreempted. We call bubble.changegstatus 1427 // after status changes happen, but doing so here would violate the 1428 // ordering between the gscan and synctest locks. The bubble doesn't 1429 // distinguish between _Grunning and _Gpreempted anyway, so not 1430 // notifying it is fine. 1431 } 1432 1433 // casGFromPreempted attempts to transition gp from _Gpreempted to 1434 // _Gwaiting. If successful, the caller is responsible for 1435 // re-scheduling gp. 1436 func casGFromPreempted(gp *g, old, new uint32) bool { 1437 if old != _Gpreempted || new != _Gwaiting { 1438 throw("bad g transition") 1439 } 1440 gp.waitreason = waitReasonPreempted 1441 if !gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting) { 1442 return false 1443 } 1444 if bubble := gp.bubble; bubble != nil { 1445 bubble.changegstatus(gp, _Gpreempted, _Gwaiting) 1446 } 1447 return true 1448 } 1449 1450 // stwReason is an enumeration of reasons the world is stopping. 1451 type stwReason uint8 1452 1453 // Reasons to stop-the-world. 1454 // 1455 // Avoid reusing reasons and add new ones instead. 1456 const ( 1457 stwUnknown stwReason = iota // "unknown" 1458 stwGCMarkTerm // "GC mark termination" 1459 stwGCSweepTerm // "GC sweep termination" 1460 stwWriteHeapDump // "write heap dump" 1461 stwGoroutineProfile // "goroutine profile" 1462 stwGoroutineProfileCleanup // "goroutine profile cleanup" 1463 stwAllGoroutinesStack // "all goroutines stack trace" 1464 stwReadMemStats // "read mem stats" 1465 stwAllThreadsSyscall // "AllThreadsSyscall" 1466 stwGOMAXPROCS // "GOMAXPROCS" 1467 stwStartTrace // "start trace" 1468 stwStopTrace // "stop trace" 1469 stwForTestCountPagesInUse // "CountPagesInUse (test)" 1470 stwForTestReadMetricsSlow // "ReadMetricsSlow (test)" 1471 stwForTestReadMemStatsSlow // "ReadMemStatsSlow (test)" 1472 stwForTestPageCachePagesLeaked // "PageCachePagesLeaked (test)" 1473 stwForTestResetDebugLog // "ResetDebugLog (test)" 1474 ) 1475 1476 func (r stwReason) String() string { 1477 return stwReasonStrings[r] 1478 } 1479 1480 func (r stwReason) isGC() bool { 1481 return r == stwGCMarkTerm || r == stwGCSweepTerm 1482 } 1483 1484 // If you add to this list, also add it to src/internal/trace/parser.go. 1485 // If you change the values of any of the stw* constants, bump the trace 1486 // version number and make a copy of this. 1487 var stwReasonStrings = [...]string{ 1488 stwUnknown: "unknown", 1489 stwGCMarkTerm: "GC mark termination", 1490 stwGCSweepTerm: "GC sweep termination", 1491 stwWriteHeapDump: "write heap dump", 1492 stwGoroutineProfile: "goroutine profile", 1493 stwGoroutineProfileCleanup: "goroutine profile cleanup", 1494 stwAllGoroutinesStack: "all goroutines stack trace", 1495 stwReadMemStats: "read mem stats", 1496 stwAllThreadsSyscall: "AllThreadsSyscall", 1497 stwGOMAXPROCS: "GOMAXPROCS", 1498 stwStartTrace: "start trace", 1499 stwStopTrace: "stop trace", 1500 stwForTestCountPagesInUse: "CountPagesInUse (test)", 1501 stwForTestReadMetricsSlow: "ReadMetricsSlow (test)", 1502 stwForTestReadMemStatsSlow: "ReadMemStatsSlow (test)", 1503 stwForTestPageCachePagesLeaked: "PageCachePagesLeaked (test)", 1504 stwForTestResetDebugLog: "ResetDebugLog (test)", 1505 } 1506 1507 // worldStop provides context from the stop-the-world required by the 1508 // start-the-world. 1509 type worldStop struct { 1510 reason stwReason 1511 startedStopping int64 1512 finishedStopping int64 1513 stoppingCPUTime int64 1514 } 1515 1516 // Temporary variable for stopTheWorld, when it can't write to the stack. 1517 // 1518 // Protected by worldsema. 1519 var stopTheWorldContext worldStop 1520 1521 // stopTheWorld stops all P's from executing goroutines, interrupting 1522 // all goroutines at GC safe points and records reason as the reason 1523 // for the stop. On return, only the current goroutine's P is running. 1524 // stopTheWorld must not be called from a system stack and the caller 1525 // must not hold worldsema. The caller must call startTheWorld when 1526 // other P's should resume execution. 1527 // 1528 // stopTheWorld is safe for multiple goroutines to call at the 1529 // same time. Each will execute its own stop, and the stops will 1530 // be serialized. 1531 // 1532 // This is also used by routines that do stack dumps. If the system is 1533 // in panic or being exited, this may not reliably stop all 1534 // goroutines. 1535 // 1536 // Returns the STW context. When starting the world, this context must be 1537 // passed to startTheWorld. 1538 func stopTheWorld(reason stwReason) worldStop { 1539 semacquire(&worldsema) 1540 gp := getg() 1541 gp.m.preemptoff = reason.String() 1542 systemstack(func() { 1543 stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack 1544 }) 1545 return stopTheWorldContext 1546 } 1547 1548 // startTheWorld undoes the effects of stopTheWorld. 1549 // 1550 // w must be the worldStop returned by stopTheWorld. 1551 func startTheWorld(w worldStop) { 1552 systemstack(func() { startTheWorldWithSema(0, w) }) 1553 1554 // worldsema must be held over startTheWorldWithSema to ensure 1555 // gomaxprocs cannot change while worldsema is held. 1556 // 1557 // Release worldsema with direct handoff to the next waiter, but 1558 // acquirem so that semrelease1 doesn't try to yield our time. 1559 // 1560 // Otherwise if e.g. ReadMemStats is being called in a loop, 1561 // it might stomp on other attempts to stop the world, such as 1562 // for starting or ending GC. The operation this blocks is 1563 // so heavy-weight that we should just try to be as fair as 1564 // possible here. 1565 // 1566 // We don't want to just allow us to get preempted between now 1567 // and releasing the semaphore because then we keep everyone 1568 // (including, for example, GCs) waiting longer. 1569 mp := acquirem() 1570 mp.preemptoff = "" 1571 semrelease1(&worldsema, true, 0) 1572 releasem(mp) 1573 } 1574 1575 // stopTheWorldGC has the same effect as stopTheWorld, but blocks 1576 // until the GC is not running. It also blocks a GC from starting 1577 // until startTheWorldGC is called. 1578 func stopTheWorldGC(reason stwReason) worldStop { 1579 semacquire(&gcsema) 1580 return stopTheWorld(reason) 1581 } 1582 1583 // startTheWorldGC undoes the effects of stopTheWorldGC. 1584 // 1585 // w must be the worldStop returned by stopTheWorld. 1586 func startTheWorldGC(w worldStop) { 1587 startTheWorld(w) 1588 semrelease(&gcsema) 1589 } 1590 1591 // Holding worldsema grants an M the right to try to stop the world. 1592 var worldsema uint32 = 1 1593 1594 // Holding gcsema grants the M the right to block a GC, and blocks 1595 // until the current GC is done. In particular, it prevents gomaxprocs 1596 // from changing concurrently. 1597 // 1598 // TODO(mknyszek): Once gomaxprocs and the execution tracer can handle 1599 // being changed/enabled during a GC, remove this. 1600 var gcsema uint32 = 1 1601 1602 // stopTheWorldWithSema is the core implementation of stopTheWorld. 1603 // The caller is responsible for acquiring worldsema and disabling 1604 // preemption first and then should stopTheWorldWithSema on the system 1605 // stack: 1606 // 1607 // semacquire(&worldsema, 0) 1608 // m.preemptoff = "reason" 1609 // var stw worldStop 1610 // systemstack(func() { 1611 // stw = stopTheWorldWithSema(reason) 1612 // }) 1613 // 1614 // When finished, the caller must either call startTheWorld or undo 1615 // these three operations separately: 1616 // 1617 // m.preemptoff = "" 1618 // systemstack(func() { 1619 // now = startTheWorldWithSema(stw) 1620 // }) 1621 // semrelease(&worldsema) 1622 // 1623 // It is allowed to acquire worldsema once and then execute multiple 1624 // startTheWorldWithSema/stopTheWorldWithSema pairs. 1625 // Other P's are able to execute between successive calls to 1626 // startTheWorldWithSema and stopTheWorldWithSema. 1627 // Holding worldsema causes any other goroutines invoking 1628 // stopTheWorld to block. 1629 // 1630 // Returns the STW context. When starting the world, this context must be 1631 // passed to startTheWorldWithSema. 1632 // 1633 //go:systemstack 1634 func stopTheWorldWithSema(reason stwReason) worldStop { 1635 // Mark the goroutine which called stopTheWorld preemptible so its 1636 // stack may be scanned by the GC or observed by the execution tracer. 1637 // 1638 // This lets a mark worker scan us or the execution tracer take our 1639 // stack while we try to stop the world since otherwise we could get 1640 // in a mutual preemption deadlock. 1641 // 1642 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 1643 // stack shrink, effectively pinning the stack in memory for the duration. 1644 // 1645 // N.B. The execution tracer is not aware of this status transition and 1646 // handles it specially based on the wait reason. 1647 casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) 1648 1649 trace := traceAcquire() 1650 if trace.ok() { 1651 trace.STWStart(reason) 1652 traceRelease(trace) 1653 } 1654 gp := getg() 1655 1656 // If we hold a lock, then we won't be able to stop another M 1657 // that is blocked trying to acquire the lock. 1658 if gp.m.locks > 0 { 1659 throw("stopTheWorld: holding locks") 1660 } 1661 1662 lock(&sched.lock) 1663 start := nanotime() // exclude time waiting for sched.lock from start and total time metrics. 1664 sched.stopwait = gomaxprocs 1665 sched.gcwaiting.Store(true) 1666 preemptall() 1667 1668 // Stop current P. 1669 gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. 1670 gp.m.p.ptr().gcStopTime = start 1671 sched.stopwait-- 1672 1673 // Try to retake all P's in syscalls. 1674 for _, pp := range allp { 1675 if thread, ok := setBlockOnExitSyscall(pp); ok { 1676 thread.gcstopP() 1677 thread.resume() 1678 } 1679 } 1680 1681 // Stop idle Ps. 1682 now := nanotime() 1683 for { 1684 pp, _ := pidleget(now) 1685 if pp == nil { 1686 break 1687 } 1688 pp.status = _Pgcstop 1689 pp.gcStopTime = nanotime() 1690 sched.stopwait-- 1691 } 1692 wait := sched.stopwait > 0 1693 unlock(&sched.lock) 1694 1695 // Wait for remaining Ps to stop voluntarily. 1696 if wait { 1697 for { 1698 // wait for 100us, then try to re-preempt in case of any races 1699 if notetsleep(&sched.stopnote, 100*1000) { 1700 noteclear(&sched.stopnote) 1701 break 1702 } 1703 preemptall() 1704 } 1705 } 1706 1707 finish := nanotime() 1708 startTime := finish - start 1709 if reason.isGC() { 1710 sched.stwStoppingTimeGC.record(startTime) 1711 } else { 1712 sched.stwStoppingTimeOther.record(startTime) 1713 } 1714 1715 // Double-check we actually stopped everything, and all the invariants hold. 1716 // Also accumulate all the time spent by each P in _Pgcstop up to the point 1717 // where everything was stopped. This will be accumulated into the total pause 1718 // CPU time by the caller. 1719 stoppingCPUTime := int64(0) 1720 bad := "" 1721 if sched.stopwait != 0 { 1722 bad = "stopTheWorld: not stopped (stopwait != 0)" 1723 } else { 1724 for _, pp := range allp { 1725 if pp.status != _Pgcstop { 1726 bad = "stopTheWorld: not stopped (status != _Pgcstop)" 1727 } 1728 if pp.gcStopTime == 0 && bad == "" { 1729 bad = "stopTheWorld: broken CPU time accounting" 1730 } 1731 stoppingCPUTime += finish - pp.gcStopTime 1732 pp.gcStopTime = 0 1733 } 1734 } 1735 if freezing.Load() { 1736 // Some other thread is panicking. This can cause the 1737 // sanity checks above to fail if the panic happens in 1738 // the signal handler on a stopped thread. Either way, 1739 // we should halt this thread. 1740 lock(&deadlock) 1741 lock(&deadlock) 1742 } 1743 if bad != "" { 1744 throw(bad) 1745 } 1746 1747 worldStopped() 1748 1749 // Switch back to _Grunning, now that the world is stopped. 1750 casgstatus(getg().m.curg, _Gwaiting, _Grunning) 1751 1752 return worldStop{ 1753 reason: reason, 1754 startedStopping: start, 1755 finishedStopping: finish, 1756 stoppingCPUTime: stoppingCPUTime, 1757 } 1758 } 1759 1760 // reason is the same STW reason passed to stopTheWorld. start is the start 1761 // time returned by stopTheWorld. 1762 // 1763 // now is the current time; prefer to pass 0 to capture a fresh timestamp. 1764 // 1765 // stattTheWorldWithSema returns now. 1766 func startTheWorldWithSema(now int64, w worldStop) int64 { 1767 assertWorldStopped() 1768 1769 mp := acquirem() // disable preemption because it can be holding p in a local var 1770 if netpollinited() { 1771 list, delta := netpoll(0) // non-blocking 1772 injectglist(&list) 1773 netpollAdjustWaiters(delta) 1774 } 1775 lock(&sched.lock) 1776 1777 procs := gomaxprocs 1778 if newprocs != 0 { 1779 procs = newprocs 1780 newprocs = 0 1781 } 1782 p1 := procresize(procs) 1783 sched.gcwaiting.Store(false) 1784 if sched.sysmonwait.Load() { 1785 sched.sysmonwait.Store(false) 1786 notewakeup(&sched.sysmonnote) 1787 } 1788 unlock(&sched.lock) 1789 1790 worldStarted() 1791 1792 for p1 != nil { 1793 p := p1 1794 p1 = p1.link.ptr() 1795 if p.m != 0 { 1796 mp := p.m.ptr() 1797 p.m = 0 1798 if mp.nextp != 0 { 1799 throw("startTheWorld: inconsistent mp->nextp") 1800 } 1801 mp.nextp.set(p) 1802 notewakeup(&mp.park) 1803 } else { 1804 // Start M to run P. Do not start another M below. 1805 newm(nil, p, -1) 1806 } 1807 } 1808 1809 // Capture start-the-world time before doing clean-up tasks. 1810 if now == 0 { 1811 now = nanotime() 1812 } 1813 totalTime := now - w.startedStopping 1814 if w.reason.isGC() { 1815 sched.stwTotalTimeGC.record(totalTime) 1816 } else { 1817 sched.stwTotalTimeOther.record(totalTime) 1818 } 1819 trace := traceAcquire() 1820 if trace.ok() { 1821 trace.STWDone() 1822 traceRelease(trace) 1823 } 1824 1825 // Wakeup an additional proc in case we have excessive runnable goroutines 1826 // in local queues or in the global queue. If we don't, the proc will park itself. 1827 // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. 1828 wakep() 1829 1830 releasem(mp) 1831 1832 return now 1833 } 1834 1835 // usesLibcall indicates whether this runtime performs system calls 1836 // via libcall. 1837 func usesLibcall() bool { 1838 switch GOOS { 1839 case "aix", "darwin", "illumos", "ios", "openbsd", "solaris", "windows": 1840 return true 1841 } 1842 return false 1843 } 1844 1845 // mStackIsSystemAllocated indicates whether this runtime starts on a 1846 // system-allocated stack. 1847 func mStackIsSystemAllocated() bool { 1848 switch GOOS { 1849 case "aix", "darwin", "plan9", "illumos", "ios", "openbsd", "solaris", "windows": 1850 return true 1851 } 1852 return false 1853 } 1854 1855 // mstart is the entry-point for new Ms. 1856 // It is written in assembly, uses ABI0, is marked TOPFRAME, and calls mstart0. 1857 func mstart() 1858 1859 // mstart0 is the Go entry-point for new Ms. 1860 // This must not split the stack because we may not even have stack 1861 // bounds set up yet. 1862 // 1863 // May run during STW (because it doesn't have a P yet), so write 1864 // barriers are not allowed. 1865 // 1866 //go:nosplit 1867 //go:nowritebarrierrec 1868 func mstart0() { 1869 gp := getg() 1870 1871 osStack := gp.stack.lo == 0 1872 if osStack { 1873 // Initialize stack bounds from system stack. 1874 // Cgo may have left stack size in stack.hi. 1875 // minit may update the stack bounds. 1876 // 1877 // Note: these bounds may not be very accurate. 1878 // We set hi to &size, but there are things above 1879 // it. The 1024 is supposed to compensate this, 1880 // but is somewhat arbitrary. 1881 size := gp.stack.hi 1882 if size == 0 { 1883 size = 16384 * sys.StackGuardMultiplier 1884 } 1885 gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) 1886 gp.stack.lo = gp.stack.hi - size + 1024 1887 } 1888 // Initialize stack guard so that we can start calling regular 1889 // Go code. 1890 gp.stackguard0 = gp.stack.lo + stackGuard 1891 // This is the g0, so we can also call go:systemstack 1892 // functions, which check stackguard1. 1893 gp.stackguard1 = gp.stackguard0 1894 mstart1() 1895 1896 // Exit this thread. 1897 if mStackIsSystemAllocated() { 1898 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate 1899 // the stack, but put it in gp.stack before mstart, 1900 // so the logic above hasn't set osStack yet. 1901 osStack = true 1902 } 1903 mexit(osStack) 1904 } 1905 1906 // The go:noinline is to guarantee the sys.GetCallerPC/sys.GetCallerSP below are safe, 1907 // so that we can set up g0.sched to return to the call of mstart1 above. 1908 // 1909 //go:noinline 1910 func mstart1() { 1911 gp := getg() 1912 1913 if gp != gp.m.g0 { 1914 throw("bad runtime·mstart") 1915 } 1916 1917 // Set up m.g0.sched as a label returning to just 1918 // after the mstart1 call in mstart0 above, for use by goexit0 and mcall. 1919 // We're never coming back to mstart1 after we call schedule, 1920 // so other calls can reuse the current frame. 1921 // And goexit0 does a gogo that needs to return from mstart1 1922 // and let mstart0 exit the thread. 1923 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1924 gp.sched.pc = sys.GetCallerPC() 1925 gp.sched.sp = sys.GetCallerSP() 1926 1927 asminit() 1928 minit() 1929 1930 // Install signal handlers; after minit so that minit can 1931 // prepare the thread to be able to handle the signals. 1932 if gp.m == &m0 { 1933 mstartm0() 1934 } 1935 1936 if debug.dataindependenttiming == 1 { 1937 sys.EnableDIT() 1938 } 1939 1940 if fn := gp.m.mstartfn; fn != nil { 1941 fn() 1942 } 1943 1944 if gp.m != &m0 { 1945 acquirep(gp.m.nextp.ptr()) 1946 gp.m.nextp = 0 1947 } 1948 schedule() 1949 } 1950 1951 // mstartm0 implements part of mstart1 that only runs on the m0. 1952 // 1953 // Write barriers are allowed here because we know the GC can't be 1954 // running yet, so they'll be no-ops. 1955 // 1956 //go:yeswritebarrierrec 1957 func mstartm0() { 1958 // Create an extra M for callbacks on threads not created by Go. 1959 // An extra M is also needed on Windows for callbacks created by 1960 // syscall.NewCallback. See issue #6751 for details. 1961 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1962 cgoHasExtraM = true 1963 newextram() 1964 } 1965 initsig(false) 1966 } 1967 1968 // mPark causes a thread to park itself, returning once woken. 1969 // 1970 //go:nosplit 1971 func mPark() { 1972 gp := getg() 1973 // This M might stay parked through an entire GC cycle. 1974 // Erase any leftovers on the signal stack. 1975 if goexperiment.RuntimeSecret { 1976 eraseSecretsSignalStk() 1977 } 1978 notesleep(&gp.m.park) 1979 noteclear(&gp.m.park) 1980 } 1981 1982 // mexit tears down and exits the current thread. 1983 // 1984 // Don't call this directly to exit the thread, since it must run at 1985 // the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to 1986 // unwind the stack to the point that exits the thread. 1987 // 1988 // It is entered with m.p != nil, so write barriers are allowed. It 1989 // will release the P before exiting. 1990 // 1991 //go:yeswritebarrierrec 1992 func mexit(osStack bool) { 1993 mp := getg().m 1994 1995 if mp == &m0 { 1996 // This is the main thread. Just wedge it. 1997 // 1998 // On Linux, exiting the main thread puts the process 1999 // into a non-waitable zombie state. On Plan 9, 2000 // exiting the main thread unblocks wait even though 2001 // other threads are still running. On Solaris we can 2002 // neither exitThread nor return from mstart. Other 2003 // bad things probably happen on other platforms. 2004 // 2005 // We could try to clean up this M more before wedging 2006 // it, but that complicates signal handling. 2007 handoffp(releasep()) 2008 lock(&sched.lock) 2009 sched.nmfreed++ 2010 checkdead() 2011 unlock(&sched.lock) 2012 mPark() 2013 throw("locked m0 woke up") 2014 } 2015 2016 sigblock(true) 2017 unminit() 2018 2019 // Free the gsignal stack. 2020 if mp.gsignal != nil { 2021 stackfree(mp.gsignal.stack) 2022 if valgrindenabled { 2023 valgrindDeregisterStack(mp.gsignal.valgrindStackID) 2024 mp.gsignal.valgrindStackID = 0 2025 } 2026 // On some platforms, when calling into VDSO (e.g. nanotime) 2027 // we store our g on the gsignal stack, if there is one. 2028 // Now the stack is freed, unlink it from the m, so we 2029 // won't write to it when calling VDSO code. 2030 mp.gsignal = nil 2031 } 2032 2033 // Free vgetrandom state. 2034 vgetrandomDestroy(mp) 2035 2036 // Clear the self pointer so Ps don't access this M after it is freed, 2037 // or keep it alive. 2038 mp.self.clear() 2039 2040 // Remove m from allm. 2041 lock(&sched.lock) 2042 for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { 2043 if *pprev == mp { 2044 *pprev = mp.alllink 2045 goto found 2046 } 2047 } 2048 throw("m not found in allm") 2049 found: 2050 // Events must not be traced after this point. 2051 2052 // Delay reaping m until it's done with the stack. 2053 // 2054 // Put mp on the free list, though it will not be reaped while freeWait 2055 // is freeMWait. mp is no longer reachable via allm, so even if it is 2056 // on an OS stack, we must keep a reference to mp alive so that the GC 2057 // doesn't free mp while we are still using it. 2058 // 2059 // Note that the free list must not be linked through alllink because 2060 // some functions walk allm without locking, so may be using alllink. 2061 // 2062 // N.B. It's important that the M appears on the free list simultaneously 2063 // with it being removed so that the tracer can find it. 2064 mp.freeWait.Store(freeMWait) 2065 mp.freelink = sched.freem 2066 sched.freem = mp 2067 unlock(&sched.lock) 2068 2069 atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) 2070 sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) 2071 2072 // Release the P. 2073 handoffp(releasep()) 2074 // After this point we must not have write barriers. 2075 2076 // Invoke the deadlock detector. This must happen after 2077 // handoffp because it may have started a new M to take our 2078 // P's work. 2079 lock(&sched.lock) 2080 sched.nmfreed++ 2081 checkdead() 2082 unlock(&sched.lock) 2083 2084 if GOOS == "darwin" || GOOS == "ios" { 2085 // Make sure pendingPreemptSignals is correct when an M exits. 2086 // For #41702. 2087 if mp.signalPending.Load() != 0 { 2088 pendingPreemptSignals.Add(-1) 2089 } 2090 } 2091 2092 // Destroy all allocated resources. After this is called, we may no 2093 // longer take any locks. 2094 mdestroy(mp) 2095 2096 if osStack { 2097 // No more uses of mp, so it is safe to drop the reference. 2098 mp.freeWait.Store(freeMRef) 2099 2100 // Return from mstart and let the system thread 2101 // library free the g0 stack and terminate the thread. 2102 return 2103 } 2104 2105 // mstart is the thread's entry point, so there's nothing to 2106 // return to. Exit the thread directly. exitThread will clear 2107 // m.freeWait when it's done with the stack and the m can be 2108 // reaped. 2109 exitThread(&mp.freeWait) 2110 } 2111 2112 // forEachP calls fn(p) for every P p when p reaches a GC safe point. 2113 // If a P is currently executing code, this will bring the P to a GC 2114 // safe point and execute fn on that P. If the P is not executing code 2115 // (it is idle or in a syscall), this will call fn(p) directly while 2116 // preventing the P from exiting its state. This does not ensure that 2117 // fn will run on every CPU executing Go code, but it acts as a global 2118 // memory barrier. GC uses this as a "ragged barrier." 2119 // 2120 // The caller must hold worldsema. fn must not refer to any 2121 // part of the current goroutine's stack, since the GC may move it. 2122 func forEachP(reason waitReason, fn func(*p)) { 2123 systemstack(func() { 2124 gp := getg().m.curg 2125 // Mark the user stack as preemptible so that it may be scanned 2126 // by the GC or observed by the execution tracer. Otherwise, our 2127 // attempt to force all P's to a safepoint could result in a 2128 // deadlock as we attempt to preempt a goroutine that's trying 2129 // to preempt us (e.g. for a stack scan). 2130 // 2131 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 2132 // stack shrink, effectively pinning the stack in memory for the duration. 2133 // 2134 // N.B. The execution tracer is not aware of this status transition and 2135 // handles it specially based on the wait reason. 2136 casGToWaitingForSuspendG(gp, _Grunning, reason) 2137 forEachPInternal(fn) 2138 casgstatus(gp, _Gwaiting, _Grunning) 2139 }) 2140 } 2141 2142 // forEachPInternal calls fn(p) for every P p when p reaches a GC safe point. 2143 // It is the internal implementation of forEachP. 2144 // 2145 // The caller must hold worldsema and either must ensure that a GC is not 2146 // running (otherwise this may deadlock with the GC trying to preempt this P) 2147 // or it must leave its goroutine in a preemptible state before it switches 2148 // to the systemstack. Due to these restrictions, prefer forEachP when possible. 2149 // 2150 //go:systemstack 2151 func forEachPInternal(fn func(*p)) { 2152 mp := acquirem() 2153 pp := getg().m.p.ptr() 2154 2155 lock(&sched.lock) 2156 if sched.safePointWait != 0 { 2157 throw("forEachP: sched.safePointWait != 0") 2158 } 2159 sched.safePointWait = gomaxprocs - 1 2160 sched.safePointFn = fn 2161 2162 // Ask all Ps to run the safe point function. 2163 for _, p2 := range allp { 2164 if p2 != pp { 2165 atomic.Store(&p2.runSafePointFn, 1) 2166 } 2167 } 2168 preemptall() 2169 2170 // Any P entering _Pidle or a system call from now on will observe 2171 // p.runSafePointFn == 1 and will call runSafePointFn when 2172 // changing its status to _Pidle. 2173 2174 // Run safe point function for all idle Ps. sched.pidle will 2175 // not change because we hold sched.lock. 2176 for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { 2177 if atomic.Cas(&p.runSafePointFn, 1, 0) { 2178 fn(p) 2179 sched.safePointWait-- 2180 } 2181 } 2182 2183 wait := sched.safePointWait > 0 2184 unlock(&sched.lock) 2185 2186 // Run fn for the current P. 2187 fn(pp) 2188 2189 // Force Ps currently in a system call into _Pidle and hand them 2190 // off to induce safe point function execution. 2191 for _, p2 := range allp { 2192 if atomic.Load(&p2.runSafePointFn) != 1 { 2193 // Already ran it. 2194 continue 2195 } 2196 if thread, ok := setBlockOnExitSyscall(p2); ok { 2197 thread.takeP() 2198 thread.resume() 2199 handoffp(p2) 2200 } 2201 } 2202 2203 // Wait for remaining Ps to run fn. 2204 if wait { 2205 for { 2206 // Wait for 100us, then try to re-preempt in 2207 // case of any races. 2208 // 2209 // Requires system stack. 2210 if notetsleep(&sched.safePointNote, 100*1000) { 2211 noteclear(&sched.safePointNote) 2212 break 2213 } 2214 preemptall() 2215 } 2216 } 2217 if sched.safePointWait != 0 { 2218 throw("forEachP: not done") 2219 } 2220 for _, p2 := range allp { 2221 if p2.runSafePointFn != 0 { 2222 throw("forEachP: P did not run fn") 2223 } 2224 } 2225 2226 lock(&sched.lock) 2227 sched.safePointFn = nil 2228 unlock(&sched.lock) 2229 releasem(mp) 2230 } 2231 2232 // runSafePointFn runs the safe point function, if any, for this P. 2233 // This should be called like 2234 // 2235 // if getg().m.p.runSafePointFn != 0 { 2236 // runSafePointFn() 2237 // } 2238 // 2239 // runSafePointFn must be checked on any transition in to _Pidle or 2240 // when entering a system call to avoid a race where forEachP sees 2241 // that the P is running just before the P goes into _Pidle/system call 2242 // and neither forEachP nor the P run the safe-point function. 2243 func runSafePointFn() { 2244 p := getg().m.p.ptr() 2245 // Resolve the race between forEachP running the safe-point 2246 // function on this P's behalf and this P running the 2247 // safe-point function directly. 2248 if !atomic.Cas(&p.runSafePointFn, 1, 0) { 2249 return 2250 } 2251 sched.safePointFn(p) 2252 lock(&sched.lock) 2253 sched.safePointWait-- 2254 if sched.safePointWait == 0 { 2255 notewakeup(&sched.safePointNote) 2256 } 2257 unlock(&sched.lock) 2258 } 2259 2260 // When running with cgo, we call _cgo_thread_start 2261 // to start threads for us so that we can play nicely with 2262 // foreign code. 2263 var cgoThreadStart unsafe.Pointer 2264 2265 type cgothreadstart struct { 2266 g guintptr 2267 tls *uint64 2268 fn unsafe.Pointer 2269 } 2270 2271 // Allocate a new m unassociated with any thread. 2272 // Can use p for allocation context if needed. 2273 // fn is recorded as the new m's m.mstartfn. 2274 // id is optional pre-allocated m ID. Omit by passing -1. 2275 // 2276 // This function is allowed to have write barriers even if the caller 2277 // isn't because it borrows pp. 2278 // 2279 //go:yeswritebarrierrec 2280 func allocm(pp *p, fn func(), id int64) *m { 2281 allocmLock.rlock() 2282 2283 // The caller owns pp, but we may borrow (i.e., acquirep) it. We must 2284 // disable preemption to ensure it is not stolen, which would make the 2285 // caller lose ownership. 2286 acquirem() 2287 2288 gp := getg() 2289 if gp.m.p == 0 { 2290 acquirep(pp) // temporarily borrow p for mallocs in this function 2291 } 2292 2293 // Release the free M list. We need to do this somewhere and 2294 // this may free up a stack we can use. 2295 if sched.freem != nil { 2296 lock(&sched.lock) 2297 var newList *m 2298 for freem := sched.freem; freem != nil; { 2299 // Wait for freeWait to indicate that freem's stack is unused. 2300 wait := freem.freeWait.Load() 2301 if wait == freeMWait { 2302 next := freem.freelink 2303 freem.freelink = newList 2304 newList = freem 2305 freem = next 2306 continue 2307 } 2308 // Drop any remaining trace resources. 2309 // Ms can continue to emit events all the way until wait != freeMWait, 2310 // so it's only safe to call traceThreadDestroy at this point. 2311 if traceEnabled() || traceShuttingDown() { 2312 traceThreadDestroy(freem) 2313 } 2314 // Free the stack if needed. For freeMRef, there is 2315 // nothing to do except drop freem from the sched.freem 2316 // list. 2317 if wait == freeMStack { 2318 // stackfree must be on the system stack, but allocm is 2319 // reachable off the system stack transitively from 2320 // startm. 2321 systemstack(func() { 2322 stackfree(freem.g0.stack) 2323 if valgrindenabled { 2324 valgrindDeregisterStack(freem.g0.valgrindStackID) 2325 freem.g0.valgrindStackID = 0 2326 } 2327 }) 2328 } 2329 freem = freem.freelink 2330 } 2331 sched.freem = newList 2332 unlock(&sched.lock) 2333 } 2334 2335 mp := &new(mPadded).m 2336 mp.mstartfn = fn 2337 mcommoninit(mp, id) 2338 2339 // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. 2340 // Windows and Plan 9 will layout sched stack on OS stack. 2341 if iscgo || mStackIsSystemAllocated() { 2342 mp.g0 = malg(-1) 2343 } else { 2344 mp.g0 = malg(16384 * sys.StackGuardMultiplier) 2345 } 2346 mp.g0.m = mp 2347 2348 if pp == gp.m.p.ptr() { 2349 releasep() 2350 } 2351 2352 releasem(gp.m) 2353 allocmLock.runlock() 2354 return mp 2355 } 2356 2357 // needm is called when a cgo callback happens on a 2358 // thread without an m (a thread not created by Go). 2359 // In this case, needm is expected to find an m to use 2360 // and return with m, g initialized correctly. 2361 // Since m and g are not set now (likely nil, but see below) 2362 // needm is limited in what routines it can call. In particular 2363 // it can only call nosplit functions (textflag 7) and cannot 2364 // do any scheduling that requires an m. 2365 // 2366 // In order to avoid needing heavy lifting here, we adopt 2367 // the following strategy: there is a stack of available m's 2368 // that can be stolen. Using compare-and-swap 2369 // to pop from the stack has ABA races, so we simulate 2370 // a lock by doing an exchange (via Casuintptr) to steal the stack 2371 // head and replace the top pointer with MLOCKED (1). 2372 // This serves as a simple spin lock that we can use even 2373 // without an m. The thread that locks the stack in this way 2374 // unlocks the stack by storing a valid stack head pointer. 2375 // 2376 // In order to make sure that there is always an m structure 2377 // available to be stolen, we maintain the invariant that there 2378 // is always one more than needed. At the beginning of the 2379 // program (if cgo is in use) the list is seeded with a single m. 2380 // If needm finds that it has taken the last m off the list, its job 2381 // is - once it has installed its own m so that it can do things like 2382 // allocate memory - to create a spare m and put it on the list. 2383 // 2384 // Each of these extra m's also has a g0 and a curg that are 2385 // pressed into service as the scheduling stack and current 2386 // goroutine for the duration of the cgo callback. 2387 // 2388 // It calls dropm to put the m back on the list, 2389 // 1. when the callback is done with the m in non-pthread platforms, 2390 // 2. or when the C thread exiting on pthread platforms. 2391 // 2392 // The signal argument indicates whether we're called from a signal 2393 // handler. 2394 // 2395 //go:nosplit 2396 func needm(signal bool) { 2397 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 2398 // Can happen if C/C++ code calls Go from a global ctor. 2399 // Can also happen on Windows if a global ctor uses a 2400 // callback created by syscall.NewCallback. See issue #6751 2401 // for details. 2402 // 2403 // Can not throw, because scheduler is not initialized yet. 2404 writeErrStr("fatal error: cgo callback before cgo call\n") 2405 exit(1) 2406 } 2407 2408 // Save and block signals before getting an M. 2409 // The signal handler may call needm itself, 2410 // and we must avoid a deadlock. Also, once g is installed, 2411 // any incoming signals will try to execute, 2412 // but we won't have the sigaltstack settings and other data 2413 // set up appropriately until the end of minit, which will 2414 // unblock the signals. This is the same dance as when 2415 // starting a new m to run Go code via newosproc. 2416 var sigmask sigset 2417 sigsave(&sigmask) 2418 sigblock(false) 2419 2420 // getExtraM is safe here because of the invariant above, 2421 // that the extra list always contains or will soon contain 2422 // at least one m. 2423 mp, last := getExtraM() 2424 2425 // Set needextram when we've just emptied the list, 2426 // so that the eventual call into cgocallbackg will 2427 // allocate a new m for the extra list. We delay the 2428 // allocation until then so that it can be done 2429 // after exitsyscall makes sure it is okay to be 2430 // running at all (that is, there's no garbage collection 2431 // running right now). 2432 mp.needextram = last 2433 2434 // Store the original signal mask for use by minit. 2435 mp.sigmask = sigmask 2436 2437 // Install TLS on some platforms (previously setg 2438 // would do this if necessary). 2439 osSetupTLS(mp) 2440 2441 // Install g (= m->g0) and set the stack bounds 2442 // to match the current stack. 2443 setg(mp.g0) 2444 sp := sys.GetCallerSP() 2445 callbackUpdateSystemStack(mp, sp, signal) 2446 2447 // We must mark that we are already in Go now. 2448 // Otherwise, we may call needm again when we get a signal, before cgocallbackg1, 2449 // which means the extram list may be empty, that will cause a deadlock. 2450 mp.isExtraInC = false 2451 2452 // Initialize this thread to use the m. 2453 asminit() 2454 minit() 2455 2456 // Emit a trace event for this dead -> syscall transition, 2457 // but only if we're not in a signal handler. 2458 // 2459 // N.B. the tracer can run on a bare M just fine, we just have 2460 // to make sure to do this before setg(nil) and unminit. 2461 var trace traceLocker 2462 if !signal { 2463 trace = traceAcquire() 2464 } 2465 2466 // mp.curg is now a real goroutine. 2467 casgstatus(mp.curg, _Gdeadextra, _Gsyscall) 2468 sched.ngsys.Add(-1) 2469 2470 // This is technically inaccurate, but we set isExtraInC to false above, 2471 // and so we need to update addGSyscallNoP to keep the two pieces of state 2472 // consistent (it's only updated when isExtraInC is false). More specifically, 2473 // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and 2474 // since isExtraInC is false, we will decrement this metric. 2475 // 2476 // The inaccuracy is thankfully transient: only until this thread can get a P. 2477 // We're going into Go anyway, so it's okay to pretend we're a real goroutine now. 2478 addGSyscallNoP(mp) 2479 2480 if !signal { 2481 if trace.ok() { 2482 trace.GoCreateSyscall(mp.curg) 2483 traceRelease(trace) 2484 } 2485 } 2486 mp.isExtraInSig = signal 2487 } 2488 2489 // Acquire an extra m and bind it to the C thread when a pthread key has been created. 2490 // 2491 //go:nosplit 2492 func needAndBindM() { 2493 needm(false) 2494 2495 if _cgo_pthread_key_created != nil && *(*uintptr)(_cgo_pthread_key_created) != 0 { 2496 cgoBindM() 2497 } 2498 } 2499 2500 // newextram allocates m's and puts them on the extra list. 2501 // It is called with a working local m, so that it can do things 2502 // like call schedlock and allocate. 2503 func newextram() { 2504 c := extraMWaiters.Swap(0) 2505 if c > 0 { 2506 for i := uint32(0); i < c; i++ { 2507 oneNewExtraM() 2508 } 2509 } else if extraMLength.Load() == 0 { 2510 // Make sure there is at least one extra M. 2511 oneNewExtraM() 2512 } 2513 } 2514 2515 // oneNewExtraM allocates an m and puts it on the extra list. 2516 func oneNewExtraM() { 2517 // Create extra goroutine locked to extra m. 2518 // The goroutine is the context in which the cgo callback will run. 2519 // The sched.pc will never be returned to, but setting it to 2520 // goexit makes clear to the traceback routines where 2521 // the goroutine stack ends. 2522 mp := allocm(nil, nil, -1) 2523 gp := malg(4096) 2524 gp.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum 2525 gp.sched.sp = gp.stack.hi 2526 gp.sched.sp -= 4 * goarch.PtrSize // extra space in case of reads slightly beyond frame 2527 gp.sched.lr = 0 2528 gp.sched.g = guintptr(unsafe.Pointer(gp)) 2529 gp.syscallpc = gp.sched.pc 2530 gp.syscallsp = gp.sched.sp 2531 gp.stktopsp = gp.sched.sp 2532 // malg returns status as _Gidle. Change to _Gdeadextra before 2533 // adding to allg where GC can see it. _Gdeadextra hides this 2534 // from traceback and stack scans. 2535 casgstatus(gp, _Gidle, _Gdeadextra) 2536 gp.m = mp 2537 mp.curg = gp 2538 mp.isextra = true 2539 // mark we are in C by default. 2540 mp.isExtraInC = true 2541 mp.lockedInt++ 2542 mp.lockedg.set(gp) 2543 gp.lockedm.set(mp) 2544 gp.goid = sched.goidgen.Add(1) 2545 if raceenabled { 2546 gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) 2547 } 2548 // put on allg for garbage collector 2549 allgadd(gp) 2550 2551 // gp is now on the allg list, but we don't want it to be 2552 // counted by gcount. It would be more "proper" to increment 2553 // sched.ngfree, but that requires locking. Incrementing ngsys 2554 // has the same effect. 2555 sched.ngsys.Add(1) 2556 2557 // Add m to the extra list. 2558 addExtraM(mp) 2559 } 2560 2561 // dropm puts the current m back onto the extra list. 2562 // 2563 // 1. On systems without pthreads, like Windows 2564 // dropm is called when a cgo callback has called needm but is now 2565 // done with the callback and returning back into the non-Go thread. 2566 // 2567 // The main expense here is the call to signalstack to release the 2568 // m's signal stack, and then the call to needm on the next callback 2569 // from this thread. It is tempting to try to save the m for next time, 2570 // which would eliminate both these costs, but there might not be 2571 // a next time: the current thread (which Go does not control) might exit. 2572 // If we saved the m for that thread, there would be an m leak each time 2573 // such a thread exited. Instead, we acquire and release an m on each 2574 // call. These should typically not be scheduling operations, just a few 2575 // atomics, so the cost should be small. 2576 // 2577 // 2. On systems with pthreads 2578 // dropm is called while a non-Go thread is exiting. 2579 // We allocate a pthread per-thread variable using pthread_key_create, 2580 // to register a thread-exit-time destructor. 2581 // And store the g into a thread-specific value associated with the pthread key, 2582 // when first return back to C. 2583 // So that the destructor would invoke dropm while the non-Go thread is exiting. 2584 // This is much faster since it avoids expensive signal-related syscalls. 2585 // 2586 // This may run without a P, so //go:nowritebarrierrec is required. 2587 // 2588 // This may run with a different stack than was recorded in g0 (there is no 2589 // call to callbackUpdateSystemStack prior to dropm), so this must be 2590 // //go:nosplit to avoid the stack bounds check. 2591 // 2592 //go:nowritebarrierrec 2593 //go:nosplit 2594 func dropm() { 2595 // Clear m and g, and return m to the extra list. 2596 // After the call to setg we can only call nosplit functions 2597 // with no pointer manipulation. 2598 mp := getg().m 2599 2600 // Emit a trace event for this syscall -> dead transition. 2601 // 2602 // N.B. the tracer can run on a bare M just fine, we just have 2603 // to make sure to do this before setg(nil) and unminit. 2604 var trace traceLocker 2605 if !mp.isExtraInSig { 2606 trace = traceAcquire() 2607 } 2608 2609 // Return mp.curg to _Gdeadextra state. 2610 casgstatus(mp.curg, _Gsyscall, _Gdeadextra) 2611 mp.curg.preemptStop = false 2612 sched.ngsys.Add(1) 2613 decGSyscallNoP(mp) 2614 2615 if !mp.isExtraInSig { 2616 if trace.ok() { 2617 trace.GoDestroySyscall() 2618 traceRelease(trace) 2619 } 2620 } 2621 2622 // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore. 2623 // 2624 // In the new tracer, we model needm and dropm and a goroutine being created and 2625 // destroyed respectively. The m then might get reused with a different procid but 2626 // still with a reference to oldp, and still with the same syscalltick. The next 2627 // time a G is "created" in needm, it'll return and quietly reacquire its P from a 2628 // different m with a different procid, which will confuse the trace parser. By 2629 // trashing syscalltick, we ensure that it'll appear as if we lost the P to the 2630 // tracer parser and that we just reacquired it. 2631 // 2632 // Trash the value by decrementing because that gets us as far away from the value 2633 // the syscall exit code expects as possible. Setting to zero is risky because 2634 // syscalltick could already be zero (and in fact, is initialized to zero). 2635 mp.syscalltick-- 2636 2637 // Reset trace state unconditionally. This goroutine is being 'destroyed' 2638 // from the perspective of the tracer. 2639 mp.curg.trace.reset() 2640 2641 // Flush all the M's buffers. This is necessary because the M might 2642 // be used on a different thread with a different procid, so we have 2643 // to make sure we don't write into the same buffer. 2644 if traceEnabled() || traceShuttingDown() { 2645 // Acquire sched.lock across thread destruction. One of the invariants of the tracer 2646 // is that a thread cannot disappear from the tracer's view (allm or freem) without 2647 // it noticing, so it requires that sched.lock be held over traceThreadDestroy. 2648 // 2649 // This isn't strictly necessary in this case, because this thread never leaves allm, 2650 // but the critical section is short and dropm is rare on pthread platforms, so just 2651 // take the lock and play it safe. traceThreadDestroy also asserts that the lock is held. 2652 lock(&sched.lock) 2653 traceThreadDestroy(mp) 2654 unlock(&sched.lock) 2655 } 2656 mp.isExtraInSig = false 2657 2658 // Block signals before unminit. 2659 // Unminit unregisters the signal handling stack (but needs g on some systems). 2660 // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers. 2661 // It's important not to try to handle a signal between those two steps. 2662 sigmask := mp.sigmask 2663 sigblock(false) 2664 unminit() 2665 2666 setg(nil) 2667 2668 // Clear g0 stack bounds to ensure that needm always refreshes the 2669 // bounds when reusing this M. 2670 g0 := mp.g0 2671 g0.stack.hi = 0 2672 g0.stack.lo = 0 2673 g0.stackguard0 = 0 2674 g0.stackguard1 = 0 2675 mp.g0StackAccurate = false 2676 2677 putExtraM(mp) 2678 2679 msigrestore(sigmask) 2680 } 2681 2682 // bindm store the g0 of the current m into a thread-specific value. 2683 // 2684 // We allocate a pthread per-thread variable using pthread_key_create, 2685 // to register a thread-exit-time destructor. 2686 // We are here setting the thread-specific value of the pthread key, to enable the destructor. 2687 // So that the pthread_key_destructor would dropm while the C thread is exiting. 2688 // 2689 // And the saved g will be used in pthread_key_destructor, 2690 // since the g stored in the TLS by Go might be cleared in some platforms, 2691 // before the destructor invoked, so, we restore g by the stored g, before dropm. 2692 // 2693 // We store g0 instead of m, to make the assembly code simpler, 2694 // since we need to restore g0 in runtime.cgocallback. 2695 // 2696 // On systems without pthreads, like Windows, bindm shouldn't be used. 2697 // 2698 // NOTE: this always runs without a P, so, nowritebarrierrec required. 2699 // 2700 //go:nosplit 2701 //go:nowritebarrierrec 2702 func cgoBindM() { 2703 if GOOS == "windows" || GOOS == "plan9" { 2704 fatal("bindm in unexpected GOOS") 2705 } 2706 g := getg() 2707 if g.m.g0 != g { 2708 fatal("the current g is not g0") 2709 } 2710 if _cgo_bindm != nil { 2711 asmcgocall(_cgo_bindm, unsafe.Pointer(g)) 2712 } 2713 } 2714 2715 // A helper function for EnsureDropM. 2716 // 2717 // getm should be an internal detail, 2718 // but widely used packages access it using linkname. 2719 // Notable members of the hall of shame include: 2720 // - fortio.org/log 2721 // 2722 // Do not remove or change the type signature. 2723 // See go.dev/issue/67401. 2724 // 2725 //go:linkname getm 2726 func getm() uintptr { 2727 return uintptr(unsafe.Pointer(getg().m)) 2728 } 2729 2730 var ( 2731 // Locking linked list of extra M's, via mp.schedlink. Must be accessed 2732 // only via lockextra/unlockextra. 2733 // 2734 // Can't be atomic.Pointer[m] because we use an invalid pointer as a 2735 // "locked" sentinel value. M's on this list remain visible to the GC 2736 // because their mp.curg is on allgs. 2737 extraM atomic.Uintptr 2738 // Number of M's in the extraM list. 2739 extraMLength atomic.Uint32 2740 // Number of waiters in lockextra. 2741 extraMWaiters atomic.Uint32 2742 2743 // Number of extra M's in use by threads. 2744 extraMInUse atomic.Uint32 2745 ) 2746 2747 // lockextra locks the extra list and returns the list head. 2748 // The caller must unlock the list by storing a new list head 2749 // to extram. If nilokay is true, then lockextra will 2750 // return a nil list head if that's what it finds. If nilokay is false, 2751 // lockextra will keep waiting until the list head is no longer nil. 2752 // 2753 //go:nosplit 2754 func lockextra(nilokay bool) *m { 2755 const locked = 1 2756 2757 incr := false 2758 for { 2759 old := extraM.Load() 2760 if old == locked { 2761 osyield_no_g() 2762 continue 2763 } 2764 if old == 0 && !nilokay { 2765 if !incr { 2766 // Add 1 to the number of threads 2767 // waiting for an M. 2768 // This is cleared by newextram. 2769 extraMWaiters.Add(1) 2770 incr = true 2771 } 2772 usleep_no_g(1) 2773 continue 2774 } 2775 if extraM.CompareAndSwap(old, locked) { 2776 return (*m)(unsafe.Pointer(old)) 2777 } 2778 osyield_no_g() 2779 continue 2780 } 2781 } 2782 2783 //go:nosplit 2784 func unlockextra(mp *m, delta int32) { 2785 extraMLength.Add(delta) 2786 extraM.Store(uintptr(unsafe.Pointer(mp))) 2787 } 2788 2789 // Return an M from the extra M list. Returns last == true if the list becomes 2790 // empty because of this call. 2791 // 2792 // Spins waiting for an extra M, so caller must ensure that the list always 2793 // contains or will soon contain at least one M. 2794 // 2795 //go:nosplit 2796 func getExtraM() (mp *m, last bool) { 2797 mp = lockextra(false) 2798 extraMInUse.Add(1) 2799 unlockextra(mp.schedlink.ptr(), -1) 2800 return mp, mp.schedlink.ptr() == nil 2801 } 2802 2803 // Returns an extra M back to the list. mp must be from getExtraM. Newly 2804 // allocated M's should use addExtraM. 2805 // 2806 //go:nosplit 2807 func putExtraM(mp *m) { 2808 extraMInUse.Add(-1) 2809 addExtraM(mp) 2810 } 2811 2812 // Adds a newly allocated M to the extra M list. 2813 // 2814 //go:nosplit 2815 func addExtraM(mp *m) { 2816 mnext := lockextra(true) 2817 mp.schedlink.set(mnext) 2818 unlockextra(mp, 1) 2819 } 2820 2821 var ( 2822 // allocmLock is locked for read when creating new Ms in allocm and their 2823 // addition to allm. Thus acquiring this lock for write blocks the 2824 // creation of new Ms. 2825 allocmLock rwmutex 2826 2827 // execLock serializes exec and clone to avoid bugs or unspecified 2828 // behaviour around exec'ing while creating/destroying threads. See 2829 // issue #19546. 2830 execLock rwmutex 2831 ) 2832 2833 // These errors are reported (via writeErrStr) by some OS-specific 2834 // versions of newosproc and newosproc0. 2835 const ( 2836 failthreadcreate = "runtime: failed to create new OS thread\n" 2837 failallocatestack = "runtime: failed to allocate stack for the new OS thread\n" 2838 ) 2839 2840 // newmHandoff contains a list of m structures that need new OS threads. 2841 // This is used by newm in situations where newm itself can't safely 2842 // start an OS thread. 2843 var newmHandoff struct { 2844 lock mutex 2845 2846 // newm points to a list of M structures that need new OS 2847 // threads. The list is linked through m.schedlink. 2848 newm muintptr 2849 2850 // waiting indicates that wake needs to be notified when an m 2851 // is put on the list. 2852 waiting bool 2853 wake note 2854 2855 // haveTemplateThread indicates that the templateThread has 2856 // been started. This is not protected by lock. Use cas to set 2857 // to 1. 2858 haveTemplateThread uint32 2859 } 2860 2861 // Create a new m. It will start off with a call to fn, or else the scheduler. 2862 // fn needs to be static and not a heap allocated closure. 2863 // May run with m.p==nil, so write barriers are not allowed. 2864 // 2865 // id is optional pre-allocated m ID. Omit by passing -1. 2866 // 2867 //go:nowritebarrierrec 2868 func newm(fn func(), pp *p, id int64) { 2869 // allocm adds a new M to allm, but they do not start until created by 2870 // the OS in newm1 or the template thread. 2871 // 2872 // doAllThreadsSyscall requires that every M in allm will eventually 2873 // start and be signal-able, even with a STW. 2874 // 2875 // Disable preemption here until we start the thread to ensure that 2876 // newm is not preempted between allocm and starting the new thread, 2877 // ensuring that anything added to allm is guaranteed to eventually 2878 // start. 2879 acquirem() 2880 2881 mp := allocm(pp, fn, id) 2882 mp.nextp.set(pp) 2883 mp.sigmask = initSigmask 2884 if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { 2885 // We're on a locked M or a thread that may have been 2886 // started by C. The kernel state of this thread may 2887 // be strange (the user may have locked it for that 2888 // purpose). We don't want to clone that into another 2889 // thread. Instead, ask a known-good thread to create 2890 // the thread for us. 2891 // 2892 // This is disabled on Plan 9. See golang.org/issue/22227. 2893 // 2894 // TODO: This may be unnecessary on Windows, which 2895 // doesn't model thread creation off fork. 2896 lock(&newmHandoff.lock) 2897 if newmHandoff.haveTemplateThread == 0 { 2898 throw("on a locked thread with no template thread") 2899 } 2900 mp.schedlink = newmHandoff.newm 2901 newmHandoff.newm.set(mp) 2902 if newmHandoff.waiting { 2903 newmHandoff.waiting = false 2904 notewakeup(&newmHandoff.wake) 2905 } 2906 unlock(&newmHandoff.lock) 2907 // The M has not started yet, but the template thread does not 2908 // participate in STW, so it will always process queued Ms and 2909 // it is safe to releasem. 2910 releasem(getg().m) 2911 return 2912 } 2913 newm1(mp) 2914 releasem(getg().m) 2915 } 2916 2917 func newm1(mp *m) { 2918 if iscgo && _cgo_thread_start != nil { 2919 var ts cgothreadstart 2920 ts.g.set(mp.g0) 2921 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) 2922 ts.fn = unsafe.Pointer(abi.FuncPCABI0(mstart)) 2923 if msanenabled { 2924 msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2925 } 2926 if asanenabled { 2927 asanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2928 } 2929 execLock.rlock() // Prevent process clone. 2930 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) 2931 execLock.runlock() 2932 return 2933 } 2934 execLock.rlock() // Prevent process clone. 2935 newosproc(mp) 2936 execLock.runlock() 2937 } 2938 2939 // startTemplateThread starts the template thread if it is not already 2940 // running. 2941 // 2942 // The calling thread must itself be in a known-good state. 2943 func startTemplateThread() { 2944 if GOARCH == "wasm" { // no threads on wasm yet 2945 return 2946 } 2947 2948 // Disable preemption to guarantee that the template thread will be 2949 // created before a park once haveTemplateThread is set. 2950 mp := acquirem() 2951 if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { 2952 releasem(mp) 2953 return 2954 } 2955 newm(templateThread, nil, -1) 2956 releasem(mp) 2957 } 2958 2959 // templateThread is a thread in a known-good state that exists solely 2960 // to start new threads in known-good states when the calling thread 2961 // may not be in a good state. 2962 // 2963 // Many programs never need this, so templateThread is started lazily 2964 // when we first enter a state that might lead to running on a thread 2965 // in an unknown state. 2966 // 2967 // templateThread runs on an M without a P, so it must not have write 2968 // barriers. 2969 // 2970 //go:nowritebarrierrec 2971 func templateThread() { 2972 lock(&sched.lock) 2973 sched.nmsys++ 2974 checkdead() 2975 unlock(&sched.lock) 2976 2977 for { 2978 lock(&newmHandoff.lock) 2979 for newmHandoff.newm != 0 { 2980 newm := newmHandoff.newm.ptr() 2981 newmHandoff.newm = 0 2982 unlock(&newmHandoff.lock) 2983 for newm != nil { 2984 next := newm.schedlink.ptr() 2985 newm.schedlink = 0 2986 newm1(newm) 2987 newm = next 2988 } 2989 lock(&newmHandoff.lock) 2990 } 2991 newmHandoff.waiting = true 2992 noteclear(&newmHandoff.wake) 2993 unlock(&newmHandoff.lock) 2994 notesleep(&newmHandoff.wake) 2995 } 2996 } 2997 2998 // Stops execution of the current m until new work is available. 2999 // Returns with acquired P. 3000 func stopm() { 3001 gp := getg() 3002 3003 if gp.m.locks != 0 { 3004 throw("stopm holding locks") 3005 } 3006 if gp.m.p != 0 { 3007 throw("stopm holding p") 3008 } 3009 if gp.m.spinning { 3010 throw("stopm spinning") 3011 } 3012 3013 lock(&sched.lock) 3014 mput(gp.m) 3015 unlock(&sched.lock) 3016 mPark() 3017 acquirep(gp.m.nextp.ptr()) 3018 gp.m.nextp = 0 3019 } 3020 3021 func mspinning() { 3022 // startm's caller incremented nmspinning. Set the new M's spinning. 3023 getg().m.spinning = true 3024 } 3025 3026 // Schedules some M to run the p (creates an M if necessary). 3027 // If p==nil, tries to get an idle P, if no idle P's does nothing. 3028 // May run with m.p==nil, so write barriers are not allowed. 3029 // If spinning is set, the caller has incremented nmspinning and must provide a 3030 // P. startm will set m.spinning in the newly started M. 3031 // 3032 // Callers passing a non-nil P must call from a non-preemptible context. See 3033 // comment on acquirem below. 3034 // 3035 // Argument lockheld indicates whether the caller already acquired the 3036 // scheduler lock. Callers holding the lock when making the call must pass 3037 // true. The lock might be temporarily dropped, but will be reacquired before 3038 // returning. 3039 // 3040 // Must not have write barriers because this may be called without a P. 3041 // 3042 //go:nowritebarrierrec 3043 func startm(pp *p, spinning, lockheld bool) { 3044 // Disable preemption. 3045 // 3046 // Every owned P must have an owner that will eventually stop it in the 3047 // event of a GC stop request. startm takes transient ownership of a P 3048 // (either from argument or pidleget below) and transfers ownership to 3049 // a started M, which will be responsible for performing the stop. 3050 // 3051 // Preemption must be disabled during this transient ownership, 3052 // otherwise the P this is running on may enter GC stop while still 3053 // holding the transient P, leaving that P in limbo and deadlocking the 3054 // STW. 3055 // 3056 // Callers passing a non-nil P must already be in non-preemptible 3057 // context, otherwise such preemption could occur on function entry to 3058 // startm. Callers passing a nil P may be preemptible, so we must 3059 // disable preemption before acquiring a P from pidleget below. 3060 mp := acquirem() 3061 if !lockheld { 3062 lock(&sched.lock) 3063 } 3064 if pp == nil { 3065 if spinning { 3066 // TODO(prattmic): All remaining calls to this function 3067 // with _p_ == nil could be cleaned up to find a P 3068 // before calling startm. 3069 throw("startm: P required for spinning=true") 3070 } 3071 pp, _ = pidleget(0) 3072 if pp == nil { 3073 if !lockheld { 3074 unlock(&sched.lock) 3075 } 3076 releasem(mp) 3077 return 3078 } 3079 } 3080 nmp := mget() 3081 if nmp == nil { 3082 // No M is available, we must drop sched.lock and call newm. 3083 // However, we already own a P to assign to the M. 3084 // 3085 // Once sched.lock is released, another G (e.g., in a syscall), 3086 // could find no idle P while checkdead finds a runnable G but 3087 // no running M's because this new M hasn't started yet, thus 3088 // throwing in an apparent deadlock. 3089 // This apparent deadlock is possible when startm is called 3090 // from sysmon, which doesn't count as a running M. 3091 // 3092 // Avoid this situation by pre-allocating the ID for the new M, 3093 // thus marking it as 'running' before we drop sched.lock. This 3094 // new M will eventually run the scheduler to execute any 3095 // queued G's. 3096 id := mReserveID() 3097 unlock(&sched.lock) 3098 3099 var fn func() 3100 if spinning { 3101 // The caller incremented nmspinning, so set m.spinning in the new M. 3102 fn = mspinning 3103 } 3104 newm(fn, pp, id) 3105 3106 if lockheld { 3107 lock(&sched.lock) 3108 } 3109 // Ownership transfer of pp committed by start in newm. 3110 // Preemption is now safe. 3111 releasem(mp) 3112 return 3113 } 3114 if !lockheld { 3115 unlock(&sched.lock) 3116 } 3117 if nmp.spinning { 3118 throw("startm: m is spinning") 3119 } 3120 if nmp.nextp != 0 { 3121 throw("startm: m has p") 3122 } 3123 if spinning && !runqempty(pp) { 3124 throw("startm: p has runnable gs") 3125 } 3126 // The caller incremented nmspinning, so set m.spinning in the new M. 3127 nmp.spinning = spinning 3128 nmp.nextp.set(pp) 3129 notewakeup(&nmp.park) 3130 // Ownership transfer of pp committed by wakeup. Preemption is now 3131 // safe. 3132 releasem(mp) 3133 } 3134 3135 // Hands off P from syscall or locked M. 3136 // Always runs without a P, so write barriers are not allowed. 3137 // 3138 //go:nowritebarrierrec 3139 func handoffp(pp *p) { 3140 // handoffp must start an M in any situation where 3141 // findRunnable would return a G to run on pp. 3142 3143 // if it has local work, start it straight away 3144 if !runqempty(pp) || !sched.runq.empty() { 3145 startm(pp, false, false) 3146 return 3147 } 3148 // if there's trace work to do, start it straight away 3149 if (traceEnabled() || traceShuttingDown()) && traceReaderAvailable() != nil { 3150 startm(pp, false, false) 3151 return 3152 } 3153 // if it has GC work, start it straight away 3154 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) { 3155 startm(pp, false, false) 3156 return 3157 } 3158 // no local work, check that there are no spinning/idle M's, 3159 // otherwise our help is not required 3160 if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic 3161 sched.needspinning.Store(0) 3162 startm(pp, true, false) 3163 return 3164 } 3165 lock(&sched.lock) 3166 if sched.gcwaiting.Load() { 3167 pp.status = _Pgcstop 3168 pp.gcStopTime = nanotime() 3169 sched.stopwait-- 3170 if sched.stopwait == 0 { 3171 notewakeup(&sched.stopnote) 3172 } 3173 unlock(&sched.lock) 3174 return 3175 } 3176 if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) { 3177 sched.safePointFn(pp) 3178 sched.safePointWait-- 3179 if sched.safePointWait == 0 { 3180 notewakeup(&sched.safePointNote) 3181 } 3182 } 3183 if !sched.runq.empty() { 3184 unlock(&sched.lock) 3185 startm(pp, false, false) 3186 return 3187 } 3188 // If this is the last running P and nobody is polling network, 3189 // need to wakeup another M to poll network. 3190 if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 { 3191 unlock(&sched.lock) 3192 startm(pp, false, false) 3193 return 3194 } 3195 3196 // The scheduler lock cannot be held when calling wakeNetPoller below 3197 // because wakeNetPoller may call wakep which may call startm. 3198 when := pp.timers.wakeTime() 3199 pidleput(pp, 0) 3200 unlock(&sched.lock) 3201 3202 if when != 0 { 3203 wakeNetPoller(when) 3204 } 3205 } 3206 3207 // Tries to add one more P to execute G's. 3208 // Called when a G is made runnable (newproc, ready). 3209 // Must be called with a P. 3210 // 3211 // wakep should be an internal detail, 3212 // but widely used packages access it using linkname. 3213 // Notable members of the hall of shame include: 3214 // - gvisor.dev/gvisor 3215 // 3216 // Do not remove or change the type signature. 3217 // See go.dev/issue/67401. 3218 // 3219 //go:linkname wakep 3220 func wakep() { 3221 // Be conservative about spinning threads, only start one if none exist 3222 // already. 3223 if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { 3224 return 3225 } 3226 3227 // Disable preemption until ownership of pp transfers to the next M in 3228 // startm. Otherwise preemption here would leave pp stuck waiting to 3229 // enter _Pgcstop. 3230 // 3231 // See preemption comment on acquirem in startm for more details. 3232 mp := acquirem() 3233 3234 var pp *p 3235 lock(&sched.lock) 3236 pp, _ = pidlegetSpinning(0) 3237 if pp == nil { 3238 if sched.nmspinning.Add(-1) < 0 { 3239 throw("wakep: negative nmspinning") 3240 } 3241 unlock(&sched.lock) 3242 releasem(mp) 3243 return 3244 } 3245 // Since we always have a P, the race in the "No M is available" 3246 // comment in startm doesn't apply during the small window between the 3247 // unlock here and lock in startm. A checkdead in between will always 3248 // see at least one running M (ours). 3249 unlock(&sched.lock) 3250 3251 startm(pp, true, false) 3252 3253 releasem(mp) 3254 } 3255 3256 // Stops execution of the current m that is locked to a g until the g is runnable again. 3257 // Returns with acquired P. 3258 func stoplockedm() { 3259 gp := getg() 3260 3261 if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m { 3262 throw("stoplockedm: inconsistent locking") 3263 } 3264 if gp.m.p != 0 { 3265 // Schedule another M to run this p. 3266 pp := releasep() 3267 handoffp(pp) 3268 } 3269 incidlelocked(1) 3270 // Wait until another thread schedules lockedg again. 3271 mPark() 3272 status := readgstatus(gp.m.lockedg.ptr()) 3273 if status&^_Gscan != _Grunnable { 3274 print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") 3275 dumpgstatus(gp.m.lockedg.ptr()) 3276 throw("stoplockedm: not runnable") 3277 } 3278 acquirep(gp.m.nextp.ptr()) 3279 gp.m.nextp = 0 3280 } 3281 3282 // Schedules the locked m to run the locked gp. 3283 // May run during STW, so write barriers are not allowed. 3284 // 3285 //go:nowritebarrierrec 3286 func startlockedm(gp *g) { 3287 mp := gp.lockedm.ptr() 3288 if mp == getg().m { 3289 throw("startlockedm: locked to me") 3290 } 3291 if mp.nextp != 0 { 3292 throw("startlockedm: m has p") 3293 } 3294 // directly handoff current P to the locked m 3295 incidlelocked(-1) 3296 pp := releasep() 3297 mp.nextp.set(pp) 3298 notewakeup(&mp.park) 3299 stopm() 3300 } 3301 3302 // Stops the current m for stopTheWorld. 3303 // Returns when the world is restarted. 3304 func gcstopm() { 3305 gp := getg() 3306 3307 if !sched.gcwaiting.Load() { 3308 throw("gcstopm: not waiting for gc") 3309 } 3310 if gp.m.spinning { 3311 gp.m.spinning = false 3312 // OK to just drop nmspinning here, 3313 // startTheWorld will unpark threads as necessary. 3314 if sched.nmspinning.Add(-1) < 0 { 3315 throw("gcstopm: negative nmspinning") 3316 } 3317 } 3318 pp := releasep() 3319 lock(&sched.lock) 3320 pp.status = _Pgcstop 3321 pp.gcStopTime = nanotime() 3322 sched.stopwait-- 3323 if sched.stopwait == 0 { 3324 notewakeup(&sched.stopnote) 3325 } 3326 unlock(&sched.lock) 3327 stopm() 3328 } 3329 3330 // Schedules gp to run on the current M. 3331 // If inheritTime is true, gp inherits the remaining time in the 3332 // current time slice. Otherwise, it starts a new time slice. 3333 // Never returns. 3334 // 3335 // Write barriers are allowed because this is called immediately after 3336 // acquiring a P in several places. 3337 // 3338 //go:yeswritebarrierrec 3339 func execute(gp *g, inheritTime bool) { 3340 mp := getg().m 3341 3342 if goroutineProfile.active { 3343 // Make sure that gp has had its stack written out to the goroutine 3344 // profile, exactly as it was when the goroutine profiler first stopped 3345 // the world. 3346 tryRecordGoroutineProfile(gp, nil, osyield) 3347 } 3348 3349 // Assign gp.m before entering _Grunning so running Gs have an M. 3350 mp.curg = gp 3351 gp.m = mp 3352 gp.syncSafePoint = false // Clear the flag, which may have been set by morestack. 3353 casgstatus(gp, _Grunnable, _Grunning) 3354 gp.waitsince = 0 3355 gp.preempt = false 3356 gp.stackguard0 = gp.stack.lo + stackGuard 3357 if !inheritTime { 3358 mp.p.ptr().schedtick++ 3359 } 3360 3361 if sys.DITSupported && debug.dataindependenttiming != 1 { 3362 if gp.ditWanted && !mp.ditEnabled { 3363 // The current M doesn't have DIT enabled, but the goroutine we're 3364 // executing does need it, so turn it on. 3365 sys.EnableDIT() 3366 mp.ditEnabled = true 3367 } else if !gp.ditWanted && mp.ditEnabled { 3368 // The current M has DIT enabled, but the goroutine we're executing does 3369 // not need it, so turn it off. 3370 // NOTE: turning off DIT here means that the scheduler will have DIT enabled 3371 // when it runs after this goroutine yields or is preempted. This may have 3372 // a minor performance impact on the scheduler. 3373 sys.DisableDIT() 3374 mp.ditEnabled = false 3375 } 3376 } 3377 3378 // Check whether the profiler needs to be turned on or off. 3379 hz := sched.profilehz 3380 if mp.profilehz != hz { 3381 setThreadCPUProfiler(hz) 3382 } 3383 3384 trace := traceAcquire() 3385 if trace.ok() { 3386 trace.GoStart() 3387 traceRelease(trace) 3388 } 3389 3390 gogo(&gp.sched) 3391 } 3392 3393 // Finds a runnable goroutine to execute. 3394 // Tries to steal from other P's, get g from local or global queue, poll network. 3395 // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 3396 // reader) so the caller should try to wake a P. 3397 func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 3398 mp := getg().m 3399 3400 // The conditions here and in handoffp must agree: if 3401 // findRunnable would return a G to run, handoffp must start 3402 // an M. 3403 3404 top: 3405 // We may have collected an allp snapshot below. The snapshot is only 3406 // required in each loop iteration. Clear it to all GC to collect the 3407 // slice. 3408 mp.clearAllpSnapshot() 3409 3410 pp := mp.p.ptr() 3411 if sched.gcwaiting.Load() { 3412 gcstopm() 3413 goto top 3414 } 3415 if pp.runSafePointFn != 0 { 3416 runSafePointFn() 3417 } 3418 3419 // now and pollUntil are saved for work stealing later, 3420 // which may steal timers. It's important that between now 3421 // and then, nothing blocks, so these numbers remain mostly 3422 // relevant. 3423 now, pollUntil, _ := pp.timers.check(0, nil) 3424 3425 // Try to schedule the trace reader. 3426 if traceEnabled() || traceShuttingDown() { 3427 gp := traceReader() 3428 if gp != nil { 3429 trace := traceAcquire() 3430 casgstatus(gp, _Gwaiting, _Grunnable) 3431 if trace.ok() { 3432 trace.GoUnpark(gp, 0) 3433 traceRelease(trace) 3434 } 3435 return gp, false, true 3436 } 3437 } 3438 3439 // Try to schedule a GC worker. 3440 if gcBlackenEnabled != 0 { 3441 gp, tnow := gcController.findRunnableGCWorker(pp, now) 3442 if gp != nil { 3443 return gp, false, true 3444 } 3445 now = tnow 3446 } 3447 3448 // Check the global runnable queue once in a while to ensure fairness. 3449 // Otherwise two goroutines can completely occupy the local runqueue 3450 // by constantly respawning each other. 3451 if pp.schedtick%61 == 0 && !sched.runq.empty() { 3452 lock(&sched.lock) 3453 gp := globrunqget() 3454 unlock(&sched.lock) 3455 if gp != nil { 3456 return gp, false, false 3457 } 3458 } 3459 3460 // Wake up the finalizer G. 3461 if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 3462 if gp := wakefing(); gp != nil { 3463 ready(gp, 0, true) 3464 } 3465 } 3466 3467 // Wake up one or more cleanup Gs. 3468 if gcCleanups.needsWake() { 3469 gcCleanups.wake() 3470 } 3471 3472 if *cgo_yield != nil { 3473 asmcgocall(*cgo_yield, nil) 3474 } 3475 3476 // local runq 3477 if gp, inheritTime := runqget(pp); gp != nil { 3478 return gp, inheritTime, false 3479 } 3480 3481 // global runq 3482 if !sched.runq.empty() { 3483 lock(&sched.lock) 3484 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3485 unlock(&sched.lock) 3486 if gp != nil { 3487 if runqputbatch(pp, &q); !q.empty() { 3488 throw("Couldn't put Gs into empty local runq") 3489 } 3490 return gp, false, false 3491 } 3492 } 3493 3494 // Poll network. 3495 // This netpoll is only an optimization before we resort to stealing. 3496 // We can safely skip it if there are no waiters or a thread is blocked 3497 // in netpoll already. If there is any kind of logical race with that 3498 // blocked thread (e.g. it has already returned from netpoll, but does 3499 // not set lastpoll yet), this thread will do blocking netpoll below 3500 // anyway. 3501 // We only poll from one thread at a time to avoid kernel contention 3502 // on machines with many cores. 3503 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 && sched.pollingNet.Swap(1) == 0 { 3504 list, delta := netpoll(0) 3505 sched.pollingNet.Store(0) 3506 if !list.empty() { // non-blocking 3507 gp := list.pop() 3508 injectglist(&list) 3509 netpollAdjustWaiters(delta) 3510 trace := traceAcquire() 3511 casgstatus(gp, _Gwaiting, _Grunnable) 3512 if trace.ok() { 3513 trace.GoUnpark(gp, 0) 3514 traceRelease(trace) 3515 } 3516 return gp, false, false 3517 } 3518 } 3519 3520 // Spinning Ms: steal work from other Ps. 3521 // 3522 // Limit the number of spinning Ms to half the number of busy Ps. 3523 // This is necessary to prevent excessive CPU consumption when 3524 // GOMAXPROCS>>1 but the program parallelism is low. 3525 if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 3526 if !mp.spinning { 3527 mp.becomeSpinning() 3528 } 3529 3530 gp, inheritTime, tnow, w, newWork := stealWork(now) 3531 if gp != nil { 3532 // Successfully stole. 3533 return gp, inheritTime, false 3534 } 3535 if newWork { 3536 // There may be new timer or GC work; restart to 3537 // discover. 3538 goto top 3539 } 3540 3541 now = tnow 3542 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3543 // Earlier timer to wait for. 3544 pollUntil = w 3545 } 3546 } 3547 3548 // We have nothing to do. 3549 // 3550 // If we're in the GC mark phase, can safely scan and blacken objects, 3551 // and have work to do, run idle-time marking rather than give up the P. 3552 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() { 3553 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3554 if node != nil { 3555 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3556 gp := node.gp.ptr() 3557 3558 trace := traceAcquire() 3559 casgstatus(gp, _Gwaiting, _Grunnable) 3560 if trace.ok() { 3561 trace.GoUnpark(gp, 0) 3562 traceRelease(trace) 3563 } 3564 return gp, false, false 3565 } 3566 gcController.removeIdleMarkWorker() 3567 } 3568 3569 // wasm only: 3570 // If a callback returned and no other goroutine is awake, 3571 // then wake event handler goroutine which pauses execution 3572 // until a callback was triggered. 3573 gp, otherReady := beforeIdle(now, pollUntil) 3574 if gp != nil { 3575 trace := traceAcquire() 3576 casgstatus(gp, _Gwaiting, _Grunnable) 3577 if trace.ok() { 3578 trace.GoUnpark(gp, 0) 3579 traceRelease(trace) 3580 } 3581 return gp, false, false 3582 } 3583 if otherReady { 3584 goto top 3585 } 3586 3587 // Before we drop our P, make a snapshot of the allp slice, 3588 // which can change underfoot once we no longer block 3589 // safe-points. We don't need to snapshot the contents because 3590 // everything up to cap(allp) is immutable. 3591 // 3592 // We clear the snapshot from the M after return via 3593 // mp.clearAllpSnapshop (in schedule) and on each iteration of the top 3594 // loop. 3595 allpSnapshot := mp.snapshotAllp() 3596 // Also snapshot masks. Value changes are OK, but we can't allow 3597 // len to change out from under us. 3598 idlepMaskSnapshot := idlepMask 3599 timerpMaskSnapshot := timerpMask 3600 3601 // return P and block 3602 lock(&sched.lock) 3603 if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { 3604 unlock(&sched.lock) 3605 goto top 3606 } 3607 if !sched.runq.empty() { 3608 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3609 unlock(&sched.lock) 3610 if gp == nil { 3611 throw("global runq empty with non-zero runqsize") 3612 } 3613 if runqputbatch(pp, &q); !q.empty() { 3614 throw("Couldn't put Gs into empty local runq") 3615 } 3616 return gp, false, false 3617 } 3618 if !mp.spinning && sched.needspinning.Load() == 1 { 3619 // See "Delicate dance" comment below. 3620 mp.becomeSpinning() 3621 unlock(&sched.lock) 3622 goto top 3623 } 3624 if releasep() != pp { 3625 throw("findRunnable: wrong p") 3626 } 3627 now = pidleput(pp, now) 3628 unlock(&sched.lock) 3629 3630 // Delicate dance: thread transitions from spinning to non-spinning 3631 // state, potentially concurrently with submission of new work. We must 3632 // drop nmspinning first and then check all sources again (with 3633 // #StoreLoad memory barrier in between). If we do it the other way 3634 // around, another thread can submit work after we've checked all 3635 // sources but before we drop nmspinning; as a result nobody will 3636 // unpark a thread to run the work. 3637 // 3638 // This applies to the following sources of work: 3639 // 3640 // * Goroutines added to the global or a per-P run queue. 3641 // * New/modified-earlier timers on a per-P timer heap. 3642 // * Idle-priority GC work (barring golang.org/issue/19112). 3643 // 3644 // If we discover new work below, we need to restore m.spinning as a 3645 // signal for resetspinning to unpark a new worker thread (because 3646 // there can be more than one starving goroutine). 3647 // 3648 // However, if after discovering new work we also observe no idle Ps 3649 // (either here or in resetspinning), we have a problem. We may be 3650 // racing with a non-spinning M in the block above, having found no 3651 // work and preparing to release its P and park. Allowing that P to go 3652 // idle will result in loss of work conservation (idle P while there is 3653 // runnable work). This could result in complete deadlock in the 3654 // unlikely event that we discover new work (from netpoll) right as we 3655 // are racing with _all_ other Ps going idle. 3656 // 3657 // We use sched.needspinning to synchronize with non-spinning Ms going 3658 // idle. If needspinning is set when they are about to drop their P, 3659 // they abort the drop and instead become a new spinning M on our 3660 // behalf. If we are not racing and the system is truly fully loaded 3661 // then no spinning threads are required, and the next thread to 3662 // naturally become spinning will clear the flag. 3663 // 3664 // Also see "Worker thread parking/unparking" comment at the top of the 3665 // file. 3666 wasSpinning := mp.spinning 3667 if mp.spinning { 3668 mp.spinning = false 3669 if sched.nmspinning.Add(-1) < 0 { 3670 throw("findRunnable: negative nmspinning") 3671 } 3672 3673 // Note the for correctness, only the last M transitioning from 3674 // spinning to non-spinning must perform these rechecks to 3675 // ensure no missed work. However, the runtime has some cases 3676 // of transient increments of nmspinning that are decremented 3677 // without going through this path, so we must be conservative 3678 // and perform the check on all spinning Ms. 3679 // 3680 // See https://go.dev/issue/43997. 3681 3682 // Check global and P runqueues again. 3683 3684 lock(&sched.lock) 3685 if !sched.runq.empty() { 3686 pp, _ := pidlegetSpinning(0) 3687 if pp != nil { 3688 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3689 unlock(&sched.lock) 3690 if gp == nil { 3691 throw("global runq empty with non-zero runqsize") 3692 } 3693 if runqputbatch(pp, &q); !q.empty() { 3694 throw("Couldn't put Gs into empty local runq") 3695 } 3696 acquirep(pp) 3697 mp.becomeSpinning() 3698 return gp, false, false 3699 } 3700 } 3701 unlock(&sched.lock) 3702 3703 pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 3704 if pp != nil { 3705 acquirep(pp) 3706 mp.becomeSpinning() 3707 goto top 3708 } 3709 3710 // Check for idle-priority GC work again. 3711 pp, gp := checkIdleGCNoP() 3712 if pp != nil { 3713 acquirep(pp) 3714 mp.becomeSpinning() 3715 3716 // Run the idle worker. 3717 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3718 trace := traceAcquire() 3719 casgstatus(gp, _Gwaiting, _Grunnable) 3720 if trace.ok() { 3721 trace.GoUnpark(gp, 0) 3722 traceRelease(trace) 3723 } 3724 return gp, false, false 3725 } 3726 3727 // Finally, check for timer creation or expiry concurrently with 3728 // transitioning from spinning to non-spinning. 3729 // 3730 // Note that we cannot use checkTimers here because it calls 3731 // adjusttimers which may need to allocate memory, and that isn't 3732 // allowed when we don't have an active P. 3733 pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 3734 } 3735 3736 // We don't need allp anymore at this pointer, but can't clear the 3737 // snapshot without a P for the write barrier.. 3738 3739 // Poll network until next timer. 3740 if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 3741 sched.pollUntil.Store(pollUntil) 3742 if mp.p != 0 { 3743 throw("findRunnable: netpoll with p") 3744 } 3745 if mp.spinning { 3746 throw("findRunnable: netpoll with spinning") 3747 } 3748 delay := int64(-1) 3749 if pollUntil != 0 { 3750 if now == 0 { 3751 now = nanotime() 3752 } 3753 delay = pollUntil - now 3754 if delay < 0 { 3755 delay = 0 3756 } 3757 } 3758 if faketime != 0 { 3759 // When using fake time, just poll. 3760 delay = 0 3761 } 3762 list, delta := netpoll(delay) // block until new work is available 3763 // Refresh now again, after potentially blocking. 3764 now = nanotime() 3765 sched.pollUntil.Store(0) 3766 sched.lastpoll.Store(now) 3767 if faketime != 0 && list.empty() { 3768 // Using fake time and nothing is ready; stop M. 3769 // When all M's stop, checkdead will call timejump. 3770 stopm() 3771 goto top 3772 } 3773 lock(&sched.lock) 3774 pp, _ := pidleget(now) 3775 unlock(&sched.lock) 3776 if pp == nil { 3777 injectglist(&list) 3778 netpollAdjustWaiters(delta) 3779 } else { 3780 acquirep(pp) 3781 if !list.empty() { 3782 gp := list.pop() 3783 injectglist(&list) 3784 netpollAdjustWaiters(delta) 3785 trace := traceAcquire() 3786 casgstatus(gp, _Gwaiting, _Grunnable) 3787 if trace.ok() { 3788 trace.GoUnpark(gp, 0) 3789 traceRelease(trace) 3790 } 3791 return gp, false, false 3792 } 3793 if wasSpinning { 3794 mp.becomeSpinning() 3795 } 3796 goto top 3797 } 3798 } else if pollUntil != 0 && netpollinited() { 3799 pollerPollUntil := sched.pollUntil.Load() 3800 if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 3801 netpollBreak() 3802 } 3803 } 3804 stopm() 3805 goto top 3806 } 3807 3808 // pollWork reports whether there is non-background work this P could 3809 // be doing. This is a fairly lightweight check to be used for 3810 // background work loops, like idle GC. It checks a subset of the 3811 // conditions checked by the actual scheduler. 3812 func pollWork() bool { 3813 if !sched.runq.empty() { 3814 return true 3815 } 3816 p := getg().m.p.ptr() 3817 if !runqempty(p) { 3818 return true 3819 } 3820 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 { 3821 if list, delta := netpoll(0); !list.empty() { 3822 injectglist(&list) 3823 netpollAdjustWaiters(delta) 3824 return true 3825 } 3826 } 3827 return false 3828 } 3829 3830 // stealWork attempts to steal a runnable goroutine or timer from any P. 3831 // 3832 // If newWork is true, new work may have been readied. 3833 // 3834 // If now is not 0 it is the current time. stealWork returns the passed time or 3835 // the current time if now was passed as 0. 3836 func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) { 3837 pp := getg().m.p.ptr() 3838 3839 ranTimer := false 3840 3841 const stealTries = 4 3842 for i := 0; i < stealTries; i++ { 3843 stealTimersOrRunNextG := i == stealTries-1 3844 3845 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { 3846 if sched.gcwaiting.Load() { 3847 // GC work may be available. 3848 return nil, false, now, pollUntil, true 3849 } 3850 p2 := allp[enum.position()] 3851 if pp == p2 { 3852 continue 3853 } 3854 3855 // Steal timers from p2. This call to checkTimers is the only place 3856 // where we might hold a lock on a different P's timers. We do this 3857 // once on the last pass before checking runnext because stealing 3858 // from the other P's runnext should be the last resort, so if there 3859 // are timers to steal do that first. 3860 // 3861 // We only check timers on one of the stealing iterations because 3862 // the time stored in now doesn't change in this loop and checking 3863 // the timers for each P more than once with the same value of now 3864 // is probably a waste of time. 3865 // 3866 // timerpMask tells us whether the P may have timers at all. If it 3867 // can't, no need to check at all. 3868 if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3869 tnow, w, ran := p2.timers.check(now, nil) 3870 now = tnow 3871 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3872 pollUntil = w 3873 } 3874 if ran { 3875 // Running the timers may have 3876 // made an arbitrary number of G's 3877 // ready and added them to this P's 3878 // local run queue. That invalidates 3879 // the assumption of runqsteal 3880 // that it always has room to add 3881 // stolen G's. So check now if there 3882 // is a local G to run. 3883 if gp, inheritTime := runqget(pp); gp != nil { 3884 return gp, inheritTime, now, pollUntil, ranTimer 3885 } 3886 ranTimer = true 3887 } 3888 } 3889 3890 // Don't bother to attempt to steal if p2 is idle. 3891 if !idlepMask.read(enum.position()) { 3892 if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3893 return gp, false, now, pollUntil, ranTimer 3894 } 3895 } 3896 } 3897 } 3898 3899 // No goroutines found to steal. Regardless, running a timer may have 3900 // made some goroutine ready that we missed. Indicate the next timer to 3901 // wait for. 3902 return nil, false, now, pollUntil, ranTimer 3903 } 3904 3905 // Check all Ps for a runnable G to steal. 3906 // 3907 // On entry we have no P. If a G is available to steal and a P is available, 3908 // the P is returned which the caller should acquire and attempt to steal the 3909 // work to. 3910 func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3911 for id, p2 := range allpSnapshot { 3912 if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { 3913 lock(&sched.lock) 3914 pp, _ := pidlegetSpinning(0) 3915 if pp == nil { 3916 // Can't get a P, don't bother checking remaining Ps. 3917 unlock(&sched.lock) 3918 return nil 3919 } 3920 unlock(&sched.lock) 3921 return pp 3922 } 3923 } 3924 3925 // No work available. 3926 return nil 3927 } 3928 3929 // Check all Ps for a timer expiring sooner than pollUntil. 3930 // 3931 // Returns updated pollUntil value. 3932 func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3933 for id, p2 := range allpSnapshot { 3934 if timerpMaskSnapshot.read(uint32(id)) { 3935 w := p2.timers.wakeTime() 3936 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3937 pollUntil = w 3938 } 3939 } 3940 } 3941 3942 return pollUntil 3943 } 3944 3945 // Check for idle-priority GC, without a P on entry. 3946 // 3947 // If some GC work, a P, and a worker G are all available, the P and G will be 3948 // returned. The returned P has not been wired yet. 3949 func checkIdleGCNoP() (*p, *g) { 3950 // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3951 // must check again after acquiring a P. As an optimization, we also check 3952 // if an idle mark worker is needed at all. This is OK here, because if we 3953 // observe that one isn't needed, at least one is currently running. Even if 3954 // it stops running, its own journey into the scheduler should schedule it 3955 // again, if need be (at which point, this check will pass, if relevant). 3956 if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3957 return nil, nil 3958 } 3959 if !gcShouldScheduleWorker(nil) { 3960 return nil, nil 3961 } 3962 3963 // Work is available; we can start an idle GC worker only if there is 3964 // an available P and available worker G. 3965 // 3966 // We can attempt to acquire these in either order, though both have 3967 // synchronization concerns (see below). Workers are almost always 3968 // available (see comment in findRunnableGCWorker for the one case 3969 // there may be none). Since we're slightly less likely to find a P, 3970 // check for that first. 3971 // 3972 // Synchronization: note that we must hold sched.lock until we are 3973 // committed to keeping it. Otherwise we cannot put the unnecessary P 3974 // back in sched.pidle without performing the full set of idle 3975 // transition checks. 3976 // 3977 // If we were to check gcBgMarkWorkerPool first, we must somehow handle 3978 // the assumption in gcControllerState.findRunnableGCWorker that an 3979 // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running. 3980 lock(&sched.lock) 3981 pp, now := pidlegetSpinning(0) 3982 if pp == nil { 3983 unlock(&sched.lock) 3984 return nil, nil 3985 } 3986 3987 // Now that we own a P, gcBlackenEnabled can't change (as it requires STW). 3988 if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() { 3989 pidleput(pp, now) 3990 unlock(&sched.lock) 3991 return nil, nil 3992 } 3993 3994 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3995 if node == nil { 3996 pidleput(pp, now) 3997 unlock(&sched.lock) 3998 gcController.removeIdleMarkWorker() 3999 return nil, nil 4000 } 4001 4002 unlock(&sched.lock) 4003 4004 return pp, node.gp.ptr() 4005 } 4006 4007 // wakeNetPoller wakes up the thread sleeping in the network poller if it isn't 4008 // going to wake up before the when argument; or it wakes an idle P to service 4009 // timers and the network poller if there isn't one already. 4010 func wakeNetPoller(when int64) { 4011 if sched.lastpoll.Load() == 0 { 4012 // In findRunnable we ensure that when polling the pollUntil 4013 // field is either zero or the time to which the current 4014 // poll is expected to run. This can have a spurious wakeup 4015 // but should never miss a wakeup. 4016 pollerPollUntil := sched.pollUntil.Load() 4017 if pollerPollUntil == 0 || pollerPollUntil > when { 4018 netpollBreak() 4019 } 4020 } else { 4021 // There are no threads in the network poller, try to get 4022 // one there so it can handle new timers. 4023 if GOOS != "plan9" { // Temporary workaround - see issue #42303. 4024 wakep() 4025 } 4026 } 4027 } 4028 4029 func resetspinning() { 4030 gp := getg() 4031 if !gp.m.spinning { 4032 throw("resetspinning: not a spinning m") 4033 } 4034 gp.m.spinning = false 4035 nmspinning := sched.nmspinning.Add(-1) 4036 if nmspinning < 0 { 4037 throw("findRunnable: negative nmspinning") 4038 } 4039 // M wakeup policy is deliberately somewhat conservative, so check if we 4040 // need to wakeup another P here. See "Worker thread parking/unparking" 4041 // comment at the top of the file for details. 4042 wakep() 4043 } 4044 4045 // injectglist adds each runnable G on the list to some run queue, 4046 // and clears glist. If there is no current P, they are added to the 4047 // global queue, and up to npidle M's are started to run them. 4048 // Otherwise, for each idle P, this adds a G to the global queue 4049 // and starts an M. Any remaining G's are added to the current P's 4050 // local run queue. 4051 // This may temporarily acquire sched.lock. 4052 // Can run concurrently with GC. 4053 func injectglist(glist *gList) { 4054 if glist.empty() { 4055 return 4056 } 4057 4058 // Mark all the goroutines as runnable before we put them 4059 // on the run queues. 4060 var tail *g 4061 trace := traceAcquire() 4062 for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 4063 tail = gp 4064 casgstatus(gp, _Gwaiting, _Grunnable) 4065 if trace.ok() { 4066 trace.GoUnpark(gp, 0) 4067 } 4068 } 4069 if trace.ok() { 4070 traceRelease(trace) 4071 } 4072 4073 // Turn the gList into a gQueue. 4074 q := gQueue{glist.head, tail.guintptr(), glist.size} 4075 *glist = gList{} 4076 4077 startIdle := func(n int32) { 4078 for ; n > 0; n-- { 4079 mp := acquirem() // See comment in startm. 4080 lock(&sched.lock) 4081 4082 pp, _ := pidlegetSpinning(0) 4083 if pp == nil { 4084 unlock(&sched.lock) 4085 releasem(mp) 4086 break 4087 } 4088 4089 startm(pp, false, true) 4090 unlock(&sched.lock) 4091 releasem(mp) 4092 } 4093 } 4094 4095 pp := getg().m.p.ptr() 4096 if pp == nil { 4097 n := q.size 4098 lock(&sched.lock) 4099 globrunqputbatch(&q) 4100 unlock(&sched.lock) 4101 startIdle(n) 4102 return 4103 } 4104 4105 var globq gQueue 4106 npidle := sched.npidle.Load() 4107 for ; npidle > 0 && !q.empty(); npidle-- { 4108 g := q.pop() 4109 globq.pushBack(g) 4110 } 4111 if !globq.empty() { 4112 n := globq.size 4113 lock(&sched.lock) 4114 globrunqputbatch(&globq) 4115 unlock(&sched.lock) 4116 startIdle(n) 4117 } 4118 4119 if runqputbatch(pp, &q); !q.empty() { 4120 lock(&sched.lock) 4121 globrunqputbatch(&q) 4122 unlock(&sched.lock) 4123 } 4124 4125 // Some P's might have become idle after we loaded `sched.npidle` 4126 // but before any goroutines were added to the queue, which could 4127 // lead to idle P's when there is work available in the global queue. 4128 // That could potentially last until other goroutines become ready 4129 // to run. That said, we need to find a way to hedge 4130 // 4131 // Calling wakep() here is the best bet, it will do nothing in the 4132 // common case (no racing on `sched.npidle`), while it could wake one 4133 // more P to execute G's, which might end up with >1 P's: the first one 4134 // wakes another P and so forth until there is no more work, but this 4135 // ought to be an extremely rare case. 4136 // 4137 // Also see "Worker thread parking/unparking" comment at the top of the file for details. 4138 wakep() 4139 } 4140 4141 // One round of scheduler: find a runnable goroutine and execute it. 4142 // Never returns. 4143 func schedule() { 4144 mp := getg().m 4145 4146 if mp.locks != 0 { 4147 throw("schedule: holding locks") 4148 } 4149 4150 if mp.lockedg != 0 { 4151 stoplockedm() 4152 execute(mp.lockedg.ptr(), false) // Never returns. 4153 } 4154 4155 // We should not schedule away from a g that is executing a cgo call, 4156 // since the cgo call is using the m's g0 stack. 4157 if mp.incgo { 4158 throw("schedule: in cgo") 4159 } 4160 4161 top: 4162 pp := mp.p.ptr() 4163 pp.preempt = false 4164 4165 // Safety check: if we are spinning, the run queue should be empty. 4166 // Check this before calling checkTimers, as that might call 4167 // goready to put a ready goroutine on the local run queue. 4168 if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 4169 throw("schedule: spinning with local work") 4170 } 4171 4172 gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 4173 4174 // May be on a new P. 4175 pp = mp.p.ptr() 4176 4177 // findRunnable may have collected an allp snapshot. The snapshot is 4178 // only required within findRunnable. Clear it to all GC to collect the 4179 // slice. 4180 mp.clearAllpSnapshot() 4181 4182 // If the P was assigned a next GC mark worker but findRunnable 4183 // selected anything else, release the worker so another P may run it. 4184 // 4185 // N.B. If this occurs because a higher-priority goroutine was selected 4186 // (trace reader), then tryWakeP is set, which will wake another P to 4187 // run the worker. If this occurs because the GC is no longer active, 4188 // there is no need to wakep. 4189 gcController.releaseNextGCMarkWorker(pp) 4190 4191 if debug.dontfreezetheworld > 0 && freezing.Load() { 4192 // See comment in freezetheworld. We don't want to perturb 4193 // scheduler state, so we didn't gcstopm in findRunnable, but 4194 // also don't want to allow new goroutines to run. 4195 // 4196 // Deadlock here rather than in the findRunnable loop so if 4197 // findRunnable is stuck in a loop we don't perturb that 4198 // either. 4199 lock(&deadlock) 4200 lock(&deadlock) 4201 } 4202 4203 // This thread is going to run a goroutine and is not spinning anymore, 4204 // so if it was marked as spinning we need to reset it now and potentially 4205 // start a new spinning M. 4206 if mp.spinning { 4207 resetspinning() 4208 } 4209 4210 if sched.disable.user && !schedEnabled(gp) { 4211 // Scheduling of this goroutine is disabled. Put it on 4212 // the list of pending runnable goroutines for when we 4213 // re-enable user scheduling and look again. 4214 lock(&sched.lock) 4215 if schedEnabled(gp) { 4216 // Something re-enabled scheduling while we 4217 // were acquiring the lock. 4218 unlock(&sched.lock) 4219 } else { 4220 sched.disable.runnable.pushBack(gp) 4221 unlock(&sched.lock) 4222 goto top 4223 } 4224 } 4225 4226 // If about to schedule a not-normal goroutine (a GCworker or tracereader), 4227 // wake a P if there is one. 4228 if tryWakeP { 4229 wakep() 4230 } 4231 if gp.lockedm != 0 { 4232 // Hands off own p to the locked m, 4233 // then blocks waiting for a new p. 4234 startlockedm(gp) 4235 goto top 4236 } 4237 4238 execute(gp, inheritTime) 4239 } 4240 4241 // dropg removes the association between m and the current goroutine m->curg (gp for short). 4242 // Typically a caller sets gp's status away from Grunning and then 4243 // immediately calls dropg to finish the job. The caller is also responsible 4244 // for arranging that gp will be restarted using ready at an 4245 // appropriate time. After calling dropg and arranging for gp to be 4246 // readied later, the caller can do other work but eventually should 4247 // call schedule to restart the scheduling of goroutines on this m. 4248 func dropg() { 4249 gp := getg() 4250 4251 setMNoWB(&gp.m.curg.m, nil) 4252 setGNoWB(&gp.m.curg, nil) 4253 } 4254 4255 func parkunlock_c(gp *g, lock unsafe.Pointer) bool { 4256 unlock((*mutex)(lock)) 4257 return true 4258 } 4259 4260 // park continuation on g0. 4261 func park_m(gp *g) { 4262 mp := getg().m 4263 4264 trace := traceAcquire() 4265 4266 // If g is in a synctest group, we don't want to let the group 4267 // become idle until after the waitunlockf (if any) has confirmed 4268 // that the park is happening. 4269 // We need to record gp.bubble here, since waitunlockf can change it. 4270 bubble := gp.bubble 4271 if bubble != nil { 4272 bubble.incActive() 4273 } 4274 4275 if trace.ok() { 4276 // Trace the event before the transition. It may take a 4277 // stack trace, but we won't own the stack after the 4278 // transition anymore. 4279 trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) 4280 } 4281 // N.B. Not using casGToWaiting here because the waitreason is 4282 // set by park_m's caller. 4283 casgstatus(gp, _Grunning, _Gwaiting) 4284 if trace.ok() { 4285 traceRelease(trace) 4286 } 4287 4288 dropg() 4289 4290 if fn := mp.waitunlockf; fn != nil { 4291 ok := fn(gp, mp.waitlock) 4292 mp.waitunlockf = nil 4293 mp.waitlock = nil 4294 if !ok { 4295 trace := traceAcquire() 4296 casgstatus(gp, _Gwaiting, _Grunnable) 4297 if bubble != nil { 4298 bubble.decActive() 4299 } 4300 if trace.ok() { 4301 trace.GoUnpark(gp, 2) 4302 traceRelease(trace) 4303 } 4304 execute(gp, true) // Schedule it back, never returns. 4305 } 4306 } 4307 4308 if bubble != nil { 4309 bubble.decActive() 4310 } 4311 4312 schedule() 4313 } 4314 4315 func goschedImpl(gp *g, preempted bool) { 4316 pp := gp.m.p.ptr() 4317 trace := traceAcquire() 4318 status := readgstatus(gp) 4319 if status&^_Gscan != _Grunning { 4320 dumpgstatus(gp) 4321 throw("bad g status") 4322 } 4323 if trace.ok() { 4324 // Trace the event before the transition. It may take a 4325 // stack trace, but we won't own the stack after the 4326 // transition anymore. 4327 if preempted { 4328 trace.GoPreempt() 4329 } else { 4330 trace.GoSched() 4331 } 4332 } 4333 casgstatus(gp, _Grunning, _Grunnable) 4334 if trace.ok() { 4335 traceRelease(trace) 4336 } 4337 4338 dropg() 4339 if preempted && sched.gcwaiting.Load() { 4340 // If preempted for STW, keep the G on the local P in runnext 4341 // so it can keep running immediately after the STW. 4342 runqput(pp, gp, true) 4343 } else { 4344 lock(&sched.lock) 4345 globrunqput(gp) 4346 unlock(&sched.lock) 4347 } 4348 4349 if mainStarted { 4350 wakep() 4351 } 4352 4353 schedule() 4354 } 4355 4356 // Gosched continuation on g0. 4357 func gosched_m(gp *g) { 4358 goschedImpl(gp, false) 4359 } 4360 4361 // goschedguarded is a forbidden-states-avoided version of gosched_m. 4362 func goschedguarded_m(gp *g) { 4363 if !canPreemptM(gp.m) { 4364 gogo(&gp.sched) // never return 4365 } 4366 goschedImpl(gp, false) 4367 } 4368 4369 func gopreempt_m(gp *g) { 4370 goschedImpl(gp, true) 4371 } 4372 4373 // preemptPark parks gp and puts it in _Gpreempted. 4374 // 4375 //go:systemstack 4376 func preemptPark(gp *g) { 4377 status := readgstatus(gp) 4378 if status&^_Gscan != _Grunning { 4379 dumpgstatus(gp) 4380 throw("bad g status") 4381 } 4382 4383 if gp.asyncSafePoint { 4384 // Double-check that async preemption does not 4385 // happen in SPWRITE assembly functions. 4386 // isAsyncSafePoint must exclude this case. 4387 f := findfunc(gp.sched.pc) 4388 if !f.valid() { 4389 throw("preempt at unknown pc") 4390 } 4391 if f.flag&abi.FuncFlagSPWrite != 0 { 4392 println("runtime: unexpected SPWRITE function", funcname(f), "in async preempt") 4393 throw("preempt SPWRITE") 4394 } 4395 } 4396 4397 // Transition from _Grunning to _Gscan|_Gpreempted. We can't 4398 // be in _Grunning when we dropg because then we'd be running 4399 // without an M, but the moment we're in _Gpreempted, 4400 // something could claim this G before we've fully cleaned it 4401 // up. Hence, we set the scan bit to lock down further 4402 // transitions until we can dropg. 4403 casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) 4404 4405 // Be careful about ownership as we trace this next event. 4406 // 4407 // According to the tracer invariants (trace.go) it's unsafe 4408 // for us to emit an event for a goroutine we do not own. 4409 // The moment we CAS into _Gpreempted, suspendG could CAS the 4410 // goroutine to _Gwaiting, effectively taking ownership. All of 4411 // this could happen before we even get the chance to emit 4412 // an event. The end result is that the events could appear 4413 // out of order, and the tracer generally assumes the scheduler 4414 // takes care of the ordering between GoPark and GoUnpark. 4415 // 4416 // The answer here is simple: emit the event while we still hold 4417 // the _Gscan bit on the goroutine, since the _Gscan bit means 4418 // ownership over transitions. 4419 // 4420 // We still need to traceAcquire and traceRelease across the CAS 4421 // because the tracer could be what's calling suspendG in the first 4422 // place. This also upholds the tracer invariant that we must hold 4423 // traceAcquire/traceRelease across the transition. However, we 4424 // specifically *only* emit the event while we still have ownership. 4425 trace := traceAcquire() 4426 if trace.ok() { 4427 trace.GoPark(traceBlockPreempted, 0) 4428 } 4429 4430 // Drop the goroutine from the M. Only do this after the tracer has 4431 // emitted an event, because it needs the association for GoPark to 4432 // work correctly. 4433 dropg() 4434 4435 // Drop the scan bit and release the trace locker if necessary. 4436 casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) 4437 if trace.ok() { 4438 traceRelease(trace) 4439 } 4440 4441 // All done. 4442 schedule() 4443 } 4444 4445 // goyield is like Gosched, but it: 4446 // - emits a GoPreempt trace event instead of a GoSched trace event 4447 // - puts the current G on the runq of the current P instead of the globrunq 4448 // 4449 // goyield should be an internal detail, 4450 // but widely used packages access it using linkname. 4451 // Notable members of the hall of shame include: 4452 // - gvisor.dev/gvisor 4453 // - github.com/sagernet/gvisor 4454 // 4455 // Do not remove or change the type signature. 4456 // See go.dev/issue/67401. 4457 // 4458 //go:linkname goyield 4459 func goyield() { 4460 checkTimeouts() 4461 mcall(goyield_m) 4462 } 4463 4464 func goyield_m(gp *g) { 4465 trace := traceAcquire() 4466 pp := gp.m.p.ptr() 4467 if trace.ok() { 4468 // Trace the event before the transition. It may take a 4469 // stack trace, but we won't own the stack after the 4470 // transition anymore. 4471 trace.GoPreempt() 4472 } 4473 casgstatus(gp, _Grunning, _Grunnable) 4474 if trace.ok() { 4475 traceRelease(trace) 4476 } 4477 dropg() 4478 runqput(pp, gp, false) 4479 schedule() 4480 } 4481 4482 // Finishes execution of the current goroutine. 4483 func goexit1() { 4484 if raceenabled { 4485 if gp := getg(); gp.bubble != nil { 4486 racereleasemergeg(gp, gp.bubble.raceaddr()) 4487 } 4488 racegoend() 4489 } 4490 trace := traceAcquire() 4491 if trace.ok() { 4492 trace.GoEnd() 4493 traceRelease(trace) 4494 } 4495 mcall(goexit0) 4496 } 4497 4498 // goexit continuation on g0. 4499 func goexit0(gp *g) { 4500 if goexperiment.RuntimeSecret && gp.secret > 0 { 4501 // Erase the whole stack. This path only occurs when 4502 // runtime.Goexit is called from within a runtime/secret.Do call. 4503 memclrNoHeapPointers(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4504 // Since this is running on g0, our registers are already zeroed from going through 4505 // mcall in secret mode. 4506 } 4507 gdestroy(gp) 4508 schedule() 4509 } 4510 4511 func gdestroy(gp *g) { 4512 mp := getg().m 4513 pp := mp.p.ptr() 4514 4515 casgstatus(gp, _Grunning, _Gdead) 4516 gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) 4517 if isSystemGoroutine(gp, false) { 4518 sched.ngsys.Add(-1) 4519 } 4520 gp.m = nil 4521 locked := gp.lockedm != 0 4522 gp.lockedm = 0 4523 mp.lockedg = 0 4524 gp.preemptStop = false 4525 gp.paniconfault = false 4526 gp._defer = nil // should be true already but just in case. 4527 gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. 4528 gp.writebuf = nil 4529 gp.waitreason = waitReasonZero 4530 gp.param = nil 4531 gp.labels = nil 4532 gp.timer = nil 4533 gp.bubble = nil 4534 gp.fipsOnlyBypass = false 4535 gp.secret = 0 4536 4537 if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { 4538 // Flush assist credit to the global pool. This gives 4539 // better information to pacing if the application is 4540 // rapidly creating an exiting goroutines. 4541 assistWorkPerByte := gcController.assistWorkPerByte.Load() 4542 scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) 4543 gcController.bgScanCredit.Add(scanCredit) 4544 gp.gcAssistBytes = 0 4545 } 4546 4547 dropg() 4548 4549 if GOARCH == "wasm" { // no threads yet on wasm 4550 gfput(pp, gp) 4551 return 4552 } 4553 4554 if locked && mp.lockedInt != 0 { 4555 print("runtime: mp.lockedInt = ", mp.lockedInt, "\n") 4556 if mp.isextra { 4557 throw("runtime.Goexit called in a thread that was not created by the Go runtime") 4558 } 4559 throw("exited a goroutine internally locked to the OS thread") 4560 } 4561 gfput(pp, gp) 4562 if locked { 4563 // The goroutine may have locked this thread because 4564 // it put it in an unusual kernel state. Kill it 4565 // rather than returning it to the thread pool. 4566 4567 // Return to mstart, which will release the P and exit 4568 // the thread. 4569 if GOOS != "plan9" { // See golang.org/issue/22227. 4570 gogo(&mp.g0.sched) 4571 } else { 4572 // Clear lockedExt on plan9 since we may end up re-using 4573 // this thread. 4574 mp.lockedExt = 0 4575 } 4576 } 4577 } 4578 4579 // save updates getg().sched to refer to pc and sp so that a following 4580 // gogo will restore pc and sp. 4581 // 4582 // save must not have write barriers because invoking a write barrier 4583 // can clobber getg().sched. 4584 // 4585 //go:nosplit 4586 //go:nowritebarrierrec 4587 func save(pc, sp, bp uintptr) { 4588 gp := getg() 4589 4590 if gp == gp.m.g0 || gp == gp.m.gsignal { 4591 // m.g0.sched is special and must describe the context 4592 // for exiting the thread. mstart1 writes to it directly. 4593 // m.gsignal.sched should not be used at all. 4594 // This check makes sure save calls do not accidentally 4595 // run in contexts where they'd write to system g's. 4596 throw("save on system g not allowed") 4597 } 4598 4599 gp.sched.pc = pc 4600 gp.sched.sp = sp 4601 gp.sched.lr = 0 4602 gp.sched.bp = bp 4603 // We need to ensure ctxt is zero, but can't have a write 4604 // barrier here. However, it should always already be zero. 4605 // Assert that. 4606 if gp.sched.ctxt != nil { 4607 badctxt() 4608 } 4609 } 4610 4611 // The goroutine g is about to enter a system call. 4612 // Record that it's not using the cpu anymore. 4613 // This is called only from the go syscall library and cgocall, 4614 // not from the low-level system calls used by the runtime. 4615 // 4616 // Entersyscall cannot split the stack: the save must 4617 // make g->sched refer to the caller's stack segment, because 4618 // entersyscall is going to return immediately after. 4619 // 4620 // Nothing entersyscall calls can split the stack either. 4621 // We cannot safely move the stack during an active call to syscall, 4622 // because we do not know which of the uintptr arguments are 4623 // really pointers (back into the stack). 4624 // In practice, this means that we make the fast path run through 4625 // entersyscall doing no-split things, and the slow path has to use systemstack 4626 // to run bigger things on the system stack. 4627 // 4628 // reentersyscall is the entry point used by cgo callbacks, where explicitly 4629 // saved SP and PC are restored. This is needed when exitsyscall will be called 4630 // from a function further up in the call stack than the parent, as g->syscallsp 4631 // must always point to a valid stack frame. entersyscall below is the normal 4632 // entry point for syscalls, which obtains the SP and PC from the caller. 4633 // 4634 //go:nosplit 4635 func reentersyscall(pc, sp, bp uintptr) { 4636 gp := getg() 4637 4638 // Disable preemption because during this function g is in Gsyscall status, 4639 // but can have inconsistent g->sched, do not let GC observe it. 4640 gp.m.locks++ 4641 4642 // This M may have a signal stack that is dirtied with secret information 4643 // (see package "runtime/secret"). Since it's about to go into a syscall for 4644 // an arbitrary amount of time and the G that put the secret info there 4645 // might have returned from secret.Do, we have to zero it out now, lest we 4646 // break the guarantee that secrets are purged by the next GC after a return 4647 // to secret.Do. 4648 // 4649 // It might be tempting to think that we only need to zero out this if we're 4650 // not running in secret mode anymore, but that leaves an ABA problem. The G 4651 // that put the secrets onto our signal stack may not be the one that is 4652 // currently executing. 4653 // 4654 // Logically, we should erase this when we lose our P, not when we enter the 4655 // syscall. This would avoid a zeroing in the case where the call returns 4656 // almost immediately. Since we use this path for cgo calls as well, these 4657 // fast "syscalls" are quite common. However, since we only erase the signal 4658 // stack if we were delivered a signal in secret mode and considering the 4659 // cross-thread synchronization cost for the P, it hardly seems worth it. 4660 // 4661 // TODO(dmo): can we encode the goid into mp.signalSecret and avoid the ABA problem? 4662 if goexperiment.RuntimeSecret { 4663 eraseSecretsSignalStk() 4664 } 4665 4666 // Entersyscall must not call any function that might split/grow the stack. 4667 // (See details in comment above.) 4668 // Catch calls that might, by replacing the stack guard with something that 4669 // will trip any stack check and leaving a flag to tell newstack to die. 4670 gp.stackguard0 = stackPreempt 4671 gp.throwsplit = true 4672 4673 // Copy the syscalltick over so we can identify if the P got stolen later. 4674 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4675 4676 pp := gp.m.p.ptr() 4677 if pp.runSafePointFn != 0 { 4678 // runSafePointFn may stack split if run on this stack 4679 systemstack(runSafePointFn) 4680 } 4681 gp.m.oldp.set(pp) 4682 4683 // Leave SP around for GC and traceback. 4684 save(pc, sp, bp) 4685 gp.syscallsp = sp 4686 gp.syscallpc = pc 4687 gp.syscallbp = bp 4688 4689 // Double-check sp and bp. 4690 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4691 systemstack(func() { 4692 print("entersyscall inconsistent sp ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4693 throw("entersyscall") 4694 }) 4695 } 4696 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4697 systemstack(func() { 4698 print("entersyscall inconsistent bp ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4699 throw("entersyscall") 4700 }) 4701 } 4702 trace := traceAcquire() 4703 if trace.ok() { 4704 // Emit a trace event. Notably, actually emitting the event must happen before 4705 // the casgstatus because it mutates the P, but the traceLocker must be held 4706 // across the casgstatus since we're transitioning out of _Grunning 4707 // (see trace.go invariants). 4708 systemstack(func() { 4709 trace.GoSysCall() 4710 }) 4711 // systemstack clobbered gp.sched, so restore it. 4712 save(pc, sp, bp) 4713 } 4714 if sched.gcwaiting.Load() { 4715 // Optimization: If there's a pending STW, do the equivalent of 4716 // entersyscallblock here at the last minute and immediately give 4717 // away our P. 4718 systemstack(func() { 4719 entersyscallHandleGCWait(trace) 4720 }) 4721 // systemstack clobbered gp.sched, so restore it. 4722 save(pc, sp, bp) 4723 } 4724 // As soon as we switch to _Gsyscall, we are in danger of losing our P. 4725 // We must not touch it after this point. 4726 // 4727 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4728 // If we have a bubble, we need to fall into casgstatus. 4729 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Grunning, _Gsyscall) { 4730 casgstatus(gp, _Grunning, _Gsyscall) 4731 } 4732 if staticLockRanking { 4733 // casgstatus clobbers gp.sched via systemstack under staticLockRanking. Restore it. 4734 save(pc, sp, bp) 4735 } 4736 if trace.ok() { 4737 // N.B. We don't need to go on the systemstack because traceRelease is very 4738 // carefully recursively nosplit. This also means we don't need to worry 4739 // about clobbering gp.sched. 4740 traceRelease(trace) 4741 } 4742 if sched.sysmonwait.Load() { 4743 systemstack(entersyscallWakeSysmon) 4744 // systemstack clobbered gp.sched, so restore it. 4745 save(pc, sp, bp) 4746 } 4747 gp.m.locks-- 4748 } 4749 4750 // debugExtendGrunningNoP is a debug mode that extends the windows in which 4751 // we're _Grunning without a P in order to try to shake out bugs with code 4752 // assuming this state is impossible. 4753 const debugExtendGrunningNoP = false 4754 4755 // Standard syscall entry used by the go syscall library and normal cgo calls. 4756 // 4757 // This is exported via linkname to assembly in the syscall package and x/sys. 4758 // 4759 // Other packages should not be accessing entersyscall directly, 4760 // but widely used packages access it using linkname. 4761 // Notable members of the hall of shame include: 4762 // - gvisor.dev/gvisor 4763 // 4764 // Do not remove or change the type signature. 4765 // See go.dev/issue/67401. 4766 // 4767 //go:nosplit 4768 //go:linkname entersyscall 4769 func entersyscall() { 4770 // N.B. getcallerfp cannot be written directly as argument in the call 4771 // to reentersyscall because it forces spilling the other arguments to 4772 // the stack. This results in exceeding the nosplit stack requirements 4773 // on some platforms. 4774 fp := getcallerfp() 4775 reentersyscall(sys.GetCallerPC(), sys.GetCallerSP(), fp) 4776 } 4777 4778 func entersyscallWakeSysmon() { 4779 lock(&sched.lock) 4780 if sched.sysmonwait.Load() { 4781 sched.sysmonwait.Store(false) 4782 notewakeup(&sched.sysmonnote) 4783 } 4784 unlock(&sched.lock) 4785 } 4786 4787 func entersyscallHandleGCWait(trace traceLocker) { 4788 gp := getg() 4789 4790 lock(&sched.lock) 4791 if sched.stopwait > 0 { 4792 // Set our P to _Pgcstop so the STW can take it. 4793 pp := gp.m.p.ptr() 4794 pp.m = 0 4795 gp.m.p = 0 4796 atomic.Store(&pp.status, _Pgcstop) 4797 4798 if trace.ok() { 4799 trace.ProcStop(pp) 4800 } 4801 addGSyscallNoP(gp.m) // We gave up our P voluntarily. 4802 pp.gcStopTime = nanotime() 4803 pp.syscalltick++ 4804 if sched.stopwait--; sched.stopwait == 0 { 4805 notewakeup(&sched.stopnote) 4806 } 4807 } 4808 unlock(&sched.lock) 4809 } 4810 4811 // The same as entersyscall(), but with a hint that the syscall is blocking. 4812 4813 // entersyscallblock should be an internal detail, 4814 // but widely used packages access it using linkname. 4815 // Notable members of the hall of shame include: 4816 // - gvisor.dev/gvisor 4817 // 4818 // Do not remove or change the type signature. 4819 // See go.dev/issue/67401. 4820 // 4821 //go:linkname entersyscallblock 4822 //go:nosplit 4823 func entersyscallblock() { 4824 gp := getg() 4825 4826 gp.m.locks++ // see comment in entersyscall 4827 gp.throwsplit = true 4828 gp.stackguard0 = stackPreempt // see comment in entersyscall 4829 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4830 gp.m.p.ptr().syscalltick++ 4831 4832 addGSyscallNoP(gp.m) // We're going to give up our P. 4833 4834 // Leave SP around for GC and traceback. 4835 pc := sys.GetCallerPC() 4836 sp := sys.GetCallerSP() 4837 bp := getcallerfp() 4838 save(pc, sp, bp) 4839 gp.syscallsp = gp.sched.sp 4840 gp.syscallpc = gp.sched.pc 4841 gp.syscallbp = gp.sched.bp 4842 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4843 sp1 := sp 4844 sp2 := gp.sched.sp 4845 sp3 := gp.syscallsp 4846 systemstack(func() { 4847 print("entersyscallblock inconsistent sp ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4848 throw("entersyscallblock") 4849 }) 4850 } 4851 4852 // Once we switch to _Gsyscall, we can't safely touch 4853 // our P anymore, so we need to hand it off beforehand. 4854 // The tracer also needs to see the syscall before the P 4855 // handoff, so the order here must be (1) trace, 4856 // (2) handoff, (3) _Gsyscall switch. 4857 trace := traceAcquire() 4858 systemstack(func() { 4859 if trace.ok() { 4860 trace.GoSysCall() 4861 } 4862 handoffp(releasep()) 4863 }) 4864 // <-- 4865 // Caution: we're in a small window where we are in _Grunning without a P. 4866 // --> 4867 if debugExtendGrunningNoP { 4868 usleep(10) 4869 } 4870 casgstatus(gp, _Grunning, _Gsyscall) 4871 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4872 systemstack(func() { 4873 print("entersyscallblock inconsistent sp ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4874 throw("entersyscallblock") 4875 }) 4876 } 4877 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4878 systemstack(func() { 4879 print("entersyscallblock inconsistent bp ", hex(bp), " ", hex(gp.sched.bp), " ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4880 throw("entersyscallblock") 4881 }) 4882 } 4883 if trace.ok() { 4884 systemstack(func() { 4885 traceRelease(trace) 4886 }) 4887 } 4888 4889 // Resave for traceback during blocked call. 4890 save(sys.GetCallerPC(), sys.GetCallerSP(), getcallerfp()) 4891 4892 gp.m.locks-- 4893 } 4894 4895 // The goroutine g exited its system call. 4896 // Arrange for it to run on a cpu again. 4897 // This is called only from the go syscall library, not 4898 // from the low-level system calls used by the runtime. 4899 // 4900 // Write barriers are not allowed because our P may have been stolen. 4901 // 4902 // This is exported via linkname to assembly in the syscall package. 4903 // 4904 // exitsyscall should be an internal detail, 4905 // but widely used packages access it using linkname. 4906 // Notable members of the hall of shame include: 4907 // - gvisor.dev/gvisor 4908 // 4909 // Do not remove or change the type signature. 4910 // See go.dev/issue/67401. 4911 // 4912 //go:nosplit 4913 //go:nowritebarrierrec 4914 //go:linkname exitsyscall 4915 func exitsyscall() { 4916 gp := getg() 4917 4918 gp.m.locks++ // see comment in entersyscall 4919 if sys.GetCallerSP() > gp.syscallsp { 4920 throw("exitsyscall: syscall frame is no longer valid") 4921 } 4922 gp.waitsince = 0 4923 4924 if sched.stopwait == freezeStopWait { 4925 // Wedge ourselves if there's an outstanding freezetheworld. 4926 // If we transition to running, we might end up with our traceback 4927 // being taken twice. 4928 systemstack(func() { 4929 lock(&deadlock) 4930 lock(&deadlock) 4931 }) 4932 } 4933 4934 // Optimistically assume we're going to keep running, and switch to running. 4935 // Before this point, our P wiring is not ours. Once we get past this point, 4936 // we can access our P if we have it, otherwise we lost it. 4937 // 4938 // N.B. Because we're transitioning to _Grunning here, traceAcquire doesn't 4939 // need to be held ahead of time. We're effectively atomic with respect to 4940 // the tracer because we're non-preemptible and in the runtime. It can't stop 4941 // us to read a bad status. 4942 // 4943 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4944 // If we have a bubble, we need to fall into casgstatus. 4945 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Gsyscall, _Grunning) { 4946 casgstatus(gp, _Gsyscall, _Grunning) 4947 } 4948 4949 // Caution: we're in a window where we may be in _Grunning without a P. 4950 // Either we will grab a P or call exitsyscall0, where we'll switch to 4951 // _Grunnable. 4952 if debugExtendGrunningNoP { 4953 usleep(10) 4954 } 4955 4956 // Grab and clear our old P. 4957 oldp := gp.m.oldp.ptr() 4958 gp.m.oldp.set(nil) 4959 4960 // Check if we still have a P, and if not, try to acquire an idle P. 4961 pp := gp.m.p.ptr() 4962 if pp != nil { 4963 // Fast path: we still have our P. Just emit a syscall exit event. 4964 if trace := traceAcquire(); trace.ok() { 4965 systemstack(func() { 4966 // The truth is we truly never lost the P, but syscalltick 4967 // is used to indicate whether the P should be treated as 4968 // lost anyway. For example, when syscalltick is trashed by 4969 // dropm. 4970 // 4971 // TODO(mknyszek): Consider a more explicit mechanism for this. 4972 // Then syscalltick doesn't need to be trashed, and can be used 4973 // exclusively by sysmon for deciding when it's time to retake. 4974 if pp.syscalltick == gp.m.syscalltick { 4975 trace.GoSysExit(false) 4976 } else { 4977 // Since we need to pretend we lost the P, but nobody ever 4978 // took it, we need a ProcSteal event to model the loss. 4979 // Then, continue with everything else we'd do if we lost 4980 // the P. 4981 trace.ProcSteal(pp) 4982 trace.ProcStart() 4983 trace.GoSysExit(true) 4984 trace.GoStart() 4985 } 4986 traceRelease(trace) 4987 }) 4988 } 4989 } else { 4990 // Slow path: we lost our P. Try to get another one. 4991 systemstack(func() { 4992 // Try to get some other P. 4993 if pp := exitsyscallTryGetP(oldp); pp != nil { 4994 // Install the P. 4995 acquirepNoTrace(pp) 4996 4997 // We're going to start running again, so emit all the relevant events. 4998 if trace := traceAcquire(); trace.ok() { 4999 trace.ProcStart() 5000 trace.GoSysExit(true) 5001 trace.GoStart() 5002 traceRelease(trace) 5003 } 5004 } 5005 }) 5006 pp = gp.m.p.ptr() 5007 } 5008 5009 // If we have a P, clean up and exit. 5010 if pp != nil { 5011 if goroutineProfile.active { 5012 // Make sure that gp has had its stack written out to the goroutine 5013 // profile, exactly as it was when the goroutine profiler first 5014 // stopped the world. 5015 systemstack(func() { 5016 tryRecordGoroutineProfileWB(gp) 5017 }) 5018 } 5019 5020 // Increment the syscalltick for P, since we're exiting a syscall. 5021 pp.syscalltick++ 5022 5023 // Garbage collector isn't running (since we are), 5024 // so okay to clear syscallsp. 5025 gp.syscallsp = 0 5026 gp.m.locks-- 5027 if gp.preempt { 5028 // Restore the preemption request in case we cleared it in newstack. 5029 gp.stackguard0 = stackPreempt 5030 } else { 5031 // Otherwise restore the real stackGuard, we clobbered it in entersyscall/entersyscallblock. 5032 gp.stackguard0 = gp.stack.lo + stackGuard 5033 } 5034 gp.throwsplit = false 5035 5036 if sched.disable.user && !schedEnabled(gp) { 5037 // Scheduling of this goroutine is disabled. 5038 Gosched() 5039 } 5040 return 5041 } 5042 // Slowest path: We couldn't get a P, so call into the scheduler. 5043 gp.m.locks-- 5044 5045 // Call the scheduler. 5046 mcall(exitsyscallNoP) 5047 5048 // Scheduler returned, so we're allowed to run now. 5049 // Delete the syscallsp information that we left for 5050 // the garbage collector during the system call. 5051 // Must wait until now because until gosched returns 5052 // we don't know for sure that the garbage collector 5053 // is not running. 5054 gp.syscallsp = 0 5055 gp.m.p.ptr().syscalltick++ 5056 gp.throwsplit = false 5057 } 5058 5059 // exitsyscall's attempt to try to get any P, if it's missing one. 5060 // Returns true on success. 5061 // 5062 // Must execute on the systemstack because exitsyscall is nosplit. 5063 // 5064 //go:systemstack 5065 func exitsyscallTryGetP(oldp *p) *p { 5066 // Try to steal our old P back. 5067 if oldp != nil { 5068 if thread, ok := setBlockOnExitSyscall(oldp); ok { 5069 thread.takeP() 5070 decGSyscallNoP(getg().m) // We got a P for ourselves. 5071 thread.resume() 5072 return oldp 5073 } 5074 } 5075 5076 // Try to get an idle P. 5077 if sched.pidle != 0 { 5078 lock(&sched.lock) 5079 pp, _ := pidleget(0) 5080 if pp != nil && sched.sysmonwait.Load() { 5081 sched.sysmonwait.Store(false) 5082 notewakeup(&sched.sysmonnote) 5083 } 5084 unlock(&sched.lock) 5085 if pp != nil { 5086 decGSyscallNoP(getg().m) // We got a P for ourselves. 5087 return pp 5088 } 5089 } 5090 return nil 5091 } 5092 5093 // exitsyscall slow path on g0. 5094 // Failed to acquire P, enqueue gp as runnable. 5095 // 5096 // Called via mcall, so gp is the calling g from this M. 5097 // 5098 //go:nowritebarrierrec 5099 func exitsyscallNoP(gp *g) { 5100 traceExitingSyscall() 5101 trace := traceAcquire() 5102 casgstatus(gp, _Grunning, _Grunnable) 5103 traceExitedSyscall() 5104 if trace.ok() { 5105 // Write out syscall exit eagerly. 5106 // 5107 // It's important that we write this *after* we know whether we 5108 // lost our P or not (determined by exitsyscallfast). 5109 trace.GoSysExit(true) 5110 traceRelease(trace) 5111 } 5112 decGSyscallNoP(getg().m) 5113 dropg() 5114 lock(&sched.lock) 5115 var pp *p 5116 if schedEnabled(gp) { 5117 pp, _ = pidleget(0) 5118 } 5119 var locked bool 5120 if pp == nil { 5121 globrunqput(gp) 5122 5123 // Below, we stoplockedm if gp is locked. globrunqput releases 5124 // ownership of gp, so we must check if gp is locked prior to 5125 // committing the release by unlocking sched.lock, otherwise we 5126 // could race with another M transitioning gp from unlocked to 5127 // locked. 5128 locked = gp.lockedm != 0 5129 } else if sched.sysmonwait.Load() { 5130 sched.sysmonwait.Store(false) 5131 notewakeup(&sched.sysmonnote) 5132 } 5133 unlock(&sched.lock) 5134 if pp != nil { 5135 acquirep(pp) 5136 execute(gp, false) // Never returns. 5137 } 5138 if locked { 5139 // Wait until another thread schedules gp and so m again. 5140 // 5141 // N.B. lockedm must be this M, as this g was running on this M 5142 // before entersyscall. 5143 stoplockedm() 5144 execute(gp, false) // Never returns. 5145 } 5146 stopm() 5147 schedule() // Never returns. 5148 } 5149 5150 // addGSyscallNoP must be called when a goroutine in a syscall loses its P. 5151 // This function updates all relevant accounting. 5152 // 5153 // nosplit because it's called on the syscall paths. 5154 // 5155 //go:nosplit 5156 func addGSyscallNoP(mp *m) { 5157 // It's safe to read isExtraInC here because it's only mutated 5158 // outside of _Gsyscall, and we know this thread is attached 5159 // to a goroutine in _Gsyscall and blocked from exiting. 5160 if !mp.isExtraInC { 5161 // Increment nGsyscallNoP since we're taking away a P 5162 // from a _Gsyscall goroutine, but only if isExtraInC 5163 // is not set on the M. If it is, then this thread is 5164 // back to being a full C thread, and will just inflate 5165 // the count of not-in-go goroutines. See go.dev/issue/76435. 5166 sched.nGsyscallNoP.Add(1) 5167 } 5168 } 5169 5170 // decGSsyscallNoP must be called whenever a goroutine in a syscall without 5171 // a P exits the system call. This function updates all relevant accounting. 5172 // 5173 // nosplit because it's called from dropm. 5174 // 5175 //go:nosplit 5176 func decGSyscallNoP(mp *m) { 5177 // Update nGsyscallNoP, but only if this is not a thread coming 5178 // out of C. See the comment in addGSyscallNoP. This logic must match, 5179 // to avoid unmatched increments and decrements. 5180 if !mp.isExtraInC { 5181 sched.nGsyscallNoP.Add(-1) 5182 } 5183 } 5184 5185 // Called from syscall package before fork. 5186 // 5187 // syscall_runtime_BeforeFork is for package syscall, 5188 // but widely used packages access it using linkname. 5189 // Notable members of the hall of shame include: 5190 // - gvisor.dev/gvisor 5191 // 5192 // Do not remove or change the type signature. 5193 // See go.dev/issue/67401. 5194 // 5195 //go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork 5196 //go:nosplit 5197 func syscall_runtime_BeforeFork() { 5198 gp := getg().m.curg 5199 5200 // Block signals during a fork, so that the child does not run 5201 // a signal handler before exec if a signal is sent to the process 5202 // group. See issue #18600. 5203 gp.m.locks++ 5204 sigsave(&gp.m.sigmask) 5205 sigblock(false) 5206 5207 // This function is called before fork in syscall package. 5208 // Code between fork and exec must not allocate memory nor even try to grow stack. 5209 // Here we spoil g.stackguard0 to reliably detect any attempts to grow stack. 5210 // runtime_AfterFork will undo this in parent process, but not in child. 5211 gp.stackguard0 = stackFork 5212 } 5213 5214 // Called from syscall package after fork in parent. 5215 // 5216 // syscall_runtime_AfterFork is for package syscall, 5217 // but widely used packages access it using linkname. 5218 // Notable members of the hall of shame include: 5219 // - gvisor.dev/gvisor 5220 // 5221 // Do not remove or change the type signature. 5222 // See go.dev/issue/67401. 5223 // 5224 //go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork 5225 //go:nosplit 5226 func syscall_runtime_AfterFork() { 5227 gp := getg().m.curg 5228 5229 // See the comments in beforefork. 5230 gp.stackguard0 = gp.stack.lo + stackGuard 5231 5232 msigrestore(gp.m.sigmask) 5233 5234 gp.m.locks-- 5235 } 5236 5237 // inForkedChild is true while manipulating signals in the child process. 5238 // This is used to avoid calling libc functions in case we are using vfork. 5239 var inForkedChild bool 5240 5241 // Called from syscall package after fork in child. 5242 // It resets non-sigignored signals to the default handler, and 5243 // restores the signal mask in preparation for the exec. 5244 // 5245 // Because this might be called during a vfork, and therefore may be 5246 // temporarily sharing address space with the parent process, this must 5247 // not change any global variables or calling into C code that may do so. 5248 // 5249 // syscall_runtime_AfterForkInChild is for package syscall, 5250 // but widely used packages access it using linkname. 5251 // Notable members of the hall of shame include: 5252 // - gvisor.dev/gvisor 5253 // 5254 // Do not remove or change the type signature. 5255 // See go.dev/issue/67401. 5256 // 5257 //go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild 5258 //go:nosplit 5259 //go:nowritebarrierrec 5260 func syscall_runtime_AfterForkInChild() { 5261 // It's OK to change the global variable inForkedChild here 5262 // because we are going to change it back. There is no race here, 5263 // because if we are sharing address space with the parent process, 5264 // then the parent process can not be running concurrently. 5265 inForkedChild = true 5266 5267 clearSignalHandlers() 5268 5269 // When we are the child we are the only thread running, 5270 // so we know that nothing else has changed gp.m.sigmask. 5271 msigrestore(getg().m.sigmask) 5272 5273 inForkedChild = false 5274 } 5275 5276 // pendingPreemptSignals is the number of preemption signals 5277 // that have been sent but not received. This is only used on Darwin. 5278 // For #41702. 5279 var pendingPreemptSignals atomic.Int32 5280 5281 // Called from syscall package before Exec. 5282 // 5283 //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec 5284 func syscall_runtime_BeforeExec() { 5285 // Prevent thread creation during exec. 5286 execLock.lock() 5287 5288 // On Darwin, wait for all pending preemption signals to 5289 // be received. See issue #41702. 5290 if GOOS == "darwin" || GOOS == "ios" { 5291 for pendingPreemptSignals.Load() > 0 { 5292 osyield() 5293 } 5294 } 5295 } 5296 5297 // Called from syscall package after Exec. 5298 // 5299 //go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec 5300 func syscall_runtime_AfterExec() { 5301 execLock.unlock() 5302 } 5303 5304 // Allocate a new g, with a stack big enough for stacksize bytes. 5305 func malg(stacksize int32) *g { 5306 newg := new(g) 5307 if stacksize >= 0 { 5308 stacksize = round2(stackSystem + stacksize) 5309 systemstack(func() { 5310 newg.stack = stackalloc(uint32(stacksize)) 5311 if valgrindenabled { 5312 newg.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(newg.stack.lo), unsafe.Pointer(newg.stack.hi)) 5313 } 5314 }) 5315 newg.stackguard0 = newg.stack.lo + stackGuard 5316 newg.stackguard1 = ^uintptr(0) 5317 // Clear the bottom word of the stack. We record g 5318 // there on gsignal stack during VDSO on ARM and ARM64. 5319 *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 5320 } 5321 return newg 5322 } 5323 5324 // Create a new g running fn. 5325 // Put it on the queue of g's waiting to run. 5326 // The compiler turns a go statement into a call to this. 5327 func newproc(fn *funcval) { 5328 gp := getg() 5329 pc := sys.GetCallerPC() 5330 systemstack(func() { 5331 newg := newproc1(fn, gp, pc, false, waitReasonZero) 5332 5333 pp := getg().m.p.ptr() 5334 runqput(pp, newg, true) 5335 5336 if mainStarted { 5337 wakep() 5338 } 5339 }) 5340 } 5341 5342 // Create a new g in state _Grunnable (or _Gwaiting if parked is true), starting at fn. 5343 // callerpc is the address of the go statement that created this. The caller is responsible 5344 // for adding the new g to the scheduler. If parked is true, waitreason must be non-zero. 5345 func newproc1(fn *funcval, callergp *g, callerpc uintptr, parked bool, waitreason waitReason) *g { 5346 if fn == nil { 5347 fatal("go of nil func value") 5348 } 5349 5350 mp := acquirem() // disable preemption because we hold M and P in local vars. 5351 pp := mp.p.ptr() 5352 newg := gfget(pp) 5353 if newg == nil { 5354 newg = malg(stackMin) 5355 casgstatus(newg, _Gidle, _Gdead) 5356 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 5357 } 5358 if newg.stack.hi == 0 { 5359 throw("newproc1: newg missing stack") 5360 } 5361 5362 if readgstatus(newg) != _Gdead { 5363 throw("newproc1: new g is not Gdead") 5364 } 5365 5366 totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 5367 totalSize = alignUp(totalSize, sys.StackAlign) 5368 sp := newg.stack.hi - totalSize 5369 if usesLR { 5370 // caller's LR 5371 *(*uintptr)(unsafe.Pointer(sp)) = 0 5372 prepGoExitFrame(sp) 5373 } 5374 if GOARCH == "arm64" { 5375 // caller's FP 5376 *(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0 5377 } 5378 5379 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 5380 newg.sched.sp = sp 5381 newg.stktopsp = sp 5382 newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 5383 newg.sched.g = guintptr(unsafe.Pointer(newg)) 5384 gostartcallfn(&newg.sched, fn) 5385 newg.parentGoid = callergp.goid 5386 newg.gopc = callerpc 5387 newg.ancestors = saveAncestors(callergp) 5388 newg.startpc = fn.fn 5389 newg.runningCleanups.Store(false) 5390 if isSystemGoroutine(newg, false) { 5391 sched.ngsys.Add(1) 5392 } else { 5393 // Only user goroutines inherit synctest groups and pprof labels. 5394 newg.bubble = callergp.bubble 5395 if mp.curg != nil { 5396 newg.labels = mp.curg.labels 5397 } 5398 if goroutineProfile.active { 5399 // A concurrent goroutine profile is running. It should include 5400 // exactly the set of goroutines that were alive when the goroutine 5401 // profiler first stopped the world. That does not include newg, so 5402 // mark it as not needing a profile before transitioning it from 5403 // _Gdead. 5404 newg.goroutineProfiled.Store(goroutineProfileSatisfied) 5405 } 5406 } 5407 // Track initial transition? 5408 newg.trackingSeq = uint8(cheaprand()) 5409 if newg.trackingSeq%gTrackingPeriod == 0 { 5410 newg.tracking = true 5411 } 5412 gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) 5413 5414 // Get a goid and switch to runnable. This needs to happen under traceAcquire 5415 // since it's a goroutine transition. See tracer invariants in trace.go. 5416 trace := traceAcquire() 5417 var status uint32 = _Grunnable 5418 if parked { 5419 status = _Gwaiting 5420 newg.waitreason = waitreason 5421 } 5422 if pp.goidcache == pp.goidcacheend { 5423 // Sched.goidgen is the last allocated id, 5424 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 5425 // At startup sched.goidgen=0, so main goroutine receives goid=1. 5426 pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) 5427 pp.goidcache -= _GoidCacheBatch - 1 5428 pp.goidcacheend = pp.goidcache + _GoidCacheBatch 5429 } 5430 newg.goid = pp.goidcache 5431 casgstatus(newg, _Gdead, status) 5432 pp.goidcache++ 5433 newg.trace.reset() 5434 if trace.ok() { 5435 trace.GoCreate(newg, newg.startpc, parked) 5436 traceRelease(trace) 5437 } 5438 5439 // fips140 bubble 5440 newg.fipsOnlyBypass = callergp.fipsOnlyBypass 5441 5442 // dit bubble 5443 newg.ditWanted = callergp.ditWanted 5444 5445 if goexperiment.RuntimeSecret && callergp.secret > 0 { 5446 // while it might seem weird to have a non-zero gp.secret value 5447 // with no calls to secret.Do on the stack, this case is handled 5448 // just fine by the cleanup logic in goexit0 5449 // TODO: secret mode is invisible to the user if they don't ask about it via secret.Enabled 5450 // and can have severe performance penalties (at time of writing, wrapping the entire 5451 // tls handshake resulted in a 30% slowdown of the benchmarks). 5452 // Whether a goroutine is running in secret mode should be more visible, 5453 // maybe with a stack frame or some sort of bubble inspecting mechanism 5454 newg.secret = 1 5455 } 5456 5457 // Set up race context. 5458 if raceenabled { 5459 newg.racectx = racegostart(callerpc) 5460 newg.raceignore = 0 5461 if newg.labels != nil { 5462 // See note in proflabel.go on labelSync's role in synchronizing 5463 // with the reads in the signal handler. 5464 racereleasemergeg(newg, unsafe.Pointer(&labelSync)) 5465 } 5466 } 5467 pp.goroutinesCreated++ 5468 releasem(mp) 5469 5470 return newg 5471 } 5472 5473 // saveAncestors copies previous ancestors of the given caller g and 5474 // includes info for the current caller into a new set of tracebacks for 5475 // a g being created. 5476 func saveAncestors(callergp *g) *[]ancestorInfo { 5477 // Copy all prior info, except for the root goroutine (goid 0). 5478 if debug.tracebackancestors <= 0 || callergp.goid == 0 { 5479 return nil 5480 } 5481 var callerAncestors []ancestorInfo 5482 if callergp.ancestors != nil { 5483 callerAncestors = *callergp.ancestors 5484 } 5485 n := int32(len(callerAncestors)) + 1 5486 if n > debug.tracebackancestors { 5487 n = debug.tracebackancestors 5488 } 5489 ancestors := make([]ancestorInfo, n) 5490 copy(ancestors[1:], callerAncestors) 5491 5492 var pcs [tracebackInnerFrames]uintptr 5493 npcs := gcallers(callergp, 0, pcs[:]) 5494 ipcs := make([]uintptr, npcs) 5495 copy(ipcs, pcs[:]) 5496 ancestors[0] = ancestorInfo{ 5497 pcs: ipcs, 5498 goid: callergp.goid, 5499 gopc: callergp.gopc, 5500 } 5501 5502 ancestorsp := new([]ancestorInfo) 5503 *ancestorsp = ancestors 5504 return ancestorsp 5505 } 5506 5507 // Put on gfree list. 5508 // If local list is too long, transfer a batch to the global list. 5509 func gfput(pp *p, gp *g) { 5510 if readgstatus(gp) != _Gdead { 5511 throw("gfput: bad status (not Gdead)") 5512 } 5513 5514 stksize := gp.stack.hi - gp.stack.lo 5515 5516 if stksize != uintptr(startingStackSize) { 5517 // non-standard stack size - free it. 5518 stackfree(gp.stack) 5519 gp.stack.lo = 0 5520 gp.stack.hi = 0 5521 gp.stackguard0 = 0 5522 if valgrindenabled { 5523 valgrindDeregisterStack(gp.valgrindStackID) 5524 gp.valgrindStackID = 0 5525 } 5526 } 5527 5528 pp.gFree.push(gp) 5529 if pp.gFree.size >= 64 { 5530 var ( 5531 stackQ gQueue 5532 noStackQ gQueue 5533 ) 5534 for pp.gFree.size >= 32 { 5535 gp := pp.gFree.pop() 5536 if gp.stack.lo == 0 { 5537 noStackQ.push(gp) 5538 } else { 5539 stackQ.push(gp) 5540 } 5541 } 5542 lock(&sched.gFree.lock) 5543 sched.gFree.noStack.pushAll(noStackQ) 5544 sched.gFree.stack.pushAll(stackQ) 5545 unlock(&sched.gFree.lock) 5546 } 5547 } 5548 5549 // Get from gfree list. 5550 // If local list is empty, grab a batch from global list. 5551 func gfget(pp *p) *g { 5552 retry: 5553 if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 5554 lock(&sched.gFree.lock) 5555 // Move a batch of free Gs to the P. 5556 for pp.gFree.size < 32 { 5557 // Prefer Gs with stacks. 5558 gp := sched.gFree.stack.pop() 5559 if gp == nil { 5560 gp = sched.gFree.noStack.pop() 5561 if gp == nil { 5562 break 5563 } 5564 } 5565 pp.gFree.push(gp) 5566 } 5567 unlock(&sched.gFree.lock) 5568 goto retry 5569 } 5570 gp := pp.gFree.pop() 5571 if gp == nil { 5572 return nil 5573 } 5574 if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 5575 // Deallocate old stack. We kept it in gfput because it was the 5576 // right size when the goroutine was put on the free list, but 5577 // the right size has changed since then. 5578 systemstack(func() { 5579 stackfree(gp.stack) 5580 gp.stack.lo = 0 5581 gp.stack.hi = 0 5582 gp.stackguard0 = 0 5583 if valgrindenabled { 5584 valgrindDeregisterStack(gp.valgrindStackID) 5585 gp.valgrindStackID = 0 5586 } 5587 }) 5588 } 5589 if gp.stack.lo == 0 { 5590 // Stack was deallocated in gfput or just above. Allocate a new one. 5591 systemstack(func() { 5592 gp.stack = stackalloc(startingStackSize) 5593 if valgrindenabled { 5594 gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(gp.stack.lo), unsafe.Pointer(gp.stack.hi)) 5595 } 5596 }) 5597 gp.stackguard0 = gp.stack.lo + stackGuard 5598 } else { 5599 if raceenabled { 5600 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5601 } 5602 if msanenabled { 5603 msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5604 } 5605 if asanenabled { 5606 asanunpoison(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5607 } 5608 } 5609 return gp 5610 } 5611 5612 // Purge all cached G's from gfree list to the global list. 5613 func gfpurge(pp *p) { 5614 var ( 5615 stackQ gQueue 5616 noStackQ gQueue 5617 ) 5618 for !pp.gFree.empty() { 5619 gp := pp.gFree.pop() 5620 if gp.stack.lo == 0 { 5621 noStackQ.push(gp) 5622 } else { 5623 stackQ.push(gp) 5624 } 5625 } 5626 lock(&sched.gFree.lock) 5627 sched.gFree.noStack.pushAll(noStackQ) 5628 sched.gFree.stack.pushAll(stackQ) 5629 unlock(&sched.gFree.lock) 5630 } 5631 5632 // Breakpoint executes a breakpoint trap. 5633 func Breakpoint() { 5634 breakpoint() 5635 } 5636 5637 // dolockOSThread is called by LockOSThread and lockOSThread below 5638 // after they modify m.locked. Do not allow preemption during this call, 5639 // or else the m might be different in this function than in the caller. 5640 // 5641 //go:nosplit 5642 func dolockOSThread() { 5643 if GOARCH == "wasm" { 5644 return // no threads on wasm yet 5645 } 5646 gp := getg() 5647 gp.m.lockedg.set(gp) 5648 gp.lockedm.set(gp.m) 5649 } 5650 5651 // LockOSThread wires the calling goroutine to its current operating system thread. 5652 // The calling goroutine will always execute in that thread, 5653 // and no other goroutine will execute in it, 5654 // until the calling goroutine has made as many calls to 5655 // [UnlockOSThread] as to LockOSThread. 5656 // If the calling goroutine exits without unlocking the thread, 5657 // the thread will be terminated. 5658 // 5659 // All init functions are run on the startup thread. Calling LockOSThread 5660 // from an init function will cause the main function to be invoked on 5661 // that thread. 5662 // 5663 // A goroutine should call LockOSThread before calling OS services or 5664 // non-Go library functions that depend on per-thread state. 5665 // 5666 //go:nosplit 5667 func LockOSThread() { 5668 if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { 5669 // If we need to start a new thread from the locked 5670 // thread, we need the template thread. Start it now 5671 // while we're in a known-good state. 5672 startTemplateThread() 5673 } 5674 gp := getg() 5675 gp.m.lockedExt++ 5676 if gp.m.lockedExt == 0 { 5677 gp.m.lockedExt-- 5678 panic("LockOSThread nesting overflow") 5679 } 5680 dolockOSThread() 5681 } 5682 5683 //go:nosplit 5684 func lockOSThread() { 5685 getg().m.lockedInt++ 5686 dolockOSThread() 5687 } 5688 5689 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below 5690 // after they update m->locked. Do not allow preemption during this call, 5691 // or else the m might be in different in this function than in the caller. 5692 // 5693 //go:nosplit 5694 func dounlockOSThread() { 5695 if GOARCH == "wasm" { 5696 return // no threads on wasm yet 5697 } 5698 gp := getg() 5699 if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 { 5700 return 5701 } 5702 gp.m.lockedg = 0 5703 gp.lockedm = 0 5704 } 5705 5706 // UnlockOSThread undoes an earlier call to LockOSThread. 5707 // If this drops the number of active LockOSThread calls on the 5708 // calling goroutine to zero, it unwires the calling goroutine from 5709 // its fixed operating system thread. 5710 // If there are no active LockOSThread calls, this is a no-op. 5711 // 5712 // Before calling UnlockOSThread, the caller must ensure that the OS 5713 // thread is suitable for running other goroutines. If the caller made 5714 // any permanent changes to the state of the thread that would affect 5715 // other goroutines, it should not call this function and thus leave 5716 // the goroutine locked to the OS thread until the goroutine (and 5717 // hence the thread) exits. 5718 // 5719 //go:nosplit 5720 func UnlockOSThread() { 5721 gp := getg() 5722 if gp.m.lockedExt == 0 { 5723 return 5724 } 5725 gp.m.lockedExt-- 5726 dounlockOSThread() 5727 } 5728 5729 //go:nosplit 5730 func unlockOSThread() { 5731 gp := getg() 5732 if gp.m.lockedInt == 0 { 5733 systemstack(badunlockosthread) 5734 } 5735 gp.m.lockedInt-- 5736 dounlockOSThread() 5737 } 5738 5739 func badunlockosthread() { 5740 throw("runtime: internal error: misuse of lockOSThread/unlockOSThread") 5741 } 5742 5743 func gcount(includeSys bool) int32 { 5744 n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size 5745 if !includeSys { 5746 n -= sched.ngsys.Load() 5747 } 5748 for _, pp := range allp { 5749 n -= pp.gFree.size 5750 } 5751 5752 // All these variables can be changed concurrently, so the result can be inconsistent. 5753 // But at least the current goroutine is running. 5754 if n < 1 { 5755 n = 1 5756 } 5757 return n 5758 } 5759 5760 // goroutineleakcount returns the number of leaked goroutines last reported by 5761 // the runtime. 5762 // 5763 //go:linkname goroutineleakcount runtime/pprof.runtime_goroutineleakcount 5764 func goroutineleakcount() int { 5765 return work.goroutineLeak.count 5766 } 5767 5768 func mcount() int32 { 5769 return int32(sched.mnext - sched.nmfreed) 5770 } 5771 5772 var prof struct { 5773 signalLock atomic.Uint32 5774 5775 // Must hold signalLock to write. Reads may be lock-free, but 5776 // signalLock should be taken to synchronize with changes. 5777 hz atomic.Int32 5778 } 5779 5780 func _System() { _System() } 5781 func _ExternalCode() { _ExternalCode() } 5782 func _LostExternalCode() { _LostExternalCode() } 5783 func _GC() { _GC() } 5784 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } 5785 func _LostContendedRuntimeLock() { _LostContendedRuntimeLock() } 5786 func _VDSO() { _VDSO() } 5787 5788 // Called if we receive a SIGPROF signal. 5789 // Called by the signal handler, may run during STW. 5790 // 5791 //go:nowritebarrierrec 5792 func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { 5793 if prof.hz.Load() == 0 { 5794 return 5795 } 5796 5797 // If mp.profilehz is 0, then profiling is not enabled for this thread. 5798 // We must check this to avoid a deadlock between setcpuprofilerate 5799 // and the call to cpuprof.add, below. 5800 if mp != nil && mp.profilehz == 0 { 5801 return 5802 } 5803 5804 // On mips{,le}/arm, 64bit atomics are emulated with spinlocks, in 5805 // internal/runtime/atomic. If SIGPROF arrives while the program is inside 5806 // the critical section, it creates a deadlock (when writing the sample). 5807 // As a workaround, create a counter of SIGPROFs while in critical section 5808 // to store the count, and pass it to sigprof.add() later when SIGPROF is 5809 // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). 5810 if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { 5811 if f := findfunc(pc); f.valid() { 5812 if stringslite.HasPrefix(funcname(f), "internal/runtime/atomic") { 5813 cpuprof.lostAtomic++ 5814 return 5815 } 5816 } 5817 if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && pc&0xffff0000 == 0xffff0000 { 5818 // internal/runtime/atomic functions call into kernel 5819 // helpers on arm < 7. See 5820 // internal/runtime/atomic/sys_linux_arm.s. 5821 cpuprof.lostAtomic++ 5822 return 5823 } 5824 } 5825 5826 // Profiling runs concurrently with GC, so it must not allocate. 5827 // Set a trap in case the code does allocate. 5828 // Note that on windows, one thread takes profiles of all the 5829 // other threads, so mp is usually not getg().m. 5830 // In fact mp may not even be stopped. 5831 // See golang.org/issue/17165. 5832 getg().m.mallocing++ 5833 5834 var u unwinder 5835 var stk [maxCPUProfStack]uintptr 5836 n := 0 5837 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { 5838 cgoOff := 0 5839 // Check cgoCallersUse to make sure that we are not 5840 // interrupting other code that is fiddling with 5841 // cgoCallers. We are running in a signal handler 5842 // with all signals blocked, so we don't have to worry 5843 // about any other code interrupting us. 5844 if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 { 5845 for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 { 5846 cgoOff++ 5847 } 5848 n += copy(stk[:], mp.cgoCallers[:cgoOff]) 5849 mp.cgoCallers[0] = 0 5850 } 5851 5852 // Collect Go stack that leads to the cgo call. 5853 u.initAt(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, unwindSilentErrors) 5854 } else if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { 5855 // Libcall, i.e. runtime syscall on windows. 5856 // Collect Go stack that leads to the call. 5857 u.initAt(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), unwindSilentErrors) 5858 } else if mp != nil && mp.vdsoSP != 0 { 5859 // VDSO call, e.g. nanotime1 on Linux. 5860 // Collect Go stack that leads to the call. 5861 u.initAt(mp.vdsoPC, mp.vdsoSP, 0, gp, unwindSilentErrors|unwindJumpStack) 5862 } else { 5863 u.initAt(pc, sp, lr, gp, unwindSilentErrors|unwindTrap|unwindJumpStack) 5864 } 5865 n += tracebackPCs(&u, 0, stk[n:]) 5866 5867 if n <= 0 { 5868 // Normal traceback is impossible or has failed. 5869 // Account it against abstract "System" or "GC". 5870 n = 2 5871 if inVDSOPage(pc) { 5872 pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum 5873 } else if pc > firstmoduledata.etext { 5874 // "ExternalCode" is better than "etext". 5875 pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum 5876 } 5877 stk[0] = pc 5878 if mp.preemptoff != "" { 5879 stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum 5880 } else { 5881 stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum 5882 } 5883 } 5884 5885 if prof.hz.Load() != 0 { 5886 // Note: it can happen on Windows that we interrupted a system thread 5887 // with no g, so gp could nil. The other nil checks are done out of 5888 // caution, but not expected to be nil in practice. 5889 var tagPtr *unsafe.Pointer 5890 if gp != nil && gp.m != nil && gp.m.curg != nil { 5891 tagPtr = &gp.m.curg.labels 5892 } 5893 cpuprof.add(tagPtr, stk[:n]) 5894 5895 gprof := gp 5896 var mp *m 5897 var pp *p 5898 if gp != nil && gp.m != nil { 5899 if gp.m.curg != nil { 5900 gprof = gp.m.curg 5901 } 5902 mp = gp.m 5903 pp = gp.m.p.ptr() 5904 } 5905 traceCPUSample(gprof, mp, pp, stk[:n]) 5906 } 5907 getg().m.mallocing-- 5908 } 5909 5910 // setcpuprofilerate sets the CPU profiling rate to hz times per second. 5911 // If hz <= 0, setcpuprofilerate turns off CPU profiling. 5912 func setcpuprofilerate(hz int32) { 5913 // Force sane arguments. 5914 if hz < 0 { 5915 hz = 0 5916 } 5917 5918 // Disable preemption, otherwise we can be rescheduled to another thread 5919 // that has profiling enabled. 5920 gp := getg() 5921 gp.m.locks++ 5922 5923 // Stop profiler on this thread so that it is safe to lock prof. 5924 // if a profiling signal came in while we had prof locked, 5925 // it would deadlock. 5926 setThreadCPUProfiler(0) 5927 5928 for !prof.signalLock.CompareAndSwap(0, 1) { 5929 osyield() 5930 } 5931 if prof.hz.Load() != hz { 5932 setProcessCPUProfiler(hz) 5933 prof.hz.Store(hz) 5934 } 5935 prof.signalLock.Store(0) 5936 5937 lock(&sched.lock) 5938 sched.profilehz = hz 5939 unlock(&sched.lock) 5940 5941 if hz != 0 { 5942 setThreadCPUProfiler(hz) 5943 } 5944 5945 gp.m.locks-- 5946 } 5947 5948 // init initializes pp, which may be a freshly allocated p or a 5949 // previously destroyed p, and transitions it to status _Pgcstop. 5950 func (pp *p) init(id int32) { 5951 pp.id = id 5952 pp.gcw.id = id 5953 pp.status = _Pgcstop 5954 pp.sudogcache = pp.sudogbuf[:0] 5955 pp.deferpool = pp.deferpoolbuf[:0] 5956 pp.wbBuf.reset() 5957 if pp.mcache == nil { 5958 if id == 0 { 5959 if mcache0 == nil { 5960 throw("missing mcache?") 5961 } 5962 // Use the bootstrap mcache0. Only one P will get 5963 // mcache0: the one with ID 0. 5964 pp.mcache = mcache0 5965 } else { 5966 pp.mcache = allocmcache() 5967 } 5968 } 5969 if raceenabled && pp.raceprocctx == 0 { 5970 if id == 0 { 5971 pp.raceprocctx = raceprocctx0 5972 raceprocctx0 = 0 // bootstrap 5973 } else { 5974 pp.raceprocctx = raceproccreate() 5975 } 5976 } 5977 lockInit(&pp.timers.mu, lockRankTimers) 5978 5979 // This P may get timers when it starts running. Set the mask here 5980 // since the P may not go through pidleget (notably P 0 on startup). 5981 timerpMask.set(id) 5982 // Similarly, we may not go through pidleget before this P starts 5983 // running if it is P 0 on startup. 5984 idlepMask.clear(id) 5985 } 5986 5987 // destroy releases all of the resources associated with pp and 5988 // transitions it to status _Pdead. 5989 // 5990 // sched.lock must be held and the world must be stopped. 5991 func (pp *p) destroy() { 5992 assertLockHeld(&sched.lock) 5993 assertWorldStopped() 5994 5995 // Move all runnable goroutines to the global queue 5996 for pp.runqhead != pp.runqtail { 5997 // Pop from tail of local queue 5998 pp.runqtail-- 5999 gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() 6000 // Push onto head of global queue 6001 globrunqputhead(gp) 6002 } 6003 if pp.runnext != 0 { 6004 globrunqputhead(pp.runnext.ptr()) 6005 pp.runnext = 0 6006 } 6007 6008 // Move all timers to the local P. 6009 getg().m.p.ptr().timers.take(&pp.timers) 6010 6011 // No need to flush p's write barrier buffer or span queue, as Ps 6012 // cannot be destroyed during the mark phase. 6013 if phase := gcphase; phase != _GCoff { 6014 println("runtime: p id", pp.id, "destroyed during GC phase", phase) 6015 throw("P destroyed while GC is running") 6016 } 6017 // We should free the queues though. 6018 pp.gcw.spanq.destroy() 6019 6020 clear(pp.sudogbuf[:]) 6021 pp.sudogcache = pp.sudogbuf[:0] 6022 pp.pinnerCache = nil 6023 clear(pp.deferpoolbuf[:]) 6024 pp.deferpool = pp.deferpoolbuf[:0] 6025 systemstack(func() { 6026 for i := 0; i < pp.mspancache.len; i++ { 6027 // Safe to call since the world is stopped. 6028 mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) 6029 } 6030 pp.mspancache.len = 0 6031 lock(&mheap_.lock) 6032 pp.pcache.flush(&mheap_.pages) 6033 unlock(&mheap_.lock) 6034 }) 6035 freemcache(pp.mcache) 6036 pp.mcache = nil 6037 gfpurge(pp) 6038 if raceenabled { 6039 if pp.timers.raceCtx != 0 { 6040 // The race detector code uses a callback to fetch 6041 // the proc context, so arrange for that callback 6042 // to see the right thing. 6043 // This hack only works because we are the only 6044 // thread running. 6045 mp := getg().m 6046 phold := mp.p.ptr() 6047 mp.p.set(pp) 6048 6049 racectxend(pp.timers.raceCtx) 6050 pp.timers.raceCtx = 0 6051 6052 mp.p.set(phold) 6053 } 6054 raceprocdestroy(pp.raceprocctx) 6055 pp.raceprocctx = 0 6056 } 6057 pp.gcAssistTime = 0 6058 gcCleanups.queued += pp.cleanupsQueued 6059 pp.cleanupsQueued = 0 6060 sched.goroutinesCreated.Add(int64(pp.goroutinesCreated)) 6061 pp.goroutinesCreated = 0 6062 pp.xRegs.free() 6063 pp.status = _Pdead 6064 } 6065 6066 // Change number of processors. 6067 // 6068 // sched.lock must be held, and the world must be stopped. 6069 // 6070 // gcworkbufs must not be being modified by either the GC or the write barrier 6071 // code, so the GC must not be running if the number of Ps actually changes. 6072 // 6073 // Returns list of Ps with local work, they need to be scheduled by the caller. 6074 func procresize(nprocs int32) *p { 6075 assertLockHeld(&sched.lock) 6076 assertWorldStopped() 6077 6078 old := gomaxprocs 6079 if old < 0 || nprocs <= 0 { 6080 throw("procresize: invalid arg") 6081 } 6082 trace := traceAcquire() 6083 if trace.ok() { 6084 trace.Gomaxprocs(nprocs) 6085 traceRelease(trace) 6086 } 6087 6088 // update statistics 6089 now := nanotime() 6090 if sched.procresizetime != 0 { 6091 sched.totaltime += int64(old) * (now - sched.procresizetime) 6092 } 6093 sched.procresizetime = now 6094 6095 // Grow allp if necessary. 6096 if nprocs > int32(len(allp)) { 6097 // Synchronize with retake, which could be running 6098 // concurrently since it doesn't run on a P. 6099 lock(&allpLock) 6100 if nprocs <= int32(cap(allp)) { 6101 allp = allp[:nprocs] 6102 } else { 6103 nallp := make([]*p, nprocs) 6104 // Copy everything up to allp's cap so we 6105 // never lose old allocated Ps. 6106 copy(nallp, allp[:cap(allp)]) 6107 allp = nallp 6108 } 6109 6110 idlepMask = idlepMask.resize(nprocs) 6111 timerpMask = timerpMask.resize(nprocs) 6112 work.spanqMask = work.spanqMask.resize(nprocs) 6113 unlock(&allpLock) 6114 } 6115 6116 // initialize new P's 6117 for i := old; i < nprocs; i++ { 6118 pp := allp[i] 6119 if pp == nil { 6120 pp = new(p) 6121 } 6122 pp.init(i) 6123 atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) 6124 } 6125 6126 gp := getg() 6127 if gp.m.p != 0 && gp.m.p.ptr().id < nprocs { 6128 // continue to use the current P 6129 gp.m.p.ptr().status = _Prunning 6130 gp.m.p.ptr().mcache.prepareForSweep() 6131 } else { 6132 // release the current P and acquire allp[0]. 6133 // 6134 // We must do this before destroying our current P 6135 // because p.destroy itself has write barriers, so we 6136 // need to do that from a valid P. 6137 if gp.m.p != 0 { 6138 trace := traceAcquire() 6139 if trace.ok() { 6140 // Pretend that we were descheduled 6141 // and then scheduled again to keep 6142 // the trace consistent. 6143 trace.GoSched() 6144 trace.ProcStop(gp.m.p.ptr()) 6145 traceRelease(trace) 6146 } 6147 gp.m.p.ptr().m = 0 6148 } 6149 gp.m.p = 0 6150 pp := allp[0] 6151 pp.m = 0 6152 pp.status = _Pidle 6153 acquirep(pp) 6154 trace := traceAcquire() 6155 if trace.ok() { 6156 trace.GoStart() 6157 traceRelease(trace) 6158 } 6159 } 6160 6161 // g.m.p is now set, so we no longer need mcache0 for bootstrapping. 6162 mcache0 = nil 6163 6164 // release resources from unused P's 6165 for i := nprocs; i < old; i++ { 6166 pp := allp[i] 6167 pp.destroy() 6168 // can't free P itself because it can be referenced by an M in syscall 6169 } 6170 6171 // Trim allp. 6172 if int32(len(allp)) != nprocs { 6173 lock(&allpLock) 6174 allp = allp[:nprocs] 6175 idlepMask = idlepMask.resize(nprocs) 6176 timerpMask = timerpMask.resize(nprocs) 6177 work.spanqMask = work.spanqMask.resize(nprocs) 6178 unlock(&allpLock) 6179 } 6180 6181 // Assign Ms to Ps with runnable goroutines. 6182 var runnablePs *p 6183 var runnablePsNeedM *p 6184 var idlePs *p 6185 for i := nprocs - 1; i >= 0; i-- { 6186 pp := allp[i] 6187 if gp.m.p.ptr() == pp { 6188 continue 6189 } 6190 pp.status = _Pidle 6191 if runqempty(pp) { 6192 pp.link.set(idlePs) 6193 idlePs = pp 6194 continue 6195 } 6196 6197 // Prefer to run on the most recent M if it is 6198 // available. 6199 // 6200 // Ps with no oldm (or for which oldm is already taken 6201 // by an earlier P), we delay until all oldm Ps are 6202 // handled. Otherwise, mget may return an M that a 6203 // later P has in oldm. 6204 var mp *m 6205 if oldm := pp.oldm.get(); oldm != nil { 6206 // Returns nil if oldm is not idle. 6207 mp = mgetSpecific(oldm) 6208 } 6209 if mp == nil { 6210 // Call mget later. 6211 pp.link.set(runnablePsNeedM) 6212 runnablePsNeedM = pp 6213 continue 6214 } 6215 pp.m.set(mp) 6216 pp.link.set(runnablePs) 6217 runnablePs = pp 6218 } 6219 // Assign Ms to remaining runnable Ps without usable oldm. See comment 6220 // above. 6221 for runnablePsNeedM != nil { 6222 pp := runnablePsNeedM 6223 runnablePsNeedM = pp.link.ptr() 6224 6225 mp := mget() 6226 pp.m.set(mp) 6227 pp.link.set(runnablePs) 6228 runnablePs = pp 6229 } 6230 6231 // Now that we've assigned Ms to Ps with runnable goroutines, assign GC 6232 // mark workers to remaining idle Ps, if needed. 6233 // 6234 // By assigning GC workers to Ps here, we slightly speed up starting 6235 // the world, as we will start enough Ps to run all of the user 6236 // goroutines and GC mark workers all at once, rather than using a 6237 // sequence of wakep calls as each P's findRunnable realizes it needs 6238 // to run a mark worker instead of a user goroutine. 6239 // 6240 // By assigning GC workers to Ps only _after_ previously-running Ps are 6241 // assigned Ms, we ensure that goroutines previously running on a P 6242 // continue to run on the same P, with GC mark workers preferring 6243 // previously-idle Ps. This helps prevent goroutines from shuffling 6244 // around too much across STW. 6245 // 6246 // N.B., if there aren't enough Ps left in idlePs for all of the GC 6247 // mark workers, then findRunnable will still choose to run mark 6248 // workers on Ps assigned above. 6249 // 6250 // N.B., we do this during any STW in the mark phase, not just the 6251 // sweep termination STW that starts the mark phase. gcBgMarkWorker 6252 // always preempts by removing itself from the P, so even unrelated 6253 // STWs during the mark require that Ps reselect mark workers upon 6254 // restart. 6255 if gcBlackenEnabled != 0 { 6256 for idlePs != nil { 6257 pp := idlePs 6258 6259 ok, _ := gcController.assignWaitingGCWorker(pp, now) 6260 if !ok { 6261 // No more mark workers needed. 6262 break 6263 } 6264 6265 // Got a worker, P is now runnable. 6266 // 6267 // mget may return nil if there aren't enough Ms, in 6268 // which case startTheWorldWithSema will start one. 6269 // 6270 // N.B. findRunnableGCWorker will make the worker G 6271 // itself runnable. 6272 idlePs = pp.link.ptr() 6273 mp := mget() 6274 pp.m.set(mp) 6275 pp.link.set(runnablePs) 6276 runnablePs = pp 6277 } 6278 } 6279 6280 // Finally, any remaining Ps are truly idle. 6281 for idlePs != nil { 6282 pp := idlePs 6283 idlePs = pp.link.ptr() 6284 pidleput(pp, now) 6285 } 6286 6287 stealOrder.reset(uint32(nprocs)) 6288 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 6289 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) 6290 if old != nprocs { 6291 // Notify the limiter that the amount of procs has changed. 6292 gcCPULimiter.resetCapacity(now, nprocs) 6293 } 6294 return runnablePs 6295 } 6296 6297 // Associate p and the current m. 6298 // 6299 // This function is allowed to have write barriers even if the caller 6300 // isn't because it immediately acquires pp. 6301 // 6302 //go:yeswritebarrierrec 6303 func acquirep(pp *p) { 6304 // Do the work. 6305 acquirepNoTrace(pp) 6306 6307 // Emit the event. 6308 trace := traceAcquire() 6309 if trace.ok() { 6310 trace.ProcStart() 6311 traceRelease(trace) 6312 } 6313 } 6314 6315 // Internals of acquirep, just skipping the trace events. 6316 // 6317 //go:yeswritebarrierrec 6318 func acquirepNoTrace(pp *p) { 6319 // Do the part that isn't allowed to have write barriers. 6320 wirep(pp) 6321 6322 // Have p; write barriers now allowed. 6323 6324 // The M we're associating with will be the old M after the next 6325 // releasep. We must set this here because write barriers are not 6326 // allowed in releasep. 6327 pp.oldm = pp.m.ptr().self 6328 6329 // Perform deferred mcache flush before this P can allocate 6330 // from a potentially stale mcache. 6331 pp.mcache.prepareForSweep() 6332 } 6333 6334 // wirep is the first step of acquirep, which actually associates the 6335 // current M to pp. This is broken out so we can disallow write 6336 // barriers for this part, since we don't yet have a P. 6337 // 6338 //go:nowritebarrierrec 6339 //go:nosplit 6340 func wirep(pp *p) { 6341 gp := getg() 6342 6343 if gp.m.p != 0 { 6344 // Call on the systemstack to avoid a nosplit overflow build failure 6345 // on some platforms when built with -N -l. See #64113. 6346 systemstack(func() { 6347 throw("wirep: already in go") 6348 }) 6349 } 6350 if pp.m != 0 || pp.status != _Pidle { 6351 // Call on the systemstack to avoid a nosplit overflow build failure 6352 // on some platforms when built with -N -l. See #64113. 6353 systemstack(func() { 6354 id := int64(0) 6355 if pp.m != 0 { 6356 id = pp.m.ptr().id 6357 } 6358 print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n") 6359 throw("wirep: invalid p state") 6360 }) 6361 } 6362 gp.m.p.set(pp) 6363 pp.m.set(gp.m) 6364 pp.status = _Prunning 6365 } 6366 6367 // Disassociate p and the current m. 6368 func releasep() *p { 6369 trace := traceAcquire() 6370 if trace.ok() { 6371 trace.ProcStop(getg().m.p.ptr()) 6372 traceRelease(trace) 6373 } 6374 return releasepNoTrace() 6375 } 6376 6377 // Disassociate p and the current m without tracing an event. 6378 func releasepNoTrace() *p { 6379 gp := getg() 6380 6381 if gp.m.p == 0 { 6382 throw("releasep: invalid arg") 6383 } 6384 pp := gp.m.p.ptr() 6385 if pp.m.ptr() != gp.m || pp.status != _Prunning { 6386 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") 6387 throw("releasep: invalid p state") 6388 } 6389 6390 // P must clear if nextGCMarkWorker if it stops. 6391 gcController.releaseNextGCMarkWorker(pp) 6392 6393 gp.m.p = 0 6394 pp.m = 0 6395 pp.status = _Pidle 6396 return pp 6397 } 6398 6399 func incidlelocked(v int32) { 6400 lock(&sched.lock) 6401 sched.nmidlelocked += v 6402 if v > 0 { 6403 checkdead() 6404 } 6405 unlock(&sched.lock) 6406 } 6407 6408 // Check for deadlock situation. 6409 // The check is based on number of running M's, if 0 -> deadlock. 6410 // sched.lock must be held. 6411 func checkdead() { 6412 assertLockHeld(&sched.lock) 6413 6414 // For -buildmode=c-shared or -buildmode=c-archive it's OK if 6415 // there are no running goroutines. The calling program is 6416 // assumed to be running. 6417 // One exception is Wasm, which is single-threaded. If we are 6418 // in Go and all goroutines are blocked, it deadlocks. 6419 if (islibrary || isarchive) && GOARCH != "wasm" { 6420 return 6421 } 6422 6423 // If we are dying because of a signal caught on an already idle thread, 6424 // freezetheworld will cause all running threads to block. 6425 // And runtime will essentially enter into deadlock state, 6426 // except that there is a thread that will call exit soon. 6427 if panicking.Load() > 0 { 6428 return 6429 } 6430 6431 // If we are not running under cgo, but we have an extra M then account 6432 // for it. (It is possible to have an extra M on Windows without cgo to 6433 // accommodate callbacks created by syscall.NewCallback. See issue #6751 6434 // for details.) 6435 var run0 int32 6436 if !iscgo && cgoHasExtraM && extraMLength.Load() > 0 { 6437 run0 = 1 6438 } 6439 6440 run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 6441 if run > run0 { 6442 return 6443 } 6444 if run < 0 { 6445 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") 6446 unlock(&sched.lock) 6447 throw("checkdead: inconsistent counts") 6448 } 6449 6450 grunning := 0 6451 forEachG(func(gp *g) { 6452 if isSystemGoroutine(gp, false) { 6453 return 6454 } 6455 s := readgstatus(gp) 6456 switch s &^ _Gscan { 6457 case _Gwaiting, 6458 _Gpreempted: 6459 grunning++ 6460 case _Grunnable, 6461 _Grunning, 6462 _Gsyscall: 6463 print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") 6464 unlock(&sched.lock) 6465 throw("checkdead: runnable g") 6466 } 6467 }) 6468 if grunning == 0 { // possible if main goroutine calls runtime·Goexit() 6469 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6470 fatal("no goroutines (main called runtime.Goexit) - deadlock!") 6471 } 6472 6473 // Maybe jump time forward for playground. 6474 if faketime != 0 { 6475 if when := timeSleepUntil(); when < maxWhen { 6476 faketime = when 6477 6478 // Start an M to steal the timer. 6479 pp, _ := pidleget(faketime) 6480 if pp == nil { 6481 // There should always be a free P since 6482 // nothing is running. 6483 unlock(&sched.lock) 6484 throw("checkdead: no p for timer") 6485 } 6486 mp := mget() 6487 if mp == nil { 6488 // There should always be a free M since 6489 // nothing is running. 6490 unlock(&sched.lock) 6491 throw("checkdead: no m for timer") 6492 } 6493 // M must be spinning to steal. We set this to be 6494 // explicit, but since this is the only M it would 6495 // become spinning on its own anyways. 6496 sched.nmspinning.Add(1) 6497 mp.spinning = true 6498 mp.nextp.set(pp) 6499 notewakeup(&mp.park) 6500 return 6501 } 6502 } 6503 6504 // There are no goroutines running, so we can look at the P's. 6505 for _, pp := range allp { 6506 if len(pp.timers.heap) > 0 { 6507 return 6508 } 6509 } 6510 6511 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6512 fatal("all goroutines are asleep - deadlock!") 6513 } 6514 6515 // forcegcperiod is the maximum time in nanoseconds between garbage 6516 // collections. If we go this long without a garbage collection, one 6517 // is forced to run. 6518 // 6519 // This is a variable for testing purposes. It normally doesn't change. 6520 var forcegcperiod int64 = 2 * 60 * 1e9 6521 6522 // haveSysmon indicates whether there is sysmon thread support. 6523 // 6524 // No threads on wasm yet, so no sysmon. 6525 const haveSysmon = GOARCH != "wasm" 6526 6527 // Always runs without a P, so write barriers are not allowed. 6528 // 6529 //go:nowritebarrierrec 6530 func sysmon() { 6531 lock(&sched.lock) 6532 sched.nmsys++ 6533 checkdead() 6534 unlock(&sched.lock) 6535 6536 lastgomaxprocs := int64(0) 6537 lasttrace := int64(0) 6538 idle := 0 // how many cycles in succession we had not wokeup somebody 6539 delay := uint32(0) 6540 6541 for { 6542 if idle == 0 { // start with 20us sleep... 6543 delay = 20 6544 } else if idle > 50 { // start doubling the sleep after 1ms... 6545 delay *= 2 6546 } 6547 if delay > 10*1000 { // up to 10ms 6548 delay = 10 * 1000 6549 } 6550 usleep(delay) 6551 6552 // sysmon should not enter deep sleep if schedtrace is enabled so that 6553 // it can print that information at the right time. 6554 // 6555 // It should also not enter deep sleep if there are any active P's so 6556 // that it can retake P's from syscalls, preempt long running G's, and 6557 // poll the network if all P's are busy for long stretches. 6558 // 6559 // It should wakeup from deep sleep if any P's become active either due 6560 // to exiting a syscall or waking up due to a timer expiring so that it 6561 // can resume performing those duties. If it wakes from a syscall it 6562 // resets idle and delay as a bet that since it had retaken a P from a 6563 // syscall before, it may need to do it again shortly after the 6564 // application starts work again. It does not reset idle when waking 6565 // from a timer to avoid adding system load to applications that spend 6566 // most of their time sleeping. 6567 now := nanotime() 6568 if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) { 6569 lock(&sched.lock) 6570 if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs { 6571 syscallWake := false 6572 next := timeSleepUntil() 6573 if next > now { 6574 sched.sysmonwait.Store(true) 6575 unlock(&sched.lock) 6576 // Make wake-up period small enough 6577 // for the sampling to be correct. 6578 sleep := forcegcperiod / 2 6579 if next-now < sleep { 6580 sleep = next - now 6581 } 6582 shouldRelax := sleep >= osRelaxMinNS 6583 if shouldRelax { 6584 osRelax(true) 6585 } 6586 syscallWake = notetsleep(&sched.sysmonnote, sleep) 6587 if shouldRelax { 6588 osRelax(false) 6589 } 6590 lock(&sched.lock) 6591 sched.sysmonwait.Store(false) 6592 noteclear(&sched.sysmonnote) 6593 } 6594 if syscallWake { 6595 idle = 0 6596 delay = 20 6597 } 6598 } 6599 unlock(&sched.lock) 6600 } 6601 6602 lock(&sched.sysmonlock) 6603 // Update now in case we blocked on sysmonnote or spent a long time 6604 // blocked on schedlock or sysmonlock above. 6605 now = nanotime() 6606 6607 // trigger libc interceptors if needed 6608 if *cgo_yield != nil { 6609 asmcgocall(*cgo_yield, nil) 6610 } 6611 // poll network if not polled for more than 10ms 6612 lastpoll := sched.lastpoll.Load() 6613 if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { 6614 sched.lastpoll.CompareAndSwap(lastpoll, now) 6615 list, delta := netpoll(0) // non-blocking - returns list of goroutines 6616 if !list.empty() { 6617 // Need to decrement number of idle locked M's 6618 // (pretending that one more is running) before injectglist. 6619 // Otherwise it can lead to the following situation: 6620 // injectglist grabs all P's but before it starts M's to run the P's, 6621 // another M returns from syscall, finishes running its G, 6622 // observes that there is no work to do and no other running M's 6623 // and reports deadlock. 6624 incidlelocked(-1) 6625 injectglist(&list) 6626 incidlelocked(1) 6627 netpollAdjustWaiters(delta) 6628 } 6629 } 6630 // Check if we need to update GOMAXPROCS at most once per second. 6631 if debug.updatemaxprocs != 0 && lastgomaxprocs+1e9 <= now { 6632 sysmonUpdateGOMAXPROCS() 6633 lastgomaxprocs = now 6634 } 6635 if scavenger.sysmonWake.Load() != 0 { 6636 // Kick the scavenger awake if someone requested it. 6637 scavenger.wake() 6638 } 6639 // retake P's blocked in syscalls 6640 // and preempt long running G's 6641 if retake(now) != 0 { 6642 idle = 0 6643 } else { 6644 idle++ 6645 } 6646 // check if we need to force a GC 6647 if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() { 6648 lock(&forcegc.lock) 6649 forcegc.idle.Store(false) 6650 var list gList 6651 list.push(forcegc.g) 6652 injectglist(&list) 6653 unlock(&forcegc.lock) 6654 } 6655 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { 6656 lasttrace = now 6657 schedtrace(debug.scheddetail > 0) 6658 } 6659 unlock(&sched.sysmonlock) 6660 } 6661 } 6662 6663 type sysmontick struct { 6664 schedtick uint32 6665 syscalltick uint32 6666 schedwhen int64 6667 syscallwhen int64 6668 } 6669 6670 // forcePreemptNS is the time slice given to a G before it is 6671 // preempted. 6672 const forcePreemptNS = 10 * 1000 * 1000 // 10ms 6673 6674 func retake(now int64) uint32 { 6675 n := 0 6676 // Prevent allp slice changes. This lock will be completely 6677 // uncontended unless we're already stopping the world. 6678 lock(&allpLock) 6679 // We can't use a range loop over allp because we may 6680 // temporarily drop the allpLock. Hence, we need to re-fetch 6681 // allp each time around the loop. 6682 for i := 0; i < len(allp); i++ { 6683 // Quickly filter out non-running Ps. Running Ps are either 6684 // in a syscall or are actually executing. Idle Ps don't 6685 // need to be retaken. 6686 // 6687 // This is best-effort, so it's OK that it's racy. Our target 6688 // is to retake Ps that have been running or in a syscall for 6689 // a long time (milliseconds), so the state has plenty of time 6690 // to stabilize. 6691 pp := allp[i] 6692 if pp == nil || atomic.Load(&pp.status) != _Prunning { 6693 // pp can be nil if procresize has grown 6694 // allp but not yet created new Ps. 6695 continue 6696 } 6697 pd := &pp.sysmontick 6698 sysretake := false 6699 6700 // Preempt G if it's running on the same schedtick for 6701 // too long. This could be from a single long-running 6702 // goroutine or a sequence of goroutines run via 6703 // runnext, which share a single schedtick time slice. 6704 schedt := int64(pp.schedtick) 6705 if int64(pd.schedtick) != schedt { 6706 pd.schedtick = uint32(schedt) 6707 pd.schedwhen = now 6708 } else if pd.schedwhen+forcePreemptNS <= now { 6709 preemptone(pp) 6710 // If pp is in a syscall, preemptone doesn't work. 6711 // The goroutine nor the thread can respond to a 6712 // preemption request because they're not in Go code, 6713 // so we need to take the P ourselves. 6714 sysretake = true 6715 } 6716 6717 // Drop allpLock so we can take sched.lock. 6718 unlock(&allpLock) 6719 6720 // Need to decrement number of idle locked M's (pretending that 6721 // one more is running) before we take the P and resume. 6722 // Otherwise the M from which we retake can exit the syscall, 6723 // increment nmidle and report deadlock. 6724 // 6725 // Can't call incidlelocked once we setBlockOnExitSyscall, due 6726 // to a lock ordering violation between sched.lock and _Gscan. 6727 incidlelocked(-1) 6728 6729 // Try to prevent the P from continuing in the syscall, if it's in one at all. 6730 thread, ok := setBlockOnExitSyscall(pp) 6731 if !ok { 6732 // Not in a syscall, or something changed out from under us. 6733 goto done 6734 } 6735 6736 // Retake the P if it's there for more than 1 sysmon tick (at least 20us). 6737 if syst := int64(pp.syscalltick); !sysretake && int64(pd.syscalltick) != syst { 6738 pd.syscalltick = uint32(syst) 6739 pd.syscallwhen = now 6740 thread.resume() 6741 goto done 6742 } 6743 6744 // On the one hand we don't want to retake Ps if there is no other work to do, 6745 // but on the other hand we want to retake them eventually 6746 // because they can prevent the sysmon thread from deep sleep. 6747 if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { 6748 thread.resume() 6749 goto done 6750 } 6751 6752 // Take the P. Note: because we have the scan bit, the goroutine 6753 // is at worst stuck spinning in exitsyscall. 6754 thread.takeP() 6755 thread.resume() 6756 n++ 6757 6758 // Handoff the P for some other thread to run it. 6759 handoffp(pp) 6760 6761 // The P has been handed off to another thread, so risk of a false 6762 // deadlock report while we hold onto it is gone. 6763 done: 6764 incidlelocked(1) 6765 lock(&allpLock) 6766 } 6767 unlock(&allpLock) 6768 return uint32(n) 6769 } 6770 6771 // syscallingThread represents a thread in a system call that temporarily 6772 // cannot advance out of the system call. 6773 type syscallingThread struct { 6774 gp *g 6775 mp *m 6776 pp *p 6777 status uint32 6778 } 6779 6780 // setBlockOnExitSyscall prevents pp's thread from advancing out of 6781 // exitsyscall. On success, returns the g/m/p state of the thread 6782 // and true. At that point, the caller owns the g/m/p links referenced, 6783 // the goroutine is in _Gsyscall, and prevented from transitioning out 6784 // of it. On failure, it returns false, and none of these guarantees are 6785 // made. 6786 // 6787 // Callers must call resume on the resulting thread state once 6788 // they're done with thread, otherwise it will remain blocked forever. 6789 // 6790 // This function races with state changes on pp, and thus may fail 6791 // if pp is not in a system call, or exits a system call concurrently 6792 // with this function. However, this function is safe to call without 6793 // any additional synchronization. 6794 func setBlockOnExitSyscall(pp *p) (syscallingThread, bool) { 6795 if pp.status != _Prunning { 6796 return syscallingThread{}, false 6797 } 6798 // Be very careful here, these reads are intentionally racy. 6799 // Once we notice the G is in _Gsyscall, acquire its scan bit, 6800 // and validate that it's still connected to the *same* M and P, 6801 // we can actually get to work. Holding the scan bit will prevent 6802 // the G from exiting the syscall. 6803 // 6804 // Our goal here is to interrupt long syscalls. If it turns out 6805 // that we're wrong and the G switched to another syscall while 6806 // we were trying to do this, that's completely fine. It's 6807 // probably making more frequent syscalls and the typical 6808 // preemption paths should be effective. 6809 mp := pp.m.ptr() 6810 if mp == nil { 6811 // Nothing to do. 6812 return syscallingThread{}, false 6813 } 6814 gp := mp.curg 6815 if gp == nil { 6816 // Nothing to do. 6817 return syscallingThread{}, false 6818 } 6819 status := readgstatus(gp) &^ _Gscan 6820 6821 // A goroutine is considered in a syscall, and may have a corresponding 6822 // P, if it's in _Gsyscall *or* _Gdeadextra. In the latter case, it's an 6823 // extra M goroutine. 6824 if status != _Gsyscall && status != _Gdeadextra { 6825 // Not in a syscall, nothing to do. 6826 return syscallingThread{}, false 6827 } 6828 if !castogscanstatus(gp, status, status|_Gscan) { 6829 // Not in _Gsyscall or _Gdeadextra anymore. Nothing to do. 6830 return syscallingThread{}, false 6831 } 6832 if gp.m != mp || gp.m.p.ptr() != pp { 6833 // This is not what we originally observed. Nothing to do. 6834 casfrom_Gscanstatus(gp, status|_Gscan, status) 6835 return syscallingThread{}, false 6836 } 6837 return syscallingThread{gp, mp, pp, status}, true 6838 } 6839 6840 // gcstopP unwires the P attached to the syscalling thread 6841 // and moves it into the _Pgcstop state. 6842 // 6843 // The caller must be stopping the world. 6844 func (s syscallingThread) gcstopP() { 6845 assertLockHeld(&sched.lock) 6846 6847 s.releaseP(_Pgcstop) 6848 s.pp.gcStopTime = nanotime() 6849 sched.stopwait-- 6850 } 6851 6852 // takeP unwires the P attached to the syscalling thread 6853 // and moves it into the _Pidle state. 6854 func (s syscallingThread) takeP() { 6855 s.releaseP(_Pidle) 6856 } 6857 6858 // releaseP unwires the P from the syscalling thread, moving 6859 // it to the provided state. Callers should prefer to use 6860 // takeP and gcstopP. 6861 func (s syscallingThread) releaseP(state uint32) { 6862 if state != _Pidle && state != _Pgcstop { 6863 throw("attempted to release P into a bad state") 6864 } 6865 trace := traceAcquire() 6866 s.pp.m = 0 6867 s.mp.p = 0 6868 atomic.Store(&s.pp.status, state) 6869 if trace.ok() { 6870 trace.ProcSteal(s.pp) 6871 traceRelease(trace) 6872 } 6873 addGSyscallNoP(s.mp) 6874 s.pp.syscalltick++ 6875 } 6876 6877 // resume allows a syscalling thread to advance beyond exitsyscall. 6878 func (s syscallingThread) resume() { 6879 casfrom_Gscanstatus(s.gp, s.status|_Gscan, s.status) 6880 } 6881 6882 // Tell all goroutines that they have been preempted and they should stop. 6883 // This function is purely best-effort. It can fail to inform a goroutine if a 6884 // processor just started running it. 6885 // No locks need to be held. 6886 // Returns true if preemption request was issued to at least one goroutine. 6887 func preemptall() bool { 6888 res := false 6889 for _, pp := range allp { 6890 if pp.status != _Prunning { 6891 continue 6892 } 6893 if preemptone(pp) { 6894 res = true 6895 } 6896 } 6897 return res 6898 } 6899 6900 // Tell the goroutine running on processor P to stop. 6901 // This function is purely best-effort. It can incorrectly fail to inform the 6902 // goroutine. It can inform the wrong goroutine. Even if it informs the 6903 // correct goroutine, that goroutine might ignore the request if it is 6904 // simultaneously executing newstack. 6905 // No lock needs to be held. 6906 // Returns true if preemption request was issued. 6907 // The actual preemption will happen at some point in the future 6908 // and will be indicated by the gp->status no longer being 6909 // Grunning 6910 func preemptone(pp *p) bool { 6911 mp := pp.m.ptr() 6912 if mp == nil || mp == getg().m { 6913 return false 6914 } 6915 gp := mp.curg 6916 if gp == nil || gp == mp.g0 { 6917 return false 6918 } 6919 if readgstatus(gp)&^_Gscan == _Gsyscall { 6920 // Don't bother trying to preempt a goroutine in a syscall. 6921 return false 6922 } 6923 6924 gp.preempt = true 6925 6926 // Every call in a goroutine checks for stack overflow by 6927 // comparing the current stack pointer to gp->stackguard0. 6928 // Setting gp->stackguard0 to StackPreempt folds 6929 // preemption into the normal stack overflow check. 6930 gp.stackguard0 = stackPreempt 6931 6932 // Request an async preemption of this P. 6933 if preemptMSupported && debug.asyncpreemptoff == 0 { 6934 pp.preempt = true 6935 preemptM(mp) 6936 } 6937 6938 return true 6939 } 6940 6941 var starttime int64 6942 6943 func schedtrace(detailed bool) { 6944 now := nanotime() 6945 if starttime == 0 { 6946 starttime = now 6947 } 6948 6949 lock(&sched.lock) 6950 print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runq.size) 6951 if detailed { 6952 print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") 6953 } 6954 // We must be careful while reading data from P's, M's and G's. 6955 // Even if we hold schedlock, most data can be changed concurrently. 6956 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 6957 for i, pp := range allp { 6958 h := atomic.Load(&pp.runqhead) 6959 t := atomic.Load(&pp.runqtail) 6960 if detailed { 6961 print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=") 6962 mp := pp.m.ptr() 6963 if mp != nil { 6964 print(mp.id) 6965 } else { 6966 print("nil") 6967 } 6968 print(" runqsize=", t-h, " gfreecnt=", pp.gFree.size, " timerslen=", len(pp.timers.heap), "\n") 6969 } else { 6970 // In non-detailed mode format lengths of per-P run queues as: 6971 // [ len1 len2 len3 len4 ] 6972 print(" ") 6973 if i == 0 { 6974 print("[ ") 6975 } 6976 print(t - h) 6977 if i == len(allp)-1 { 6978 print(" ]") 6979 } 6980 } 6981 } 6982 6983 if !detailed { 6984 // Format per-P schedticks as: schedticks=[ tick1 tick2 tick3 tick4 ]. 6985 print(" schedticks=[ ") 6986 for _, pp := range allp { 6987 print(pp.schedtick) 6988 print(" ") 6989 } 6990 print("]\n") 6991 } 6992 6993 if !detailed { 6994 unlock(&sched.lock) 6995 return 6996 } 6997 6998 for mp := allm; mp != nil; mp = mp.alllink { 6999 pp := mp.p.ptr() 7000 print(" M", mp.id, ": p=") 7001 if pp != nil { 7002 print(pp.id) 7003 } else { 7004 print("nil") 7005 } 7006 print(" curg=") 7007 if mp.curg != nil { 7008 print(mp.curg.goid) 7009 } else { 7010 print("nil") 7011 } 7012 print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=") 7013 if lockedg := mp.lockedg.ptr(); lockedg != nil { 7014 print(lockedg.goid) 7015 } else { 7016 print("nil") 7017 } 7018 print("\n") 7019 } 7020 7021 forEachG(func(gp *g) { 7022 print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=") 7023 if gp.m != nil { 7024 print(gp.m.id) 7025 } else { 7026 print("nil") 7027 } 7028 print(" lockedm=") 7029 if lockedm := gp.lockedm.ptr(); lockedm != nil { 7030 print(lockedm.id) 7031 } else { 7032 print("nil") 7033 } 7034 print("\n") 7035 }) 7036 unlock(&sched.lock) 7037 } 7038 7039 type updateMaxProcsGState struct { 7040 lock mutex 7041 g *g 7042 idle atomic.Bool 7043 7044 // Readable when idle == false, writable when idle == true. 7045 procs int32 // new GOMAXPROCS value 7046 } 7047 7048 var ( 7049 // GOMAXPROCS update godebug metric. Incremented if automatic 7050 // GOMAXPROCS updates actually change the value of GOMAXPROCS. 7051 updatemaxprocs = &godebugInc{name: "updatemaxprocs"} 7052 7053 // Synchronization and state between updateMaxProcsGoroutine and 7054 // sysmon. 7055 updateMaxProcsG updateMaxProcsGState 7056 7057 // Synchronization between GOMAXPROCS and sysmon. 7058 // 7059 // Setting GOMAXPROCS via a call to GOMAXPROCS disables automatic 7060 // GOMAXPROCS updates. 7061 // 7062 // We want to make two guarantees to callers of GOMAXPROCS. After 7063 // GOMAXPROCS returns: 7064 // 7065 // 1. The runtime will not make any automatic changes to GOMAXPROCS. 7066 // 7067 // 2. The runtime will not perform any of the system calls used to 7068 // determine the appropriate value of GOMAXPROCS (i.e., it won't 7069 // call defaultGOMAXPROCS). 7070 // 7071 // (1) is the baseline guarantee that everyone needs. The GOMAXPROCS 7072 // API isn't useful to anyone if automatic updates may occur after it 7073 // returns. This is easily achieved by double-checking the state under 7074 // STW before committing an automatic GOMAXPROCS update. 7075 // 7076 // (2) doesn't matter to most users, as it is isn't observable as long 7077 // as (1) holds. However, it can be important to users sandboxing Go. 7078 // They want disable these system calls and need some way to know when 7079 // they are guaranteed the calls will stop. 7080 // 7081 // This would be simple to achieve if we simply called 7082 // defaultGOMAXPROCS under STW in updateMaxProcsGoroutine below. 7083 // However, we would like to avoid scheduling this goroutine every 7084 // second when it will almost never do anything. Instead, sysmon calls 7085 // defaultGOMAXPROCS to decide whether to schedule 7086 // updateMaxProcsGoroutine. Thus we need to synchronize between sysmon 7087 // and GOMAXPROCS calls. 7088 // 7089 // GOMAXPROCS can't hold a runtime mutex across STW. It could hold a 7090 // semaphore, but sysmon cannot take semaphores. Instead, we have a 7091 // more complex scheme: 7092 // 7093 // * sysmon holds computeMaxProcsLock while calling defaultGOMAXPROCS. 7094 // * sysmon skips the current update if sched.customGOMAXPROCS is 7095 // set. 7096 // * GOMAXPROCS sets sched.customGOMAXPROCS once it is committed to 7097 // changing GOMAXPROCS. 7098 // * GOMAXPROCS takes computeMaxProcsLock to wait for outstanding 7099 // defaultGOMAXPROCS calls to complete. 7100 // 7101 // N.B. computeMaxProcsLock could simply be sched.lock, but we want to 7102 // avoid holding that lock during the potentially slow 7103 // defaultGOMAXPROCS. 7104 computeMaxProcsLock mutex 7105 ) 7106 7107 // Start GOMAXPROCS update helper goroutine. 7108 // 7109 // This is based on forcegchelper. 7110 func defaultGOMAXPROCSUpdateEnable() { 7111 if debug.updatemaxprocs == 0 { 7112 // Unconditionally increment the metric when updates are disabled. 7113 // 7114 // It would be more descriptive if we did a dry run of the 7115 // complete update, determining the appropriate value of 7116 // GOMAXPROCS and the bailing out and just incrementing the 7117 // metric if a change would occur. 7118 // 7119 // Not only is that a lot of ongoing work for a disabled 7120 // feature, but some users need to be able to completely 7121 // disable the update system calls (such as sandboxes). 7122 // Currently, updatemaxprocs=0 serves that purpose. 7123 updatemaxprocs.IncNonDefault() 7124 return 7125 } 7126 7127 go updateMaxProcsGoroutine() 7128 } 7129 7130 func updateMaxProcsGoroutine() { 7131 updateMaxProcsG.g = getg() 7132 lockInit(&updateMaxProcsG.lock, lockRankUpdateMaxProcsG) 7133 for { 7134 lock(&updateMaxProcsG.lock) 7135 if updateMaxProcsG.idle.Load() { 7136 throw("updateMaxProcsGoroutine: phase error") 7137 } 7138 updateMaxProcsG.idle.Store(true) 7139 goparkunlock(&updateMaxProcsG.lock, waitReasonUpdateGOMAXPROCSIdle, traceBlockSystemGoroutine, 1) 7140 // This goroutine is explicitly resumed by sysmon. 7141 7142 stw := stopTheWorldGC(stwGOMAXPROCS) 7143 7144 // Still OK to update? 7145 lock(&sched.lock) 7146 custom := sched.customGOMAXPROCS 7147 unlock(&sched.lock) 7148 if custom { 7149 startTheWorldGC(stw) 7150 return 7151 } 7152 7153 // newprocs will be processed by startTheWorld 7154 // 7155 // TODO(prattmic): this could use a nicer API. Perhaps add it to the 7156 // stw parameter? 7157 newprocs = updateMaxProcsG.procs 7158 lock(&sched.lock) 7159 sched.customGOMAXPROCS = false 7160 unlock(&sched.lock) 7161 7162 startTheWorldGC(stw) 7163 } 7164 } 7165 7166 func sysmonUpdateGOMAXPROCS() { 7167 // Synchronize with GOMAXPROCS. See comment on computeMaxProcsLock. 7168 lock(&computeMaxProcsLock) 7169 7170 // No update if GOMAXPROCS was set manually. 7171 lock(&sched.lock) 7172 custom := sched.customGOMAXPROCS 7173 curr := gomaxprocs 7174 unlock(&sched.lock) 7175 if custom { 7176 unlock(&computeMaxProcsLock) 7177 return 7178 } 7179 7180 // Don't hold sched.lock while we read the filesystem. 7181 procs := defaultGOMAXPROCS(0) 7182 unlock(&computeMaxProcsLock) 7183 if procs == curr { 7184 // Nothing to do. 7185 return 7186 } 7187 7188 // Sysmon can't directly stop the world. Run the helper to do so on our 7189 // behalf. If updateGOMAXPROCS.idle is false, then a previous update is 7190 // still pending. 7191 if updateMaxProcsG.idle.Load() { 7192 lock(&updateMaxProcsG.lock) 7193 updateMaxProcsG.procs = procs 7194 updateMaxProcsG.idle.Store(false) 7195 var list gList 7196 list.push(updateMaxProcsG.g) 7197 injectglist(&list) 7198 unlock(&updateMaxProcsG.lock) 7199 } 7200 } 7201 7202 // schedEnableUser enables or disables the scheduling of user 7203 // goroutines. 7204 // 7205 // This does not stop already running user goroutines, so the caller 7206 // should first stop the world when disabling user goroutines. 7207 func schedEnableUser(enable bool) { 7208 lock(&sched.lock) 7209 if sched.disable.user == !enable { 7210 unlock(&sched.lock) 7211 return 7212 } 7213 sched.disable.user = !enable 7214 if enable { 7215 n := sched.disable.runnable.size 7216 globrunqputbatch(&sched.disable.runnable) 7217 unlock(&sched.lock) 7218 for ; n != 0 && sched.npidle.Load() != 0; n-- { 7219 startm(nil, false, false) 7220 } 7221 } else { 7222 unlock(&sched.lock) 7223 } 7224 } 7225 7226 // schedEnabled reports whether gp should be scheduled. It returns 7227 // false is scheduling of gp is disabled. 7228 // 7229 // sched.lock must be held. 7230 func schedEnabled(gp *g) bool { 7231 assertLockHeld(&sched.lock) 7232 7233 if sched.disable.user { 7234 return isSystemGoroutine(gp, true) 7235 } 7236 return true 7237 } 7238 7239 // Put mp on midle list. 7240 // sched.lock must be held. 7241 // May run during STW, so write barriers are not allowed. 7242 // 7243 //go:nowritebarrierrec 7244 func mput(mp *m) { 7245 assertLockHeld(&sched.lock) 7246 7247 sched.midle.push(unsafe.Pointer(mp)) 7248 sched.nmidle++ 7249 checkdead() 7250 } 7251 7252 // Try to get an m from midle list. 7253 // sched.lock must be held. 7254 // May run during STW, so write barriers are not allowed. 7255 // 7256 //go:nowritebarrierrec 7257 func mget() *m { 7258 assertLockHeld(&sched.lock) 7259 7260 mp := (*m)(sched.midle.pop()) 7261 if mp != nil { 7262 sched.nmidle-- 7263 } 7264 return mp 7265 } 7266 7267 // Try to get a specific m from midle list. Returns nil if it isn't on the 7268 // midle list. 7269 // 7270 // sched.lock must be held. 7271 // May run during STW, so write barriers are not allowed. 7272 // 7273 //go:nowritebarrierrec 7274 func mgetSpecific(mp *m) *m { 7275 assertLockHeld(&sched.lock) 7276 7277 if mp.idleNode.prev == 0 && mp.idleNode.next == 0 { 7278 // Not on the list. 7279 return nil 7280 } 7281 7282 sched.midle.remove(unsafe.Pointer(mp)) 7283 sched.nmidle-- 7284 7285 return mp 7286 } 7287 7288 // Put gp on the global runnable queue. 7289 // sched.lock must be held. 7290 // May run during STW, so write barriers are not allowed. 7291 // 7292 //go:nowritebarrierrec 7293 func globrunqput(gp *g) { 7294 assertLockHeld(&sched.lock) 7295 7296 sched.runq.pushBack(gp) 7297 } 7298 7299 // Put gp at the head of the global runnable queue. 7300 // sched.lock must be held. 7301 // May run during STW, so write barriers are not allowed. 7302 // 7303 //go:nowritebarrierrec 7304 func globrunqputhead(gp *g) { 7305 assertLockHeld(&sched.lock) 7306 7307 sched.runq.push(gp) 7308 } 7309 7310 // Put a batch of runnable goroutines on the global runnable queue. 7311 // This clears *batch. 7312 // sched.lock must be held. 7313 // May run during STW, so write barriers are not allowed. 7314 // 7315 //go:nowritebarrierrec 7316 func globrunqputbatch(batch *gQueue) { 7317 assertLockHeld(&sched.lock) 7318 7319 sched.runq.pushBackAll(*batch) 7320 *batch = gQueue{} 7321 } 7322 7323 // Try get a single G from the global runnable queue. 7324 // sched.lock must be held. 7325 func globrunqget() *g { 7326 assertLockHeld(&sched.lock) 7327 7328 if sched.runq.size == 0 { 7329 return nil 7330 } 7331 7332 return sched.runq.pop() 7333 } 7334 7335 // Try get a batch of G's from the global runnable queue. 7336 // sched.lock must be held. 7337 func globrunqgetbatch(n int32) (gp *g, q gQueue) { 7338 assertLockHeld(&sched.lock) 7339 7340 if sched.runq.size == 0 { 7341 return 7342 } 7343 7344 n = min(n, sched.runq.size, sched.runq.size/gomaxprocs+1) 7345 7346 gp = sched.runq.pop() 7347 n-- 7348 7349 for ; n > 0; n-- { 7350 gp1 := sched.runq.pop() 7351 q.pushBack(gp1) 7352 } 7353 return 7354 } 7355 7356 // pMask is an atomic bitstring with one bit per P. 7357 type pMask []uint32 7358 7359 // read returns true if P id's bit is set. 7360 func (p pMask) read(id uint32) bool { 7361 word := id / 32 7362 mask := uint32(1) << (id % 32) 7363 return (atomic.Load(&p[word]) & mask) != 0 7364 } 7365 7366 // set sets P id's bit. 7367 func (p pMask) set(id int32) { 7368 word := id / 32 7369 mask := uint32(1) << (id % 32) 7370 atomic.Or(&p[word], mask) 7371 } 7372 7373 // clear clears P id's bit. 7374 func (p pMask) clear(id int32) { 7375 word := id / 32 7376 mask := uint32(1) << (id % 32) 7377 atomic.And(&p[word], ^mask) 7378 } 7379 7380 // any returns true if any bit in p is set. 7381 func (p pMask) any() bool { 7382 for i := range p { 7383 if atomic.Load(&p[i]) != 0 { 7384 return true 7385 } 7386 } 7387 return false 7388 } 7389 7390 // resize resizes the pMask and returns a new one. 7391 // 7392 // The result may alias p, so callers are encouraged to 7393 // discard p. Not safe for concurrent use. 7394 func (p pMask) resize(nprocs int32) pMask { 7395 maskWords := (nprocs + 31) / 32 7396 7397 if maskWords <= int32(cap(p)) { 7398 return p[:maskWords] 7399 } 7400 newMask := make([]uint32, maskWords) 7401 // No need to copy beyond len, old Ps are irrelevant. 7402 copy(newMask, p) 7403 return newMask 7404 } 7405 7406 // pidleput puts p on the _Pidle list. now must be a relatively recent call 7407 // to nanotime or zero. Returns now or the current time if now was zero. 7408 // 7409 // This releases ownership of p. Once sched.lock is released it is no longer 7410 // safe to use p. 7411 // 7412 // sched.lock must be held. 7413 // 7414 // May run during STW, so write barriers are not allowed. 7415 // 7416 //go:nowritebarrierrec 7417 func pidleput(pp *p, now int64) int64 { 7418 assertLockHeld(&sched.lock) 7419 7420 if !runqempty(pp) { 7421 throw("pidleput: P has non-empty run queue") 7422 } 7423 if now == 0 { 7424 now = nanotime() 7425 } 7426 if pp.timers.len.Load() == 0 { 7427 timerpMask.clear(pp.id) 7428 } 7429 idlepMask.set(pp.id) 7430 pp.link = sched.pidle 7431 sched.pidle.set(pp) 7432 sched.npidle.Add(1) 7433 if !pp.limiterEvent.start(limiterEventIdle, now) { 7434 throw("must be able to track idle limiter event") 7435 } 7436 return now 7437 } 7438 7439 // pidleget tries to get a p from the _Pidle list, acquiring ownership. 7440 // 7441 // sched.lock must be held. 7442 // 7443 // May run during STW, so write barriers are not allowed. 7444 // 7445 //go:nowritebarrierrec 7446 func pidleget(now int64) (*p, int64) { 7447 assertLockHeld(&sched.lock) 7448 7449 pp := sched.pidle.ptr() 7450 if pp != nil { 7451 // Timer may get added at any time now. 7452 if now == 0 { 7453 now = nanotime() 7454 } 7455 timerpMask.set(pp.id) 7456 idlepMask.clear(pp.id) 7457 sched.pidle = pp.link 7458 sched.npidle.Add(-1) 7459 pp.limiterEvent.stop(limiterEventIdle, now) 7460 } 7461 return pp, now 7462 } 7463 7464 // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership. 7465 // This is called by spinning Ms (or callers than need a spinning M) that have 7466 // found work. If no P is available, this must synchronized with non-spinning 7467 // Ms that may be preparing to drop their P without discovering this work. 7468 // 7469 // sched.lock must be held. 7470 // 7471 // May run during STW, so write barriers are not allowed. 7472 // 7473 //go:nowritebarrierrec 7474 func pidlegetSpinning(now int64) (*p, int64) { 7475 assertLockHeld(&sched.lock) 7476 7477 pp, now := pidleget(now) 7478 if pp == nil { 7479 // See "Delicate dance" comment in findRunnable. We found work 7480 // that we cannot take, we must synchronize with non-spinning 7481 // Ms that may be preparing to drop their P. 7482 sched.needspinning.Store(1) 7483 return nil, now 7484 } 7485 7486 return pp, now 7487 } 7488 7489 // runqempty reports whether pp has no Gs on its local run queue. 7490 // It never returns true spuriously. 7491 func runqempty(pp *p) bool { 7492 // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 7493 // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 7494 // Simply observing that runqhead == runqtail and then observing that runqnext == nil 7495 // does not mean the queue is empty. 7496 for { 7497 head := atomic.Load(&pp.runqhead) 7498 tail := atomic.Load(&pp.runqtail) 7499 runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 7500 if tail == atomic.Load(&pp.runqtail) { 7501 return head == tail && runnext == 0 7502 } 7503 } 7504 } 7505 7506 // To shake out latent assumptions about scheduling order, 7507 // we introduce some randomness into scheduling decisions 7508 // when running with the race detector. 7509 // The need for this was made obvious by changing the 7510 // (deterministic) scheduling order in Go 1.5 and breaking 7511 // many poorly-written tests. 7512 // With the randomness here, as long as the tests pass 7513 // consistently with -race, they shouldn't have latent scheduling 7514 // assumptions. 7515 const randomizeScheduler = raceenabled 7516 7517 // runqput tries to put g on the local runnable queue. 7518 // If next is false, runqput adds g to the tail of the runnable queue. 7519 // If next is true, runqput puts g in the pp.runnext slot. 7520 // If the run queue is full, runnext puts g on the global queue. 7521 // Executed only by the owner P. 7522 func runqput(pp *p, gp *g, next bool) { 7523 if !haveSysmon && next { 7524 // A runnext goroutine shares the same time slice as the 7525 // current goroutine (inheritTime from runqget). To prevent a 7526 // ping-pong pair of goroutines from starving all others, we 7527 // depend on sysmon to preempt "long-running goroutines". That 7528 // is, any set of goroutines sharing the same time slice. 7529 // 7530 // If there is no sysmon, we must avoid runnext entirely or 7531 // risk starvation. 7532 next = false 7533 } 7534 if randomizeScheduler && next && randn(2) == 0 { 7535 next = false 7536 } 7537 7538 if next { 7539 retryNext: 7540 oldnext := pp.runnext 7541 if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { 7542 goto retryNext 7543 } 7544 if oldnext == 0 { 7545 return 7546 } 7547 // Kick the old runnext out to the regular run queue. 7548 gp = oldnext.ptr() 7549 } 7550 7551 retry: 7552 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7553 t := pp.runqtail 7554 if t-h < uint32(len(pp.runq)) { 7555 pp.runq[t%uint32(len(pp.runq))].set(gp) 7556 atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 7557 return 7558 } 7559 if runqputslow(pp, gp, h, t) { 7560 return 7561 } 7562 // the queue is not full, now the put above must succeed 7563 goto retry 7564 } 7565 7566 // Put g and a batch of work from local runnable queue on global queue. 7567 // Executed only by the owner P. 7568 func runqputslow(pp *p, gp *g, h, t uint32) bool { 7569 var batch [len(pp.runq)/2 + 1]*g 7570 7571 // First, grab a batch from local queue. 7572 n := t - h 7573 n = n / 2 7574 if n != uint32(len(pp.runq)/2) { 7575 throw("runqputslow: queue is not full") 7576 } 7577 for i := uint32(0); i < n; i++ { 7578 batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7579 } 7580 if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7581 return false 7582 } 7583 batch[n] = gp 7584 7585 if randomizeScheduler { 7586 for i := uint32(1); i <= n; i++ { 7587 j := cheaprandn(i + 1) 7588 batch[i], batch[j] = batch[j], batch[i] 7589 } 7590 } 7591 7592 // Link the goroutines. 7593 for i := uint32(0); i < n; i++ { 7594 batch[i].schedlink.set(batch[i+1]) 7595 } 7596 7597 q := gQueue{batch[0].guintptr(), batch[n].guintptr(), int32(n + 1)} 7598 7599 // Now put the batch on global queue. 7600 lock(&sched.lock) 7601 globrunqputbatch(&q) 7602 unlock(&sched.lock) 7603 return true 7604 } 7605 7606 // runqputbatch tries to put all the G's on q on the local runnable queue. 7607 // If the local runq is full the input queue still contains unqueued Gs. 7608 // Executed only by the owner P. 7609 func runqputbatch(pp *p, q *gQueue) { 7610 if q.empty() { 7611 return 7612 } 7613 h := atomic.LoadAcq(&pp.runqhead) 7614 t := pp.runqtail 7615 n := uint32(0) 7616 for !q.empty() && t-h < uint32(len(pp.runq)) { 7617 gp := q.pop() 7618 pp.runq[t%uint32(len(pp.runq))].set(gp) 7619 t++ 7620 n++ 7621 } 7622 7623 if randomizeScheduler { 7624 off := func(o uint32) uint32 { 7625 return (pp.runqtail + o) % uint32(len(pp.runq)) 7626 } 7627 for i := uint32(1); i < n; i++ { 7628 j := cheaprandn(i + 1) 7629 pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] 7630 } 7631 } 7632 7633 atomic.StoreRel(&pp.runqtail, t) 7634 7635 return 7636 } 7637 7638 // Get g from local runnable queue. 7639 // If inheritTime is true, gp should inherit the remaining time in the 7640 // current time slice. Otherwise, it should start a new time slice. 7641 // Executed only by the owner P. 7642 func runqget(pp *p) (gp *g, inheritTime bool) { 7643 // If there's a runnext, it's the next G to run. 7644 next := pp.runnext 7645 // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 7646 // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 7647 // Hence, there's no need to retry this CAS if it fails. 7648 if next != 0 && pp.runnext.cas(next, 0) { 7649 return next.ptr(), true 7650 } 7651 7652 for { 7653 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7654 t := pp.runqtail 7655 if t == h { 7656 return nil, false 7657 } 7658 gp := pp.runq[h%uint32(len(pp.runq))].ptr() 7659 if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 7660 return gp, false 7661 } 7662 } 7663 } 7664 7665 // runqdrain drains the local runnable queue of pp and returns all goroutines in it. 7666 // Executed only by the owner P. 7667 func runqdrain(pp *p) (drainQ gQueue) { 7668 oldNext := pp.runnext 7669 if oldNext != 0 && pp.runnext.cas(oldNext, 0) { 7670 drainQ.pushBack(oldNext.ptr()) 7671 } 7672 7673 retry: 7674 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7675 t := pp.runqtail 7676 qn := t - h 7677 if qn == 0 { 7678 return 7679 } 7680 if qn > uint32(len(pp.runq)) { // read inconsistent h and t 7681 goto retry 7682 } 7683 7684 if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume 7685 goto retry 7686 } 7687 7688 // We've inverted the order in which it gets G's from the local P's runnable queue 7689 // and then advances the head pointer because we don't want to mess up the statuses of G's 7690 // while runqdrain() and runqsteal() are running in parallel. 7691 // Thus we should advance the head pointer before draining the local P into a gQueue, 7692 // so that we can update any gp.schedlink only after we take the full ownership of G, 7693 // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them. 7694 // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details. 7695 for i := uint32(0); i < qn; i++ { 7696 gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7697 drainQ.pushBack(gp) 7698 } 7699 return 7700 } 7701 7702 // Grabs a batch of goroutines from pp's runnable queue into batch. 7703 // Batch is a ring buffer starting at batchHead. 7704 // Returns number of grabbed goroutines. 7705 // Can be executed by any P. 7706 func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 7707 for { 7708 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7709 t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 7710 n := t - h 7711 n = n - n/2 7712 if n == 0 { 7713 if stealRunNextG { 7714 // Try to steal from pp.runnext. 7715 if next := pp.runnext; next != 0 { 7716 if pp.status == _Prunning { 7717 if mp := pp.m.ptr(); mp != nil { 7718 if gp := mp.curg; gp == nil || readgstatus(gp)&^_Gscan != _Gsyscall { 7719 // Sleep to ensure that pp isn't about to run the g 7720 // we are about to steal. 7721 // The important use case here is when the g running 7722 // on pp ready()s another g and then almost 7723 // immediately blocks. Instead of stealing runnext 7724 // in this window, back off to give pp a chance to 7725 // schedule runnext. This will avoid thrashing gs 7726 // between different Ps. 7727 // A sync chan send/recv takes ~50ns as of time of 7728 // writing, so 3us gives ~50x overshoot. 7729 // If curg is nil, we assume that the P is likely 7730 // to be in the scheduler. If curg isn't nil and isn't 7731 // in a syscall, then it's either running, waiting, or 7732 // runnable. In this case we want to sleep because the 7733 // P might either call into the scheduler soon (running), 7734 // or already is (since we found a waiting or runnable 7735 // goroutine hanging off of a running P, suggesting it 7736 // either recently transitioned out of running, or will 7737 // transition to running shortly). 7738 if !osHasLowResTimer { 7739 usleep(3) 7740 } else { 7741 // On some platforms system timer granularity is 7742 // 1-15ms, which is way too much for this 7743 // optimization. So just yield. 7744 osyield() 7745 } 7746 } 7747 } 7748 } 7749 if !pp.runnext.cas(next, 0) { 7750 continue 7751 } 7752 batch[batchHead%uint32(len(batch))] = next 7753 return 1 7754 } 7755 } 7756 return 0 7757 } 7758 if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 7759 continue 7760 } 7761 for i := uint32(0); i < n; i++ { 7762 g := pp.runq[(h+i)%uint32(len(pp.runq))] 7763 batch[(batchHead+i)%uint32(len(batch))] = g 7764 } 7765 if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7766 return n 7767 } 7768 } 7769 } 7770 7771 // Steal half of elements from local runnable queue of p2 7772 // and put onto local runnable queue of p. 7773 // Returns one of the stolen elements (or nil if failed). 7774 func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 7775 t := pp.runqtail 7776 n := runqgrab(p2, &pp.runq, t, stealRunNextG) 7777 if n == 0 { 7778 return nil 7779 } 7780 n-- 7781 gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 7782 if n == 0 { 7783 return gp 7784 } 7785 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7786 if t-h+n >= uint32(len(pp.runq)) { 7787 throw("runqsteal: runq overflow") 7788 } 7789 atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 7790 return gp 7791 } 7792 7793 // A gQueue is a dequeue of Gs linked through g.schedlink. A G can only 7794 // be on one gQueue or gList at a time. 7795 type gQueue struct { 7796 head guintptr 7797 tail guintptr 7798 size int32 7799 } 7800 7801 // empty reports whether q is empty. 7802 func (q *gQueue) empty() bool { 7803 return q.head == 0 7804 } 7805 7806 // push adds gp to the head of q. 7807 func (q *gQueue) push(gp *g) { 7808 gp.schedlink = q.head 7809 q.head.set(gp) 7810 if q.tail == 0 { 7811 q.tail.set(gp) 7812 } 7813 q.size++ 7814 } 7815 7816 // pushBack adds gp to the tail of q. 7817 func (q *gQueue) pushBack(gp *g) { 7818 gp.schedlink = 0 7819 if q.tail != 0 { 7820 q.tail.ptr().schedlink.set(gp) 7821 } else { 7822 q.head.set(gp) 7823 } 7824 q.tail.set(gp) 7825 q.size++ 7826 } 7827 7828 // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must 7829 // not be used. 7830 func (q *gQueue) pushBackAll(q2 gQueue) { 7831 if q2.tail == 0 { 7832 return 7833 } 7834 q2.tail.ptr().schedlink = 0 7835 if q.tail != 0 { 7836 q.tail.ptr().schedlink = q2.head 7837 } else { 7838 q.head = q2.head 7839 } 7840 q.tail = q2.tail 7841 q.size += q2.size 7842 } 7843 7844 // pop removes and returns the head of queue q. It returns nil if 7845 // q is empty. 7846 func (q *gQueue) pop() *g { 7847 gp := q.head.ptr() 7848 if gp != nil { 7849 q.head = gp.schedlink 7850 if q.head == 0 { 7851 q.tail = 0 7852 } 7853 q.size-- 7854 } 7855 return gp 7856 } 7857 7858 // popList takes all Gs in q and returns them as a gList. 7859 func (q *gQueue) popList() gList { 7860 stack := gList{q.head, q.size} 7861 *q = gQueue{} 7862 return stack 7863 } 7864 7865 // A gList is a list of Gs linked through g.schedlink. A G can only be 7866 // on one gQueue or gList at a time. 7867 type gList struct { 7868 head guintptr 7869 size int32 7870 } 7871 7872 // empty reports whether l is empty. 7873 func (l *gList) empty() bool { 7874 return l.head == 0 7875 } 7876 7877 // push adds gp to the head of l. 7878 func (l *gList) push(gp *g) { 7879 gp.schedlink = l.head 7880 l.head.set(gp) 7881 l.size++ 7882 } 7883 7884 // pushAll prepends all Gs in q to l. After this q must not be used. 7885 func (l *gList) pushAll(q gQueue) { 7886 if !q.empty() { 7887 q.tail.ptr().schedlink = l.head 7888 l.head = q.head 7889 l.size += q.size 7890 } 7891 } 7892 7893 // pop removes and returns the head of l. If l is empty, it returns nil. 7894 func (l *gList) pop() *g { 7895 gp := l.head.ptr() 7896 if gp != nil { 7897 l.head = gp.schedlink 7898 l.size-- 7899 } 7900 return gp 7901 } 7902 7903 //go:linkname setMaxThreads runtime/debug.setMaxThreads 7904 func setMaxThreads(in int) (out int) { 7905 lock(&sched.lock) 7906 out = int(sched.maxmcount) 7907 if in > 0x7fffffff { // MaxInt32 7908 sched.maxmcount = 0x7fffffff 7909 } else { 7910 sched.maxmcount = int32(in) 7911 } 7912 checkmcount() 7913 unlock(&sched.lock) 7914 return 7915 } 7916 7917 // procPin should be an internal detail, 7918 // but widely used packages access it using linkname. 7919 // Notable members of the hall of shame include: 7920 // - github.com/bytedance/gopkg 7921 // - github.com/choleraehyq/pid 7922 // - github.com/songzhibin97/gkit 7923 // 7924 // Do not remove or change the type signature. 7925 // See go.dev/issue/67401. 7926 // 7927 //go:linkname procPin 7928 //go:nosplit 7929 func procPin() int { 7930 gp := getg() 7931 mp := gp.m 7932 7933 mp.locks++ 7934 return int(mp.p.ptr().id) 7935 } 7936 7937 // procUnpin should be an internal detail, 7938 // but widely used packages access it using linkname. 7939 // Notable members of the hall of shame include: 7940 // - github.com/bytedance/gopkg 7941 // - github.com/choleraehyq/pid 7942 // - github.com/songzhibin97/gkit 7943 // 7944 // Do not remove or change the type signature. 7945 // See go.dev/issue/67401. 7946 // 7947 //go:linkname procUnpin 7948 //go:nosplit 7949 func procUnpin() { 7950 gp := getg() 7951 gp.m.locks-- 7952 } 7953 7954 //go:linkname sync_runtime_procPin sync.runtime_procPin 7955 //go:nosplit 7956 func sync_runtime_procPin() int { 7957 return procPin() 7958 } 7959 7960 //go:linkname sync_runtime_procUnpin sync.runtime_procUnpin 7961 //go:nosplit 7962 func sync_runtime_procUnpin() { 7963 procUnpin() 7964 } 7965 7966 //go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin 7967 //go:nosplit 7968 func sync_atomic_runtime_procPin() int { 7969 return procPin() 7970 } 7971 7972 //go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin 7973 //go:nosplit 7974 func sync_atomic_runtime_procUnpin() { 7975 procUnpin() 7976 } 7977 7978 // Active spinning for sync.Mutex. 7979 // 7980 //go:linkname internal_sync_runtime_canSpin internal/sync.runtime_canSpin 7981 //go:nosplit 7982 func internal_sync_runtime_canSpin(i int) bool { 7983 // sync.Mutex is cooperative, so we are conservative with spinning. 7984 // Spin only few times and only if running on a multicore machine and 7985 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. 7986 // As opposed to runtime mutex we don't do passive spinning here, 7987 // because there can be work on global runq or on other Ps. 7988 if i >= active_spin || numCPUStartup <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 { 7989 return false 7990 } 7991 if p := getg().m.p.ptr(); !runqempty(p) { 7992 return false 7993 } 7994 return true 7995 } 7996 7997 //go:linkname internal_sync_runtime_doSpin internal/sync.runtime_doSpin 7998 //go:nosplit 7999 func internal_sync_runtime_doSpin() { 8000 procyield(active_spin_cnt) 8001 } 8002 8003 // Active spinning for sync.Mutex. 8004 // 8005 // sync_runtime_canSpin should be an internal detail, 8006 // but widely used packages access it using linkname. 8007 // Notable members of the hall of shame include: 8008 // - github.com/livekit/protocol 8009 // - github.com/sagernet/gvisor 8010 // - gvisor.dev/gvisor 8011 // 8012 // Do not remove or change the type signature. 8013 // See go.dev/issue/67401. 8014 // 8015 //go:linkname sync_runtime_canSpin sync.runtime_canSpin 8016 //go:nosplit 8017 func sync_runtime_canSpin(i int) bool { 8018 return internal_sync_runtime_canSpin(i) 8019 } 8020 8021 // sync_runtime_doSpin should be an internal detail, 8022 // but widely used packages access it using linkname. 8023 // Notable members of the hall of shame include: 8024 // - github.com/livekit/protocol 8025 // - github.com/sagernet/gvisor 8026 // - gvisor.dev/gvisor 8027 // 8028 // Do not remove or change the type signature. 8029 // See go.dev/issue/67401. 8030 // 8031 //go:linkname sync_runtime_doSpin sync.runtime_doSpin 8032 //go:nosplit 8033 func sync_runtime_doSpin() { 8034 internal_sync_runtime_doSpin() 8035 } 8036 8037 var stealOrder randomOrder 8038 8039 // randomOrder/randomEnum are helper types for randomized work stealing. 8040 // They allow to enumerate all Ps in different pseudo-random orders without repetitions. 8041 // The algorithm is based on the fact that if we have X such that X and GOMAXPROCS 8042 // are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. 8043 type randomOrder struct { 8044 count uint32 8045 coprimes []uint32 8046 } 8047 8048 type randomEnum struct { 8049 i uint32 8050 count uint32 8051 pos uint32 8052 inc uint32 8053 } 8054 8055 func (ord *randomOrder) reset(count uint32) { 8056 ord.count = count 8057 ord.coprimes = ord.coprimes[:0] 8058 for i := uint32(1); i <= count; i++ { 8059 if gcd(i, count) == 1 { 8060 ord.coprimes = append(ord.coprimes, i) 8061 } 8062 } 8063 } 8064 8065 func (ord *randomOrder) start(i uint32) randomEnum { 8066 return randomEnum{ 8067 count: ord.count, 8068 pos: i % ord.count, 8069 inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 8070 } 8071 } 8072 8073 func (enum *randomEnum) done() bool { 8074 return enum.i == enum.count 8075 } 8076 8077 func (enum *randomEnum) next() { 8078 enum.i++ 8079 enum.pos = (enum.pos + enum.inc) % enum.count 8080 } 8081 8082 func (enum *randomEnum) position() uint32 { 8083 return enum.pos 8084 } 8085 8086 func gcd(a, b uint32) uint32 { 8087 for b != 0 { 8088 a, b = b, a%b 8089 } 8090 return a 8091 } 8092 8093 // An initTask represents the set of initializations that need to be done for a package. 8094 // Keep in sync with ../../test/noinit.go:initTask 8095 type initTask struct { 8096 state uint32 // 0 = uninitialized, 1 = in progress, 2 = done 8097 nfns uint32 8098 // followed by nfns pcs, uintptr sized, one per init function to run 8099 } 8100 8101 // inittrace stores statistics for init functions which are 8102 // updated by malloc and newproc when active is true. 8103 var inittrace tracestat 8104 8105 type tracestat struct { 8106 active bool // init tracing activation status 8107 id uint64 // init goroutine id 8108 allocs uint64 // heap allocations 8109 bytes uint64 // heap allocated bytes 8110 } 8111 8112 func doInit(ts []*initTask) { 8113 for _, t := range ts { 8114 doInit1(t) 8115 } 8116 } 8117 8118 func doInit1(t *initTask) { 8119 switch t.state { 8120 case 2: // fully initialized 8121 return 8122 case 1: // initialization in progress 8123 throw("recursive call during initialization - linker skew") 8124 default: // not initialized yet 8125 t.state = 1 // initialization in progress 8126 8127 var ( 8128 start int64 8129 before tracestat 8130 ) 8131 8132 if inittrace.active { 8133 start = nanotime() 8134 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8135 before = inittrace 8136 } 8137 8138 if t.nfns == 0 { 8139 // We should have pruned all of these in the linker. 8140 throw("inittask with no functions") 8141 } 8142 8143 firstFunc := add(unsafe.Pointer(t), 8) 8144 for i := uint32(0); i < t.nfns; i++ { 8145 p := add(firstFunc, uintptr(i)*goarch.PtrSize) 8146 f := *(*func())(unsafe.Pointer(&p)) 8147 f() 8148 } 8149 8150 if inittrace.active { 8151 end := nanotime() 8152 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8153 after := inittrace 8154 8155 f := *(*func())(unsafe.Pointer(&firstFunc)) 8156 pkg := funcpkgpath(findfunc(abi.FuncPCABIInternal(f))) 8157 8158 var sbuf [24]byte 8159 print("init ", pkg, " @") 8160 print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") 8161 print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") 8162 print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") 8163 print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") 8164 print("\n") 8165 } 8166 8167 t.state = 2 // initialization done 8168 } 8169 } 8170