Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/strconv"
12 "internal/runtime/syscall"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
45
46
47
48
49
50
51
52
53
54
55 const (
56 _FUTEX_PRIVATE_FLAG = 128
57 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
58 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
59 )
60
61
62
63
64
65
66
67
68
69 func futexsleep(addr *uint32, val uint32, ns int64) {
70
71
72
73
74
75 if ns < 0 {
76 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
77 return
78 }
79
80 var ts timespec
81 ts.setNsec(ns)
82 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
83 }
84
85
86
87
88 func futexwakeup(addr *uint32, cnt uint32) {
89 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
90 if ret >= 0 {
91 return
92 }
93
94
95
96
97 systemstack(func() {
98 print("futexwakeup addr=", addr, " returned ", ret, "\n")
99 })
100
101 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
102 }
103
104 func getCPUCount() int32 {
105
106
107
108
109
110
111
112 const maxCPUs = 64 * 1024
113 var buf [maxCPUs / 8]byte
114 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
115 if r < 0 {
116 return 1
117 }
118 n := int32(0)
119 for _, v := range buf[:r] {
120 for v != 0 {
121 n += int32(v & 1)
122 v >>= 1
123 }
124 }
125 if n == 0 {
126 n = 1
127 }
128 return n
129 }
130
131
132 const (
133 _CLONE_VM = 0x100
134 _CLONE_FS = 0x200
135 _CLONE_FILES = 0x400
136 _CLONE_SIGHAND = 0x800
137 _CLONE_PTRACE = 0x2000
138 _CLONE_VFORK = 0x4000
139 _CLONE_PARENT = 0x8000
140 _CLONE_THREAD = 0x10000
141 _CLONE_NEWNS = 0x20000
142 _CLONE_SYSVSEM = 0x40000
143 _CLONE_SETTLS = 0x80000
144 _CLONE_PARENT_SETTID = 0x100000
145 _CLONE_CHILD_CLEARTID = 0x200000
146 _CLONE_UNTRACED = 0x800000
147 _CLONE_CHILD_SETTID = 0x1000000
148 _CLONE_STOPPED = 0x2000000
149 _CLONE_NEWUTS = 0x4000000
150 _CLONE_NEWIPC = 0x8000000
151
152
153
154
155
156
157
158
159 cloneFlags = _CLONE_VM |
160 _CLONE_FS |
161 _CLONE_FILES |
162 _CLONE_SIGHAND |
163 _CLONE_SYSVSEM |
164 _CLONE_THREAD
165 )
166
167
168 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
169
170
171
172
173 func newosproc(mp *m) {
174 stk := unsafe.Pointer(mp.g0.stack.hi)
175
178 if false {
179 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
180 }
181
182
183
184 var oset sigset
185 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
186 ret := retryOnEAGAIN(func() int32 {
187 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
188
189
190 if r >= 0 {
191 return 0
192 }
193 return -r
194 })
195 sigprocmask(_SIG_SETMASK, &oset, nil)
196
197 if ret != 0 {
198 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
199 if ret == _EAGAIN {
200 println("runtime: may need to increase max user processes (ulimit -u)")
201 }
202 throw("newosproc")
203 }
204 }
205
206
207
208
209 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
210 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
211 if stack == nil {
212 writeErrStr(failallocatestack)
213 exit(1)
214 }
215 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
216 if ret < 0 {
217 writeErrStr(failthreadcreate)
218 exit(1)
219 }
220 }
221
222 const (
223 _AT_NULL = 0
224 _AT_PAGESZ = 6
225 _AT_PLATFORM = 15
226 _AT_HWCAP = 16
227 _AT_SECURE = 23
228 _AT_RANDOM = 25
229 _AT_HWCAP2 = 26
230 )
231
232 var procAuxv = []byte("/proc/self/auxv\x00")
233
234 var addrspace_vec [1]byte
235
236 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
237
238 var auxvreadbuf [128]uintptr
239
240 func sysargs(argc int32, argv **byte) {
241 n := argc + 1
242
243
244 for argv_index(argv, n) != nil {
245 n++
246 }
247
248
249 n++
250
251
252 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
253
254 if pairs := sysauxv(auxvp[:]); pairs != 0 {
255 auxv = auxvp[: pairs*2 : pairs*2]
256 return
257 }
258
259
260
261 fd := open(&procAuxv[0], 0 , 0)
262 if fd < 0 {
263
264
265
266 const size = 256 << 10
267 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
268 if err != 0 {
269 return
270 }
271 var n uintptr
272 for n = 4 << 10; n < size; n <<= 1 {
273 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
274 if err == 0 {
275 physPageSize = n
276 break
277 }
278 }
279 if physPageSize == 0 {
280 physPageSize = size
281 }
282 munmap(p, size)
283 return
284 }
285
286 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
287 closefd(fd)
288 if n < 0 {
289 return
290 }
291
292
293 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
294 pairs := sysauxv(auxvreadbuf[:])
295 auxv = auxvreadbuf[: pairs*2 : pairs*2]
296 }
297
298
299 var secureMode bool
300
301 func sysauxv(auxv []uintptr) (pairs int) {
302
303
304 var i int
305 for ; auxv[i] != _AT_NULL; i += 2 {
306 tag, val := auxv[i], auxv[i+1]
307 switch tag {
308 case _AT_RANDOM:
309
310
311
312
313
314
315 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
316
317 case _AT_PAGESZ:
318 physPageSize = val
319
320 case _AT_SECURE:
321 secureMode = val == 1
322 }
323
324 archauxv(tag, val)
325 vdsoauxv(tag, val)
326 }
327 return i / 2
328 }
329
330 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
331
332 func getHugePageSize() uintptr {
333 var numbuf [20]byte
334 fd := open(&sysTHPSizePath[0], 0 , 0)
335 if fd < 0 {
336 return 0
337 }
338 ptr := noescape(unsafe.Pointer(&numbuf[0]))
339 n := read(fd, ptr, int32(len(numbuf)))
340 closefd(fd)
341 if n <= 0 {
342 return 0
343 }
344 n--
345 v, ok := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
346 if !ok || v < 0 {
347 v = 0
348 }
349 if v&(v-1) != 0 {
350
351 return 0
352 }
353 return uintptr(v)
354 }
355
356 func osinit() {
357 numCPUStartup = getCPUCount()
358 physHugePageSize = getHugePageSize()
359 osArchInit()
360 vgetrandomInit()
361 }
362
363 var urandom_dev = []byte("/dev/urandom\x00")
364
365 func readRandom(r []byte) int {
366
367
368 fd := open(&urandom_dev[0], 0 , 0)
369 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
370 closefd(fd)
371 return int(n)
372 }
373
374 func goenvs() {
375 goenvs_unix()
376 }
377
378
379
380
381
382
383
384 func libpreinit() {
385 initsig(true)
386 }
387
388
389
390 func mpreinit(mp *m) {
391 mp.gsignal = malg(32 * 1024)
392 mp.gsignal.m = mp
393 }
394
395 func gettid() uint32
396
397
398
399 func minit() {
400 minitSignals()
401
402
403
404
405 getg().m.procid = uint64(gettid())
406 }
407
408
409
410
411 func unminit() {
412 unminitSignals()
413 getg().m.procid = 0
414 }
415
416
417
418
419
420
421 func mdestroy(mp *m) {
422 }
423
424
425
426
427
428 func sigreturn__sigaction()
429 func sigtramp()
430 func cgoSigtramp()
431
432
433 func sigaltstack(new, old *stackt)
434
435
436 func setitimer(mode int32, new, old *itimerval)
437
438
439 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
440
441
442 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
443
444
445 func timer_delete(timerid int32) int32
446
447
448 func rtsigprocmask(how int32, new, old *sigset, size int32)
449
450
451
452 func sigprocmask(how int32, new, old *sigset) {
453 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
454 }
455
456 func raise(sig uint32)
457 func raiseproc(sig uint32)
458
459
460 func sched_getaffinity(pid, len uintptr, buf *byte) int32
461 func osyield()
462
463
464 func osyield_no_g() {
465 osyield()
466 }
467
468 func pipe2(flags int32) (r, w int32, errno int32)
469
470
471 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
472 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
473 return int32(r), int32(err)
474 }
475
476 const (
477 _si_max_size = 128
478 _sigev_max_size = 64
479 )
480
481
482
483 func setsig(i uint32, fn uintptr) {
484 var sa sigactiont
485 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
486 sigfillset(&sa.sa_mask)
487
488
489
490 if GOARCH == "386" || GOARCH == "amd64" {
491 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
492 }
493 if fn == abi.FuncPCABIInternal(sighandler) {
494 if iscgo {
495 fn = abi.FuncPCABI0(cgoSigtramp)
496 } else {
497 fn = abi.FuncPCABI0(sigtramp)
498 }
499 }
500 sa.sa_handler = fn
501 sigaction(i, &sa, nil)
502 }
503
504
505
506 func setsigstack(i uint32) {
507 var sa sigactiont
508 sigaction(i, nil, &sa)
509 if sa.sa_flags&_SA_ONSTACK != 0 {
510 return
511 }
512 sa.sa_flags |= _SA_ONSTACK
513 sigaction(i, &sa, nil)
514 }
515
516
517
518 func getsig(i uint32) uintptr {
519 var sa sigactiont
520 sigaction(i, nil, &sa)
521 return sa.sa_handler
522 }
523
524
525
526
527 func setSignalstackSP(s *stackt, sp uintptr) {
528 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
529 }
530
531
532 func (c *sigctxt) fixsigcode(sig uint32) {
533 }
534
535
536
537
538 func sysSigaction(sig uint32, new, old *sigactiont) {
539 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
540
541
542
543
544
545
546
547
548
549
550
551 if sig != 32 && sig != 33 && sig != 64 {
552
553 systemstack(func() {
554 throw("sigaction failed")
555 })
556 }
557 }
558 }
559
560
561
562
563 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
564
565 func getpid() int
566 func tgkill(tgid, tid, sig int)
567
568
569 func signalM(mp *m, sig int) {
570 tgkill(getpid(), int(mp.procid), sig)
571 }
572
573
574
575
576
577
578
579
580 func validSIGPROF(mp *m, c *sigctxt) bool {
581 code := int32(c.sigcode())
582 setitimer := code == _SI_KERNEL
583 timer_create := code == _SI_TIMER
584
585 if !(setitimer || timer_create) {
586
587
588
589 return true
590 }
591
592 if mp == nil {
593
594
595
596
597
598
599
600
601
602
603
604
605 return setitimer
606 }
607
608
609
610 if mp.profileTimerValid.Load() {
611
612
613
614
615
616 return timer_create
617 }
618
619
620 return setitimer
621 }
622
623 func setProcessCPUProfiler(hz int32) {
624 setProcessCPUProfilerTimer(hz)
625 }
626
627 func setThreadCPUProfiler(hz int32) {
628 mp := getg().m
629 mp.profilehz = hz
630
631
632 if mp.profileTimerValid.Load() {
633 timerid := mp.profileTimer
634 mp.profileTimerValid.Store(false)
635 mp.profileTimer = 0
636
637 ret := timer_delete(timerid)
638 if ret != 0 {
639 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
640 throw("timer_delete")
641 }
642 }
643
644 if hz == 0 {
645
646 return
647 }
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668 spec := new(itimerspec)
669 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
670 spec.it_interval.setNsec(1e9 / int64(hz))
671
672 var timerid int32
673 var sevp sigevent
674 sevp.notify = _SIGEV_THREAD_ID
675 sevp.signo = _SIGPROF
676 sevp.sigev_notify_thread_id = int32(mp.procid)
677 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
678 if ret != 0 {
679
680
681 return
682 }
683
684 ret = timer_settime(timerid, 0, spec, nil)
685 if ret != 0 {
686 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
687 ", 0, {interval: {",
688 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
689 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
690 throw("timer_settime")
691 }
692
693 mp.profileTimer = timerid
694 mp.profileTimerValid.Store(true)
695 }
696
697
698
699 type perThreadSyscallArgs struct {
700 trap uintptr
701 a1 uintptr
702 a2 uintptr
703 a3 uintptr
704 a4 uintptr
705 a5 uintptr
706 a6 uintptr
707 r1 uintptr
708 r2 uintptr
709 }
710
711
712
713
714
715
716 var perThreadSyscall perThreadSyscallArgs
717
718
719
720
721
722
723
724
725
726 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
727 if iscgo {
728
729 panic("doAllThreadsSyscall not supported with cgo enabled")
730 }
731
732
733
734
735
736
737
738
739 stw := stopTheWorld(stwAllThreadsSyscall)
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761 allocmLock.lock()
762
763
764
765
766
767
768 acquirem()
769
770
771
772
773
774
775 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
776 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
777
778 r2 = 0
779 }
780 if errno != 0 {
781 releasem(getg().m)
782 allocmLock.unlock()
783 startTheWorld(stw)
784 return r1, r2, errno
785 }
786
787 perThreadSyscall = perThreadSyscallArgs{
788 trap: trap,
789 a1: a1,
790 a2: a2,
791 a3: a3,
792 a4: a4,
793 a5: a5,
794 a6: a6,
795 r1: r1,
796 r2: r2,
797 }
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834 for mp := allm; mp != nil; mp = mp.alllink {
835 for atomic.Load64(&mp.procid) == 0 {
836
837 osyield()
838 }
839 }
840
841
842
843 gp := getg()
844 tid := gp.m.procid
845 for mp := allm; mp != nil; mp = mp.alllink {
846 if atomic.Load64(&mp.procid) == tid {
847
848 continue
849 }
850 mp.needPerThreadSyscall.Store(1)
851 signalM(mp, sigPerThreadSyscall)
852 }
853
854
855 for mp := allm; mp != nil; mp = mp.alllink {
856 if mp.procid == tid {
857 continue
858 }
859 for mp.needPerThreadSyscall.Load() != 0 {
860 osyield()
861 }
862 }
863
864 perThreadSyscall = perThreadSyscallArgs{}
865
866 releasem(getg().m)
867 allocmLock.unlock()
868 startTheWorld(stw)
869
870 return r1, r2, errno
871 }
872
873
874
875
876
877
878
879 func runPerThreadSyscall() {
880 gp := getg()
881 if gp.m.needPerThreadSyscall.Load() == 0 {
882 return
883 }
884
885 args := perThreadSyscall
886 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
887 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
888
889 r2 = 0
890 }
891 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
892 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
893 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
894 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
895 }
896
897 gp.m.needPerThreadSyscall.Store(0)
898 }
899
900 const (
901 _SI_USER = 0
902 _SI_TKILL = -6
903 _SYS_SECCOMP = 1
904 )
905
906
907
908
909
910 func (c *sigctxt) sigFromUser() bool {
911 code := int32(c.sigcode())
912 return code == _SI_USER || code == _SI_TKILL
913 }
914
915
916
917
918 func (c *sigctxt) sigFromSeccomp() bool {
919 code := int32(c.sigcode())
920 return code == _SYS_SECCOMP
921 }
922
923
924 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
925 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
926 return int32(r), int32(err)
927 }
928
View as plain text