Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/strconv"
12 "internal/runtime/syscall/linux"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44
45
46
47
48
49
50
51
52 const (
53 _FUTEX_PRIVATE_FLAG = 128
54 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
55 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
56 )
57
58
59
60
61
62
63
64
65
66 func futexsleep(addr *uint32, val uint32, ns int64) {
67
68
69
70
71
72 if ns < 0 {
73 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
74 return
75 }
76
77 var ts timespec
78 ts.setNsec(ns)
79 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, &ts, nil, 0)
80 }
81
82
83
84
85 func futexwakeup(addr *uint32, cnt uint32) {
86 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
87 if ret >= 0 {
88 return
89 }
90
91
92
93
94 systemstack(func() {
95 print("futexwakeup addr=", addr, " returned ", ret, "\n")
96 })
97
98 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
99 }
100
101 func getCPUCount() int32 {
102
103
104
105
106
107
108
109 const maxCPUs = 64 * 1024
110 var buf [maxCPUs / 8]byte
111 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
112 if r < 0 {
113 return 1
114 }
115 n := int32(0)
116 for _, v := range buf[:r] {
117 for v != 0 {
118 n += int32(v & 1)
119 v >>= 1
120 }
121 }
122 if n == 0 {
123 n = 1
124 }
125 return n
126 }
127
128
129 const (
130 _CLONE_VM = 0x100
131 _CLONE_FS = 0x200
132 _CLONE_FILES = 0x400
133 _CLONE_SIGHAND = 0x800
134 _CLONE_PTRACE = 0x2000
135 _CLONE_VFORK = 0x4000
136 _CLONE_PARENT = 0x8000
137 _CLONE_THREAD = 0x10000
138 _CLONE_NEWNS = 0x20000
139 _CLONE_SYSVSEM = 0x40000
140 _CLONE_SETTLS = 0x80000
141 _CLONE_PARENT_SETTID = 0x100000
142 _CLONE_CHILD_CLEARTID = 0x200000
143 _CLONE_UNTRACED = 0x800000
144 _CLONE_CHILD_SETTID = 0x1000000
145 _CLONE_STOPPED = 0x2000000
146 _CLONE_NEWUTS = 0x4000000
147 _CLONE_NEWIPC = 0x8000000
148
149
150
151
152
153
154
155
156 cloneFlags = _CLONE_VM |
157 _CLONE_FS |
158 _CLONE_FILES |
159 _CLONE_SIGHAND |
160 _CLONE_SYSVSEM |
161 _CLONE_THREAD
162 )
163
164
165 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
166
167
168
169
170 func newosproc(mp *m) {
171 stk := unsafe.Pointer(mp.g0.stack.hi)
172
175 if false {
176 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
177 }
178
179
180
181 var oset sigset
182 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
183 ret := retryOnEAGAIN(func() int32 {
184 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
185
186
187 if r >= 0 {
188 return 0
189 }
190 return -r
191 })
192 sigprocmask(_SIG_SETMASK, &oset, nil)
193
194 if ret != 0 {
195 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
196 if ret == _EAGAIN {
197 println("runtime: may need to increase max user processes (ulimit -u)")
198 }
199 throw("newosproc")
200 }
201 }
202
203
204
205
206 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
207 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
208 if stack == nil {
209 writeErrStr(failallocatestack)
210 exit(1)
211 }
212 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
213 if ret < 0 {
214 writeErrStr(failthreadcreate)
215 exit(1)
216 }
217 }
218
219 const (
220 _AT_NULL = 0
221 _AT_PAGESZ = 6
222 _AT_PLATFORM = 15
223 _AT_HWCAP = 16
224 _AT_SECURE = 23
225 _AT_RANDOM = 25
226 _AT_HWCAP2 = 26
227 )
228
229 var procAuxv = []byte("/proc/self/auxv\x00")
230
231 var addrspace_vec [1]byte
232
233 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
234
235 var auxvreadbuf [128]uintptr
236
237 func sysargs(argc int32, argv **byte) {
238 n := argc + 1
239
240
241 for argv_index(argv, n) != nil {
242 n++
243 }
244
245
246 n++
247
248
249 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
250
251 if pairs := sysauxv(auxvp[:]); pairs != 0 {
252 auxv = auxvp[: pairs*2 : pairs*2]
253 return
254 }
255
256
257
258 fd := open(&procAuxv[0], 0 , 0)
259 if fd < 0 {
260
261
262
263 const size = 256 << 10
264 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
265 if err != 0 {
266 return
267 }
268 var n uintptr
269 for n = 4 << 10; n < size; n <<= 1 {
270 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
271 if err == 0 {
272 physPageSize = n
273 break
274 }
275 }
276 if physPageSize == 0 {
277 physPageSize = size
278 }
279 munmap(p, size)
280 return
281 }
282
283 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
284 closefd(fd)
285 if n < 0 {
286 return
287 }
288
289
290 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
291 pairs := sysauxv(auxvreadbuf[:])
292 auxv = auxvreadbuf[: pairs*2 : pairs*2]
293 }
294
295
296 var secureMode bool
297
298 func sysauxv(auxv []uintptr) (pairs int) {
299
300
301 var i int
302 for ; auxv[i] != _AT_NULL; i += 2 {
303 tag, val := auxv[i], auxv[i+1]
304 switch tag {
305 case _AT_RANDOM:
306
307
308
309
310
311
312 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
313
314 case _AT_PAGESZ:
315 physPageSize = val
316
317 case _AT_SECURE:
318 secureMode = val == 1
319 }
320
321 archauxv(tag, val)
322 vdsoauxv(tag, val)
323 }
324 return i / 2
325 }
326
327 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
328
329 func getHugePageSize() uintptr {
330 var numbuf [20]byte
331 fd := open(&sysTHPSizePath[0], 0 , 0)
332 if fd < 0 {
333 return 0
334 }
335 ptr := noescape(unsafe.Pointer(&numbuf[0]))
336 n := read(fd, ptr, int32(len(numbuf)))
337 closefd(fd)
338 if n <= 0 {
339 return 0
340 }
341 n--
342 v, ok := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
343 if !ok || v < 0 {
344 v = 0
345 }
346 if v&(v-1) != 0 {
347
348 return 0
349 }
350 return uintptr(v)
351 }
352
353 func osinit() {
354 numCPUStartup = getCPUCount()
355 physHugePageSize = getHugePageSize()
356 vgetrandomInit()
357 }
358
359 var urandom_dev = []byte("/dev/urandom\x00")
360
361 func readRandom(r []byte) int {
362
363
364 fd := open(&urandom_dev[0], 0 , 0)
365 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
366 closefd(fd)
367 return int(n)
368 }
369
370 func goenvs() {
371 goenvs_unix()
372 }
373
374
375
376
377
378
379
380 func libpreinit() {
381 initsig(true)
382 }
383
384
385
386 func mpreinit(mp *m) {
387 mp.gsignal = malg(32 * 1024)
388 mp.gsignal.m = mp
389 }
390
391 func gettid() uint32
392
393
394
395 func minit() {
396 minitSignals()
397
398
399
400
401 getg().m.procid = uint64(gettid())
402 }
403
404
405
406
407 func unminit() {
408 unminitSignals()
409 getg().m.procid = 0
410 }
411
412
413
414
415
416
417
418 func mdestroy(mp *m) {
419 }
420
421
422
423
424
425 func sigreturn__sigaction()
426 func sigtramp()
427 func cgoSigtramp()
428
429
430 func sigaltstack(new, old *stackt)
431
432
433 func setitimer(mode int32, new, old *itimerval)
434
435
436 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
437
438
439 func timer_delete(timerid int32) int32
440
441
442 func rtsigprocmask(how int32, new, old *sigset, size int32)
443
444
445
446 func sigprocmask(how int32, new, old *sigset) {
447 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
448 }
449
450 func raise(sig uint32)
451 func raiseproc(sig uint32)
452
453
454 func sched_getaffinity(pid, len uintptr, buf *byte) int32
455 func osyield()
456
457
458 func osyield_no_g() {
459 osyield()
460 }
461
462 func pipe2(flags int32) (r, w int32, errno int32)
463
464
465 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
466 r, _, err := linux.Syscall6(linux.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
467 return int32(r), int32(err)
468 }
469
470 const (
471 _si_max_size = 128
472 _sigev_max_size = 64
473 )
474
475
476
477 func setsig(i uint32, fn uintptr) {
478 var sa sigactiont
479 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
480 sigfillset(&sa.sa_mask)
481
482
483
484
485 if GOARCH == "386" || GOARCH == "amd64" {
486 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
487 }
488 if fn == abi.FuncPCABIInternal(sighandler) {
489 if iscgo {
490 fn = abi.FuncPCABI0(cgoSigtramp)
491 } else {
492 fn = abi.FuncPCABI0(sigtramp)
493 }
494 }
495 sa.sa_handler = fn
496 sigaction(i, &sa, nil)
497 }
498
499
500
501 func setsigstack(i uint32) {
502 var sa sigactiont
503 sigaction(i, nil, &sa)
504 if sa.sa_flags&_SA_ONSTACK != 0 {
505 return
506 }
507 sa.sa_flags |= _SA_ONSTACK
508 sigaction(i, &sa, nil)
509 }
510
511
512
513 func getsig(i uint32) uintptr {
514 var sa sigactiont
515 sigaction(i, nil, &sa)
516 return sa.sa_handler
517 }
518
519
520
521
522 func setSignalstackSP(s *stackt, sp uintptr) {
523 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
524 }
525
526
527 func (c *sigctxt) fixsigcode(sig uint32) {
528 }
529
530
531
532
533 func sysSigaction(sig uint32, new, old *sigactiont) {
534 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
535
536
537
538
539
540
541
542
543
544
545
546 if sig != 32 && sig != 33 && sig != 64 {
547
548 systemstack(func() {
549 throw("sigaction failed")
550 })
551 }
552 }
553 }
554
555
556
557
558 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
559
560
561
562
563
564
565
566
567
568 func fixSigactionForCgo(new *sigactiont) {
569 if GOARCH == "386" && new != nil {
570 new.sa_flags &^= _SA_RESTORER
571 new.sa_restorer = 0
572 }
573 }
574
575 func getpid() int
576 func tgkill(tgid, tid, sig int)
577
578
579 func signalM(mp *m, sig int) {
580 tgkill(getpid(), int(mp.procid), sig)
581 }
582
583
584
585
586
587
588
589
590 func validSIGPROF(mp *m, c *sigctxt) bool {
591 code := int32(c.sigcode())
592 setitimer := code == _SI_KERNEL
593 timer_create := code == _SI_TIMER
594
595 if !(setitimer || timer_create) {
596
597
598
599 return true
600 }
601
602 if mp == nil {
603
604
605
606
607
608
609
610
611
612
613
614
615 return setitimer
616 }
617
618
619
620 if mp.profileTimerValid.Load() {
621
622
623
624
625
626 return timer_create
627 }
628
629
630 return setitimer
631 }
632
633 func setProcessCPUProfiler(hz int32) {
634 setProcessCPUProfilerTimer(hz)
635 }
636
637 func setThreadCPUProfiler(hz int32) {
638 mp := getg().m
639 mp.profilehz = hz
640
641
642 if mp.profileTimerValid.Load() {
643 timerid := mp.profileTimer
644 mp.profileTimerValid.Store(false)
645 mp.profileTimer = 0
646
647 ret := timer_delete(timerid)
648 if ret != 0 {
649 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
650 throw("timer_delete")
651 }
652 }
653
654 if hz == 0 {
655
656 return
657 }
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678 spec := new(itimerspec)
679 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
680 spec.it_interval.setNsec(1e9 / int64(hz))
681
682 var timerid int32
683 var sevp sigevent
684 sevp.notify = _SIGEV_THREAD_ID
685 sevp.signo = _SIGPROF
686 sevp.sigev_notify_thread_id = int32(mp.procid)
687 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
688 if ret != 0 {
689
690
691 return
692 }
693
694 ret = timer_settime(timerid, 0, spec, nil)
695 if ret != 0 {
696 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
697 ", 0, {interval: {",
698 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
699 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
700 throw("timer_settime")
701 }
702
703 mp.profileTimer = timerid
704 mp.profileTimerValid.Store(true)
705 }
706
707
708
709 type perThreadSyscallArgs struct {
710 trap uintptr
711 a1 uintptr
712 a2 uintptr
713 a3 uintptr
714 a4 uintptr
715 a5 uintptr
716 a6 uintptr
717 r1 uintptr
718 r2 uintptr
719 }
720
721
722
723
724
725
726 var perThreadSyscall perThreadSyscallArgs
727
728
729
730
731
732
733
734
735
736 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
737 if iscgo {
738
739 panic("doAllThreadsSyscall not supported with cgo enabled")
740 }
741
742
743
744
745
746
747
748
749 stw := stopTheWorld(stwAllThreadsSyscall)
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771 allocmLock.lock()
772
773
774
775
776
777
778 acquirem()
779
780
781
782
783
784
785 r1, r2, errno := linux.Syscall6(trap, a1, a2, a3, a4, a5, a6)
786 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
787
788 r2 = 0
789 }
790 if errno != 0 {
791 releasem(getg().m)
792 allocmLock.unlock()
793 startTheWorld(stw)
794 return r1, r2, errno
795 }
796
797 perThreadSyscall = perThreadSyscallArgs{
798 trap: trap,
799 a1: a1,
800 a2: a2,
801 a3: a3,
802 a4: a4,
803 a5: a5,
804 a6: a6,
805 r1: r1,
806 r2: r2,
807 }
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844 for mp := allm; mp != nil; mp = mp.alllink {
845 for atomic.Load64(&mp.procid) == 0 {
846
847 osyield()
848 }
849 }
850
851
852
853 gp := getg()
854 tid := gp.m.procid
855 for mp := allm; mp != nil; mp = mp.alllink {
856 if atomic.Load64(&mp.procid) == tid {
857
858 continue
859 }
860 mp.needPerThreadSyscall.Store(1)
861 signalM(mp, sigPerThreadSyscall)
862 }
863
864
865 for mp := allm; mp != nil; mp = mp.alllink {
866 if mp.procid == tid {
867 continue
868 }
869 for mp.needPerThreadSyscall.Load() != 0 {
870 osyield()
871 }
872 }
873
874 perThreadSyscall = perThreadSyscallArgs{}
875
876 releasem(getg().m)
877 allocmLock.unlock()
878 startTheWorld(stw)
879
880 return r1, r2, errno
881 }
882
883
884
885
886
887
888
889 func runPerThreadSyscall() {
890 gp := getg()
891 if gp.m.needPerThreadSyscall.Load() == 0 {
892 return
893 }
894
895 args := perThreadSyscall
896 r1, r2, errno := linux.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
897 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
898
899 r2 = 0
900 }
901 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
902 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
903 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
904 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
905 }
906
907 gp.m.needPerThreadSyscall.Store(0)
908 }
909
910 const (
911 _SI_USER = 0
912 _SI_TKILL = -6
913 _SYS_SECCOMP = 1
914 )
915
916
917
918
919
920 func (c *sigctxt) sigFromUser() bool {
921 code := int32(c.sigcode())
922 return code == _SI_USER || code == _SI_TKILL
923 }
924
925
926
927
928 func (c *sigctxt) sigFromSeccomp() bool {
929 code := int32(c.sigcode())
930 return code == _SYS_SECCOMP
931 }
932
933
934 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
935 r, _, err := linux.Syscall6(linux.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
936 return int32(r), int32(err)
937 }
938
View as plain text