Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 #ifndef GOOS_windows
41 // Avoid calling it on Windows because it is not used
42 // and it would crash the application due to the autogenerated
43 // ABI wrapper trying to access a non-existent TLS slot.
44 CALL runtime·libpreinit(SB)
45 #endif
46
47 // Create a new thread to finish Go runtime initialization.
48 MOVQ _cgo_sys_thread_create(SB), AX
49 TESTQ AX, AX
50 JZ nocgo
51
52 // We're calling back to C.
53 // Align stack per C ABI requirements.
54 MOVQ SP, BX // Callee-save in C ABI
55 ANDQ $~15, SP
56 MOVQ $_rt0_amd64_lib_go(SB), DI
57 MOVQ $0, SI
58 #ifdef GOOS_windows
59 // For Windows ABI
60 MOVQ DI, CX
61 MOVQ SI, DX
62 // Leave space for four words on the stack as required
63 // by the Windows amd64 calling convention.
64 ADJSP $32
65 #endif
66 CALL AX
67 #ifdef GOOS_windows
68 ADJSP $-32 // just to make the assembler not complain about unbalanced stack
69 #endif
70 MOVQ BX, SP
71 JMP restore
72
73 nocgo:
74 ADJSP $16
75 MOVQ $0x800000, 0(SP) // stacksize
76 MOVQ $_rt0_amd64_lib_go(SB), AX
77 MOVQ AX, 8(SP) // fn
78 CALL runtime·newosproc0(SB)
79 ADJSP $-16
80
81 restore:
82 POP_REGS_HOST_TO_ABI0()
83 RET
84
85 // _rt0_amd64_lib_go initializes the Go runtime.
86 // This is started in a separate thread by _rt0_amd64_lib.
87 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
88 MOVQ _rt0_amd64_lib_argc<>(SB), DI
89 MOVQ _rt0_amd64_lib_argv<>(SB), SI
90 JMP runtime·rt0_go(SB)
91
92 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
93 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
94 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
95 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
96
97 #ifdef GOAMD64_v2
98 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
99 #endif
100
101 #ifdef GOAMD64_v3
102 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
103 #endif
104
105 #ifdef GOAMD64_v4
106 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
107 #endif
108
109 GLOBL bad_cpu_msg<>(SB), RODATA, $84
110
111 // Define a list of AMD64 microarchitecture level features
112 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
113
114 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
115 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
116 // LAHF/SAHF
117 #define V2_EXT_FEATURES_CX (1 << 0)
118 // FMA MOVBE OSXSAVE AVX F16C
119 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
120 // ABM (FOR LZNCT)
121 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
122 // BMI1 AVX2 BMI2
123 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
124 // XMM YMM
125 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
126
127 #define V4_FEATURES_CX V3_FEATURES_CX
128
129 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
130 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
131 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
132 // OPMASK ZMM
133 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
134
135 #ifdef GOAMD64_v2
136 #define NEED_MAX_CPUID 0x80000001
137 #define NEED_FEATURES_CX V2_FEATURES_CX
138 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
139 #endif
140
141 #ifdef GOAMD64_v3
142 #define NEED_MAX_CPUID 0x80000001
143 #define NEED_FEATURES_CX V3_FEATURES_CX
144 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
145 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
146 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
147 #endif
148
149 #ifdef GOAMD64_v4
150 #define NEED_MAX_CPUID 0x80000001
151 #define NEED_FEATURES_CX V4_FEATURES_CX
152 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
153 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
154
155 // Darwin requires a different approach to check AVX512 support, see CL 285572.
156 #ifdef GOOS_darwin
157 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
158 // These values are from:
159 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
160 #define commpage64_base_address 0x00007fffffe00000
161 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
162 #define commpage64_version (commpage64_base_address+0x01E)
163 #define AVX512F 0x0000004000000000
164 #define AVX512CD 0x0000008000000000
165 #define AVX512DQ 0x0000010000000000
166 #define AVX512BW 0x0000020000000000
167 #define AVX512VL 0x0000100000000000
168 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
169 #else
170 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
171 #endif
172
173 #endif
174
175 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
176 // copy arguments forward on an even stack
177 MOVQ DI, AX // argc
178 MOVQ SI, BX // argv
179 SUBQ $(5*8), SP // 3args 2auto
180 ANDQ $~15, SP
181 MOVQ AX, 24(SP)
182 MOVQ BX, 32(SP)
183
184 // This is typically the entry point for Go programs.
185 // Call stack unwinding must not proceed past this frame.
186 // Set the frame pointer register to 0 so that frame pointer-based unwinders
187 // (which don't use debug info for performance reasons)
188 // won't attempt to unwind past this function.
189 // See go.dev/issue/63630
190 MOVQ $0, BP
191
192 // create istack out of the given (operating system) stack.
193 // _cgo_init may update stackguard.
194 MOVQ $runtime·g0(SB), DI
195 LEAQ (-64*1024)(SP), BX
196 MOVQ BX, g_stackguard0(DI)
197 MOVQ BX, g_stackguard1(DI)
198 MOVQ BX, (g_stack+stack_lo)(DI)
199 MOVQ SP, (g_stack+stack_hi)(DI)
200
201 // find out information about the processor we're on
202 MOVL $0, AX
203 CPUID
204 CMPL AX, $0
205 JE nocpuinfo
206
207 CMPL BX, $0x756E6547 // "Genu"
208 JNE notintel
209 CMPL DX, $0x49656E69 // "ineI"
210 JNE notintel
211 CMPL CX, $0x6C65746E // "ntel"
212 JNE notintel
213 MOVB $1, runtime·isIntel(SB)
214
215 notintel:
216 // Load EAX=1 cpuid flags
217 MOVL $1, AX
218 CPUID
219 MOVL AX, runtime·processorVersionInfo(SB)
220
221 nocpuinfo:
222 // if there is an _cgo_init, call it.
223 MOVQ _cgo_init(SB), AX
224 TESTQ AX, AX
225 JZ needtls
226 // arg 1: g0, already in DI
227 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
228 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
229 MOVQ $0, CX
230 #ifdef GOOS_android
231 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
232 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
233 // Compensate for tls_g (+16).
234 MOVQ -16(TLS), CX
235 #endif
236 #ifdef GOOS_windows
237 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
238 // Adjust for the Win64 calling convention.
239 MOVQ CX, R9 // arg 4
240 MOVQ DX, R8 // arg 3
241 MOVQ SI, DX // arg 2
242 MOVQ DI, CX // arg 1
243 #endif
244 CALL AX
245
246 // update stackguard after _cgo_init
247 MOVQ $runtime·g0(SB), CX
248 MOVQ (g_stack+stack_lo)(CX), AX
249 ADDQ $const_stackGuard, AX
250 MOVQ AX, g_stackguard0(CX)
251 MOVQ AX, g_stackguard1(CX)
252
253 #ifndef GOOS_windows
254 JMP ok
255 #endif
256 needtls:
257 #ifdef GOOS_plan9
258 // skip TLS setup on Plan 9
259 JMP ok
260 #endif
261 #ifdef GOOS_solaris
262 // skip TLS setup on Solaris
263 JMP ok
264 #endif
265 #ifdef GOOS_illumos
266 // skip TLS setup on illumos
267 JMP ok
268 #endif
269 #ifdef GOOS_darwin
270 // skip TLS setup on Darwin
271 JMP ok
272 #endif
273 #ifdef GOOS_openbsd
274 // skip TLS setup on OpenBSD
275 JMP ok
276 #endif
277
278 #ifdef GOOS_windows
279 CALL runtime·wintls(SB)
280 #endif
281
282 LEAQ runtime·m0+m_tls(SB), DI
283 CALL runtime·settls(SB)
284
285 // store through it, to make sure it works
286 get_tls(BX)
287 MOVQ $0x123, g(BX)
288 MOVQ runtime·m0+m_tls(SB), AX
289 CMPQ AX, $0x123
290 JEQ 2(PC)
291 CALL runtime·abort(SB)
292 ok:
293 // set the per-goroutine and per-mach "registers"
294 get_tls(BX)
295 LEAQ runtime·g0(SB), CX
296 MOVQ CX, g(BX)
297 LEAQ runtime·m0(SB), AX
298
299 // save m->g0 = g0
300 MOVQ CX, m_g0(AX)
301 // save m0 to g0->m
302 MOVQ AX, g_m(CX)
303
304 CLD // convention is D is always left cleared
305
306 // Check GOAMD64 requirements
307 // We need to do this after setting up TLS, so that
308 // we can report an error if there is a failure. See issue 49586.
309 #ifdef NEED_FEATURES_CX
310 MOVL $0, AX
311 CPUID
312 CMPL AX, $0
313 JE bad_cpu
314 MOVL $1, AX
315 CPUID
316 ANDL $NEED_FEATURES_CX, CX
317 CMPL CX, $NEED_FEATURES_CX
318 JNE bad_cpu
319 #endif
320
321 #ifdef NEED_MAX_CPUID
322 MOVL $0x80000000, AX
323 CPUID
324 CMPL AX, $NEED_MAX_CPUID
325 JL bad_cpu
326 #endif
327
328 #ifdef NEED_EXT_FEATURES_BX
329 MOVL $7, AX
330 MOVL $0, CX
331 CPUID
332 ANDL $NEED_EXT_FEATURES_BX, BX
333 CMPL BX, $NEED_EXT_FEATURES_BX
334 JNE bad_cpu
335 #endif
336
337 #ifdef NEED_EXT_FEATURES_CX
338 MOVL $0x80000001, AX
339 CPUID
340 ANDL $NEED_EXT_FEATURES_CX, CX
341 CMPL CX, $NEED_EXT_FEATURES_CX
342 JNE bad_cpu
343 #endif
344
345 #ifdef NEED_OS_SUPPORT_AX
346 XORL CX, CX
347 XGETBV
348 ANDL $NEED_OS_SUPPORT_AX, AX
349 CMPL AX, $NEED_OS_SUPPORT_AX
350 JNE bad_cpu
351 #endif
352
353 #ifdef NEED_DARWIN_SUPPORT
354 MOVQ $commpage64_version, BX
355 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
356 JL bad_cpu
357 MOVQ $commpage64_cpu_capabilities64, BX
358 MOVQ (BX), BX
359 MOVQ $NEED_DARWIN_SUPPORT, CX
360 ANDQ CX, BX
361 CMPQ BX, CX
362 JNE bad_cpu
363 #endif
364
365 CALL runtime·check(SB)
366
367 MOVL 24(SP), AX // copy argc
368 MOVL AX, 0(SP)
369 MOVQ 32(SP), AX // copy argv
370 MOVQ AX, 8(SP)
371 CALL runtime·args(SB)
372 CALL runtime·osinit(SB)
373 CALL runtime·schedinit(SB)
374
375 // create a new goroutine to start program
376 MOVQ $runtime·mainPC(SB), AX // entry
377 PUSHQ AX
378 CALL runtime·newproc(SB)
379 POPQ AX
380
381 // start this M
382 CALL runtime·mstart(SB)
383
384 CALL runtime·abort(SB) // mstart should never return
385 RET
386
387 bad_cpu: // show that the program requires a certain microarchitecture level.
388 MOVQ $2, 0(SP)
389 MOVQ $bad_cpu_msg<>(SB), AX
390 MOVQ AX, 8(SP)
391 MOVQ $84, 16(SP)
392 CALL runtime·write(SB)
393 MOVQ $1, 0(SP)
394 CALL runtime·exit(SB)
395 CALL runtime·abort(SB)
396 RET
397
398 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
399 // intended to be called by debuggers.
400 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
401 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
402 RET
403
404 // mainPC is a function value for runtime.main, to be passed to newproc.
405 // The reference to runtime.main is made via ABIInternal, since the
406 // actual function (not the ABI0 wrapper) is needed by newproc.
407 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
408 GLOBL runtime·mainPC(SB),RODATA,$8
409
410 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
411 BYTE $0xcc
412 RET
413
414 TEXT runtime·asminit(SB),NOSPLIT,$0-0
415 // No per-thread init.
416 RET
417
418 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
419 // This is the root frame of new Go-created OS threads.
420 // Call stack unwinding must not proceed past this frame.
421 // Set the frame pointer register to 0 so that frame pointer-based unwinders
422 // (which don't use debug info for performance reasons)
423 // won't attempt to unwind past this function.
424 // See go.dev/issue/63630
425 MOVD $0, BP
426 CALL runtime·mstart0(SB)
427 RET // not reached
428
429 /*
430 * go-routine
431 */
432
433 // func gogo(buf *gobuf)
434 // restore state from Gobuf; longjmp
435 TEXT runtime·gogo(SB), NOSPLIT, $0-8
436 MOVQ buf+0(FP), BX // gobuf
437 MOVQ gobuf_g(BX), DX
438 MOVQ 0(DX), CX // make sure g != nil
439 JMP gogo<>(SB)
440
441 TEXT gogo<>(SB), NOSPLIT, $0
442 get_tls(CX)
443 MOVQ DX, g(CX)
444 MOVQ DX, R14 // set the g register
445 MOVQ gobuf_sp(BX), SP // restore SP
446 MOVQ gobuf_ctxt(BX), DX
447 MOVQ gobuf_bp(BX), BP
448 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
449 MOVQ $0, gobuf_ctxt(BX)
450 MOVQ $0, gobuf_bp(BX)
451 MOVQ gobuf_pc(BX), BX
452 JMP BX
453
454 // func mcall(fn func(*g))
455 // Switch to m->g0's stack, call fn(g).
456 // Fn must never return. It should gogo(&g->sched)
457 // to keep running g.
458 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
459 MOVQ AX, DX // DX = fn
460
461 // Save state in g->sched. The caller's SP and PC are restored by gogo to
462 // resume execution in the caller's frame (implicit return). The caller's BP
463 // is also restored to support frame pointer unwinding.
464 MOVQ SP, BX // hide (SP) reads from vet
465 MOVQ 8(BX), BX // caller's PC
466 MOVQ BX, (g_sched+gobuf_pc)(R14)
467 LEAQ fn+0(FP), BX // caller's SP
468 MOVQ BX, (g_sched+gobuf_sp)(R14)
469 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
470 // can cause a frame pointer cycle, see CL 476235.
471 MOVQ (BP), BX // caller's BP
472 MOVQ BX, (g_sched+gobuf_bp)(R14)
473
474 // switch to m->g0 & its stack, call fn
475 MOVQ g_m(R14), BX
476 MOVQ m_g0(BX), SI // SI = g.m.g0
477 CMPQ SI, R14 // if g == m->g0 call badmcall
478 JNE goodm
479 JMP runtime·badmcall(SB)
480 goodm:
481 MOVQ R14, AX // AX (and arg 0) = g
482 MOVQ SI, R14 // g = g.m.g0
483 get_tls(CX) // Set G in TLS
484 MOVQ R14, g(CX)
485 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
486 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
487 PUSHQ AX // open up space for fn's arg spill slot
488 MOVQ 0(DX), R12
489 CALL R12 // fn(g)
490 // The Windows native stack unwinder incorrectly classifies the next instruction
491 // as part of the function epilogue, producing a wrong call stack.
492 // Add a NOP to work around this issue. See go.dev/issue/67007.
493 BYTE $0x90
494 POPQ AX
495 JMP runtime·badmcall2(SB)
496 RET
497
498 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
499 // of the G stack. We need to distinguish the routine that
500 // lives at the bottom of the G stack from the one that lives
501 // at the top of the system stack because the one at the top of
502 // the system stack terminates the stack walk (see topofstack()).
503 // The frame layout needs to match systemstack
504 // so that it can pretend to be systemstack_switch.
505 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
506 UNDEF
507 // Make sure this function is not leaf,
508 // so the frame is saved.
509 CALL runtime·abort(SB)
510 RET
511
512 // func systemstack(fn func())
513 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
514 MOVQ fn+0(FP), DI // DI = fn
515 get_tls(CX)
516 MOVQ g(CX), AX // AX = g
517 MOVQ g_m(AX), BX // BX = m
518
519 CMPQ AX, m_gsignal(BX)
520 JEQ noswitch
521
522 MOVQ m_g0(BX), DX // DX = g0
523 CMPQ AX, DX
524 JEQ noswitch
525
526 CMPQ AX, m_curg(BX)
527 JNE bad
528
529 // Switch stacks.
530 // The original frame pointer is stored in BP,
531 // which is useful for stack unwinding.
532 // Save our state in g->sched. Pretend to
533 // be systemstack_switch if the G stack is scanned.
534 CALL gosave_systemstack_switch<>(SB)
535
536 // switch to g0
537 MOVQ DX, g(CX)
538 MOVQ DX, R14 // set the g register
539 MOVQ (g_sched+gobuf_sp)(DX), SP
540
541 // call target function
542 MOVQ DI, DX
543 MOVQ 0(DI), DI
544 CALL DI
545
546 // switch back to g
547 get_tls(CX)
548 MOVQ g(CX), AX
549 MOVQ g_m(AX), BX
550 MOVQ m_curg(BX), AX
551 MOVQ AX, g(CX)
552 MOVQ (g_sched+gobuf_sp)(AX), SP
553 MOVQ (g_sched+gobuf_bp)(AX), BP
554 MOVQ $0, (g_sched+gobuf_sp)(AX)
555 MOVQ $0, (g_sched+gobuf_bp)(AX)
556 RET
557
558 noswitch:
559 // already on m stack; tail call the function
560 // Using a tail call here cleans up tracebacks since we won't stop
561 // at an intermediate systemstack.
562 MOVQ DI, DX
563 MOVQ 0(DI), DI
564 // The function epilogue is not called on a tail call.
565 // Pop BP from the stack to simulate it.
566 POPQ BP
567 JMP DI
568
569 bad:
570 // Bad: g is not gsignal, not g0, not curg. What is it?
571 MOVQ $runtime·badsystemstack(SB), AX
572 CALL AX
573 INT $3
574
575 // func switchToCrashStack0(fn func())
576 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
577 MOVQ g_m(R14), BX // curm
578
579 // set g to gcrash
580 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
581 MOVQ BX, g_m(R14) // g.m = curm
582 MOVQ R14, m_g0(BX) // curm.g0 = g
583 get_tls(CX)
584 MOVQ R14, g(CX)
585
586 // switch to crashstack
587 MOVQ (g_stack+stack_hi)(R14), BX
588 SUBQ $(4*8), BX
589 MOVQ BX, SP
590
591 // call target function
592 MOVQ AX, DX
593 MOVQ 0(AX), AX
594 CALL AX
595
596 // should never return
597 CALL runtime·abort(SB)
598 UNDEF
599
600 /*
601 * support for morestack
602 */
603
604 // Called during function prolog when more stack is needed.
605 //
606 // The traceback routines see morestack on a g0 as being
607 // the top of a stack (for example, morestack calling newstack
608 // calling the scheduler calling newm calling gc), so we must
609 // record an argument size. For that purpose, it has no arguments.
610 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
611 // Cannot grow scheduler stack (m->g0).
612 get_tls(CX)
613 MOVQ g(CX), DI // DI = g
614 MOVQ g_m(DI), BX // BX = m
615
616 // Set g->sched to context in f.
617 MOVQ 0(SP), AX // f's PC
618 MOVQ AX, (g_sched+gobuf_pc)(DI)
619 LEAQ 8(SP), AX // f's SP
620 MOVQ AX, (g_sched+gobuf_sp)(DI)
621 MOVQ BP, (g_sched+gobuf_bp)(DI)
622 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
623
624 MOVQ m_g0(BX), SI // SI = m.g0
625 CMPQ DI, SI
626 JNE 3(PC)
627 CALL runtime·badmorestackg0(SB)
628 CALL runtime·abort(SB)
629
630 // Cannot grow signal stack (m->gsignal).
631 MOVQ m_gsignal(BX), SI
632 CMPQ DI, SI
633 JNE 3(PC)
634 CALL runtime·badmorestackgsignal(SB)
635 CALL runtime·abort(SB)
636
637 // Called from f.
638 // Set m->morebuf to f's caller.
639 NOP SP // tell vet SP changed - stop checking offsets
640 MOVQ 8(SP), AX // f's caller's PC
641 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
642 LEAQ 16(SP), AX // f's caller's SP
643 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
644 MOVQ DI, (m_morebuf+gobuf_g)(BX)
645
646 // Call newstack on m->g0's stack.
647 MOVQ m_g0(BX), BX
648 MOVQ BX, g(CX)
649 MOVQ (g_sched+gobuf_sp)(BX), SP
650 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
651 CALL runtime·newstack(SB)
652 CALL runtime·abort(SB) // crash if newstack returns
653 RET
654
655 // morestack but not preserving ctxt.
656 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
657 MOVL $0, DX
658 JMP runtime·morestack(SB)
659
660 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
661 TEXT ·spillArgs(SB),NOSPLIT,$0-0
662 MOVQ AX, 0(R12)
663 MOVQ BX, 8(R12)
664 MOVQ CX, 16(R12)
665 MOVQ DI, 24(R12)
666 MOVQ SI, 32(R12)
667 MOVQ R8, 40(R12)
668 MOVQ R9, 48(R12)
669 MOVQ R10, 56(R12)
670 MOVQ R11, 64(R12)
671 MOVQ X0, 72(R12)
672 MOVQ X1, 80(R12)
673 MOVQ X2, 88(R12)
674 MOVQ X3, 96(R12)
675 MOVQ X4, 104(R12)
676 MOVQ X5, 112(R12)
677 MOVQ X6, 120(R12)
678 MOVQ X7, 128(R12)
679 MOVQ X8, 136(R12)
680 MOVQ X9, 144(R12)
681 MOVQ X10, 152(R12)
682 MOVQ X11, 160(R12)
683 MOVQ X12, 168(R12)
684 MOVQ X13, 176(R12)
685 MOVQ X14, 184(R12)
686 RET
687
688 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
689 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
690 MOVQ 0(R12), AX
691 MOVQ 8(R12), BX
692 MOVQ 16(R12), CX
693 MOVQ 24(R12), DI
694 MOVQ 32(R12), SI
695 MOVQ 40(R12), R8
696 MOVQ 48(R12), R9
697 MOVQ 56(R12), R10
698 MOVQ 64(R12), R11
699 MOVQ 72(R12), X0
700 MOVQ 80(R12), X1
701 MOVQ 88(R12), X2
702 MOVQ 96(R12), X3
703 MOVQ 104(R12), X4
704 MOVQ 112(R12), X5
705 MOVQ 120(R12), X6
706 MOVQ 128(R12), X7
707 MOVQ 136(R12), X8
708 MOVQ 144(R12), X9
709 MOVQ 152(R12), X10
710 MOVQ 160(R12), X11
711 MOVQ 168(R12), X12
712 MOVQ 176(R12), X13
713 MOVQ 184(R12), X14
714 RET
715
716 // reflectcall: call a function with the given argument list
717 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
718 // we don't have variable-sized frames, so we use a small number
719 // of constant-sized-frame functions to encode a few bits of size in the pc.
720 // Caution: ugly multiline assembly macros in your future!
721
722 #define DISPATCH(NAME,MAXSIZE) \
723 CMPQ CX, $MAXSIZE; \
724 JA 3(PC); \
725 MOVQ $NAME(SB), AX; \
726 JMP AX
727 // Note: can't just "JMP NAME(SB)" - bad inlining results.
728
729 TEXT ·reflectcall(SB), NOSPLIT, $0-48
730 MOVLQZX frameSize+32(FP), CX
731 DISPATCH(runtime·call16, 16)
732 DISPATCH(runtime·call32, 32)
733 DISPATCH(runtime·call64, 64)
734 DISPATCH(runtime·call128, 128)
735 DISPATCH(runtime·call256, 256)
736 DISPATCH(runtime·call512, 512)
737 DISPATCH(runtime·call1024, 1024)
738 DISPATCH(runtime·call2048, 2048)
739 DISPATCH(runtime·call4096, 4096)
740 DISPATCH(runtime·call8192, 8192)
741 DISPATCH(runtime·call16384, 16384)
742 DISPATCH(runtime·call32768, 32768)
743 DISPATCH(runtime·call65536, 65536)
744 DISPATCH(runtime·call131072, 131072)
745 DISPATCH(runtime·call262144, 262144)
746 DISPATCH(runtime·call524288, 524288)
747 DISPATCH(runtime·call1048576, 1048576)
748 DISPATCH(runtime·call2097152, 2097152)
749 DISPATCH(runtime·call4194304, 4194304)
750 DISPATCH(runtime·call8388608, 8388608)
751 DISPATCH(runtime·call16777216, 16777216)
752 DISPATCH(runtime·call33554432, 33554432)
753 DISPATCH(runtime·call67108864, 67108864)
754 DISPATCH(runtime·call134217728, 134217728)
755 DISPATCH(runtime·call268435456, 268435456)
756 DISPATCH(runtime·call536870912, 536870912)
757 DISPATCH(runtime·call1073741824, 1073741824)
758 MOVQ $runtime·badreflectcall(SB), AX
759 JMP AX
760
761 #define CALLFN(NAME,MAXSIZE) \
762 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
763 NO_LOCAL_POINTERS; \
764 /* copy arguments to stack */ \
765 MOVQ stackArgs+16(FP), SI; \
766 MOVLQZX stackArgsSize+24(FP), CX; \
767 MOVQ SP, DI; \
768 REP;MOVSB; \
769 /* set up argument registers */ \
770 MOVQ regArgs+40(FP), R12; \
771 CALL ·unspillArgs(SB); \
772 /* call function */ \
773 MOVQ f+8(FP), DX; \
774 PCDATA $PCDATA_StackMapIndex, $0; \
775 MOVQ (DX), R12; \
776 CALL R12; \
777 /* copy register return values back */ \
778 MOVQ regArgs+40(FP), R12; \
779 CALL ·spillArgs(SB); \
780 MOVLQZX stackArgsSize+24(FP), CX; \
781 MOVLQZX stackRetOffset+28(FP), BX; \
782 MOVQ stackArgs+16(FP), DI; \
783 MOVQ stackArgsType+0(FP), DX; \
784 MOVQ SP, SI; \
785 ADDQ BX, DI; \
786 ADDQ BX, SI; \
787 SUBQ BX, CX; \
788 CALL callRet<>(SB); \
789 RET
790
791 // callRet copies return values back at the end of call*. This is a
792 // separate function so it can allocate stack space for the arguments
793 // to reflectcallmove. It does not follow the Go ABI; it expects its
794 // arguments in registers.
795 TEXT callRet<>(SB), NOSPLIT, $40-0
796 NO_LOCAL_POINTERS
797 MOVQ DX, 0(SP)
798 MOVQ DI, 8(SP)
799 MOVQ SI, 16(SP)
800 MOVQ CX, 24(SP)
801 MOVQ R12, 32(SP)
802 CALL runtime·reflectcallmove(SB)
803 RET
804
805 CALLFN(·call16, 16)
806 CALLFN(·call32, 32)
807 CALLFN(·call64, 64)
808 CALLFN(·call128, 128)
809 CALLFN(·call256, 256)
810 CALLFN(·call512, 512)
811 CALLFN(·call1024, 1024)
812 CALLFN(·call2048, 2048)
813 CALLFN(·call4096, 4096)
814 CALLFN(·call8192, 8192)
815 CALLFN(·call16384, 16384)
816 CALLFN(·call32768, 32768)
817 CALLFN(·call65536, 65536)
818 CALLFN(·call131072, 131072)
819 CALLFN(·call262144, 262144)
820 CALLFN(·call524288, 524288)
821 CALLFN(·call1048576, 1048576)
822 CALLFN(·call2097152, 2097152)
823 CALLFN(·call4194304, 4194304)
824 CALLFN(·call8388608, 8388608)
825 CALLFN(·call16777216, 16777216)
826 CALLFN(·call33554432, 33554432)
827 CALLFN(·call67108864, 67108864)
828 CALLFN(·call134217728, 134217728)
829 CALLFN(·call268435456, 268435456)
830 CALLFN(·call536870912, 536870912)
831 CALLFN(·call1073741824, 1073741824)
832
833 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
834 MOVL cycles+0(FP), AX
835 TESTL AX, AX
836 JZ done
837 again:
838 PAUSE
839 SUBL $1, AX
840 JNZ again
841 done:
842 RET
843
844
845 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
846 // Stores are already ordered on x86, so this is just a
847 // compile barrier.
848 RET
849
850 // Save state of caller into g->sched,
851 // but using fake PC from systemstack_switch.
852 // Must only be called from functions with frame pointer
853 // and without locals ($0) or else unwinding from
854 // systemstack_switch is incorrect.
855 // Smashes R9.
856 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
857 // Take systemstack_switch PC and add 8 bytes to skip
858 // the prologue. The final location does not matter
859 // as long as we are between the prologue and the epilogue.
860 MOVQ $runtime·systemstack_switch+8(SB), R9
861 MOVQ R9, (g_sched+gobuf_pc)(R14)
862 LEAQ 8(SP), R9
863 MOVQ R9, (g_sched+gobuf_sp)(R14)
864 MOVQ BP, (g_sched+gobuf_bp)(R14)
865 // Assert ctxt is zero. See func save.
866 MOVQ (g_sched+gobuf_ctxt)(R14), R9
867 TESTQ R9, R9
868 JZ 2(PC)
869 CALL runtime·abort(SB)
870 RET
871
872 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
873 // Call fn(arg) aligned appropriately for the gcc ABI.
874 // Called on a system stack, and there may be no g yet (during needm).
875 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
876 MOVQ fn+0(FP), AX
877 MOVQ arg+8(FP), BX
878 MOVQ SP, DX
879 ANDQ $~15, SP // alignment
880 MOVQ DX, 8(SP)
881 MOVQ BX, DI // DI = first argument in AMD64 ABI
882 MOVQ BX, CX // CX = first argument in Win64
883 CALL AX
884 MOVQ 8(SP), DX
885 MOVQ DX, SP
886 RET
887
888 // asmcgocall_landingpad calls AX with BX as argument.
889 // Must be called on the system stack.
890 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
891 #ifdef GOOS_windows
892 // Make sure we have enough room for 4 stack-backed fast-call
893 // registers as per Windows amd64 calling convention.
894 ADJSP $32
895 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
896 // thrown in the cgo call. Exceptions that reach this function will be
897 // handled by runtime.sehtramp thanks to the SEH metadata added
898 // by the compiler.
899 // Note that runtime.sehtramp can't be attached directly to asmcgocall
900 // because its initial stack pointer can be outside the system stack bounds,
901 // and Windows stops the stack unwinding without calling the exception handler
902 // when it reaches that point.
903 MOVQ BX, CX // CX = first argument in Win64
904 CALL AX
905 // The exception handler is not called if the next instruction is part of
906 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
907 BYTE $0x90
908 ADJSP $-32
909 RET
910 #endif
911 // Tail call AX on non-Windows, as the extra stack frame is not needed.
912 MOVQ BX, DI // DI = first argument in AMD64 ABI
913 JMP AX
914
915 // func asmcgocall(fn, arg unsafe.Pointer) int32
916 // Call fn(arg) on the scheduler stack,
917 // aligned appropriately for the gcc ABI.
918 // See cgocall.go for more details.
919 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
920 MOVQ fn+0(FP), AX
921 MOVQ arg+8(FP), BX
922
923 MOVQ SP, DX
924
925 // Figure out if we need to switch to m->g0 stack.
926 // We get called to create new OS threads too, and those
927 // come in on the m->g0 stack already. Or we might already
928 // be on the m->gsignal stack.
929 get_tls(CX)
930 MOVQ g(CX), DI
931 CMPQ DI, $0
932 JEQ nosave
933 MOVQ g_m(DI), R8
934 MOVQ m_gsignal(R8), SI
935 CMPQ DI, SI
936 JEQ nosave
937 MOVQ m_g0(R8), SI
938 CMPQ DI, SI
939 JEQ nosave
940
941 // Switch to system stack.
942 // The original frame pointer is stored in BP,
943 // which is useful for stack unwinding.
944 CALL gosave_systemstack_switch<>(SB)
945 MOVQ SI, g(CX)
946 MOVQ (g_sched+gobuf_sp)(SI), SP
947
948 // Now on a scheduling stack (a pthread-created stack).
949 SUBQ $16, SP
950 ANDQ $~15, SP // alignment for gcc ABI
951 MOVQ DI, 8(SP) // save g
952 MOVQ (g_stack+stack_hi)(DI), DI
953 SUBQ DX, DI
954 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
955 CALL runtime·asmcgocall_landingpad(SB)
956
957 // Restore registers, g, stack pointer.
958 get_tls(CX)
959 MOVQ 8(SP), DI
960 MOVQ (g_stack+stack_hi)(DI), SI
961 SUBQ 0(SP), SI
962 MOVQ DI, g(CX)
963 MOVQ SI, SP
964
965 MOVL AX, ret+16(FP)
966 RET
967
968 nosave:
969 // Running on a system stack, perhaps even without a g.
970 // Having no g can happen during thread creation or thread teardown
971 // (see needm/dropm on Solaris, for example).
972 // This code is like the above sequence but without saving/restoring g
973 // and without worrying about the stack moving out from under us
974 // (because we're on a system stack, not a goroutine stack).
975 // The above code could be used directly if already on a system stack,
976 // but then the only path through this code would be a rare case on Solaris.
977 // Using this code for all "already on system stack" calls exercises it more,
978 // which should help keep it correct.
979 SUBQ $16, SP
980 ANDQ $~15, SP
981 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
982 MOVQ DX, 0(SP) // save original stack pointer
983 CALL runtime·asmcgocall_landingpad(SB)
984 MOVQ 0(SP), SI // restore original stack pointer
985 MOVQ SI, SP
986 MOVL AX, ret+16(FP)
987 RET
988
989 #ifdef GOOS_windows
990 // Dummy TLS that's used on Windows so that we don't crash trying
991 // to restore the G register in needm. needm and its callees are
992 // very careful never to actually use the G, the TLS just can't be
993 // unset since we're in Go code.
994 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
995 #endif
996
997 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
998 // See cgocall.go for more details.
999 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1000 NO_LOCAL_POINTERS
1001
1002 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1003 // It is used to dropm while thread is exiting.
1004 MOVQ fn+0(FP), AX
1005 CMPQ AX, $0
1006 JNE loadg
1007 // Restore the g from frame.
1008 get_tls(CX)
1009 MOVQ frame+8(FP), BX
1010 MOVQ BX, g(CX)
1011 JMP dropm
1012
1013 loadg:
1014 // If g is nil, Go did not create the current thread,
1015 // or if this thread never called into Go on pthread platforms.
1016 // Call needm to obtain one m for temporary use.
1017 // In this case, we're running on the thread stack, so there's
1018 // lots of space, but the linker doesn't know. Hide the call from
1019 // the linker analysis by using an indirect call through AX.
1020 get_tls(CX)
1021 #ifdef GOOS_windows
1022 MOVL $0, BX
1023 CMPQ CX, $0
1024 JEQ 2(PC)
1025 #endif
1026 MOVQ g(CX), BX
1027 CMPQ BX, $0
1028 JEQ needm
1029 MOVQ g_m(BX), BX
1030 MOVQ BX, savedm-8(SP) // saved copy of oldm
1031 JMP havem
1032 needm:
1033 #ifdef GOOS_windows
1034 // Set up a dummy TLS value. needm is careful not to use it,
1035 // but it needs to be there to prevent autogenerated code from
1036 // crashing when it loads from it.
1037 // We don't need to clear it or anything later because needm
1038 // will set up TLS properly.
1039 MOVQ $zeroTLS<>(SB), DI
1040 CALL runtime·settls(SB)
1041 #endif
1042 // On some platforms (Windows) we cannot call needm through
1043 // an ABI wrapper because there's no TLS set up, and the ABI
1044 // wrapper will try to restore the G register (R14) from TLS.
1045 // Clear X15 because Go expects it and we're not calling
1046 // through a wrapper, but otherwise avoid setting the G
1047 // register in the wrapper and call needm directly. It
1048 // takes no arguments and doesn't return any values so
1049 // there's no need to handle that. Clear R14 so that there's
1050 // a bad value in there, in case needm tries to use it.
1051 XORPS X15, X15
1052 CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
1053 JNE 2(PC)
1054 VXORPS X15, X15, X15
1055 XORQ R14, R14
1056 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1057 CALL AX
1058 MOVQ $0, savedm-8(SP)
1059 get_tls(CX)
1060 MOVQ g(CX), BX
1061 MOVQ g_m(BX), BX
1062
1063 // Set m->sched.sp = SP, so that if a panic happens
1064 // during the function we are about to execute, it will
1065 // have a valid SP to run on the g0 stack.
1066 // The next few lines (after the havem label)
1067 // will save this SP onto the stack and then write
1068 // the same SP back to m->sched.sp. That seems redundant,
1069 // but if an unrecovered panic happens, unwindm will
1070 // restore the g->sched.sp from the stack location
1071 // and then systemstack will try to use it. If we don't set it here,
1072 // that restored SP will be uninitialized (typically 0) and
1073 // will not be usable.
1074 MOVQ m_g0(BX), SI
1075 MOVQ SP, (g_sched+gobuf_sp)(SI)
1076
1077 havem:
1078 // Now there's a valid m, and we're running on its m->g0.
1079 // Save current m->g0->sched.sp on stack and then set it to SP.
1080 // Save current sp in m->g0->sched.sp in preparation for
1081 // switch back to m->curg stack.
1082 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1083 MOVQ m_g0(BX), SI
1084 MOVQ (g_sched+gobuf_sp)(SI), AX
1085 MOVQ AX, 0(SP)
1086 MOVQ SP, (g_sched+gobuf_sp)(SI)
1087
1088 // Switch to m->curg stack and call runtime.cgocallbackg.
1089 // Because we are taking over the execution of m->curg
1090 // but *not* resuming what had been running, we need to
1091 // save that information (m->curg->sched) so we can restore it.
1092 // We can restore m->curg->sched.sp easily, because calling
1093 // runtime.cgocallbackg leaves SP unchanged upon return.
1094 // To save m->curg->sched.pc, we push it onto the curg stack and
1095 // open a frame the same size as cgocallback's g0 frame.
1096 // Once we switch to the curg stack, the pushed PC will appear
1097 // to be the return PC of cgocallback, so that the traceback
1098 // will seamlessly trace back into the earlier calls.
1099 MOVQ m_curg(BX), SI
1100 MOVQ SI, g(CX)
1101 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1102 MOVQ (g_sched+gobuf_pc)(SI), BX
1103 MOVQ BX, -8(DI) // "push" return PC on the g stack
1104 // Gather our arguments into registers.
1105 MOVQ fn+0(FP), BX
1106 MOVQ frame+8(FP), CX
1107 MOVQ ctxt+16(FP), DX
1108 // Compute the size of the frame, including return PC and, if
1109 // GOEXPERIMENT=framepointer, the saved base pointer
1110 LEAQ fn+0(FP), AX
1111 SUBQ SP, AX // AX is our actual frame size
1112 SUBQ AX, DI // Allocate the same frame size on the g stack
1113 MOVQ DI, SP
1114
1115 MOVQ BX, 0(SP)
1116 MOVQ CX, 8(SP)
1117 MOVQ DX, 16(SP)
1118 MOVQ $runtime·cgocallbackg(SB), AX
1119 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1120
1121 // Compute the size of the frame again. FP and SP have
1122 // completely different values here than they did above,
1123 // but only their difference matters.
1124 LEAQ fn+0(FP), AX
1125 SUBQ SP, AX
1126
1127 // Restore g->sched (== m->curg->sched) from saved values.
1128 get_tls(CX)
1129 MOVQ g(CX), SI
1130 MOVQ SP, DI
1131 ADDQ AX, DI
1132 MOVQ -8(DI), BX
1133 MOVQ BX, (g_sched+gobuf_pc)(SI)
1134 MOVQ DI, (g_sched+gobuf_sp)(SI)
1135
1136 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1137 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1138 // so we do not have to restore it.)
1139 MOVQ g(CX), BX
1140 MOVQ g_m(BX), BX
1141 MOVQ m_g0(BX), SI
1142 MOVQ SI, g(CX)
1143 MOVQ (g_sched+gobuf_sp)(SI), SP
1144 MOVQ 0(SP), AX
1145 MOVQ AX, (g_sched+gobuf_sp)(SI)
1146
1147 // If the m on entry was nil, we called needm above to borrow an m,
1148 // 1. for the duration of the call on non-pthread platforms,
1149 // 2. or the duration of the C thread alive on pthread platforms.
1150 // If the m on entry wasn't nil,
1151 // 1. the thread might be a Go thread,
1152 // 2. or it wasn't the first call from a C thread on pthread platforms,
1153 // since then we skip dropm to reuse the m in the first call.
1154 MOVQ savedm-8(SP), BX
1155 CMPQ BX, $0
1156 JNE done
1157
1158 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1159 MOVQ _cgo_pthread_key_created(SB), AX
1160 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1161 CMPQ AX, $0
1162 JEQ dropm
1163 CMPQ (AX), $0
1164 JNE done
1165
1166 dropm:
1167 MOVQ $runtime·dropm(SB), AX
1168 CALL AX
1169 #ifdef GOOS_windows
1170 // We need to clear the TLS pointer in case the next
1171 // thread that comes into Go tries to reuse that space
1172 // but uses the same M.
1173 XORQ DI, DI
1174 CALL runtime·settls(SB)
1175 #endif
1176 done:
1177
1178 // Done!
1179 RET
1180
1181 // func setg(gg *g)
1182 // set g. for use by needm.
1183 TEXT runtime·setg(SB), NOSPLIT, $0-8
1184 MOVQ gg+0(FP), BX
1185 get_tls(CX)
1186 MOVQ BX, g(CX)
1187 RET
1188
1189 // void setg_gcc(G*); set g called from gcc.
1190 TEXT setg_gcc<>(SB),NOSPLIT,$0
1191 get_tls(AX)
1192 MOVQ DI, g(AX)
1193 MOVQ DI, R14 // set the g register
1194 RET
1195
1196 TEXT runtime·abort(SB),NOSPLIT,$0-0
1197 INT $3
1198 loop:
1199 JMP loop
1200
1201 // check that SP is in range [g->stack.lo, g->stack.hi)
1202 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1203 get_tls(CX)
1204 MOVQ g(CX), AX
1205 CMPQ (g_stack+stack_hi)(AX), SP
1206 JHI 2(PC)
1207 CALL runtime·abort(SB)
1208 CMPQ SP, (g_stack+stack_lo)(AX)
1209 JHI 2(PC)
1210 CALL runtime·abort(SB)
1211 RET
1212
1213 // func cputicks() int64
1214 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1215 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1216 JNE fences
1217 // Instruction stream serializing RDTSCP is supported.
1218 // RDTSCP is supported by Intel Nehalem (2008) and
1219 // AMD K8 Rev. F (2006) and newer.
1220 RDTSCP
1221 done:
1222 SHLQ $32, DX
1223 ADDQ DX, AX
1224 MOVQ AX, ret+0(FP)
1225 RET
1226 fences:
1227 // MFENCE is instruction stream serializing and flushes the
1228 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1229 // are dependent on MSR C001_1029 and CPU generation.
1230 // LFENCE on Intel does wait for all previous instructions to have executed.
1231 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1232 // previous instructions executed and all previous loads and stores to globally visible.
1233 // Using MFENCE;LFENCE here aligns the serializing properties without
1234 // runtime detection of CPU manufacturer.
1235 MFENCE
1236 LFENCE
1237 RDTSC
1238 JMP done
1239
1240 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1241 // hash function using AES hardware instructions
1242 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1243 // AX = ptr to data
1244 // BX = seed
1245 // CX = size
1246 CMPB runtime·useAeshash(SB), $0
1247 JEQ noaes
1248 JMP aeshashbody<>(SB)
1249 noaes:
1250 JMP runtime·memhashFallback<ABIInternal>(SB)
1251
1252 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1253 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1254 // AX = ptr to string struct
1255 // BX = seed
1256 CMPB runtime·useAeshash(SB), $0
1257 JEQ noaes
1258 MOVQ 8(AX), CX // length of string
1259 MOVQ (AX), AX // string data
1260 JMP aeshashbody<>(SB)
1261 noaes:
1262 JMP runtime·strhashFallback<ABIInternal>(SB)
1263
1264 // AX: data
1265 // BX: hash seed
1266 // CX: length
1267 // At return: AX = return value
1268 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1269 // Fill an SSE register with our seeds.
1270 MOVQ BX, X0 // 64 bits of per-table hash seed
1271 PINSRW $4, CX, X0 // 16 bits of length
1272 PSHUFHW $0, X0, X0 // repeat length 4 times total
1273 MOVO X0, X1 // save unscrambled seed
1274 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1275 AESENC X0, X0 // scramble seed
1276
1277 CMPQ CX, $16
1278 JB aes0to15
1279 JE aes16
1280 CMPQ CX, $32
1281 JBE aes17to32
1282 CMPQ CX, $64
1283 JBE aes33to64
1284 CMPQ CX, $128
1285 JBE aes65to128
1286 JMP aes129plus
1287
1288 aes0to15:
1289 TESTQ CX, CX
1290 JE aes0
1291
1292 ADDQ $16, AX
1293 TESTW $0xff0, AX
1294 JE endofpage
1295
1296 // 16 bytes loaded at this address won't cross
1297 // a page boundary, so we can load it directly.
1298 MOVOU -16(AX), X1
1299 ADDQ CX, CX
1300 MOVQ $masks<>(SB), AX
1301 PAND (AX)(CX*8), X1
1302 final1:
1303 PXOR X0, X1 // xor data with seed
1304 AESENC X1, X1 // scramble combo 3 times
1305 AESENC X1, X1
1306 AESENC X1, X1
1307 MOVQ X1, AX // return X1
1308 RET
1309
1310 endofpage:
1311 // address ends in 1111xxxx. Might be up against
1312 // a page boundary, so load ending at last byte.
1313 // Then shift bytes down using pshufb.
1314 MOVOU -32(AX)(CX*1), X1
1315 ADDQ CX, CX
1316 MOVQ $shifts<>(SB), AX
1317 PSHUFB (AX)(CX*8), X1
1318 JMP final1
1319
1320 aes0:
1321 // Return scrambled input seed
1322 AESENC X0, X0
1323 MOVQ X0, AX // return X0
1324 RET
1325
1326 aes16:
1327 MOVOU (AX), X1
1328 JMP final1
1329
1330 aes17to32:
1331 // make second starting seed
1332 PXOR runtime·aeskeysched+16(SB), X1
1333 AESENC X1, X1
1334
1335 // load data to be hashed
1336 MOVOU (AX), X2
1337 MOVOU -16(AX)(CX*1), X3
1338
1339 // xor with seed
1340 PXOR X0, X2
1341 PXOR X1, X3
1342
1343 // scramble 3 times
1344 AESENC X2, X2
1345 AESENC X3, X3
1346 AESENC X2, X2
1347 AESENC X3, X3
1348 AESENC X2, X2
1349 AESENC X3, X3
1350
1351 // combine results
1352 PXOR X3, X2
1353 MOVQ X2, AX // return X2
1354 RET
1355
1356 aes33to64:
1357 // make 3 more starting seeds
1358 MOVO X1, X2
1359 MOVO X1, X3
1360 PXOR runtime·aeskeysched+16(SB), X1
1361 PXOR runtime·aeskeysched+32(SB), X2
1362 PXOR runtime·aeskeysched+48(SB), X3
1363 AESENC X1, X1
1364 AESENC X2, X2
1365 AESENC X3, X3
1366
1367 MOVOU (AX), X4
1368 MOVOU 16(AX), X5
1369 MOVOU -32(AX)(CX*1), X6
1370 MOVOU -16(AX)(CX*1), X7
1371
1372 PXOR X0, X4
1373 PXOR X1, X5
1374 PXOR X2, X6
1375 PXOR X3, X7
1376
1377 AESENC X4, X4
1378 AESENC X5, X5
1379 AESENC X6, X6
1380 AESENC X7, X7
1381
1382 AESENC X4, X4
1383 AESENC X5, X5
1384 AESENC X6, X6
1385 AESENC X7, X7
1386
1387 AESENC X4, X4
1388 AESENC X5, X5
1389 AESENC X6, X6
1390 AESENC X7, X7
1391
1392 PXOR X6, X4
1393 PXOR X7, X5
1394 PXOR X5, X4
1395 MOVQ X4, AX // return X4
1396 RET
1397
1398 aes65to128:
1399 // make 7 more starting seeds
1400 MOVO X1, X2
1401 MOVO X1, X3
1402 MOVO X1, X4
1403 MOVO X1, X5
1404 MOVO X1, X6
1405 MOVO X1, X7
1406 PXOR runtime·aeskeysched+16(SB), X1
1407 PXOR runtime·aeskeysched+32(SB), X2
1408 PXOR runtime·aeskeysched+48(SB), X3
1409 PXOR runtime·aeskeysched+64(SB), X4
1410 PXOR runtime·aeskeysched+80(SB), X5
1411 PXOR runtime·aeskeysched+96(SB), X6
1412 PXOR runtime·aeskeysched+112(SB), X7
1413 AESENC X1, X1
1414 AESENC X2, X2
1415 AESENC X3, X3
1416 AESENC X4, X4
1417 AESENC X5, X5
1418 AESENC X6, X6
1419 AESENC X7, X7
1420
1421 // load data
1422 MOVOU (AX), X8
1423 MOVOU 16(AX), X9
1424 MOVOU 32(AX), X10
1425 MOVOU 48(AX), X11
1426 MOVOU -64(AX)(CX*1), X12
1427 MOVOU -48(AX)(CX*1), X13
1428 MOVOU -32(AX)(CX*1), X14
1429 MOVOU -16(AX)(CX*1), X15
1430
1431 // xor with seed
1432 PXOR X0, X8
1433 PXOR X1, X9
1434 PXOR X2, X10
1435 PXOR X3, X11
1436 PXOR X4, X12
1437 PXOR X5, X13
1438 PXOR X6, X14
1439 PXOR X7, X15
1440
1441 // scramble 3 times
1442 AESENC X8, X8
1443 AESENC X9, X9
1444 AESENC X10, X10
1445 AESENC X11, X11
1446 AESENC X12, X12
1447 AESENC X13, X13
1448 AESENC X14, X14
1449 AESENC X15, X15
1450
1451 AESENC X8, X8
1452 AESENC X9, X9
1453 AESENC X10, X10
1454 AESENC X11, X11
1455 AESENC X12, X12
1456 AESENC X13, X13
1457 AESENC X14, X14
1458 AESENC X15, X15
1459
1460 AESENC X8, X8
1461 AESENC X9, X9
1462 AESENC X10, X10
1463 AESENC X11, X11
1464 AESENC X12, X12
1465 AESENC X13, X13
1466 AESENC X14, X14
1467 AESENC X15, X15
1468
1469 // combine results
1470 PXOR X12, X8
1471 PXOR X13, X9
1472 PXOR X14, X10
1473 PXOR X15, X11
1474 PXOR X10, X8
1475 PXOR X11, X9
1476 PXOR X9, X8
1477 // X15 must be zero on return
1478 PXOR X15, X15
1479 MOVQ X8, AX // return X8
1480 RET
1481
1482 aes129plus:
1483 // make 7 more starting seeds
1484 MOVO X1, X2
1485 MOVO X1, X3
1486 MOVO X1, X4
1487 MOVO X1, X5
1488 MOVO X1, X6
1489 MOVO X1, X7
1490 PXOR runtime·aeskeysched+16(SB), X1
1491 PXOR runtime·aeskeysched+32(SB), X2
1492 PXOR runtime·aeskeysched+48(SB), X3
1493 PXOR runtime·aeskeysched+64(SB), X4
1494 PXOR runtime·aeskeysched+80(SB), X5
1495 PXOR runtime·aeskeysched+96(SB), X6
1496 PXOR runtime·aeskeysched+112(SB), X7
1497 AESENC X1, X1
1498 AESENC X2, X2
1499 AESENC X3, X3
1500 AESENC X4, X4
1501 AESENC X5, X5
1502 AESENC X6, X6
1503 AESENC X7, X7
1504
1505 // start with last (possibly overlapping) block
1506 MOVOU -128(AX)(CX*1), X8
1507 MOVOU -112(AX)(CX*1), X9
1508 MOVOU -96(AX)(CX*1), X10
1509 MOVOU -80(AX)(CX*1), X11
1510 MOVOU -64(AX)(CX*1), X12
1511 MOVOU -48(AX)(CX*1), X13
1512 MOVOU -32(AX)(CX*1), X14
1513 MOVOU -16(AX)(CX*1), X15
1514
1515 // xor in seed
1516 PXOR X0, X8
1517 PXOR X1, X9
1518 PXOR X2, X10
1519 PXOR X3, X11
1520 PXOR X4, X12
1521 PXOR X5, X13
1522 PXOR X6, X14
1523 PXOR X7, X15
1524
1525 // compute number of remaining 128-byte blocks
1526 DECQ CX
1527 SHRQ $7, CX
1528
1529 PCALIGN $16
1530 aesloop:
1531 // scramble state
1532 AESENC X8, X8
1533 AESENC X9, X9
1534 AESENC X10, X10
1535 AESENC X11, X11
1536 AESENC X12, X12
1537 AESENC X13, X13
1538 AESENC X14, X14
1539 AESENC X15, X15
1540
1541 // scramble state, xor in a block
1542 MOVOU (AX), X0
1543 MOVOU 16(AX), X1
1544 MOVOU 32(AX), X2
1545 MOVOU 48(AX), X3
1546 AESENC X0, X8
1547 AESENC X1, X9
1548 AESENC X2, X10
1549 AESENC X3, X11
1550 MOVOU 64(AX), X4
1551 MOVOU 80(AX), X5
1552 MOVOU 96(AX), X6
1553 MOVOU 112(AX), X7
1554 AESENC X4, X12
1555 AESENC X5, X13
1556 AESENC X6, X14
1557 AESENC X7, X15
1558
1559 ADDQ $128, AX
1560 DECQ CX
1561 JNE aesloop
1562
1563 // 3 more scrambles to finish
1564 AESENC X8, X8
1565 AESENC X9, X9
1566 AESENC X10, X10
1567 AESENC X11, X11
1568 AESENC X12, X12
1569 AESENC X13, X13
1570 AESENC X14, X14
1571 AESENC X15, X15
1572 AESENC X8, X8
1573 AESENC X9, X9
1574 AESENC X10, X10
1575 AESENC X11, X11
1576 AESENC X12, X12
1577 AESENC X13, X13
1578 AESENC X14, X14
1579 AESENC X15, X15
1580 AESENC X8, X8
1581 AESENC X9, X9
1582 AESENC X10, X10
1583 AESENC X11, X11
1584 AESENC X12, X12
1585 AESENC X13, X13
1586 AESENC X14, X14
1587 AESENC X15, X15
1588
1589 PXOR X12, X8
1590 PXOR X13, X9
1591 PXOR X14, X10
1592 PXOR X15, X11
1593 PXOR X10, X8
1594 PXOR X11, X9
1595 PXOR X9, X8
1596 // X15 must be zero on return
1597 PXOR X15, X15
1598 MOVQ X8, AX // return X8
1599 RET
1600
1601 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1602 // ABIInternal for performance.
1603 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1604 // AX = ptr to data
1605 // BX = seed
1606 CMPB runtime·useAeshash(SB), $0
1607 JEQ noaes
1608 MOVQ BX, X0 // X0 = seed
1609 PINSRD $2, (AX), X0 // data
1610 AESENC runtime·aeskeysched+0(SB), X0
1611 AESENC runtime·aeskeysched+16(SB), X0
1612 AESENC runtime·aeskeysched+32(SB), X0
1613 MOVQ X0, AX // return X0
1614 RET
1615 noaes:
1616 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1617
1618 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1619 // ABIInternal for performance.
1620 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1621 // AX = ptr to data
1622 // BX = seed
1623 CMPB runtime·useAeshash(SB), $0
1624 JEQ noaes
1625 MOVQ BX, X0 // X0 = seed
1626 PINSRQ $1, (AX), X0 // data
1627 AESENC runtime·aeskeysched+0(SB), X0
1628 AESENC runtime·aeskeysched+16(SB), X0
1629 AESENC runtime·aeskeysched+32(SB), X0
1630 MOVQ X0, AX // return X0
1631 RET
1632 noaes:
1633 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1634
1635 // simple mask to get rid of data in the high part of the register.
1636 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1637 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1638 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1639 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1640 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1641 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1642 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1643 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1644 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1645 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1646 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1647 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1648 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1649 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1650 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1651 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1652 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1653 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1654 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1655 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1656 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1657 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1658 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1659 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1660 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1661 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1662 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1663 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1664 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1665 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1666 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1667 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1668 GLOBL masks<>(SB),RODATA,$256
1669
1670 // func checkASM() bool
1671 TEXT ·checkASM(SB),NOSPLIT,$0-1
1672 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1673 MOVQ $masks<>(SB), AX
1674 MOVQ $shifts<>(SB), BX
1675 ORQ BX, AX
1676 TESTQ $15, AX
1677 SETEQ ret+0(FP)
1678 RET
1679
1680 // these are arguments to pshufb. They move data down from
1681 // the high bytes of the register to the low bytes of the register.
1682 // index is how many bytes to move.
1683 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1684 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1685 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1686 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1687 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1688 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1689 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1690 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1691 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1692 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1693 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1694 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1695 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1696 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1697 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1698 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1699 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1700 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1701 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1702 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1703 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1704 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1705 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1706 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1707 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1708 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1709 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1710 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1711 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1712 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1713 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1714 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1715 GLOBL shifts<>(SB),RODATA,$256
1716
1717 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1718 // Must obey the gcc calling convention.
1719 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1720 get_tls(CX)
1721 MOVQ g(CX), AX
1722 MOVQ g_m(AX), AX
1723 MOVQ m_curg(AX), AX
1724 MOVQ (g_stack+stack_hi)(AX), AX
1725 RET
1726
1727 // The top-most function running on a goroutine
1728 // returns to goexit+PCQuantum.
1729 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1730 BYTE $0x90 // NOP
1731 CALL runtime·goexit1(SB) // does not return
1732 // traceback from goexit1 must hit code range of goexit
1733 BYTE $0x90 // NOP
1734
1735 // This is called from .init_array and follows the platform, not Go, ABI.
1736 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1737 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1738 MOVQ runtime·lastmoduledatap(SB), AX
1739 MOVQ DI, moduledata_next(AX)
1740 MOVQ DI, runtime·lastmoduledatap(SB)
1741 POPQ R15
1742 RET
1743
1744 // Initialize special registers then jump to sigpanic.
1745 // This function is injected from the signal handler for panicking
1746 // signals. It is quite painful to set X15 in the signal context,
1747 // so we do it here.
1748 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1749 get_tls(R14)
1750 MOVQ g(R14), R14
1751 XORPS X15, X15
1752 CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
1753 JNE 2(PC)
1754 VXORPS X15, X15, X15
1755 JMP ·sigpanic<ABIInternal>(SB)
1756
1757 // gcWriteBarrier informs the GC about heap pointer writes.
1758 //
1759 // gcWriteBarrier returns space in a write barrier buffer which
1760 // should be filled in by the caller.
1761 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1762 // number of bytes of buffer needed in R11, and returns a pointer
1763 // to the buffer space in R11.
1764 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1765 // but may clobber others (e.g., SSE registers).
1766 // Typical use would be, when doing *(CX+88) = AX
1767 // CMPL $0, runtime.writeBarrier(SB)
1768 // JEQ dowrite
1769 // CALL runtime.gcBatchBarrier2(SB)
1770 // MOVQ AX, (R11)
1771 // MOVQ 88(CX), DX
1772 // MOVQ DX, 8(R11)
1773 // dowrite:
1774 // MOVQ AX, 88(CX)
1775 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1776 // Save the registers clobbered by the fast path. This is slightly
1777 // faster than having the caller spill these.
1778 MOVQ R12, 96(SP)
1779 MOVQ R13, 104(SP)
1780 retry:
1781 // TODO: Consider passing g.m.p in as an argument so they can be shared
1782 // across a sequence of write barriers.
1783 MOVQ g_m(R14), R13
1784 MOVQ m_p(R13), R13
1785 // Get current buffer write position.
1786 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1787 ADDQ R11, R12 // new next position
1788 // Is the buffer full?
1789 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1790 JA flush
1791 // Commit to the larger buffer.
1792 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1793 // Make return value (the original next position)
1794 SUBQ R11, R12
1795 MOVQ R12, R11
1796 // Restore registers.
1797 MOVQ 96(SP), R12
1798 MOVQ 104(SP), R13
1799 RET
1800
1801 flush:
1802 // Save all general purpose registers since these could be
1803 // clobbered by wbBufFlush and were not saved by the caller.
1804 // It is possible for wbBufFlush to clobber other registers
1805 // (e.g., SSE registers), but the compiler takes care of saving
1806 // those in the caller if necessary. This strikes a balance
1807 // with registers that are likely to be used.
1808 //
1809 // We don't have type information for these, but all code under
1810 // here is NOSPLIT, so nothing will observe these.
1811 //
1812 // TODO: We could strike a different balance; e.g., saving X0
1813 // and not saving GP registers that are less likely to be used.
1814 MOVQ DI, 0(SP)
1815 MOVQ AX, 8(SP)
1816 MOVQ BX, 16(SP)
1817 MOVQ CX, 24(SP)
1818 MOVQ DX, 32(SP)
1819 // DI already saved
1820 MOVQ SI, 40(SP)
1821 MOVQ BP, 48(SP)
1822 MOVQ R8, 56(SP)
1823 MOVQ R9, 64(SP)
1824 MOVQ R10, 72(SP)
1825 MOVQ R11, 80(SP)
1826 // R12 already saved
1827 // R13 already saved
1828 // R14 is g
1829 MOVQ R15, 88(SP)
1830
1831 CALL runtime·wbBufFlush(SB)
1832
1833 MOVQ 0(SP), DI
1834 MOVQ 8(SP), AX
1835 MOVQ 16(SP), BX
1836 MOVQ 24(SP), CX
1837 MOVQ 32(SP), DX
1838 MOVQ 40(SP), SI
1839 MOVQ 48(SP), BP
1840 MOVQ 56(SP), R8
1841 MOVQ 64(SP), R9
1842 MOVQ 72(SP), R10
1843 MOVQ 80(SP), R11
1844 MOVQ 88(SP), R15
1845 JMP retry
1846
1847 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1848 MOVL $8, R11
1849 JMP gcWriteBarrier<>(SB)
1850 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1851 MOVL $16, R11
1852 JMP gcWriteBarrier<>(SB)
1853 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1854 MOVL $24, R11
1855 JMP gcWriteBarrier<>(SB)
1856 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1857 MOVL $32, R11
1858 JMP gcWriteBarrier<>(SB)
1859 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1860 MOVL $40, R11
1861 JMP gcWriteBarrier<>(SB)
1862 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1863 MOVL $48, R11
1864 JMP gcWriteBarrier<>(SB)
1865 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1866 MOVL $56, R11
1867 JMP gcWriteBarrier<>(SB)
1868 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1869 MOVL $64, R11
1870 JMP gcWriteBarrier<>(SB)
1871
1872 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1873 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1874
1875 // debugCallV2 is the entry point for debugger-injected function
1876 // calls on running goroutines. It informs the runtime that a
1877 // debug call has been injected and creates a call frame for the
1878 // debugger to fill in.
1879 //
1880 // To inject a function call, a debugger should:
1881 // 1. Check that the goroutine is in state _Grunning and that
1882 // there are at least 256 bytes free on the stack.
1883 // 2. Push the current PC on the stack (updating SP).
1884 // 3. Write the desired argument frame size at SP-16 (using the SP
1885 // after step 2).
1886 // 4. Save all machine registers (including flags and XMM registers)
1887 // so they can be restored later by the debugger.
1888 // 5. Set the PC to debugCallV2 and resume execution.
1889 //
1890 // If the goroutine is in state _Grunnable, then it's not generally
1891 // safe to inject a call because it may return out via other runtime
1892 // operations. Instead, the debugger should unwind the stack to find
1893 // the return to non-runtime code, add a temporary breakpoint there,
1894 // and inject the call once that breakpoint is hit.
1895 //
1896 // If the goroutine is in any other state, it's not safe to inject a call.
1897 //
1898 // This function communicates back to the debugger by setting R12 and
1899 // invoking INT3 to raise a breakpoint signal. See the comments in the
1900 // implementation for the protocol the debugger is expected to
1901 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1902 //
1903 // The debugger must ensure that any pointers passed to the function
1904 // obey escape analysis requirements. Specifically, it must not pass
1905 // a stack pointer to an escaping argument. debugCallV2 cannot check
1906 // this invariant.
1907 //
1908 // This is ABIInternal because Go code injects its PC directly into new
1909 // goroutine stacks.
1910 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1911 // Save all registers that may contain pointers so they can be
1912 // conservatively scanned.
1913 //
1914 // We can't do anything that might clobber any of these
1915 // registers before this.
1916 MOVQ R15, r15-(14*8+8)(SP)
1917 MOVQ R14, r14-(13*8+8)(SP)
1918 MOVQ R13, r13-(12*8+8)(SP)
1919 MOVQ R12, r12-(11*8+8)(SP)
1920 MOVQ R11, r11-(10*8+8)(SP)
1921 MOVQ R10, r10-(9*8+8)(SP)
1922 MOVQ R9, r9-(8*8+8)(SP)
1923 MOVQ R8, r8-(7*8+8)(SP)
1924 MOVQ DI, di-(6*8+8)(SP)
1925 MOVQ SI, si-(5*8+8)(SP)
1926 MOVQ BP, bp-(4*8+8)(SP)
1927 MOVQ BX, bx-(3*8+8)(SP)
1928 MOVQ DX, dx-(2*8+8)(SP)
1929 // Save the frame size before we clobber it. Either of the last
1930 // saves could clobber this depending on whether there's a saved BP.
1931 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1932 MOVQ CX, cx-(1*8+8)(SP)
1933 MOVQ AX, ax-(0*8+8)(SP)
1934
1935 // Save the argument frame size.
1936 MOVQ DX, frameSize-128(SP)
1937
1938 // Perform a safe-point check.
1939 MOVQ retpc-8(FP), AX // Caller's PC
1940 MOVQ AX, 0(SP)
1941 CALL runtime·debugCallCheck(SB)
1942 MOVQ 8(SP), AX
1943 TESTQ AX, AX
1944 JZ good
1945 // The safety check failed. Put the reason string at the top
1946 // of the stack.
1947 MOVQ AX, 0(SP)
1948 MOVQ 16(SP), AX
1949 MOVQ AX, 8(SP)
1950 // Set R12 to 8 and invoke INT3. The debugger should get the
1951 // reason a call can't be injected from the top of the stack
1952 // and resume execution.
1953 MOVQ $8, R12
1954 BYTE $0xcc
1955 JMP restore
1956
1957 good:
1958 // Registers are saved and it's safe to make a call.
1959 // Open up a call frame, moving the stack if necessary.
1960 //
1961 // Once the frame is allocated, this will set R12 to 0 and
1962 // invoke INT3. The debugger should write the argument
1963 // frame for the call at SP, set up argument registers, push
1964 // the trapping PC on the stack, set the PC to the function to
1965 // call, set RDX to point to the closure (if a closure call),
1966 // and resume execution.
1967 //
1968 // If the function returns, this will set R12 to 1 and invoke
1969 // INT3. The debugger can then inspect any return value saved
1970 // on the stack at SP and in registers and resume execution again.
1971 //
1972 // If the function panics, this will set R12 to 2 and invoke INT3.
1973 // The interface{} value of the panic will be at SP. The debugger
1974 // can inspect the panic value and resume execution again.
1975 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1976 CMPQ AX, $MAXSIZE; \
1977 JA 5(PC); \
1978 MOVQ $NAME(SB), AX; \
1979 MOVQ AX, 0(SP); \
1980 CALL runtime·debugCallWrap(SB); \
1981 JMP restore
1982
1983 MOVQ frameSize-128(SP), AX
1984 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1985 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1986 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1987 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1988 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1989 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1990 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1991 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1992 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1993 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1994 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1995 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1996 // The frame size is too large. Report the error.
1997 MOVQ $debugCallFrameTooLarge<>(SB), AX
1998 MOVQ AX, 0(SP)
1999 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2000 MOVQ $8, R12
2001 BYTE $0xcc
2002 JMP restore
2003
2004 restore:
2005 // Calls and failures resume here.
2006 //
2007 // Set R12 to 16 and invoke INT3. The debugger should restore
2008 // all registers except RIP and RSP and resume execution.
2009 MOVQ $16, R12
2010 BYTE $0xcc
2011 // We must not modify flags after this point.
2012
2013 // Restore pointer-containing registers, which may have been
2014 // modified from the debugger's copy by stack copying.
2015 MOVQ ax-(0*8+8)(SP), AX
2016 MOVQ cx-(1*8+8)(SP), CX
2017 MOVQ dx-(2*8+8)(SP), DX
2018 MOVQ bx-(3*8+8)(SP), BX
2019 MOVQ bp-(4*8+8)(SP), BP
2020 MOVQ si-(5*8+8)(SP), SI
2021 MOVQ di-(6*8+8)(SP), DI
2022 MOVQ r8-(7*8+8)(SP), R8
2023 MOVQ r9-(8*8+8)(SP), R9
2024 MOVQ r10-(9*8+8)(SP), R10
2025 MOVQ r11-(10*8+8)(SP), R11
2026 MOVQ r12-(11*8+8)(SP), R12
2027 MOVQ r13-(12*8+8)(SP), R13
2028 MOVQ r14-(13*8+8)(SP), R14
2029 MOVQ r15-(14*8+8)(SP), R15
2030
2031 RET
2032
2033 // runtime.debugCallCheck assumes that functions defined with the
2034 // DEBUG_CALL_FN macro are safe points to inject calls.
2035 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2036 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2037 NO_LOCAL_POINTERS; \
2038 MOVQ $0, R12; \
2039 BYTE $0xcc; \
2040 MOVQ $1, R12; \
2041 BYTE $0xcc; \
2042 RET
2043 DEBUG_CALL_FN(debugCall32<>, 32)
2044 DEBUG_CALL_FN(debugCall64<>, 64)
2045 DEBUG_CALL_FN(debugCall128<>, 128)
2046 DEBUG_CALL_FN(debugCall256<>, 256)
2047 DEBUG_CALL_FN(debugCall512<>, 512)
2048 DEBUG_CALL_FN(debugCall1024<>, 1024)
2049 DEBUG_CALL_FN(debugCall2048<>, 2048)
2050 DEBUG_CALL_FN(debugCall4096<>, 4096)
2051 DEBUG_CALL_FN(debugCall8192<>, 8192)
2052 DEBUG_CALL_FN(debugCall16384<>, 16384)
2053 DEBUG_CALL_FN(debugCall32768<>, 32768)
2054 DEBUG_CALL_FN(debugCall65536<>, 65536)
2055
2056 // func debugCallPanicked(val interface{})
2057 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2058 // Copy the panic value to the top of stack.
2059 MOVQ val_type+0(FP), AX
2060 MOVQ AX, 0(SP)
2061 MOVQ val_data+8(FP), AX
2062 MOVQ AX, 8(SP)
2063 MOVQ $2, R12
2064 BYTE $0xcc
2065 RET
2066
2067 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2068 NO_LOCAL_POINTERS
2069 // Save all 14 int registers that could have an index in them.
2070 // They may be pointers, but if they are they are dead.
2071 MOVQ AX, 16(SP)
2072 MOVQ CX, 24(SP)
2073 MOVQ DX, 32(SP)
2074 MOVQ BX, 40(SP)
2075 // skip SP @ 48(SP)
2076 MOVQ BP, 56(SP)
2077 MOVQ SI, 64(SP)
2078 MOVQ DI, 72(SP)
2079 MOVQ R8, 80(SP)
2080 MOVQ R9, 88(SP)
2081 MOVQ R10, 96(SP)
2082 MOVQ R11, 104(SP)
2083 MOVQ R12, 112(SP)
2084 MOVQ R13, 120(SP)
2085 // skip R14 @ 128(SP) (aka G)
2086 MOVQ R15, 136(SP)
2087
2088 MOVQ SP, AX // hide SP read from vet
2089 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2090 LEAQ 16(SP), BX
2091 CALL runtime·panicBounds64<ABIInternal>(SB)
2092 RET
2093
2094 #ifdef GOOS_android
2095 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2096 // Earlier androids are set up in gcc_android.c.
2097 DATA runtime·tls_g+0(SB)/8, $16
2098 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2099 #endif
2100 #ifdef GOOS_windows
2101 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2102 #endif
2103
2104 // The compiler and assembler's -spectre=ret mode rewrites
2105 // all indirect CALL AX / JMP AX instructions to be
2106 // CALL retpolineAX / JMP retpolineAX.
2107 // See https://support.google.com/faqs/answer/7625886.
2108 #define RETPOLINE(reg) \
2109 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2110 /* nospec: */ \
2111 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2112 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2113 /* setup: */ \
2114 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2115 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2116 /* RET */ BYTE $0xC3
2117
2118 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2119 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2120 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2121 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2122 /* SP is 4, can't happen / magic encodings */
2123 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2124 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2125 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2126 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2127 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2128 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2129 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2130 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2131 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2132 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2133 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2134
2135 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2136 MOVQ BP, AX
2137 RET
2138
View as plain text