Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 #ifndef GOOS_windows
41 // Avoid calling it on Windows because it is not used
42 // and it would crash the application due to the autogenerated
43 // ABI wrapper trying to access a non-existent TLS slot.
44 CALL runtime·libpreinit(SB)
45 #endif
46
47 // Create a new thread to finish Go runtime initialization.
48 MOVQ _cgo_sys_thread_create(SB), AX
49 TESTQ AX, AX
50 JZ nocgo
51
52 // We're calling back to C.
53 // Align stack per C ABI requirements.
54 MOVQ SP, BX // Callee-save in C ABI
55 ANDQ $~15, SP
56 MOVQ $_rt0_amd64_lib_go(SB), DI
57 MOVQ $0, SI
58 #ifdef GOOS_windows
59 // For Windows ABI
60 MOVQ DI, CX
61 MOVQ SI, DX
62 // Leave space for four words on the stack as required
63 // by the Windows amd64 calling convention.
64 ADJSP $32
65 #endif
66 CALL AX
67 #ifdef GOOS_windows
68 ADJSP $-32 // just to make the assembler not complain about unbalanced stack
69 #endif
70 MOVQ BX, SP
71 JMP restore
72
73 nocgo:
74 ADJSP $16
75 MOVQ $0x800000, 0(SP) // stacksize
76 MOVQ $_rt0_amd64_lib_go(SB), AX
77 MOVQ AX, 8(SP) // fn
78 CALL runtime·newosproc0(SB)
79 ADJSP $-16
80
81 restore:
82 POP_REGS_HOST_TO_ABI0()
83 RET
84
85 // _rt0_amd64_lib_go initializes the Go runtime.
86 // This is started in a separate thread by _rt0_amd64_lib.
87 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
88 MOVQ _rt0_amd64_lib_argc<>(SB), DI
89 MOVQ _rt0_amd64_lib_argv<>(SB), SI
90 JMP runtime·rt0_go(SB)
91
92 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
93 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
94 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
95 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
96
97 #ifdef GOAMD64_v2
98 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
99 #endif
100
101 #ifdef GOAMD64_v3
102 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
103 #endif
104
105 #ifdef GOAMD64_v4
106 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
107 #endif
108
109 GLOBL bad_cpu_msg<>(SB), RODATA, $84
110
111 // Define a list of AMD64 microarchitecture level features
112 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
113
114 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
115 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
116 // LAHF/SAHF
117 #define V2_EXT_FEATURES_CX (1 << 0)
118 // FMA MOVBE OSXSAVE AVX F16C
119 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
120 // ABM (FOR LZNCT)
121 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
122 // BMI1 AVX2 BMI2
123 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
124 // XMM YMM
125 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
126
127 #define V4_FEATURES_CX V3_FEATURES_CX
128
129 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
130 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
131 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
132 // OPMASK ZMM
133 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
134
135 #ifdef GOAMD64_v2
136 #define NEED_MAX_CPUID 0x80000001
137 #define NEED_FEATURES_CX V2_FEATURES_CX
138 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
139 #endif
140
141 #ifdef GOAMD64_v3
142 #define NEED_MAX_CPUID 0x80000001
143 #define NEED_FEATURES_CX V3_FEATURES_CX
144 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
145 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
146 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
147 #endif
148
149 #ifdef GOAMD64_v4
150 #define NEED_MAX_CPUID 0x80000001
151 #define NEED_FEATURES_CX V4_FEATURES_CX
152 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
153 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
154
155 // Darwin requires a different approach to check AVX512 support, see CL 285572.
156 #ifdef GOOS_darwin
157 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
158 // These values are from:
159 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
160 #define commpage64_base_address 0x00007fffffe00000
161 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
162 #define commpage64_version (commpage64_base_address+0x01E)
163 #define AVX512F 0x0000004000000000
164 #define AVX512CD 0x0000008000000000
165 #define AVX512DQ 0x0000010000000000
166 #define AVX512BW 0x0000020000000000
167 #define AVX512VL 0x0000100000000000
168 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
169 #else
170 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
171 #endif
172
173 #endif
174
175 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
176 // copy arguments forward on an even stack
177 MOVQ DI, AX // argc
178 MOVQ SI, BX // argv
179 SUBQ $(5*8), SP // 3args 2auto
180 ANDQ $~15, SP
181 MOVQ AX, 24(SP)
182 MOVQ BX, 32(SP)
183
184 // This is typically the entry point for Go programs.
185 // Call stack unwinding must not proceed past this frame.
186 // Set the frame pointer register to 0 so that frame pointer-based unwinders
187 // (which don't use debug info for performance reasons)
188 // won't attempt to unwind past this function.
189 // See go.dev/issue/63630
190 MOVQ $0, BP
191
192 // create istack out of the given (operating system) stack.
193 // _cgo_init may update stackguard.
194 MOVQ $runtime·g0(SB), DI
195 LEAQ (-64*1024)(SP), BX
196 MOVQ BX, g_stackguard0(DI)
197 MOVQ BX, g_stackguard1(DI)
198 MOVQ BX, (g_stack+stack_lo)(DI)
199 MOVQ SP, (g_stack+stack_hi)(DI)
200
201 // find out information about the processor we're on
202 MOVL $0, AX
203 CPUID
204 CMPL AX, $0
205 JE nocpuinfo
206
207 CMPL BX, $0x756E6547 // "Genu"
208 JNE notintel
209 CMPL DX, $0x49656E69 // "ineI"
210 JNE notintel
211 CMPL CX, $0x6C65746E // "ntel"
212 JNE notintel
213 MOVB $1, runtime·isIntel(SB)
214
215 notintel:
216 // Load EAX=1 cpuid flags
217 MOVL $1, AX
218 CPUID
219 MOVL AX, runtime·processorVersionInfo(SB)
220
221 nocpuinfo:
222 // if there is an _cgo_init, call it.
223 MOVQ _cgo_init(SB), AX
224 TESTQ AX, AX
225 JZ needtls
226 // arg 1: g0, already in DI
227 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
228 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
229 MOVQ $0, CX
230 #ifdef GOOS_android
231 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
232 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
233 // Compensate for tls_g (+16).
234 MOVQ -16(TLS), CX
235 #endif
236 #ifdef GOOS_windows
237 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
238 // Adjust for the Win64 calling convention.
239 MOVQ CX, R9 // arg 4
240 MOVQ DX, R8 // arg 3
241 MOVQ SI, DX // arg 2
242 MOVQ DI, CX // arg 1
243 #endif
244 CALL AX
245
246 // update stackguard after _cgo_init
247 MOVQ $runtime·g0(SB), CX
248 MOVQ (g_stack+stack_lo)(CX), AX
249 ADDQ $const_stackGuard, AX
250 MOVQ AX, g_stackguard0(CX)
251 MOVQ AX, g_stackguard1(CX)
252
253 #ifndef GOOS_windows
254 JMP ok
255 #endif
256 needtls:
257 #ifdef GOOS_plan9
258 // skip TLS setup on Plan 9
259 JMP ok
260 #endif
261 #ifdef GOOS_solaris
262 // skip TLS setup on Solaris
263 JMP ok
264 #endif
265 #ifdef GOOS_illumos
266 // skip TLS setup on illumos
267 JMP ok
268 #endif
269 #ifdef GOOS_darwin
270 // skip TLS setup on Darwin
271 JMP ok
272 #endif
273 #ifdef GOOS_openbsd
274 // skip TLS setup on OpenBSD
275 JMP ok
276 #endif
277
278 #ifdef GOOS_windows
279 CALL runtime·wintls(SB)
280 #endif
281
282 LEAQ runtime·m0+m_tls(SB), DI
283 CALL runtime·settls(SB)
284
285 // store through it, to make sure it works
286 get_tls(BX)
287 MOVQ $0x123, g(BX)
288 MOVQ runtime·m0+m_tls(SB), AX
289 CMPQ AX, $0x123
290 JEQ 2(PC)
291 CALL runtime·abort(SB)
292 ok:
293 // set the per-goroutine and per-mach "registers"
294 get_tls(BX)
295 LEAQ runtime·g0(SB), CX
296 MOVQ CX, g(BX)
297 LEAQ runtime·m0(SB), AX
298
299 // save m->g0 = g0
300 MOVQ CX, m_g0(AX)
301 // save m0 to g0->m
302 MOVQ AX, g_m(CX)
303
304 CLD // convention is D is always left cleared
305
306 // Check GOAMD64 requirements
307 // We need to do this after setting up TLS, so that
308 // we can report an error if there is a failure. See issue 49586.
309 #ifdef NEED_FEATURES_CX
310 MOVL $0, AX
311 CPUID
312 CMPL AX, $0
313 JE bad_cpu
314 MOVL $1, AX
315 CPUID
316 ANDL $NEED_FEATURES_CX, CX
317 CMPL CX, $NEED_FEATURES_CX
318 JNE bad_cpu
319 #endif
320
321 #ifdef NEED_MAX_CPUID
322 MOVL $0x80000000, AX
323 CPUID
324 CMPL AX, $NEED_MAX_CPUID
325 JL bad_cpu
326 #endif
327
328 #ifdef NEED_EXT_FEATURES_BX
329 MOVL $7, AX
330 MOVL $0, CX
331 CPUID
332 ANDL $NEED_EXT_FEATURES_BX, BX
333 CMPL BX, $NEED_EXT_FEATURES_BX
334 JNE bad_cpu
335 #endif
336
337 #ifdef NEED_EXT_FEATURES_CX
338 MOVL $0x80000001, AX
339 CPUID
340 ANDL $NEED_EXT_FEATURES_CX, CX
341 CMPL CX, $NEED_EXT_FEATURES_CX
342 JNE bad_cpu
343 #endif
344
345 #ifdef NEED_OS_SUPPORT_AX
346 XORL CX, CX
347 XGETBV
348 ANDL $NEED_OS_SUPPORT_AX, AX
349 CMPL AX, $NEED_OS_SUPPORT_AX
350 JNE bad_cpu
351 #endif
352
353 #ifdef NEED_DARWIN_SUPPORT
354 MOVQ $commpage64_version, BX
355 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
356 JL bad_cpu
357 MOVQ $commpage64_cpu_capabilities64, BX
358 MOVQ (BX), BX
359 MOVQ $NEED_DARWIN_SUPPORT, CX
360 ANDQ CX, BX
361 CMPQ BX, CX
362 JNE bad_cpu
363 #endif
364
365 CALL runtime·check(SB)
366
367 MOVL 24(SP), AX // copy argc
368 MOVL AX, 0(SP)
369 MOVQ 32(SP), AX // copy argv
370 MOVQ AX, 8(SP)
371 CALL runtime·args(SB)
372 CALL runtime·osinit(SB)
373 CALL runtime·schedinit(SB)
374
375 // create a new goroutine to start program
376 MOVQ $runtime·mainPC(SB), AX // entry
377 PUSHQ AX
378 CALL runtime·newproc(SB)
379 POPQ AX
380
381 // start this M
382 CALL runtime·mstart(SB)
383
384 CALL runtime·abort(SB) // mstart should never return
385 RET
386
387 bad_cpu: // show that the program requires a certain microarchitecture level.
388 MOVQ $2, 0(SP)
389 MOVQ $bad_cpu_msg<>(SB), AX
390 MOVQ AX, 8(SP)
391 MOVQ $84, 16(SP)
392 CALL runtime·write(SB)
393 MOVQ $1, 0(SP)
394 CALL runtime·exit(SB)
395 CALL runtime·abort(SB)
396 RET
397
398 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
399 // intended to be called by debuggers.
400 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
401 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
402 RET
403
404 // mainPC is a function value for runtime.main, to be passed to newproc.
405 // The reference to runtime.main is made via ABIInternal, since the
406 // actual function (not the ABI0 wrapper) is needed by newproc.
407 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
408 GLOBL runtime·mainPC(SB),RODATA,$8
409
410 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
411 BYTE $0xcc
412 RET
413
414 TEXT runtime·asminit(SB),NOSPLIT,$0-0
415 // No per-thread init.
416 RET
417
418 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
419 // This is the root frame of new Go-created OS threads.
420 // Call stack unwinding must not proceed past this frame.
421 // Set the frame pointer register to 0 so that frame pointer-based unwinders
422 // (which don't use debug info for performance reasons)
423 // won't attempt to unwind past this function.
424 // See go.dev/issue/63630
425 MOVD $0, BP
426 CALL runtime·mstart0(SB)
427 RET // not reached
428
429 /*
430 * go-routine
431 */
432
433 // func gogo(buf *gobuf)
434 // restore state from Gobuf; longjmp
435 TEXT runtime·gogo(SB), NOSPLIT, $0-8
436 MOVQ buf+0(FP), BX // gobuf
437 MOVQ gobuf_g(BX), DX
438 MOVQ 0(DX), CX // make sure g != nil
439 JMP gogo<>(SB)
440
441 TEXT gogo<>(SB), NOSPLIT, $0
442 get_tls(CX)
443 MOVQ DX, g(CX)
444 MOVQ DX, R14 // set the g register
445 MOVQ gobuf_sp(BX), SP // restore SP
446 MOVQ gobuf_ctxt(BX), DX
447 MOVQ gobuf_bp(BX), BP
448 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
449 MOVQ $0, gobuf_ctxt(BX)
450 MOVQ $0, gobuf_bp(BX)
451 MOVQ gobuf_pc(BX), BX
452 JMP BX
453
454 // func mcall(fn func(*g))
455 // Switch to m->g0's stack, call fn(g).
456 // Fn must never return. It should gogo(&g->sched)
457 // to keep running g.
458 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
459 #ifdef GOEXPERIMENT_runtimesecret
460 CMPL g_secret(R14), $0
461 JEQ nosecret
462 CALL ·secretEraseRegistersMcall(SB)
463 nosecret:
464 #endif
465
466 MOVQ AX, DX // DX = fn
467
468 // Save state in g->sched. The caller's SP and PC are restored by gogo to
469 // resume execution in the caller's frame (implicit return). The caller's BP
470 // is also restored to support frame pointer unwinding.
471 MOVQ SP, BX // hide (SP) reads from vet
472 MOVQ 8(BX), BX // caller's PC
473 MOVQ BX, (g_sched+gobuf_pc)(R14)
474 LEAQ fn+0(FP), BX // caller's SP
475 MOVQ BX, (g_sched+gobuf_sp)(R14)
476 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
477 // can cause a frame pointer cycle, see CL 476235.
478 MOVQ (BP), BX // caller's BP
479 MOVQ BX, (g_sched+gobuf_bp)(R14)
480
481 // switch to m->g0 & its stack, call fn
482 MOVQ g_m(R14), BX
483 MOVQ m_g0(BX), SI // SI = g.m.g0
484 CMPQ SI, R14 // if g == m->g0 call badmcall
485 JNE goodm
486 JMP runtime·badmcall(SB)
487 goodm:
488 MOVQ R14, AX // AX (and arg 0) = g
489 MOVQ SI, R14 // g = g.m.g0
490 get_tls(CX) // Set G in TLS
491 MOVQ R14, g(CX)
492 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
493 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
494 PUSHQ AX // open up space for fn's arg spill slot
495 MOVQ 0(DX), R12
496 CALL R12 // fn(g)
497 // The Windows native stack unwinder incorrectly classifies the next instruction
498 // as part of the function epilogue, producing a wrong call stack.
499 // Add a NOP to work around this issue. See go.dev/issue/67007.
500 BYTE $0x90
501 POPQ AX
502 JMP runtime·badmcall2(SB)
503 RET
504
505 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
506 // of the G stack. We need to distinguish the routine that
507 // lives at the bottom of the G stack from the one that lives
508 // at the top of the system stack because the one at the top of
509 // the system stack terminates the stack walk (see topofstack()).
510 // The frame layout needs to match systemstack
511 // so that it can pretend to be systemstack_switch.
512 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
513 // Align for consistency with offset used in gosave_systemstack_switch
514 PCALIGN $8
515 UNDEF
516 // Make sure this function is not leaf,
517 // so the frame is saved.
518 CALL runtime·abort(SB)
519 RET
520
521 // func systemstack(fn func())
522 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
523 #ifdef GOEXPERIMENT_runtimesecret
524 // If in secret mode, erase registers on transition
525 // from G stack to M stack,
526 get_tls(CX)
527 MOVQ g(CX), AX
528 CMPL g_secret(AX), $0
529 JEQ nosecret
530 CALL ·secretEraseRegisters(SB)
531 nosecret:
532 #endif
533
534 MOVQ fn+0(FP), DI // DI = fn
535 get_tls(CX)
536 MOVQ g(CX), AX // AX = g
537 MOVQ g_m(AX), BX // BX = m
538
539 CMPQ AX, m_gsignal(BX)
540 JEQ noswitch
541
542 MOVQ m_g0(BX), DX // DX = g0
543 CMPQ AX, DX
544 JEQ noswitch
545
546 CMPQ AX, m_curg(BX)
547 JNE bad
548
549 // Switch stacks.
550 // The original frame pointer is stored in BP,
551 // which is useful for stack unwinding.
552 // Save our state in g->sched. Pretend to
553 // be systemstack_switch if the G stack is scanned.
554 CALL gosave_systemstack_switch<>(SB)
555
556 // switch to g0
557 MOVQ DX, g(CX)
558 MOVQ DX, R14 // set the g register
559 MOVQ (g_sched+gobuf_sp)(DX), SP
560
561 // call target function
562 MOVQ DI, DX
563 MOVQ 0(DI), DI
564 CALL DI
565
566 // switch back to g
567 get_tls(CX)
568 MOVQ g(CX), AX
569 MOVQ g_m(AX), BX
570 MOVQ m_curg(BX), AX
571 MOVQ AX, g(CX)
572 MOVQ (g_sched+gobuf_sp)(AX), SP
573 MOVQ (g_sched+gobuf_bp)(AX), BP
574 MOVQ $0, (g_sched+gobuf_sp)(AX)
575 MOVQ $0, (g_sched+gobuf_bp)(AX)
576 RET
577
578 noswitch:
579 // already on m stack; tail call the function
580 // Using a tail call here cleans up tracebacks since we won't stop
581 // at an intermediate systemstack.
582 MOVQ DI, DX
583 MOVQ 0(DI), DI
584 // The function epilogue is not called on a tail call.
585 // Pop BP from the stack to simulate it.
586 POPQ BP
587 JMP DI
588
589 bad:
590 // Bad: g is not gsignal, not g0, not curg. What is it?
591 MOVQ $runtime·badsystemstack(SB), AX
592 CALL AX
593 INT $3
594
595 // func switchToCrashStack0(fn func())
596 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
597 MOVQ g_m(R14), BX // curm
598
599 // set g to gcrash
600 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
601 MOVQ BX, g_m(R14) // g.m = curm
602 MOVQ R14, m_g0(BX) // curm.g0 = g
603 get_tls(CX)
604 MOVQ R14, g(CX)
605
606 // switch to crashstack
607 MOVQ (g_stack+stack_hi)(R14), BX
608 SUBQ $(4*8), BX
609 MOVQ BX, SP
610
611 // call target function
612 MOVQ AX, DX
613 MOVQ 0(AX), AX
614 CALL AX
615
616 // should never return
617 CALL runtime·abort(SB)
618 UNDEF
619
620 /*
621 * support for morestack
622 */
623
624 // Called during function prolog when more stack is needed.
625 //
626 // The traceback routines see morestack on a g0 as being
627 // the top of a stack (for example, morestack calling newstack
628 // calling the scheduler calling newm calling gc), so we must
629 // record an argument size. For that purpose, it has no arguments.
630 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
631 // Cannot grow scheduler stack (m->g0).
632 get_tls(CX)
633 MOVQ g(CX), DI // DI = g
634 MOVQ g_m(DI), BX // BX = m
635
636 // Set g->sched to context in f.
637 MOVQ 0(SP), AX // f's PC
638 MOVQ AX, (g_sched+gobuf_pc)(DI)
639 LEAQ 8(SP), AX // f's SP
640 MOVQ AX, (g_sched+gobuf_sp)(DI)
641 MOVQ BP, (g_sched+gobuf_bp)(DI)
642 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
643
644 MOVQ m_g0(BX), SI // SI = m.g0
645 CMPQ DI, SI
646 JNE 3(PC)
647 CALL runtime·badmorestackg0(SB)
648 CALL runtime·abort(SB)
649
650 // Cannot grow signal stack (m->gsignal).
651 MOVQ m_gsignal(BX), SI
652 CMPQ DI, SI
653 JNE 3(PC)
654 CALL runtime·badmorestackgsignal(SB)
655 CALL runtime·abort(SB)
656
657 // Called from f.
658 // Set m->morebuf to f's caller.
659 NOP SP // tell vet SP changed - stop checking offsets
660 MOVQ 8(SP), AX // f's caller's PC
661 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
662 LEAQ 16(SP), AX // f's caller's SP
663 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
664 MOVQ DI, (m_morebuf+gobuf_g)(BX)
665
666 // If in secret mode, erase registers on transition
667 // from G stack to M stack,
668 #ifdef GOEXPERIMENT_runtimesecret
669 CMPL g_secret(DI), $0
670 JEQ nosecret
671 CALL ·secretEraseRegisters(SB)
672 get_tls(CX)
673 MOVQ g(CX), DI // DI = g
674 MOVQ g_m(DI), BX // BX = m
675 nosecret:
676 #endif
677
678 // Call newstack on m->g0's stack.
679 MOVQ m_g0(BX), BX
680 MOVQ BX, g(CX)
681 MOVQ (g_sched+gobuf_sp)(BX), SP
682 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
683 CALL runtime·newstack(SB)
684 CALL runtime·abort(SB) // crash if newstack returns
685 RET
686
687 // morestack but not preserving ctxt.
688 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
689 MOVL $0, DX
690 JMP runtime·morestack(SB)
691
692 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
693 TEXT ·spillArgs(SB),NOSPLIT,$0-0
694 MOVQ AX, 0(R12)
695 MOVQ BX, 8(R12)
696 MOVQ CX, 16(R12)
697 MOVQ DI, 24(R12)
698 MOVQ SI, 32(R12)
699 MOVQ R8, 40(R12)
700 MOVQ R9, 48(R12)
701 MOVQ R10, 56(R12)
702 MOVQ R11, 64(R12)
703 MOVQ X0, 72(R12)
704 MOVQ X1, 80(R12)
705 MOVQ X2, 88(R12)
706 MOVQ X3, 96(R12)
707 MOVQ X4, 104(R12)
708 MOVQ X5, 112(R12)
709 MOVQ X6, 120(R12)
710 MOVQ X7, 128(R12)
711 MOVQ X8, 136(R12)
712 MOVQ X9, 144(R12)
713 MOVQ X10, 152(R12)
714 MOVQ X11, 160(R12)
715 MOVQ X12, 168(R12)
716 MOVQ X13, 176(R12)
717 MOVQ X14, 184(R12)
718 RET
719
720 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
721 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
722 MOVQ 0(R12), AX
723 MOVQ 8(R12), BX
724 MOVQ 16(R12), CX
725 MOVQ 24(R12), DI
726 MOVQ 32(R12), SI
727 MOVQ 40(R12), R8
728 MOVQ 48(R12), R9
729 MOVQ 56(R12), R10
730 MOVQ 64(R12), R11
731 MOVQ 72(R12), X0
732 MOVQ 80(R12), X1
733 MOVQ 88(R12), X2
734 MOVQ 96(R12), X3
735 MOVQ 104(R12), X4
736 MOVQ 112(R12), X5
737 MOVQ 120(R12), X6
738 MOVQ 128(R12), X7
739 MOVQ 136(R12), X8
740 MOVQ 144(R12), X9
741 MOVQ 152(R12), X10
742 MOVQ 160(R12), X11
743 MOVQ 168(R12), X12
744 MOVQ 176(R12), X13
745 MOVQ 184(R12), X14
746 RET
747
748 // reflectcall: call a function with the given argument list
749 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
750 // we don't have variable-sized frames, so we use a small number
751 // of constant-sized-frame functions to encode a few bits of size in the pc.
752 // Caution: ugly multiline assembly macros in your future!
753
754 #define DISPATCH(NAME,MAXSIZE) \
755 CMPQ CX, $MAXSIZE; \
756 JA 3(PC); \
757 MOVQ $NAME(SB), AX; \
758 JMP AX
759 // Note: can't just "JMP NAME(SB)" - bad inlining results.
760
761 TEXT ·reflectcall(SB), NOSPLIT, $0-48
762 MOVLQZX frameSize+32(FP), CX
763 DISPATCH(runtime·call16, 16)
764 DISPATCH(runtime·call32, 32)
765 DISPATCH(runtime·call64, 64)
766 DISPATCH(runtime·call128, 128)
767 DISPATCH(runtime·call256, 256)
768 DISPATCH(runtime·call512, 512)
769 DISPATCH(runtime·call1024, 1024)
770 DISPATCH(runtime·call2048, 2048)
771 DISPATCH(runtime·call4096, 4096)
772 DISPATCH(runtime·call8192, 8192)
773 DISPATCH(runtime·call16384, 16384)
774 DISPATCH(runtime·call32768, 32768)
775 DISPATCH(runtime·call65536, 65536)
776 DISPATCH(runtime·call131072, 131072)
777 DISPATCH(runtime·call262144, 262144)
778 DISPATCH(runtime·call524288, 524288)
779 DISPATCH(runtime·call1048576, 1048576)
780 DISPATCH(runtime·call2097152, 2097152)
781 DISPATCH(runtime·call4194304, 4194304)
782 DISPATCH(runtime·call8388608, 8388608)
783 DISPATCH(runtime·call16777216, 16777216)
784 DISPATCH(runtime·call33554432, 33554432)
785 DISPATCH(runtime·call67108864, 67108864)
786 DISPATCH(runtime·call134217728, 134217728)
787 DISPATCH(runtime·call268435456, 268435456)
788 DISPATCH(runtime·call536870912, 536870912)
789 DISPATCH(runtime·call1073741824, 1073741824)
790 MOVQ $runtime·badreflectcall(SB), AX
791 JMP AX
792
793 #define CALLFN(NAME,MAXSIZE) \
794 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
795 NO_LOCAL_POINTERS; \
796 /* copy arguments to stack */ \
797 MOVQ stackArgs+16(FP), SI; \
798 MOVLQZX stackArgsSize+24(FP), CX; \
799 MOVQ SP, DI; \
800 REP;MOVSB; \
801 /* set up argument registers */ \
802 MOVQ regArgs+40(FP), R12; \
803 CALL ·unspillArgs(SB); \
804 /* call function */ \
805 MOVQ f+8(FP), DX; \
806 PCDATA $PCDATA_StackMapIndex, $0; \
807 MOVQ (DX), R12; \
808 CALL R12; \
809 /* copy register return values back */ \
810 MOVQ regArgs+40(FP), R12; \
811 CALL ·spillArgs(SB); \
812 MOVLQZX stackArgsSize+24(FP), CX; \
813 MOVLQZX stackRetOffset+28(FP), BX; \
814 MOVQ stackArgs+16(FP), DI; \
815 MOVQ stackArgsType+0(FP), DX; \
816 MOVQ SP, SI; \
817 ADDQ BX, DI; \
818 ADDQ BX, SI; \
819 SUBQ BX, CX; \
820 CALL callRet<>(SB); \
821 RET
822
823 // callRet copies return values back at the end of call*. This is a
824 // separate function so it can allocate stack space for the arguments
825 // to reflectcallmove. It does not follow the Go ABI; it expects its
826 // arguments in registers.
827 TEXT callRet<>(SB), NOSPLIT, $40-0
828 NO_LOCAL_POINTERS
829 MOVQ DX, 0(SP)
830 MOVQ DI, 8(SP)
831 MOVQ SI, 16(SP)
832 MOVQ CX, 24(SP)
833 MOVQ R12, 32(SP)
834 CALL runtime·reflectcallmove(SB)
835 RET
836
837 CALLFN(·call16, 16)
838 CALLFN(·call32, 32)
839 CALLFN(·call64, 64)
840 CALLFN(·call128, 128)
841 CALLFN(·call256, 256)
842 CALLFN(·call512, 512)
843 CALLFN(·call1024, 1024)
844 CALLFN(·call2048, 2048)
845 CALLFN(·call4096, 4096)
846 CALLFN(·call8192, 8192)
847 CALLFN(·call16384, 16384)
848 CALLFN(·call32768, 32768)
849 CALLFN(·call65536, 65536)
850 CALLFN(·call131072, 131072)
851 CALLFN(·call262144, 262144)
852 CALLFN(·call524288, 524288)
853 CALLFN(·call1048576, 1048576)
854 CALLFN(·call2097152, 2097152)
855 CALLFN(·call4194304, 4194304)
856 CALLFN(·call8388608, 8388608)
857 CALLFN(·call16777216, 16777216)
858 CALLFN(·call33554432, 33554432)
859 CALLFN(·call67108864, 67108864)
860 CALLFN(·call134217728, 134217728)
861 CALLFN(·call268435456, 268435456)
862 CALLFN(·call536870912, 536870912)
863 CALLFN(·call1073741824, 1073741824)
864
865 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
866 MOVL cycles+0(FP), AX
867 TESTL AX, AX
868 JZ done
869 again:
870 PAUSE
871 SUBL $1, AX
872 JNZ again
873 done:
874 RET
875
876
877 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
878 // Stores are already ordered on x86, so this is just a
879 // compile barrier.
880 RET
881
882 // Save state of caller into g->sched,
883 // but using fake PC from systemstack_switch.
884 // Must only be called from functions with frame pointer
885 // and without locals ($0) or else unwinding from
886 // systemstack_switch is incorrect.
887 // Smashes R9.
888 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
889 // Take systemstack_switch PC and add 8 bytes to skip
890 // the prologue. Keep 8 bytes offset consistent with
891 // PCALIGN $8 in systemstack_swtich, pointing start of
892 // UNDEF instruction beyond prologue.
893 MOVQ $runtime·systemstack_switch+8(SB), R9
894 MOVQ R9, (g_sched+gobuf_pc)(R14)
895 LEAQ 8(SP), R9
896 MOVQ R9, (g_sched+gobuf_sp)(R14)
897 MOVQ BP, (g_sched+gobuf_bp)(R14)
898 // Assert ctxt is zero. See func save.
899 MOVQ (g_sched+gobuf_ctxt)(R14), R9
900 TESTQ R9, R9
901 JZ 2(PC)
902 CALL runtime·abort(SB)
903 RET
904
905 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
906 // Call fn(arg) aligned appropriately for the gcc ABI.
907 // Called on a system stack, and there may be no g yet (during needm).
908 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
909 MOVQ fn+0(FP), AX
910 MOVQ arg+8(FP), BX
911 MOVQ SP, DX
912 ANDQ $~15, SP // alignment
913 MOVQ DX, 8(SP)
914 MOVQ BX, DI // DI = first argument in AMD64 ABI
915 MOVQ BX, CX // CX = first argument in Win64
916 CALL AX
917 MOVQ 8(SP), DX
918 MOVQ DX, SP
919 RET
920
921 // asmcgocall_landingpad calls AX with BX as argument.
922 // Must be called on the system stack.
923 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
924 #ifdef GOOS_windows
925 // Make sure we have enough room for 4 stack-backed fast-call
926 // registers as per Windows amd64 calling convention.
927 ADJSP $32
928 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
929 // thrown in the cgo call. Exceptions that reach this function will be
930 // handled by runtime.sehtramp thanks to the SEH metadata added
931 // by the compiler.
932 // Note that runtime.sehtramp can't be attached directly to asmcgocall
933 // because its initial stack pointer can be outside the system stack bounds,
934 // and Windows stops the stack unwinding without calling the exception handler
935 // when it reaches that point.
936 MOVQ BX, CX // CX = first argument in Win64
937 CALL AX
938 // The exception handler is not called if the next instruction is part of
939 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
940 BYTE $0x90
941 ADJSP $-32
942 RET
943 #endif
944 // Tail call AX on non-Windows, as the extra stack frame is not needed.
945 MOVQ BX, DI // DI = first argument in AMD64 ABI
946 JMP AX
947
948 // func asmcgocall(fn, arg unsafe.Pointer) int32
949 // Call fn(arg) on the scheduler stack,
950 // aligned appropriately for the gcc ABI.
951 // See cgocall.go for more details.
952 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
953 // Figure out if we need to switch to m->g0 stack.
954 // We get called to create new OS threads too, and those
955 // come in on the m->g0 stack already. Or we might already
956 // be on the m->gsignal stack.
957 get_tls(CX)
958 MOVQ g(CX), DI
959 CMPQ DI, $0
960 JEQ nosave
961 MOVQ g_m(DI), R8
962 MOVQ m_gsignal(R8), SI
963 CMPQ DI, SI
964 JEQ nosave
965 MOVQ m_g0(R8), SI
966 CMPQ DI, SI
967 JEQ nosave
968
969 // Running on a user G
970 // Figure out if we're running secret code and clear the registers
971 // so that the C code we're about to call doesn't spill confidential
972 // information into memory
973 #ifdef GOEXPERIMENT_runtimesecret
974 CMPL g_secret(DI), $0
975 JEQ nosecret
976 CALL ·secretEraseRegisters(SB)
977
978 nosecret:
979 #endif
980 MOVQ fn+0(FP), AX
981 MOVQ arg+8(FP), BX
982 MOVQ SP, DX
983
984 // Switch to system stack.
985 // The original frame pointer is stored in BP,
986 // which is useful for stack unwinding.
987 CALL gosave_systemstack_switch<>(SB)
988 MOVQ SI, g(CX)
989 MOVQ (g_sched+gobuf_sp)(SI), SP
990
991 // Now on a scheduling stack (a pthread-created stack).
992 SUBQ $16, SP
993 ANDQ $~15, SP // alignment for gcc ABI
994 MOVQ DI, 8(SP) // save g
995 MOVQ (g_stack+stack_hi)(DI), DI
996 SUBQ DX, DI
997 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
998 CALL runtime·asmcgocall_landingpad(SB)
999
1000 // Restore registers, g, stack pointer.
1001 get_tls(CX)
1002 MOVQ 8(SP), DI
1003 MOVQ (g_stack+stack_hi)(DI), SI
1004 SUBQ 0(SP), SI
1005 MOVQ DI, g(CX)
1006 MOVQ SI, SP
1007
1008 MOVL AX, ret+16(FP)
1009 RET
1010
1011 nosave:
1012 // Running on a system stack, perhaps even without a g.
1013 // Having no g can happen during thread creation or thread teardown
1014 // (see needm/dropm on Solaris, for example).
1015 // This code is like the above sequence but without saving/restoring g
1016 // and without worrying about the stack moving out from under us
1017 // (because we're on a system stack, not a goroutine stack).
1018 // The above code could be used directly if already on a system stack,
1019 // but then the only path through this code would be a rare case on Solaris.
1020 // Using this code for all "already on system stack" calls exercises it more,
1021 // which should help keep it correct.
1022 MOVQ fn+0(FP), AX
1023 MOVQ arg+8(FP), BX
1024 MOVQ SP, DX
1025
1026 SUBQ $16, SP
1027 ANDQ $~15, SP
1028 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
1029 MOVQ DX, 0(SP) // save original stack pointer
1030 CALL runtime·asmcgocall_landingpad(SB)
1031 MOVQ 0(SP), SI // restore original stack pointer
1032 MOVQ SI, SP
1033 MOVL AX, ret+16(FP)
1034 RET
1035
1036 #ifdef GOOS_windows
1037 // Dummy TLS that's used on Windows so that we don't crash trying
1038 // to restore the G register in needm. needm and its callees are
1039 // very careful never to actually use the G, the TLS just can't be
1040 // unset since we're in Go code.
1041 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
1042 #endif
1043
1044 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1045 // See cgocall.go for more details.
1046 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1047 NO_LOCAL_POINTERS
1048
1049 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1050 // It is used to dropm while thread is exiting.
1051 MOVQ fn+0(FP), AX
1052 CMPQ AX, $0
1053 JNE loadg
1054 // Restore the g from frame.
1055 get_tls(CX)
1056 MOVQ frame+8(FP), BX
1057 MOVQ BX, g(CX)
1058 JMP dropm
1059
1060 loadg:
1061 // If g is nil, Go did not create the current thread,
1062 // or if this thread never called into Go on pthread platforms.
1063 // Call needm to obtain one m for temporary use.
1064 // In this case, we're running on the thread stack, so there's
1065 // lots of space, but the linker doesn't know. Hide the call from
1066 // the linker analysis by using an indirect call through AX.
1067 get_tls(CX)
1068 #ifdef GOOS_windows
1069 MOVL $0, BX
1070 CMPQ CX, $0
1071 JEQ 2(PC)
1072 #endif
1073 MOVQ g(CX), BX
1074 CMPQ BX, $0
1075 JEQ needm
1076 MOVQ g_m(BX), BX
1077 MOVQ BX, savedm-8(SP) // saved copy of oldm
1078 JMP havem
1079 needm:
1080 #ifdef GOOS_windows
1081 // Set up a dummy TLS value. needm is careful not to use it,
1082 // but it needs to be there to prevent autogenerated code from
1083 // crashing when it loads from it.
1084 // We don't need to clear it or anything later because needm
1085 // will set up TLS properly.
1086 MOVQ $zeroTLS<>(SB), DI
1087 CALL runtime·settls(SB)
1088 #endif
1089 // On some platforms (Windows) we cannot call needm through
1090 // an ABI wrapper because there's no TLS set up, and the ABI
1091 // wrapper will try to restore the G register (R14) from TLS.
1092 // Clear X15 because Go expects it and we're not calling
1093 // through a wrapper, but otherwise avoid setting the G
1094 // register in the wrapper and call needm directly. It
1095 // takes no arguments and doesn't return any values so
1096 // there's no need to handle that. Clear R14 so that there's
1097 // a bad value in there, in case needm tries to use it.
1098 XORPS X15, X15
1099 XORQ R14, R14
1100 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1101 CALL AX
1102 MOVQ $0, savedm-8(SP)
1103 get_tls(CX)
1104 MOVQ g(CX), BX
1105 MOVQ g_m(BX), BX
1106
1107 // Set m->sched.sp = SP, so that if a panic happens
1108 // during the function we are about to execute, it will
1109 // have a valid SP to run on the g0 stack.
1110 // The next few lines (after the havem label)
1111 // will save this SP onto the stack and then write
1112 // the same SP back to m->sched.sp. That seems redundant,
1113 // but if an unrecovered panic happens, unwindm will
1114 // restore the g->sched.sp from the stack location
1115 // and then systemstack will try to use it. If we don't set it here,
1116 // that restored SP will be uninitialized (typically 0) and
1117 // will not be usable.
1118 MOVQ m_g0(BX), SI
1119 MOVQ SP, (g_sched+gobuf_sp)(SI)
1120
1121 havem:
1122 // Now there's a valid m, and we're running on its m->g0.
1123 // Save current m->g0->sched.sp on stack and then set it to SP.
1124 // Save current sp in m->g0->sched.sp in preparation for
1125 // switch back to m->curg stack.
1126 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1127 MOVQ m_g0(BX), SI
1128 MOVQ (g_sched+gobuf_sp)(SI), AX
1129 MOVQ AX, 0(SP)
1130 MOVQ SP, (g_sched+gobuf_sp)(SI)
1131
1132 // Switch to m->curg stack and call runtime.cgocallbackg.
1133 // Because we are taking over the execution of m->curg
1134 // but *not* resuming what had been running, we need to
1135 // save that information (m->curg->sched) so we can restore it.
1136 // We can restore m->curg->sched.sp easily, because calling
1137 // runtime.cgocallbackg leaves SP unchanged upon return.
1138 // To save m->curg->sched.pc, we push it onto the curg stack and
1139 // open a frame the same size as cgocallback's g0 frame.
1140 // Once we switch to the curg stack, the pushed PC will appear
1141 // to be the return PC of cgocallback, so that the traceback
1142 // will seamlessly trace back into the earlier calls.
1143 MOVQ m_curg(BX), SI
1144 MOVQ SI, g(CX)
1145 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1146 MOVQ (g_sched+gobuf_pc)(SI), BX
1147 MOVQ BX, -8(DI) // "push" return PC on the g stack
1148 // Gather our arguments into registers.
1149 MOVQ fn+0(FP), BX
1150 MOVQ frame+8(FP), CX
1151 MOVQ ctxt+16(FP), DX
1152 // Compute the size of the frame, including return PC and, if
1153 // GOEXPERIMENT=framepointer, the saved base pointer
1154 LEAQ fn+0(FP), AX
1155 SUBQ SP, AX // AX is our actual frame size
1156 SUBQ AX, DI // Allocate the same frame size on the g stack
1157 MOVQ DI, SP
1158
1159 MOVQ BX, 0(SP)
1160 MOVQ CX, 8(SP)
1161 MOVQ DX, 16(SP)
1162 MOVQ $runtime·cgocallbackg(SB), AX
1163 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1164
1165 // Compute the size of the frame again. FP and SP have
1166 // completely different values here than they did above,
1167 // but only their difference matters.
1168 LEAQ fn+0(FP), AX
1169 SUBQ SP, AX
1170
1171 // Restore g->sched (== m->curg->sched) from saved values.
1172 get_tls(CX)
1173 MOVQ g(CX), SI
1174 MOVQ SP, DI
1175 ADDQ AX, DI
1176 MOVQ -8(DI), BX
1177 MOVQ BX, (g_sched+gobuf_pc)(SI)
1178 MOVQ DI, (g_sched+gobuf_sp)(SI)
1179
1180 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1181 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1182 // so we do not have to restore it.)
1183 MOVQ g(CX), BX
1184 MOVQ g_m(BX), BX
1185 MOVQ m_g0(BX), SI
1186 MOVQ SI, g(CX)
1187 MOVQ (g_sched+gobuf_sp)(SI), SP
1188 MOVQ 0(SP), AX
1189 MOVQ AX, (g_sched+gobuf_sp)(SI)
1190
1191 // If the m on entry was nil, we called needm above to borrow an m,
1192 // 1. for the duration of the call on non-pthread platforms,
1193 // 2. or the duration of the C thread alive on pthread platforms.
1194 // If the m on entry wasn't nil,
1195 // 1. the thread might be a Go thread,
1196 // 2. or it wasn't the first call from a C thread on pthread platforms,
1197 // since then we skip dropm to reuse the m in the first call.
1198 MOVQ savedm-8(SP), BX
1199 CMPQ BX, $0
1200 JNE done
1201
1202 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1203 MOVQ _cgo_pthread_key_created(SB), AX
1204 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1205 CMPQ AX, $0
1206 JEQ dropm
1207 CMPQ (AX), $0
1208 JNE done
1209
1210 dropm:
1211 MOVQ $runtime·dropm(SB), AX
1212 CALL AX
1213 #ifdef GOOS_windows
1214 // We need to clear the TLS pointer in case the next
1215 // thread that comes into Go tries to reuse that space
1216 // but uses the same M.
1217 XORQ DI, DI
1218 CALL runtime·settls(SB)
1219 #endif
1220 done:
1221
1222 // Done!
1223 RET
1224
1225 // func setg(gg *g)
1226 // set g. for use by needm.
1227 TEXT runtime·setg(SB), NOSPLIT, $0-8
1228 MOVQ gg+0(FP), BX
1229 get_tls(CX)
1230 MOVQ BX, g(CX)
1231 RET
1232
1233 // void setg_gcc(G*); set g called from gcc.
1234 TEXT setg_gcc<>(SB),NOSPLIT,$0
1235 get_tls(AX)
1236 MOVQ DI, g(AX)
1237 MOVQ DI, R14 // set the g register
1238 RET
1239
1240 TEXT runtime·abort(SB),NOSPLIT,$0-0
1241 INT $3
1242 loop:
1243 JMP loop
1244
1245 // check that SP is in range [g->stack.lo, g->stack.hi)
1246 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1247 get_tls(CX)
1248 MOVQ g(CX), AX
1249 CMPQ (g_stack+stack_hi)(AX), SP
1250 JHI 2(PC)
1251 CALL runtime·abort(SB)
1252 CMPQ SP, (g_stack+stack_lo)(AX)
1253 JHI 2(PC)
1254 CALL runtime·abort(SB)
1255 RET
1256
1257 // func cputicks() int64
1258 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1259 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1260 JNE fences
1261 // Instruction stream serializing RDTSCP is supported.
1262 // RDTSCP is supported by Intel Nehalem (2008) and
1263 // AMD K8 Rev. F (2006) and newer.
1264 RDTSCP
1265 done:
1266 SHLQ $32, DX
1267 ADDQ DX, AX
1268 MOVQ AX, ret+0(FP)
1269 RET
1270 fences:
1271 // MFENCE is instruction stream serializing and flushes the
1272 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1273 // are dependent on MSR C001_1029 and CPU generation.
1274 // LFENCE on Intel does wait for all previous instructions to have executed.
1275 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1276 // previous instructions executed and all previous loads and stores to globally visible.
1277 // Using MFENCE;LFENCE here aligns the serializing properties without
1278 // runtime detection of CPU manufacturer.
1279 MFENCE
1280 LFENCE
1281 RDTSC
1282 JMP done
1283
1284 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1285 // hash function using AES hardware instructions
1286 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1287 // AX = ptr to data
1288 // BX = seed
1289 // CX = size
1290 CMPB runtime·useAeshash(SB), $0
1291 JEQ noaes
1292 JMP runtime·aeshashbody<>(SB)
1293 noaes:
1294 JMP runtime·memhashFallback<ABIInternal>(SB)
1295
1296 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1297 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1298 // AX = ptr to string struct
1299 // BX = seed
1300 CMPB runtime·useAeshash(SB), $0
1301 JEQ noaes
1302 MOVQ 8(AX), CX // length of string
1303 MOVQ (AX), AX // string data
1304 JMP runtime·aeshashbody<>(SB)
1305 noaes:
1306 JMP runtime·strhashFallback<ABIInternal>(SB)
1307
1308 // AX: data
1309 // BX: hash seed
1310 // CX: length
1311 // At return: AX = return value
1312 TEXT runtime·aeshashbody<>(SB),NOSPLIT,$0-0
1313 // Fill an SSE register with our seeds.
1314 MOVQ BX, X0 // 64 bits of per-table hash seed
1315 PINSRW $4, CX, X0 // 16 bits of length
1316 PSHUFHW $0, X0, X0 // repeat length 4 times total
1317 MOVO X0, X1 // save unscrambled seed
1318 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1319 AESENC X0, X0 // scramble seed
1320
1321 CMPQ CX, $16
1322 JB aes0to15
1323 JE aes16
1324 CMPQ CX, $32
1325 JBE aes17to32
1326 CMPQ CX, $64
1327 JBE aes33to64
1328 CMPQ CX, $128
1329 JBE aes65to128
1330 JMP aes129plus
1331
1332 aes0to15:
1333 TESTQ CX, CX
1334 JE aes0
1335
1336 ADDQ $16, AX
1337 TESTW $0xff0, AX
1338 JE endofpage
1339
1340 // 16 bytes loaded at this address won't cross
1341 // a page boundary, so we can load it directly.
1342 MOVOU -16(AX), X1
1343 ADDQ CX, CX
1344 MOVQ $masks<>(SB), AX
1345 PAND (AX)(CX*8), X1
1346 final1:
1347 PXOR X0, X1 // xor data with seed
1348 AESENC X1, X1 // scramble combo 3 times
1349 AESENC X1, X1
1350 AESENC X1, X1
1351 MOVQ X1, AX // return X1
1352 RET
1353
1354 endofpage:
1355 // address ends in 1111xxxx. Might be up against
1356 // a page boundary, so load ending at last byte.
1357 // Then shift bytes down using pshufb.
1358 MOVOU -32(AX)(CX*1), X1
1359 ADDQ CX, CX
1360 MOVQ $shifts<>(SB), AX
1361 PSHUFB (AX)(CX*8), X1
1362 JMP final1
1363
1364 aes0:
1365 // Return scrambled input seed
1366 AESENC X0, X0
1367 MOVQ X0, AX // return X0
1368 RET
1369
1370 aes16:
1371 MOVOU (AX), X1
1372 JMP final1
1373
1374 aes17to32:
1375 // make second starting seed
1376 PXOR runtime·aeskeysched+16(SB), X1
1377 AESENC X1, X1
1378
1379 // load data to be hashed
1380 MOVOU (AX), X2
1381 MOVOU -16(AX)(CX*1), X3
1382
1383 // xor with seed
1384 PXOR X0, X2
1385 PXOR X1, X3
1386
1387 // scramble 3 times
1388 AESENC X2, X2
1389 AESENC X3, X3
1390 AESENC X2, X2
1391 AESENC X3, X3
1392 AESENC X2, X2
1393 AESENC X3, X3
1394
1395 // combine results
1396 PXOR X3, X2
1397 MOVQ X2, AX // return X2
1398 RET
1399
1400 aes33to64:
1401 // make 3 more starting seeds
1402 MOVO X1, X2
1403 MOVO X1, X3
1404 PXOR runtime·aeskeysched+16(SB), X1
1405 PXOR runtime·aeskeysched+32(SB), X2
1406 PXOR runtime·aeskeysched+48(SB), X3
1407 AESENC X1, X1
1408 AESENC X2, X2
1409 AESENC X3, X3
1410
1411 MOVOU (AX), X4
1412 MOVOU 16(AX), X5
1413 MOVOU -32(AX)(CX*1), X6
1414 MOVOU -16(AX)(CX*1), X7
1415
1416 PXOR X0, X4
1417 PXOR X1, X5
1418 PXOR X2, X6
1419 PXOR X3, X7
1420
1421 AESENC X4, X4
1422 AESENC X5, X5
1423 AESENC X6, X6
1424 AESENC X7, X7
1425
1426 AESENC X4, X4
1427 AESENC X5, X5
1428 AESENC X6, X6
1429 AESENC X7, X7
1430
1431 AESENC X4, X4
1432 AESENC X5, X5
1433 AESENC X6, X6
1434 AESENC X7, X7
1435
1436 PXOR X6, X4
1437 PXOR X7, X5
1438 PXOR X5, X4
1439 MOVQ X4, AX // return X4
1440 RET
1441
1442 aes65to128:
1443 // make 7 more starting seeds
1444 MOVO X1, X2
1445 MOVO X1, X3
1446 MOVO X1, X4
1447 MOVO X1, X5
1448 MOVO X1, X6
1449 MOVO X1, X7
1450 PXOR runtime·aeskeysched+16(SB), X1
1451 PXOR runtime·aeskeysched+32(SB), X2
1452 PXOR runtime·aeskeysched+48(SB), X3
1453 PXOR runtime·aeskeysched+64(SB), X4
1454 PXOR runtime·aeskeysched+80(SB), X5
1455 PXOR runtime·aeskeysched+96(SB), X6
1456 PXOR runtime·aeskeysched+112(SB), X7
1457 AESENC X1, X1
1458 AESENC X2, X2
1459 AESENC X3, X3
1460 AESENC X4, X4
1461 AESENC X5, X5
1462 AESENC X6, X6
1463 AESENC X7, X7
1464
1465 // load data
1466 MOVOU (AX), X8
1467 MOVOU 16(AX), X9
1468 MOVOU 32(AX), X10
1469 MOVOU 48(AX), X11
1470 MOVOU -64(AX)(CX*1), X12
1471 MOVOU -48(AX)(CX*1), X13
1472 MOVOU -32(AX)(CX*1), X14
1473 MOVOU -16(AX)(CX*1), X15
1474
1475 // xor with seed
1476 PXOR X0, X8
1477 PXOR X1, X9
1478 PXOR X2, X10
1479 PXOR X3, X11
1480 PXOR X4, X12
1481 PXOR X5, X13
1482 PXOR X6, X14
1483 PXOR X7, X15
1484
1485 // scramble 3 times
1486 AESENC X8, X8
1487 AESENC X9, X9
1488 AESENC X10, X10
1489 AESENC X11, X11
1490 AESENC X12, X12
1491 AESENC X13, X13
1492 AESENC X14, X14
1493 AESENC X15, X15
1494
1495 AESENC X8, X8
1496 AESENC X9, X9
1497 AESENC X10, X10
1498 AESENC X11, X11
1499 AESENC X12, X12
1500 AESENC X13, X13
1501 AESENC X14, X14
1502 AESENC X15, X15
1503
1504 AESENC X8, X8
1505 AESENC X9, X9
1506 AESENC X10, X10
1507 AESENC X11, X11
1508 AESENC X12, X12
1509 AESENC X13, X13
1510 AESENC X14, X14
1511 AESENC X15, X15
1512
1513 // combine results
1514 PXOR X12, X8
1515 PXOR X13, X9
1516 PXOR X14, X10
1517 PXOR X15, X11
1518 PXOR X10, X8
1519 PXOR X11, X9
1520 PXOR X9, X8
1521 // X15 must be zero on return
1522 PXOR X15, X15
1523 MOVQ X8, AX // return X8
1524 RET
1525
1526 aes129plus:
1527 // make 7 more starting seeds
1528 MOVO X1, X2
1529 MOVO X1, X3
1530 MOVO X1, X4
1531 MOVO X1, X5
1532 MOVO X1, X6
1533 MOVO X1, X7
1534 PXOR runtime·aeskeysched+16(SB), X1
1535 PXOR runtime·aeskeysched+32(SB), X2
1536 PXOR runtime·aeskeysched+48(SB), X3
1537 PXOR runtime·aeskeysched+64(SB), X4
1538 PXOR runtime·aeskeysched+80(SB), X5
1539 PXOR runtime·aeskeysched+96(SB), X6
1540 PXOR runtime·aeskeysched+112(SB), X7
1541 AESENC X1, X1
1542 AESENC X2, X2
1543 AESENC X3, X3
1544 AESENC X4, X4
1545 AESENC X5, X5
1546 AESENC X6, X6
1547 AESENC X7, X7
1548
1549 // start with last (possibly overlapping) block
1550 MOVOU -128(AX)(CX*1), X8
1551 MOVOU -112(AX)(CX*1), X9
1552 MOVOU -96(AX)(CX*1), X10
1553 MOVOU -80(AX)(CX*1), X11
1554 MOVOU -64(AX)(CX*1), X12
1555 MOVOU -48(AX)(CX*1), X13
1556 MOVOU -32(AX)(CX*1), X14
1557 MOVOU -16(AX)(CX*1), X15
1558
1559 // xor in seed
1560 PXOR X0, X8
1561 PXOR X1, X9
1562 PXOR X2, X10
1563 PXOR X3, X11
1564 PXOR X4, X12
1565 PXOR X5, X13
1566 PXOR X6, X14
1567 PXOR X7, X15
1568
1569 // compute number of remaining 128-byte blocks
1570 DECQ CX
1571 SHRQ $7, CX
1572
1573 PCALIGN $16
1574 aesloop:
1575 // scramble state
1576 AESENC X8, X8
1577 AESENC X9, X9
1578 AESENC X10, X10
1579 AESENC X11, X11
1580 AESENC X12, X12
1581 AESENC X13, X13
1582 AESENC X14, X14
1583 AESENC X15, X15
1584
1585 // scramble state, xor in a block
1586 MOVOU (AX), X0
1587 MOVOU 16(AX), X1
1588 MOVOU 32(AX), X2
1589 MOVOU 48(AX), X3
1590 AESENC X0, X8
1591 AESENC X1, X9
1592 AESENC X2, X10
1593 AESENC X3, X11
1594 MOVOU 64(AX), X4
1595 MOVOU 80(AX), X5
1596 MOVOU 96(AX), X6
1597 MOVOU 112(AX), X7
1598 AESENC X4, X12
1599 AESENC X5, X13
1600 AESENC X6, X14
1601 AESENC X7, X15
1602
1603 ADDQ $128, AX
1604 DECQ CX
1605 JNE aesloop
1606
1607 // 3 more scrambles to finish
1608 AESENC X8, X8
1609 AESENC X9, X9
1610 AESENC X10, X10
1611 AESENC X11, X11
1612 AESENC X12, X12
1613 AESENC X13, X13
1614 AESENC X14, X14
1615 AESENC X15, X15
1616 AESENC X8, X8
1617 AESENC X9, X9
1618 AESENC X10, X10
1619 AESENC X11, X11
1620 AESENC X12, X12
1621 AESENC X13, X13
1622 AESENC X14, X14
1623 AESENC X15, X15
1624 AESENC X8, X8
1625 AESENC X9, X9
1626 AESENC X10, X10
1627 AESENC X11, X11
1628 AESENC X12, X12
1629 AESENC X13, X13
1630 AESENC X14, X14
1631 AESENC X15, X15
1632
1633 PXOR X12, X8
1634 PXOR X13, X9
1635 PXOR X14, X10
1636 PXOR X15, X11
1637 PXOR X10, X8
1638 PXOR X11, X9
1639 PXOR X9, X8
1640 // X15 must be zero on return
1641 PXOR X15, X15
1642 MOVQ X8, AX // return X8
1643 RET
1644
1645 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1646 // ABIInternal for performance.
1647 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1648 // AX = ptr to data
1649 // BX = seed
1650 CMPB runtime·useAeshash(SB), $0
1651 JEQ noaes
1652 MOVQ BX, X0 // X0 = seed
1653 PINSRD $2, (AX), X0 // data
1654 AESENC runtime·aeskeysched+0(SB), X0
1655 AESENC runtime·aeskeysched+16(SB), X0
1656 AESENC runtime·aeskeysched+32(SB), X0
1657 MOVQ X0, AX // return X0
1658 RET
1659 noaes:
1660 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1661
1662 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1663 // ABIInternal for performance.
1664 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1665 // AX = ptr to data
1666 // BX = seed
1667 CMPB runtime·useAeshash(SB), $0
1668 JEQ noaes
1669 MOVQ BX, X0 // X0 = seed
1670 PINSRQ $1, (AX), X0 // data
1671 AESENC runtime·aeskeysched+0(SB), X0
1672 AESENC runtime·aeskeysched+16(SB), X0
1673 AESENC runtime·aeskeysched+32(SB), X0
1674 MOVQ X0, AX // return X0
1675 RET
1676 noaes:
1677 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1678
1679 // simple mask to get rid of data in the high part of the register.
1680 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1681 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1682 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1683 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1684 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1685 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1686 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1687 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1688 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1689 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1690 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1691 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1692 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1693 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1694 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1695 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1696 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1697 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1698 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1699 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1700 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1701 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1702 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1703 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1704 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1705 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1706 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1707 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1708 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1709 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1710 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1711 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1712 GLOBL masks<>(SB),RODATA,$256
1713
1714 // func checkASM() bool
1715 TEXT ·checkASM(SB),NOSPLIT,$0-1
1716 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1717 MOVQ $masks<>(SB), AX
1718 MOVQ $shifts<>(SB), BX
1719 ORQ BX, AX
1720 TESTQ $15, AX
1721 SETEQ ret+0(FP)
1722 RET
1723
1724 // these are arguments to pshufb. They move data down from
1725 // the high bytes of the register to the low bytes of the register.
1726 // index is how many bytes to move.
1727 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1728 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1729 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1730 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1731 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1732 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1733 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1734 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1735 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1736 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1737 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1738 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1739 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1740 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1741 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1742 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1743 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1744 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1745 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1746 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1747 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1748 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1749 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1750 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1751 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1752 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1753 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1754 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1755 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1756 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1757 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1758 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1759 GLOBL shifts<>(SB),RODATA,$256
1760
1761 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1762 // Must obey the gcc calling convention.
1763 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1764 get_tls(CX)
1765 MOVQ g(CX), AX
1766 MOVQ g_m(AX), AX
1767 MOVQ m_curg(AX), AX
1768 MOVQ (g_stack+stack_hi)(AX), AX
1769 RET
1770
1771 // The top-most function running on a goroutine
1772 // returns to goexit+PCQuantum.
1773 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1774 BYTE $0x90 // NOP
1775 CALL runtime·goexit1(SB) // does not return
1776 // traceback from goexit1 must hit code range of goexit
1777 BYTE $0x90 // NOP
1778
1779 // This is called from .init_array and follows the platform, not Go, ABI.
1780 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1781 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1782 MOVQ runtime·lastmoduledatap(SB), AX
1783 MOVQ DI, moduledata_next(AX)
1784 MOVQ DI, runtime·lastmoduledatap(SB)
1785 POPQ R15
1786 RET
1787
1788 // Initialize special registers then jump to sigpanic.
1789 // This function is injected from the signal handler for panicking
1790 // signals. It is quite painful to set X15 in the signal context,
1791 // so we do it here.
1792 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1793 get_tls(R14)
1794 MOVQ g(R14), R14
1795 XORPS X15, X15
1796 JMP ·sigpanic<ABIInternal>(SB)
1797
1798 // gcWriteBarrier informs the GC about heap pointer writes.
1799 //
1800 // gcWriteBarrier returns space in a write barrier buffer which
1801 // should be filled in by the caller.
1802 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1803 // number of bytes of buffer needed in R11, and returns a pointer
1804 // to the buffer space in R11.
1805 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1806 // but may clobber others (e.g., SSE registers).
1807 // Typical use would be, when doing *(CX+88) = AX
1808 // CMPL $0, runtime.writeBarrier(SB)
1809 // JEQ dowrite
1810 // CALL runtime.gcBatchBarrier2(SB)
1811 // MOVQ AX, (R11)
1812 // MOVQ 88(CX), DX
1813 // MOVQ DX, 8(R11)
1814 // dowrite:
1815 // MOVQ AX, 88(CX)
1816 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1817 // Save the registers clobbered by the fast path. This is slightly
1818 // faster than having the caller spill these.
1819 MOVQ R12, 96(SP)
1820 MOVQ R13, 104(SP)
1821 retry:
1822 // TODO: Consider passing g.m.p in as an argument so they can be shared
1823 // across a sequence of write barriers.
1824 MOVQ g_m(R14), R13
1825 MOVQ m_p(R13), R13
1826 // Get current buffer write position.
1827 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1828 ADDQ R11, R12 // new next position
1829 // Is the buffer full?
1830 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1831 JA flush
1832 // Commit to the larger buffer.
1833 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1834 // Make return value (the original next position)
1835 SUBQ R11, R12
1836 MOVQ R12, R11
1837 // Restore registers.
1838 MOVQ 96(SP), R12
1839 MOVQ 104(SP), R13
1840 RET
1841
1842 flush:
1843 // Save all general purpose registers since these could be
1844 // clobbered by wbBufFlush and were not saved by the caller.
1845 // It is possible for wbBufFlush to clobber other registers
1846 // (e.g., SSE registers), but the compiler takes care of saving
1847 // those in the caller if necessary. This strikes a balance
1848 // with registers that are likely to be used.
1849 //
1850 // We don't have type information for these, but all code under
1851 // here is NOSPLIT, so nothing will observe these.
1852 //
1853 // TODO: We could strike a different balance; e.g., saving X0
1854 // and not saving GP registers that are less likely to be used.
1855 MOVQ DI, 0(SP)
1856 MOVQ AX, 8(SP)
1857 MOVQ BX, 16(SP)
1858 MOVQ CX, 24(SP)
1859 MOVQ DX, 32(SP)
1860 // DI already saved
1861 MOVQ SI, 40(SP)
1862 MOVQ BP, 48(SP)
1863 MOVQ R8, 56(SP)
1864 MOVQ R9, 64(SP)
1865 MOVQ R10, 72(SP)
1866 MOVQ R11, 80(SP)
1867 // R12 already saved
1868 // R13 already saved
1869 // R14 is g
1870 MOVQ R15, 88(SP)
1871
1872 CALL runtime·wbBufFlush(SB)
1873
1874 MOVQ 0(SP), DI
1875 MOVQ 8(SP), AX
1876 MOVQ 16(SP), BX
1877 MOVQ 24(SP), CX
1878 MOVQ 32(SP), DX
1879 MOVQ 40(SP), SI
1880 MOVQ 48(SP), BP
1881 MOVQ 56(SP), R8
1882 MOVQ 64(SP), R9
1883 MOVQ 72(SP), R10
1884 MOVQ 80(SP), R11
1885 MOVQ 88(SP), R15
1886 JMP retry
1887
1888 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1889 MOVL $8, R11
1890 JMP gcWriteBarrier<>(SB)
1891 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1892 MOVL $16, R11
1893 JMP gcWriteBarrier<>(SB)
1894 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1895 MOVL $24, R11
1896 JMP gcWriteBarrier<>(SB)
1897 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1898 MOVL $32, R11
1899 JMP gcWriteBarrier<>(SB)
1900 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1901 MOVL $40, R11
1902 JMP gcWriteBarrier<>(SB)
1903 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1904 MOVL $48, R11
1905 JMP gcWriteBarrier<>(SB)
1906 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1907 MOVL $56, R11
1908 JMP gcWriteBarrier<>(SB)
1909 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1910 MOVL $64, R11
1911 JMP gcWriteBarrier<>(SB)
1912
1913 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1914 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1915
1916 // debugCallV2 is the entry point for debugger-injected function
1917 // calls on running goroutines. It informs the runtime that a
1918 // debug call has been injected and creates a call frame for the
1919 // debugger to fill in.
1920 //
1921 // To inject a function call, a debugger should:
1922 // 1. Check that the goroutine is in state _Grunning and that
1923 // there are at least 256 bytes free on the stack.
1924 // 2. Push the current PC on the stack (updating SP).
1925 // 3. Write the desired argument frame size at SP-16 (using the SP
1926 // after step 2).
1927 // 4. Save all machine registers (including flags and XMM registers)
1928 // so they can be restored later by the debugger.
1929 // 5. Set the PC to debugCallV2 and resume execution.
1930 //
1931 // If the goroutine is in state _Grunnable, then it's not generally
1932 // safe to inject a call because it may return out via other runtime
1933 // operations. Instead, the debugger should unwind the stack to find
1934 // the return to non-runtime code, add a temporary breakpoint there,
1935 // and inject the call once that breakpoint is hit.
1936 //
1937 // If the goroutine is in any other state, it's not safe to inject a call.
1938 //
1939 // This function communicates back to the debugger by setting R12 and
1940 // invoking INT3 to raise a breakpoint signal. See the comments in the
1941 // implementation for the protocol the debugger is expected to
1942 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1943 //
1944 // The debugger must ensure that any pointers passed to the function
1945 // obey escape analysis requirements. Specifically, it must not pass
1946 // a stack pointer to an escaping argument. debugCallV2 cannot check
1947 // this invariant.
1948 //
1949 // This is ABIInternal because Go code injects its PC directly into new
1950 // goroutine stacks.
1951 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1952 // Save all registers that may contain pointers so they can be
1953 // conservatively scanned.
1954 //
1955 // We can't do anything that might clobber any of these
1956 // registers before this.
1957 MOVQ R15, r15-(14*8+8)(SP)
1958 MOVQ R14, r14-(13*8+8)(SP)
1959 MOVQ R13, r13-(12*8+8)(SP)
1960 MOVQ R12, r12-(11*8+8)(SP)
1961 MOVQ R11, r11-(10*8+8)(SP)
1962 MOVQ R10, r10-(9*8+8)(SP)
1963 MOVQ R9, r9-(8*8+8)(SP)
1964 MOVQ R8, r8-(7*8+8)(SP)
1965 MOVQ DI, di-(6*8+8)(SP)
1966 MOVQ SI, si-(5*8+8)(SP)
1967 MOVQ BP, bp-(4*8+8)(SP)
1968 MOVQ BX, bx-(3*8+8)(SP)
1969 MOVQ DX, dx-(2*8+8)(SP)
1970 // Save the frame size before we clobber it. Either of the last
1971 // saves could clobber this depending on whether there's a saved BP.
1972 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1973 MOVQ CX, cx-(1*8+8)(SP)
1974 MOVQ AX, ax-(0*8+8)(SP)
1975
1976 // Save the argument frame size.
1977 MOVQ DX, frameSize-128(SP)
1978
1979 // Perform a safe-point check.
1980 MOVQ retpc-8(FP), AX // Caller's PC
1981 MOVQ AX, 0(SP)
1982 CALL runtime·debugCallCheck(SB)
1983 MOVQ 8(SP), AX
1984 TESTQ AX, AX
1985 JZ good
1986 // The safety check failed. Put the reason string at the top
1987 // of the stack.
1988 MOVQ AX, 0(SP)
1989 MOVQ 16(SP), AX
1990 MOVQ AX, 8(SP)
1991 // Set R12 to 8 and invoke INT3. The debugger should get the
1992 // reason a call can't be injected from the top of the stack
1993 // and resume execution.
1994 MOVQ $8, R12
1995 BYTE $0xcc
1996 JMP restore
1997
1998 good:
1999 // Registers are saved and it's safe to make a call.
2000 // Open up a call frame, moving the stack if necessary.
2001 //
2002 // Once the frame is allocated, this will set R12 to 0 and
2003 // invoke INT3. The debugger should write the argument
2004 // frame for the call at SP, set up argument registers, push
2005 // the trapping PC on the stack, set the PC to the function to
2006 // call, set RDX to point to the closure (if a closure call),
2007 // and resume execution.
2008 //
2009 // If the function returns, this will set R12 to 1 and invoke
2010 // INT3. The debugger can then inspect any return value saved
2011 // on the stack at SP and in registers and resume execution again.
2012 //
2013 // If the function panics, this will set R12 to 2 and invoke INT3.
2014 // The interface{} value of the panic will be at SP. The debugger
2015 // can inspect the panic value and resume execution again.
2016 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
2017 CMPQ AX, $MAXSIZE; \
2018 JA 5(PC); \
2019 MOVQ $NAME(SB), AX; \
2020 MOVQ AX, 0(SP); \
2021 CALL runtime·debugCallWrap(SB); \
2022 JMP restore
2023
2024 MOVQ frameSize-128(SP), AX
2025 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
2026 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
2027 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
2028 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
2029 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
2030 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
2031 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
2032 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
2033 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
2034 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
2035 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
2036 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
2037 // The frame size is too large. Report the error.
2038 MOVQ $debugCallFrameTooLarge<>(SB), AX
2039 MOVQ AX, 0(SP)
2040 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2041 MOVQ $8, R12
2042 BYTE $0xcc
2043 JMP restore
2044
2045 restore:
2046 // Calls and failures resume here.
2047 //
2048 // Set R12 to 16 and invoke INT3. The debugger should restore
2049 // all registers except RIP and RSP and resume execution.
2050 MOVQ $16, R12
2051 BYTE $0xcc
2052 // We must not modify flags after this point.
2053
2054 // Restore pointer-containing registers, which may have been
2055 // modified from the debugger's copy by stack copying.
2056 MOVQ ax-(0*8+8)(SP), AX
2057 MOVQ cx-(1*8+8)(SP), CX
2058 MOVQ dx-(2*8+8)(SP), DX
2059 MOVQ bx-(3*8+8)(SP), BX
2060 MOVQ bp-(4*8+8)(SP), BP
2061 MOVQ si-(5*8+8)(SP), SI
2062 MOVQ di-(6*8+8)(SP), DI
2063 MOVQ r8-(7*8+8)(SP), R8
2064 MOVQ r9-(8*8+8)(SP), R9
2065 MOVQ r10-(9*8+8)(SP), R10
2066 MOVQ r11-(10*8+8)(SP), R11
2067 MOVQ r12-(11*8+8)(SP), R12
2068 MOVQ r13-(12*8+8)(SP), R13
2069 MOVQ r14-(13*8+8)(SP), R14
2070 MOVQ r15-(14*8+8)(SP), R15
2071
2072 RET
2073
2074 // runtime.debugCallCheck assumes that functions defined with the
2075 // DEBUG_CALL_FN macro are safe points to inject calls.
2076 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2077 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2078 NO_LOCAL_POINTERS; \
2079 MOVQ $0, R12; \
2080 BYTE $0xcc; \
2081 MOVQ $1, R12; \
2082 BYTE $0xcc; \
2083 RET
2084 DEBUG_CALL_FN(debugCall32<>, 32)
2085 DEBUG_CALL_FN(debugCall64<>, 64)
2086 DEBUG_CALL_FN(debugCall128<>, 128)
2087 DEBUG_CALL_FN(debugCall256<>, 256)
2088 DEBUG_CALL_FN(debugCall512<>, 512)
2089 DEBUG_CALL_FN(debugCall1024<>, 1024)
2090 DEBUG_CALL_FN(debugCall2048<>, 2048)
2091 DEBUG_CALL_FN(debugCall4096<>, 4096)
2092 DEBUG_CALL_FN(debugCall8192<>, 8192)
2093 DEBUG_CALL_FN(debugCall16384<>, 16384)
2094 DEBUG_CALL_FN(debugCall32768<>, 32768)
2095 DEBUG_CALL_FN(debugCall65536<>, 65536)
2096
2097 // func debugCallPanicked(val interface{})
2098 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2099 // Copy the panic value to the top of stack.
2100 MOVQ val_type+0(FP), AX
2101 MOVQ AX, 0(SP)
2102 MOVQ val_data+8(FP), AX
2103 MOVQ AX, 8(SP)
2104 MOVQ $2, R12
2105 BYTE $0xcc
2106 RET
2107
2108 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2109 NO_LOCAL_POINTERS
2110 // Save all 14 int registers that could have an index in them.
2111 // They may be pointers, but if they are they are dead.
2112 MOVQ AX, 16(SP)
2113 MOVQ CX, 24(SP)
2114 MOVQ DX, 32(SP)
2115 MOVQ BX, 40(SP)
2116 // skip SP @ 48(SP)
2117 MOVQ BP, 56(SP)
2118 MOVQ SI, 64(SP)
2119 MOVQ DI, 72(SP)
2120 MOVQ R8, 80(SP)
2121 MOVQ R9, 88(SP)
2122 MOVQ R10, 96(SP)
2123 MOVQ R11, 104(SP)
2124 MOVQ R12, 112(SP)
2125 MOVQ R13, 120(SP)
2126 // skip R14 @ 128(SP) (aka G)
2127 MOVQ R15, 136(SP)
2128
2129 MOVQ SP, AX // hide SP read from vet
2130 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2131 LEAQ 16(SP), BX
2132 CALL runtime·panicBounds64<ABIInternal>(SB)
2133 RET
2134
2135 #ifdef GOOS_android
2136 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2137 // Earlier androids are set up in gcc_android.c.
2138 DATA runtime·tls_g+0(SB)/8, $16
2139 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2140 #endif
2141 #ifdef GOOS_windows
2142 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2143 #endif
2144
2145 // The compiler and assembler's -spectre=ret mode rewrites
2146 // all indirect CALL AX / JMP AX instructions to be
2147 // CALL retpolineAX / JMP retpolineAX.
2148 // See https://support.google.com/faqs/answer/7625886.
2149 #define RETPOLINE(reg) \
2150 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2151 /* nospec: */ \
2152 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2153 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2154 /* setup: */ \
2155 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2156 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2157 /* RET */ BYTE $0xC3
2158
2159 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2160 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2161 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2162 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2163 /* SP is 4, can't happen / magic encodings */
2164 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2165 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2166 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2167 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2168 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2169 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2170 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2171 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2172 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2173 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2174 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2175
2176 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2177 MOVQ BP, AX
2178 RET
2179
View as plain text