Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 #ifndef GOOS_windows
41 // Avoid calling it on Windows because it is not used
42 // and it would crash the application due to the autogenerated
43 // ABI wrapper trying to access a non-existent TLS slot.
44 CALL runtime·libpreinit(SB)
45 #endif
46
47 // Create a new thread to finish Go runtime initialization.
48 MOVQ _cgo_sys_thread_create(SB), AX
49 TESTQ AX, AX
50 JZ nocgo
51
52 // We're calling back to C.
53 // Align stack per C ABI requirements.
54 MOVQ SP, BX // Callee-save in C ABI
55 ANDQ $~15, SP
56 MOVQ $_rt0_amd64_lib_go(SB), DI
57 MOVQ $0, SI
58 #ifdef GOOS_windows
59 // For Windows ABI
60 MOVQ DI, CX
61 MOVQ SI, DX
62 // Leave space for four words on the stack as required
63 // by the Windows amd64 calling convention.
64 ADJSP $32
65 #endif
66 CALL AX
67 #ifdef GOOS_windows
68 ADJSP $-32 // just to make the assembler not complain about unbalanced stack
69 #endif
70 MOVQ BX, SP
71 JMP restore
72
73 nocgo:
74 ADJSP $16
75 MOVQ $0x800000, 0(SP) // stacksize
76 MOVQ $_rt0_amd64_lib_go(SB), AX
77 MOVQ AX, 8(SP) // fn
78 CALL runtime·newosproc0(SB)
79 ADJSP $-16
80
81 restore:
82 POP_REGS_HOST_TO_ABI0()
83 RET
84
85 // _rt0_amd64_lib_go initializes the Go runtime.
86 // This is started in a separate thread by _rt0_amd64_lib.
87 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
88 MOVQ _rt0_amd64_lib_argc<>(SB), DI
89 MOVQ _rt0_amd64_lib_argv<>(SB), SI
90 JMP runtime·rt0_go(SB)
91
92 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
93 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
94 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
95 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
96
97 #ifdef GOAMD64_v2
98 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
99 #endif
100
101 #ifdef GOAMD64_v3
102 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
103 #endif
104
105 #ifdef GOAMD64_v4
106 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
107 #endif
108
109 GLOBL bad_cpu_msg<>(SB), RODATA, $84
110
111 // Define a list of AMD64 microarchitecture level features
112 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
113
114 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
115 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
116 // LAHF/SAHF
117 #define V2_EXT_FEATURES_CX (1 << 0)
118 // FMA MOVBE OSXSAVE AVX F16C
119 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
120 // ABM (FOR LZNCT)
121 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
122 // BMI1 AVX2 BMI2
123 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
124 // XMM YMM
125 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
126
127 #define V4_FEATURES_CX V3_FEATURES_CX
128
129 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
130 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
131 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
132 // OPMASK ZMM
133 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
134
135 #ifdef GOAMD64_v2
136 #define NEED_MAX_CPUID 0x80000001
137 #define NEED_FEATURES_CX V2_FEATURES_CX
138 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
139 #endif
140
141 #ifdef GOAMD64_v3
142 #define NEED_MAX_CPUID 0x80000001
143 #define NEED_FEATURES_CX V3_FEATURES_CX
144 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
145 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
146 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
147 #endif
148
149 #ifdef GOAMD64_v4
150 #define NEED_MAX_CPUID 0x80000001
151 #define NEED_FEATURES_CX V4_FEATURES_CX
152 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
153 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
154
155 // Darwin requires a different approach to check AVX512 support, see CL 285572.
156 #ifdef GOOS_darwin
157 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
158 // These values are from:
159 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
160 #define commpage64_base_address 0x00007fffffe00000
161 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
162 #define commpage64_version (commpage64_base_address+0x01E)
163 #define AVX512F 0x0000004000000000
164 #define AVX512CD 0x0000008000000000
165 #define AVX512DQ 0x0000010000000000
166 #define AVX512BW 0x0000020000000000
167 #define AVX512VL 0x0000100000000000
168 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
169 #else
170 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
171 #endif
172
173 #endif
174
175 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
176 // copy arguments forward on an even stack
177 MOVQ DI, AX // argc
178 MOVQ SI, BX // argv
179 SUBQ $(5*8), SP // 3args 2auto
180 ANDQ $~15, SP
181 MOVQ AX, 24(SP)
182 MOVQ BX, 32(SP)
183
184 // This is typically the entry point for Go programs.
185 // Call stack unwinding must not proceed past this frame.
186 // Set the frame pointer register to 0 so that frame pointer-based unwinders
187 // (which don't use debug info for performance reasons)
188 // won't attempt to unwind past this function.
189 // See go.dev/issue/63630
190 MOVQ $0, BP
191
192 // create istack out of the given (operating system) stack.
193 // _cgo_init may update stackguard.
194 MOVQ $runtime·g0(SB), DI
195 LEAQ (-64*1024)(SP), BX
196 MOVQ BX, g_stackguard0(DI)
197 MOVQ BX, g_stackguard1(DI)
198 MOVQ BX, (g_stack+stack_lo)(DI)
199 MOVQ SP, (g_stack+stack_hi)(DI)
200
201 // find out information about the processor we're on
202 MOVL $0, AX
203 CPUID
204 CMPL AX, $0
205 JE nocpuinfo
206
207 CMPL BX, $0x756E6547 // "Genu"
208 JNE notintel
209 CMPL DX, $0x49656E69 // "ineI"
210 JNE notintel
211 CMPL CX, $0x6C65746E // "ntel"
212 JNE notintel
213 MOVB $1, runtime·isIntel(SB)
214
215 notintel:
216 // Load EAX=1 cpuid flags
217 MOVL $1, AX
218 CPUID
219 MOVL AX, runtime·processorVersionInfo(SB)
220
221 nocpuinfo:
222 // if there is an _cgo_init, call it.
223 MOVQ _cgo_init(SB), AX
224 TESTQ AX, AX
225 JZ needtls
226 // arg 1: g0, already in DI
227 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
228 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
229 MOVQ $0, CX
230 #ifdef GOOS_android
231 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
232 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
233 // Compensate for tls_g (+16).
234 MOVQ -16(TLS), CX
235 #endif
236 #ifdef GOOS_windows
237 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
238 // Adjust for the Win64 calling convention.
239 MOVQ CX, R9 // arg 4
240 MOVQ DX, R8 // arg 3
241 MOVQ SI, DX // arg 2
242 MOVQ DI, CX // arg 1
243 #endif
244 CALL AX
245
246 // update stackguard after _cgo_init
247 MOVQ $runtime·g0(SB), CX
248 MOVQ (g_stack+stack_lo)(CX), AX
249 ADDQ $const_stackGuard, AX
250 MOVQ AX, g_stackguard0(CX)
251 MOVQ AX, g_stackguard1(CX)
252
253 #ifndef GOOS_windows
254 JMP ok
255 #endif
256 needtls:
257 #ifdef GOOS_plan9
258 // skip TLS setup on Plan 9
259 JMP ok
260 #endif
261 #ifdef GOOS_solaris
262 // skip TLS setup on Solaris
263 JMP ok
264 #endif
265 #ifdef GOOS_illumos
266 // skip TLS setup on illumos
267 JMP ok
268 #endif
269 #ifdef GOOS_darwin
270 // skip TLS setup on Darwin
271 JMP ok
272 #endif
273 #ifdef GOOS_openbsd
274 // skip TLS setup on OpenBSD
275 JMP ok
276 #endif
277
278 #ifdef GOOS_windows
279 CALL runtime·wintls(SB)
280 #endif
281
282 LEAQ runtime·m0+m_tls(SB), DI
283 CALL runtime·settls(SB)
284
285 // store through it, to make sure it works
286 get_tls(BX)
287 MOVQ $0x123, g(BX)
288 MOVQ runtime·m0+m_tls(SB), AX
289 CMPQ AX, $0x123
290 JEQ 2(PC)
291 CALL runtime·abort(SB)
292 ok:
293 // set the per-goroutine and per-mach "registers"
294 get_tls(BX)
295 LEAQ runtime·g0(SB), CX
296 MOVQ CX, g(BX)
297 LEAQ runtime·m0(SB), AX
298
299 // save m->g0 = g0
300 MOVQ CX, m_g0(AX)
301 // save m0 to g0->m
302 MOVQ AX, g_m(CX)
303
304 CLD // convention is D is always left cleared
305
306 // Check GOAMD64 requirements
307 // We need to do this after setting up TLS, so that
308 // we can report an error if there is a failure. See issue 49586.
309 #ifdef NEED_FEATURES_CX
310 MOVL $0, AX
311 CPUID
312 CMPL AX, $0
313 JE bad_cpu
314 MOVL $1, AX
315 CPUID
316 ANDL $NEED_FEATURES_CX, CX
317 CMPL CX, $NEED_FEATURES_CX
318 JNE bad_cpu
319 #endif
320
321 #ifdef NEED_MAX_CPUID
322 MOVL $0x80000000, AX
323 CPUID
324 CMPL AX, $NEED_MAX_CPUID
325 JL bad_cpu
326 #endif
327
328 #ifdef NEED_EXT_FEATURES_BX
329 MOVL $7, AX
330 MOVL $0, CX
331 CPUID
332 ANDL $NEED_EXT_FEATURES_BX, BX
333 CMPL BX, $NEED_EXT_FEATURES_BX
334 JNE bad_cpu
335 #endif
336
337 #ifdef NEED_EXT_FEATURES_CX
338 MOVL $0x80000001, AX
339 CPUID
340 ANDL $NEED_EXT_FEATURES_CX, CX
341 CMPL CX, $NEED_EXT_FEATURES_CX
342 JNE bad_cpu
343 #endif
344
345 #ifdef NEED_OS_SUPPORT_AX
346 XORL CX, CX
347 XGETBV
348 ANDL $NEED_OS_SUPPORT_AX, AX
349 CMPL AX, $NEED_OS_SUPPORT_AX
350 JNE bad_cpu
351 #endif
352
353 #ifdef NEED_DARWIN_SUPPORT
354 MOVQ $commpage64_version, BX
355 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
356 JL bad_cpu
357 MOVQ $commpage64_cpu_capabilities64, BX
358 MOVQ (BX), BX
359 MOVQ $NEED_DARWIN_SUPPORT, CX
360 ANDQ CX, BX
361 CMPQ BX, CX
362 JNE bad_cpu
363 #endif
364
365 CALL runtime·check(SB)
366
367 MOVL 24(SP), AX // copy argc
368 MOVL AX, 0(SP)
369 MOVQ 32(SP), AX // copy argv
370 MOVQ AX, 8(SP)
371 CALL runtime·args(SB)
372 CALL runtime·osinit(SB)
373 CALL runtime·schedinit(SB)
374
375 // create a new goroutine to start program
376 MOVQ $runtime·mainPC(SB), AX // entry
377 PUSHQ AX
378 CALL runtime·newproc(SB)
379 POPQ AX
380
381 // start this M
382 CALL runtime·mstart(SB)
383
384 CALL runtime·abort(SB) // mstart should never return
385 RET
386
387 bad_cpu: // show that the program requires a certain microarchitecture level.
388 MOVQ $2, 0(SP)
389 MOVQ $bad_cpu_msg<>(SB), AX
390 MOVQ AX, 8(SP)
391 MOVQ $84, 16(SP)
392 CALL runtime·write(SB)
393 MOVQ $1, 0(SP)
394 CALL runtime·exit(SB)
395 CALL runtime·abort(SB)
396 RET
397
398 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
399 // intended to be called by debuggers.
400 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
401 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
402 RET
403
404 // mainPC is a function value for runtime.main, to be passed to newproc.
405 // The reference to runtime.main is made via ABIInternal, since the
406 // actual function (not the ABI0 wrapper) is needed by newproc.
407 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
408 GLOBL runtime·mainPC(SB),RODATA,$8
409
410 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
411 BYTE $0xcc
412 RET
413
414 TEXT runtime·asminit(SB),NOSPLIT,$0-0
415 // No per-thread init.
416 RET
417
418 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
419 // This is the root frame of new Go-created OS threads.
420 // Call stack unwinding must not proceed past this frame.
421 // Set the frame pointer register to 0 so that frame pointer-based unwinders
422 // (which don't use debug info for performance reasons)
423 // won't attempt to unwind past this function.
424 // See go.dev/issue/63630
425 MOVD $0, BP
426 CALL runtime·mstart0(SB)
427 RET // not reached
428
429 /*
430 * go-routine
431 */
432
433 // func gogo(buf *gobuf)
434 // restore state from Gobuf; longjmp
435 TEXT runtime·gogo(SB), NOSPLIT, $0-8
436 MOVQ buf+0(FP), BX // gobuf
437 MOVQ gobuf_g(BX), DX
438 MOVQ 0(DX), CX // make sure g != nil
439 JMP gogo<>(SB)
440
441 TEXT gogo<>(SB), NOSPLIT, $0
442 get_tls(CX)
443 MOVQ DX, g(CX)
444 MOVQ DX, R14 // set the g register
445 MOVQ gobuf_sp(BX), SP // restore SP
446 MOVQ gobuf_ctxt(BX), DX
447 MOVQ gobuf_bp(BX), BP
448 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
449 MOVQ $0, gobuf_ctxt(BX)
450 MOVQ $0, gobuf_bp(BX)
451 MOVQ gobuf_pc(BX), BX
452 JMP BX
453
454 // func mcall(fn func(*g))
455 // Switch to m->g0's stack, call fn(g).
456 // Fn must never return. It should gogo(&g->sched)
457 // to keep running g.
458 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
459 #ifdef GOEXPERIMENT_runtimesecret
460 CMPL g_secret(R14), $0
461 JEQ nosecret
462 CALL ·secretEraseRegistersMcall(SB)
463 nosecret:
464 #endif
465
466 MOVQ AX, DX // DX = fn
467
468 // Save state in g->sched. The caller's SP and PC are restored by gogo to
469 // resume execution in the caller's frame (implicit return). The caller's BP
470 // is also restored to support frame pointer unwinding.
471 MOVQ SP, BX // hide (SP) reads from vet
472 MOVQ 8(BX), BX // caller's PC
473 MOVQ BX, (g_sched+gobuf_pc)(R14)
474 LEAQ fn+0(FP), BX // caller's SP
475 MOVQ BX, (g_sched+gobuf_sp)(R14)
476 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
477 // can cause a frame pointer cycle, see CL 476235.
478 MOVQ (BP), BX // caller's BP
479 MOVQ BX, (g_sched+gobuf_bp)(R14)
480
481 // switch to m->g0 & its stack, call fn
482 MOVQ g_m(R14), BX
483 MOVQ m_g0(BX), SI // SI = g.m.g0
484 CMPQ SI, R14 // if g == m->g0 call badmcall
485 JNE goodm
486 JMP runtime·badmcall(SB)
487 goodm:
488 MOVQ R14, AX // AX (and arg 0) = g
489 MOVQ SI, R14 // g = g.m.g0
490 get_tls(CX) // Set G in TLS
491 MOVQ R14, g(CX)
492 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
493 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
494 PUSHQ AX // open up space for fn's arg spill slot
495 MOVQ 0(DX), R12
496 CALL R12 // fn(g)
497 // The Windows native stack unwinder incorrectly classifies the next instruction
498 // as part of the function epilogue, producing a wrong call stack.
499 // Add a NOP to work around this issue. See go.dev/issue/67007.
500 BYTE $0x90
501 POPQ AX
502 JMP runtime·badmcall2(SB)
503 RET
504
505 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
506 // of the G stack. We need to distinguish the routine that
507 // lives at the bottom of the G stack from the one that lives
508 // at the top of the system stack because the one at the top of
509 // the system stack terminates the stack walk (see topofstack()).
510 // The frame layout needs to match systemstack
511 // so that it can pretend to be systemstack_switch.
512 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
513 UNDEF
514 // Make sure this function is not leaf,
515 // so the frame is saved.
516 CALL runtime·abort(SB)
517 RET
518
519 // func systemstack(fn func())
520 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
521 #ifdef GOEXPERIMENT_runtimesecret
522 // If in secret mode, erase registers on transition
523 // from G stack to M stack,
524 get_tls(CX)
525 MOVQ g(CX), AX
526 CMPL g_secret(AX), $0
527 JEQ nosecret
528 CALL ·secretEraseRegisters(SB)
529 nosecret:
530 #endif
531
532 MOVQ fn+0(FP), DI // DI = fn
533 get_tls(CX)
534 MOVQ g(CX), AX // AX = g
535 MOVQ g_m(AX), BX // BX = m
536
537 CMPQ AX, m_gsignal(BX)
538 JEQ noswitch
539
540 MOVQ m_g0(BX), DX // DX = g0
541 CMPQ AX, DX
542 JEQ noswitch
543
544 CMPQ AX, m_curg(BX)
545 JNE bad
546
547 // Switch stacks.
548 // The original frame pointer is stored in BP,
549 // which is useful for stack unwinding.
550 // Save our state in g->sched. Pretend to
551 // be systemstack_switch if the G stack is scanned.
552 CALL gosave_systemstack_switch<>(SB)
553
554 // switch to g0
555 MOVQ DX, g(CX)
556 MOVQ DX, R14 // set the g register
557 MOVQ (g_sched+gobuf_sp)(DX), SP
558
559 // call target function
560 MOVQ DI, DX
561 MOVQ 0(DI), DI
562 CALL DI
563
564 // switch back to g
565 get_tls(CX)
566 MOVQ g(CX), AX
567 MOVQ g_m(AX), BX
568 MOVQ m_curg(BX), AX
569 MOVQ AX, g(CX)
570 MOVQ (g_sched+gobuf_sp)(AX), SP
571 MOVQ (g_sched+gobuf_bp)(AX), BP
572 MOVQ $0, (g_sched+gobuf_sp)(AX)
573 MOVQ $0, (g_sched+gobuf_bp)(AX)
574 RET
575
576 noswitch:
577 // already on m stack; tail call the function
578 // Using a tail call here cleans up tracebacks since we won't stop
579 // at an intermediate systemstack.
580 MOVQ DI, DX
581 MOVQ 0(DI), DI
582 // The function epilogue is not called on a tail call.
583 // Pop BP from the stack to simulate it.
584 POPQ BP
585 JMP DI
586
587 bad:
588 // Bad: g is not gsignal, not g0, not curg. What is it?
589 MOVQ $runtime·badsystemstack(SB), AX
590 CALL AX
591 INT $3
592
593 // func switchToCrashStack0(fn func())
594 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
595 MOVQ g_m(R14), BX // curm
596
597 // set g to gcrash
598 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
599 MOVQ BX, g_m(R14) // g.m = curm
600 MOVQ R14, m_g0(BX) // curm.g0 = g
601 get_tls(CX)
602 MOVQ R14, g(CX)
603
604 // switch to crashstack
605 MOVQ (g_stack+stack_hi)(R14), BX
606 SUBQ $(4*8), BX
607 MOVQ BX, SP
608
609 // call target function
610 MOVQ AX, DX
611 MOVQ 0(AX), AX
612 CALL AX
613
614 // should never return
615 CALL runtime·abort(SB)
616 UNDEF
617
618 /*
619 * support for morestack
620 */
621
622 // Called during function prolog when more stack is needed.
623 //
624 // The traceback routines see morestack on a g0 as being
625 // the top of a stack (for example, morestack calling newstack
626 // calling the scheduler calling newm calling gc), so we must
627 // record an argument size. For that purpose, it has no arguments.
628 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
629 // Cannot grow scheduler stack (m->g0).
630 get_tls(CX)
631 MOVQ g(CX), DI // DI = g
632 MOVQ g_m(DI), BX // BX = m
633
634 // Set g->sched to context in f.
635 MOVQ 0(SP), AX // f's PC
636 MOVQ AX, (g_sched+gobuf_pc)(DI)
637 LEAQ 8(SP), AX // f's SP
638 MOVQ AX, (g_sched+gobuf_sp)(DI)
639 MOVQ BP, (g_sched+gobuf_bp)(DI)
640 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
641
642 MOVQ m_g0(BX), SI // SI = m.g0
643 CMPQ DI, SI
644 JNE 3(PC)
645 CALL runtime·badmorestackg0(SB)
646 CALL runtime·abort(SB)
647
648 // Cannot grow signal stack (m->gsignal).
649 MOVQ m_gsignal(BX), SI
650 CMPQ DI, SI
651 JNE 3(PC)
652 CALL runtime·badmorestackgsignal(SB)
653 CALL runtime·abort(SB)
654
655 // Called from f.
656 // Set m->morebuf to f's caller.
657 NOP SP // tell vet SP changed - stop checking offsets
658 MOVQ 8(SP), AX // f's caller's PC
659 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
660 LEAQ 16(SP), AX // f's caller's SP
661 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
662 MOVQ DI, (m_morebuf+gobuf_g)(BX)
663
664 // If in secret mode, erase registers on transition
665 // from G stack to M stack,
666 #ifdef GOEXPERIMENT_runtimesecret
667 CMPL g_secret(DI), $0
668 JEQ nosecret
669 CALL ·secretEraseRegisters(SB)
670 get_tls(CX)
671 MOVQ g(CX), DI // DI = g
672 MOVQ g_m(DI), BX // BX = m
673 nosecret:
674 #endif
675
676 // Call newstack on m->g0's stack.
677 MOVQ m_g0(BX), BX
678 MOVQ BX, g(CX)
679 MOVQ (g_sched+gobuf_sp)(BX), SP
680 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
681 CALL runtime·newstack(SB)
682 CALL runtime·abort(SB) // crash if newstack returns
683 RET
684
685 // morestack but not preserving ctxt.
686 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
687 MOVL $0, DX
688 JMP runtime·morestack(SB)
689
690 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
691 TEXT ·spillArgs(SB),NOSPLIT,$0-0
692 MOVQ AX, 0(R12)
693 MOVQ BX, 8(R12)
694 MOVQ CX, 16(R12)
695 MOVQ DI, 24(R12)
696 MOVQ SI, 32(R12)
697 MOVQ R8, 40(R12)
698 MOVQ R9, 48(R12)
699 MOVQ R10, 56(R12)
700 MOVQ R11, 64(R12)
701 MOVQ X0, 72(R12)
702 MOVQ X1, 80(R12)
703 MOVQ X2, 88(R12)
704 MOVQ X3, 96(R12)
705 MOVQ X4, 104(R12)
706 MOVQ X5, 112(R12)
707 MOVQ X6, 120(R12)
708 MOVQ X7, 128(R12)
709 MOVQ X8, 136(R12)
710 MOVQ X9, 144(R12)
711 MOVQ X10, 152(R12)
712 MOVQ X11, 160(R12)
713 MOVQ X12, 168(R12)
714 MOVQ X13, 176(R12)
715 MOVQ X14, 184(R12)
716 RET
717
718 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
719 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
720 MOVQ 0(R12), AX
721 MOVQ 8(R12), BX
722 MOVQ 16(R12), CX
723 MOVQ 24(R12), DI
724 MOVQ 32(R12), SI
725 MOVQ 40(R12), R8
726 MOVQ 48(R12), R9
727 MOVQ 56(R12), R10
728 MOVQ 64(R12), R11
729 MOVQ 72(R12), X0
730 MOVQ 80(R12), X1
731 MOVQ 88(R12), X2
732 MOVQ 96(R12), X3
733 MOVQ 104(R12), X4
734 MOVQ 112(R12), X5
735 MOVQ 120(R12), X6
736 MOVQ 128(R12), X7
737 MOVQ 136(R12), X8
738 MOVQ 144(R12), X9
739 MOVQ 152(R12), X10
740 MOVQ 160(R12), X11
741 MOVQ 168(R12), X12
742 MOVQ 176(R12), X13
743 MOVQ 184(R12), X14
744 RET
745
746 // reflectcall: call a function with the given argument list
747 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
748 // we don't have variable-sized frames, so we use a small number
749 // of constant-sized-frame functions to encode a few bits of size in the pc.
750 // Caution: ugly multiline assembly macros in your future!
751
752 #define DISPATCH(NAME,MAXSIZE) \
753 CMPQ CX, $MAXSIZE; \
754 JA 3(PC); \
755 MOVQ $NAME(SB), AX; \
756 JMP AX
757 // Note: can't just "JMP NAME(SB)" - bad inlining results.
758
759 TEXT ·reflectcall(SB), NOSPLIT, $0-48
760 MOVLQZX frameSize+32(FP), CX
761 DISPATCH(runtime·call16, 16)
762 DISPATCH(runtime·call32, 32)
763 DISPATCH(runtime·call64, 64)
764 DISPATCH(runtime·call128, 128)
765 DISPATCH(runtime·call256, 256)
766 DISPATCH(runtime·call512, 512)
767 DISPATCH(runtime·call1024, 1024)
768 DISPATCH(runtime·call2048, 2048)
769 DISPATCH(runtime·call4096, 4096)
770 DISPATCH(runtime·call8192, 8192)
771 DISPATCH(runtime·call16384, 16384)
772 DISPATCH(runtime·call32768, 32768)
773 DISPATCH(runtime·call65536, 65536)
774 DISPATCH(runtime·call131072, 131072)
775 DISPATCH(runtime·call262144, 262144)
776 DISPATCH(runtime·call524288, 524288)
777 DISPATCH(runtime·call1048576, 1048576)
778 DISPATCH(runtime·call2097152, 2097152)
779 DISPATCH(runtime·call4194304, 4194304)
780 DISPATCH(runtime·call8388608, 8388608)
781 DISPATCH(runtime·call16777216, 16777216)
782 DISPATCH(runtime·call33554432, 33554432)
783 DISPATCH(runtime·call67108864, 67108864)
784 DISPATCH(runtime·call134217728, 134217728)
785 DISPATCH(runtime·call268435456, 268435456)
786 DISPATCH(runtime·call536870912, 536870912)
787 DISPATCH(runtime·call1073741824, 1073741824)
788 MOVQ $runtime·badreflectcall(SB), AX
789 JMP AX
790
791 #define CALLFN(NAME,MAXSIZE) \
792 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
793 NO_LOCAL_POINTERS; \
794 /* copy arguments to stack */ \
795 MOVQ stackArgs+16(FP), SI; \
796 MOVLQZX stackArgsSize+24(FP), CX; \
797 MOVQ SP, DI; \
798 REP;MOVSB; \
799 /* set up argument registers */ \
800 MOVQ regArgs+40(FP), R12; \
801 CALL ·unspillArgs(SB); \
802 /* call function */ \
803 MOVQ f+8(FP), DX; \
804 PCDATA $PCDATA_StackMapIndex, $0; \
805 MOVQ (DX), R12; \
806 CALL R12; \
807 /* copy register return values back */ \
808 MOVQ regArgs+40(FP), R12; \
809 CALL ·spillArgs(SB); \
810 MOVLQZX stackArgsSize+24(FP), CX; \
811 MOVLQZX stackRetOffset+28(FP), BX; \
812 MOVQ stackArgs+16(FP), DI; \
813 MOVQ stackArgsType+0(FP), DX; \
814 MOVQ SP, SI; \
815 ADDQ BX, DI; \
816 ADDQ BX, SI; \
817 SUBQ BX, CX; \
818 CALL callRet<>(SB); \
819 RET
820
821 // callRet copies return values back at the end of call*. This is a
822 // separate function so it can allocate stack space for the arguments
823 // to reflectcallmove. It does not follow the Go ABI; it expects its
824 // arguments in registers.
825 TEXT callRet<>(SB), NOSPLIT, $40-0
826 NO_LOCAL_POINTERS
827 MOVQ DX, 0(SP)
828 MOVQ DI, 8(SP)
829 MOVQ SI, 16(SP)
830 MOVQ CX, 24(SP)
831 MOVQ R12, 32(SP)
832 CALL runtime·reflectcallmove(SB)
833 RET
834
835 CALLFN(·call16, 16)
836 CALLFN(·call32, 32)
837 CALLFN(·call64, 64)
838 CALLFN(·call128, 128)
839 CALLFN(·call256, 256)
840 CALLFN(·call512, 512)
841 CALLFN(·call1024, 1024)
842 CALLFN(·call2048, 2048)
843 CALLFN(·call4096, 4096)
844 CALLFN(·call8192, 8192)
845 CALLFN(·call16384, 16384)
846 CALLFN(·call32768, 32768)
847 CALLFN(·call65536, 65536)
848 CALLFN(·call131072, 131072)
849 CALLFN(·call262144, 262144)
850 CALLFN(·call524288, 524288)
851 CALLFN(·call1048576, 1048576)
852 CALLFN(·call2097152, 2097152)
853 CALLFN(·call4194304, 4194304)
854 CALLFN(·call8388608, 8388608)
855 CALLFN(·call16777216, 16777216)
856 CALLFN(·call33554432, 33554432)
857 CALLFN(·call67108864, 67108864)
858 CALLFN(·call134217728, 134217728)
859 CALLFN(·call268435456, 268435456)
860 CALLFN(·call536870912, 536870912)
861 CALLFN(·call1073741824, 1073741824)
862
863 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
864 MOVL cycles+0(FP), AX
865 TESTL AX, AX
866 JZ done
867 again:
868 PAUSE
869 SUBL $1, AX
870 JNZ again
871 done:
872 RET
873
874
875 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
876 // Stores are already ordered on x86, so this is just a
877 // compile barrier.
878 RET
879
880 // Save state of caller into g->sched,
881 // but using fake PC from systemstack_switch.
882 // Must only be called from functions with frame pointer
883 // and without locals ($0) or else unwinding from
884 // systemstack_switch is incorrect.
885 // Smashes R9.
886 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
887 // Take systemstack_switch PC and add 8 bytes to skip
888 // the prologue. The final location does not matter
889 // as long as we are between the prologue and the epilogue.
890 MOVQ $runtime·systemstack_switch+8(SB), R9
891 MOVQ R9, (g_sched+gobuf_pc)(R14)
892 LEAQ 8(SP), R9
893 MOVQ R9, (g_sched+gobuf_sp)(R14)
894 MOVQ BP, (g_sched+gobuf_bp)(R14)
895 // Assert ctxt is zero. See func save.
896 MOVQ (g_sched+gobuf_ctxt)(R14), R9
897 TESTQ R9, R9
898 JZ 2(PC)
899 CALL runtime·abort(SB)
900 RET
901
902 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
903 // Call fn(arg) aligned appropriately for the gcc ABI.
904 // Called on a system stack, and there may be no g yet (during needm).
905 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
906 MOVQ fn+0(FP), AX
907 MOVQ arg+8(FP), BX
908 MOVQ SP, DX
909 ANDQ $~15, SP // alignment
910 MOVQ DX, 8(SP)
911 MOVQ BX, DI // DI = first argument in AMD64 ABI
912 MOVQ BX, CX // CX = first argument in Win64
913 CALL AX
914 MOVQ 8(SP), DX
915 MOVQ DX, SP
916 RET
917
918 // asmcgocall_landingpad calls AX with BX as argument.
919 // Must be called on the system stack.
920 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
921 #ifdef GOOS_windows
922 // Make sure we have enough room for 4 stack-backed fast-call
923 // registers as per Windows amd64 calling convention.
924 ADJSP $32
925 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
926 // thrown in the cgo call. Exceptions that reach this function will be
927 // handled by runtime.sehtramp thanks to the SEH metadata added
928 // by the compiler.
929 // Note that runtime.sehtramp can't be attached directly to asmcgocall
930 // because its initial stack pointer can be outside the system stack bounds,
931 // and Windows stops the stack unwinding without calling the exception handler
932 // when it reaches that point.
933 MOVQ BX, CX // CX = first argument in Win64
934 CALL AX
935 // The exception handler is not called if the next instruction is part of
936 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
937 BYTE $0x90
938 ADJSP $-32
939 RET
940 #endif
941 // Tail call AX on non-Windows, as the extra stack frame is not needed.
942 MOVQ BX, DI // DI = first argument in AMD64 ABI
943 JMP AX
944
945 // func asmcgocall(fn, arg unsafe.Pointer) int32
946 // Call fn(arg) on the scheduler stack,
947 // aligned appropriately for the gcc ABI.
948 // See cgocall.go for more details.
949 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
950 // Figure out if we need to switch to m->g0 stack.
951 // We get called to create new OS threads too, and those
952 // come in on the m->g0 stack already. Or we might already
953 // be on the m->gsignal stack.
954 get_tls(CX)
955 MOVQ g(CX), DI
956 CMPQ DI, $0
957 JEQ nosave
958 MOVQ g_m(DI), R8
959 MOVQ m_gsignal(R8), SI
960 CMPQ DI, SI
961 JEQ nosave
962 MOVQ m_g0(R8), SI
963 CMPQ DI, SI
964 JEQ nosave
965
966 // Running on a user G
967 // Figure out if we're running secret code and clear the registers
968 // so that the C code we're about to call doesn't spill confidential
969 // information into memory
970 #ifdef GOEXPERIMENT_runtimesecret
971 CMPL g_secret(DI), $0
972 JEQ nosecret
973 CALL ·secretEraseRegisters(SB)
974
975 nosecret:
976 #endif
977 MOVQ fn+0(FP), AX
978 MOVQ arg+8(FP), BX
979 MOVQ SP, DX
980
981 // Switch to system stack.
982 // The original frame pointer is stored in BP,
983 // which is useful for stack unwinding.
984 CALL gosave_systemstack_switch<>(SB)
985 MOVQ SI, g(CX)
986 MOVQ (g_sched+gobuf_sp)(SI), SP
987
988 // Now on a scheduling stack (a pthread-created stack).
989 SUBQ $16, SP
990 ANDQ $~15, SP // alignment for gcc ABI
991 MOVQ DI, 8(SP) // save g
992 MOVQ (g_stack+stack_hi)(DI), DI
993 SUBQ DX, DI
994 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
995 CALL runtime·asmcgocall_landingpad(SB)
996
997 // Restore registers, g, stack pointer.
998 get_tls(CX)
999 MOVQ 8(SP), DI
1000 MOVQ (g_stack+stack_hi)(DI), SI
1001 SUBQ 0(SP), SI
1002 MOVQ DI, g(CX)
1003 MOVQ SI, SP
1004
1005 MOVL AX, ret+16(FP)
1006 RET
1007
1008 nosave:
1009 // Running on a system stack, perhaps even without a g.
1010 // Having no g can happen during thread creation or thread teardown
1011 // (see needm/dropm on Solaris, for example).
1012 // This code is like the above sequence but without saving/restoring g
1013 // and without worrying about the stack moving out from under us
1014 // (because we're on a system stack, not a goroutine stack).
1015 // The above code could be used directly if already on a system stack,
1016 // but then the only path through this code would be a rare case on Solaris.
1017 // Using this code for all "already on system stack" calls exercises it more,
1018 // which should help keep it correct.
1019 MOVQ fn+0(FP), AX
1020 MOVQ arg+8(FP), BX
1021 MOVQ SP, DX
1022
1023 SUBQ $16, SP
1024 ANDQ $~15, SP
1025 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
1026 MOVQ DX, 0(SP) // save original stack pointer
1027 CALL runtime·asmcgocall_landingpad(SB)
1028 MOVQ 0(SP), SI // restore original stack pointer
1029 MOVQ SI, SP
1030 MOVL AX, ret+16(FP)
1031 RET
1032
1033 #ifdef GOOS_windows
1034 // Dummy TLS that's used on Windows so that we don't crash trying
1035 // to restore the G register in needm. needm and its callees are
1036 // very careful never to actually use the G, the TLS just can't be
1037 // unset since we're in Go code.
1038 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
1039 #endif
1040
1041 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1042 // See cgocall.go for more details.
1043 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1044 NO_LOCAL_POINTERS
1045
1046 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1047 // It is used to dropm while thread is exiting.
1048 MOVQ fn+0(FP), AX
1049 CMPQ AX, $0
1050 JNE loadg
1051 // Restore the g from frame.
1052 get_tls(CX)
1053 MOVQ frame+8(FP), BX
1054 MOVQ BX, g(CX)
1055 JMP dropm
1056
1057 loadg:
1058 // If g is nil, Go did not create the current thread,
1059 // or if this thread never called into Go on pthread platforms.
1060 // Call needm to obtain one m for temporary use.
1061 // In this case, we're running on the thread stack, so there's
1062 // lots of space, but the linker doesn't know. Hide the call from
1063 // the linker analysis by using an indirect call through AX.
1064 get_tls(CX)
1065 #ifdef GOOS_windows
1066 MOVL $0, BX
1067 CMPQ CX, $0
1068 JEQ 2(PC)
1069 #endif
1070 MOVQ g(CX), BX
1071 CMPQ BX, $0
1072 JEQ needm
1073 MOVQ g_m(BX), BX
1074 MOVQ BX, savedm-8(SP) // saved copy of oldm
1075 JMP havem
1076 needm:
1077 #ifdef GOOS_windows
1078 // Set up a dummy TLS value. needm is careful not to use it,
1079 // but it needs to be there to prevent autogenerated code from
1080 // crashing when it loads from it.
1081 // We don't need to clear it or anything later because needm
1082 // will set up TLS properly.
1083 MOVQ $zeroTLS<>(SB), DI
1084 CALL runtime·settls(SB)
1085 #endif
1086 // On some platforms (Windows) we cannot call needm through
1087 // an ABI wrapper because there's no TLS set up, and the ABI
1088 // wrapper will try to restore the G register (R14) from TLS.
1089 // Clear X15 because Go expects it and we're not calling
1090 // through a wrapper, but otherwise avoid setting the G
1091 // register in the wrapper and call needm directly. It
1092 // takes no arguments and doesn't return any values so
1093 // there's no need to handle that. Clear R14 so that there's
1094 // a bad value in there, in case needm tries to use it.
1095 XORPS X15, X15
1096 XORQ R14, R14
1097 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1098 CALL AX
1099 MOVQ $0, savedm-8(SP)
1100 get_tls(CX)
1101 MOVQ g(CX), BX
1102 MOVQ g_m(BX), BX
1103
1104 // Set m->sched.sp = SP, so that if a panic happens
1105 // during the function we are about to execute, it will
1106 // have a valid SP to run on the g0 stack.
1107 // The next few lines (after the havem label)
1108 // will save this SP onto the stack and then write
1109 // the same SP back to m->sched.sp. That seems redundant,
1110 // but if an unrecovered panic happens, unwindm will
1111 // restore the g->sched.sp from the stack location
1112 // and then systemstack will try to use it. If we don't set it here,
1113 // that restored SP will be uninitialized (typically 0) and
1114 // will not be usable.
1115 MOVQ m_g0(BX), SI
1116 MOVQ SP, (g_sched+gobuf_sp)(SI)
1117
1118 havem:
1119 // Now there's a valid m, and we're running on its m->g0.
1120 // Save current m->g0->sched.sp on stack and then set it to SP.
1121 // Save current sp in m->g0->sched.sp in preparation for
1122 // switch back to m->curg stack.
1123 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1124 MOVQ m_g0(BX), SI
1125 MOVQ (g_sched+gobuf_sp)(SI), AX
1126 MOVQ AX, 0(SP)
1127 MOVQ SP, (g_sched+gobuf_sp)(SI)
1128
1129 // Switch to m->curg stack and call runtime.cgocallbackg.
1130 // Because we are taking over the execution of m->curg
1131 // but *not* resuming what had been running, we need to
1132 // save that information (m->curg->sched) so we can restore it.
1133 // We can restore m->curg->sched.sp easily, because calling
1134 // runtime.cgocallbackg leaves SP unchanged upon return.
1135 // To save m->curg->sched.pc, we push it onto the curg stack and
1136 // open a frame the same size as cgocallback's g0 frame.
1137 // Once we switch to the curg stack, the pushed PC will appear
1138 // to be the return PC of cgocallback, so that the traceback
1139 // will seamlessly trace back into the earlier calls.
1140 MOVQ m_curg(BX), SI
1141 MOVQ SI, g(CX)
1142 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1143 MOVQ (g_sched+gobuf_pc)(SI), BX
1144 MOVQ BX, -8(DI) // "push" return PC on the g stack
1145 // Gather our arguments into registers.
1146 MOVQ fn+0(FP), BX
1147 MOVQ frame+8(FP), CX
1148 MOVQ ctxt+16(FP), DX
1149 // Compute the size of the frame, including return PC and, if
1150 // GOEXPERIMENT=framepointer, the saved base pointer
1151 LEAQ fn+0(FP), AX
1152 SUBQ SP, AX // AX is our actual frame size
1153 SUBQ AX, DI // Allocate the same frame size on the g stack
1154 MOVQ DI, SP
1155
1156 MOVQ BX, 0(SP)
1157 MOVQ CX, 8(SP)
1158 MOVQ DX, 16(SP)
1159 MOVQ $runtime·cgocallbackg(SB), AX
1160 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1161
1162 // Compute the size of the frame again. FP and SP have
1163 // completely different values here than they did above,
1164 // but only their difference matters.
1165 LEAQ fn+0(FP), AX
1166 SUBQ SP, AX
1167
1168 // Restore g->sched (== m->curg->sched) from saved values.
1169 get_tls(CX)
1170 MOVQ g(CX), SI
1171 MOVQ SP, DI
1172 ADDQ AX, DI
1173 MOVQ -8(DI), BX
1174 MOVQ BX, (g_sched+gobuf_pc)(SI)
1175 MOVQ DI, (g_sched+gobuf_sp)(SI)
1176
1177 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1178 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1179 // so we do not have to restore it.)
1180 MOVQ g(CX), BX
1181 MOVQ g_m(BX), BX
1182 MOVQ m_g0(BX), SI
1183 MOVQ SI, g(CX)
1184 MOVQ (g_sched+gobuf_sp)(SI), SP
1185 MOVQ 0(SP), AX
1186 MOVQ AX, (g_sched+gobuf_sp)(SI)
1187
1188 // If the m on entry was nil, we called needm above to borrow an m,
1189 // 1. for the duration of the call on non-pthread platforms,
1190 // 2. or the duration of the C thread alive on pthread platforms.
1191 // If the m on entry wasn't nil,
1192 // 1. the thread might be a Go thread,
1193 // 2. or it wasn't the first call from a C thread on pthread platforms,
1194 // since then we skip dropm to reuse the m in the first call.
1195 MOVQ savedm-8(SP), BX
1196 CMPQ BX, $0
1197 JNE done
1198
1199 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1200 MOVQ _cgo_pthread_key_created(SB), AX
1201 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1202 CMPQ AX, $0
1203 JEQ dropm
1204 CMPQ (AX), $0
1205 JNE done
1206
1207 dropm:
1208 MOVQ $runtime·dropm(SB), AX
1209 CALL AX
1210 #ifdef GOOS_windows
1211 // We need to clear the TLS pointer in case the next
1212 // thread that comes into Go tries to reuse that space
1213 // but uses the same M.
1214 XORQ DI, DI
1215 CALL runtime·settls(SB)
1216 #endif
1217 done:
1218
1219 // Done!
1220 RET
1221
1222 // func setg(gg *g)
1223 // set g. for use by needm.
1224 TEXT runtime·setg(SB), NOSPLIT, $0-8
1225 MOVQ gg+0(FP), BX
1226 get_tls(CX)
1227 MOVQ BX, g(CX)
1228 RET
1229
1230 // void setg_gcc(G*); set g called from gcc.
1231 TEXT setg_gcc<>(SB),NOSPLIT,$0
1232 get_tls(AX)
1233 MOVQ DI, g(AX)
1234 MOVQ DI, R14 // set the g register
1235 RET
1236
1237 TEXT runtime·abort(SB),NOSPLIT,$0-0
1238 INT $3
1239 loop:
1240 JMP loop
1241
1242 // check that SP is in range [g->stack.lo, g->stack.hi)
1243 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1244 get_tls(CX)
1245 MOVQ g(CX), AX
1246 CMPQ (g_stack+stack_hi)(AX), SP
1247 JHI 2(PC)
1248 CALL runtime·abort(SB)
1249 CMPQ SP, (g_stack+stack_lo)(AX)
1250 JHI 2(PC)
1251 CALL runtime·abort(SB)
1252 RET
1253
1254 // func cputicks() int64
1255 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1256 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1257 JNE fences
1258 // Instruction stream serializing RDTSCP is supported.
1259 // RDTSCP is supported by Intel Nehalem (2008) and
1260 // AMD K8 Rev. F (2006) and newer.
1261 RDTSCP
1262 done:
1263 SHLQ $32, DX
1264 ADDQ DX, AX
1265 MOVQ AX, ret+0(FP)
1266 RET
1267 fences:
1268 // MFENCE is instruction stream serializing and flushes the
1269 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1270 // are dependent on MSR C001_1029 and CPU generation.
1271 // LFENCE on Intel does wait for all previous instructions to have executed.
1272 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1273 // previous instructions executed and all previous loads and stores to globally visible.
1274 // Using MFENCE;LFENCE here aligns the serializing properties without
1275 // runtime detection of CPU manufacturer.
1276 MFENCE
1277 LFENCE
1278 RDTSC
1279 JMP done
1280
1281 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1282 // hash function using AES hardware instructions
1283 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1284 // AX = ptr to data
1285 // BX = seed
1286 // CX = size
1287 CMPB runtime·useAeshash(SB), $0
1288 JEQ noaes
1289 JMP aeshashbody<>(SB)
1290 noaes:
1291 JMP runtime·memhashFallback<ABIInternal>(SB)
1292
1293 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1294 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1295 // AX = ptr to string struct
1296 // BX = seed
1297 CMPB runtime·useAeshash(SB), $0
1298 JEQ noaes
1299 MOVQ 8(AX), CX // length of string
1300 MOVQ (AX), AX // string data
1301 JMP aeshashbody<>(SB)
1302 noaes:
1303 JMP runtime·strhashFallback<ABIInternal>(SB)
1304
1305 // AX: data
1306 // BX: hash seed
1307 // CX: length
1308 // At return: AX = return value
1309 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1310 // Fill an SSE register with our seeds.
1311 MOVQ BX, X0 // 64 bits of per-table hash seed
1312 PINSRW $4, CX, X0 // 16 bits of length
1313 PSHUFHW $0, X0, X0 // repeat length 4 times total
1314 MOVO X0, X1 // save unscrambled seed
1315 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1316 AESENC X0, X0 // scramble seed
1317
1318 CMPQ CX, $16
1319 JB aes0to15
1320 JE aes16
1321 CMPQ CX, $32
1322 JBE aes17to32
1323 CMPQ CX, $64
1324 JBE aes33to64
1325 CMPQ CX, $128
1326 JBE aes65to128
1327 JMP aes129plus
1328
1329 aes0to15:
1330 TESTQ CX, CX
1331 JE aes0
1332
1333 ADDQ $16, AX
1334 TESTW $0xff0, AX
1335 JE endofpage
1336
1337 // 16 bytes loaded at this address won't cross
1338 // a page boundary, so we can load it directly.
1339 MOVOU -16(AX), X1
1340 ADDQ CX, CX
1341 MOVQ $masks<>(SB), AX
1342 PAND (AX)(CX*8), X1
1343 final1:
1344 PXOR X0, X1 // xor data with seed
1345 AESENC X1, X1 // scramble combo 3 times
1346 AESENC X1, X1
1347 AESENC X1, X1
1348 MOVQ X1, AX // return X1
1349 RET
1350
1351 endofpage:
1352 // address ends in 1111xxxx. Might be up against
1353 // a page boundary, so load ending at last byte.
1354 // Then shift bytes down using pshufb.
1355 MOVOU -32(AX)(CX*1), X1
1356 ADDQ CX, CX
1357 MOVQ $shifts<>(SB), AX
1358 PSHUFB (AX)(CX*8), X1
1359 JMP final1
1360
1361 aes0:
1362 // Return scrambled input seed
1363 AESENC X0, X0
1364 MOVQ X0, AX // return X0
1365 RET
1366
1367 aes16:
1368 MOVOU (AX), X1
1369 JMP final1
1370
1371 aes17to32:
1372 // make second starting seed
1373 PXOR runtime·aeskeysched+16(SB), X1
1374 AESENC X1, X1
1375
1376 // load data to be hashed
1377 MOVOU (AX), X2
1378 MOVOU -16(AX)(CX*1), X3
1379
1380 // xor with seed
1381 PXOR X0, X2
1382 PXOR X1, X3
1383
1384 // scramble 3 times
1385 AESENC X2, X2
1386 AESENC X3, X3
1387 AESENC X2, X2
1388 AESENC X3, X3
1389 AESENC X2, X2
1390 AESENC X3, X3
1391
1392 // combine results
1393 PXOR X3, X2
1394 MOVQ X2, AX // return X2
1395 RET
1396
1397 aes33to64:
1398 // make 3 more starting seeds
1399 MOVO X1, X2
1400 MOVO X1, X3
1401 PXOR runtime·aeskeysched+16(SB), X1
1402 PXOR runtime·aeskeysched+32(SB), X2
1403 PXOR runtime·aeskeysched+48(SB), X3
1404 AESENC X1, X1
1405 AESENC X2, X2
1406 AESENC X3, X3
1407
1408 MOVOU (AX), X4
1409 MOVOU 16(AX), X5
1410 MOVOU -32(AX)(CX*1), X6
1411 MOVOU -16(AX)(CX*1), X7
1412
1413 PXOR X0, X4
1414 PXOR X1, X5
1415 PXOR X2, X6
1416 PXOR X3, X7
1417
1418 AESENC X4, X4
1419 AESENC X5, X5
1420 AESENC X6, X6
1421 AESENC X7, X7
1422
1423 AESENC X4, X4
1424 AESENC X5, X5
1425 AESENC X6, X6
1426 AESENC X7, X7
1427
1428 AESENC X4, X4
1429 AESENC X5, X5
1430 AESENC X6, X6
1431 AESENC X7, X7
1432
1433 PXOR X6, X4
1434 PXOR X7, X5
1435 PXOR X5, X4
1436 MOVQ X4, AX // return X4
1437 RET
1438
1439 aes65to128:
1440 // make 7 more starting seeds
1441 MOVO X1, X2
1442 MOVO X1, X3
1443 MOVO X1, X4
1444 MOVO X1, X5
1445 MOVO X1, X6
1446 MOVO X1, X7
1447 PXOR runtime·aeskeysched+16(SB), X1
1448 PXOR runtime·aeskeysched+32(SB), X2
1449 PXOR runtime·aeskeysched+48(SB), X3
1450 PXOR runtime·aeskeysched+64(SB), X4
1451 PXOR runtime·aeskeysched+80(SB), X5
1452 PXOR runtime·aeskeysched+96(SB), X6
1453 PXOR runtime·aeskeysched+112(SB), X7
1454 AESENC X1, X1
1455 AESENC X2, X2
1456 AESENC X3, X3
1457 AESENC X4, X4
1458 AESENC X5, X5
1459 AESENC X6, X6
1460 AESENC X7, X7
1461
1462 // load data
1463 MOVOU (AX), X8
1464 MOVOU 16(AX), X9
1465 MOVOU 32(AX), X10
1466 MOVOU 48(AX), X11
1467 MOVOU -64(AX)(CX*1), X12
1468 MOVOU -48(AX)(CX*1), X13
1469 MOVOU -32(AX)(CX*1), X14
1470 MOVOU -16(AX)(CX*1), X15
1471
1472 // xor with seed
1473 PXOR X0, X8
1474 PXOR X1, X9
1475 PXOR X2, X10
1476 PXOR X3, X11
1477 PXOR X4, X12
1478 PXOR X5, X13
1479 PXOR X6, X14
1480 PXOR X7, X15
1481
1482 // scramble 3 times
1483 AESENC X8, X8
1484 AESENC X9, X9
1485 AESENC X10, X10
1486 AESENC X11, X11
1487 AESENC X12, X12
1488 AESENC X13, X13
1489 AESENC X14, X14
1490 AESENC X15, X15
1491
1492 AESENC X8, X8
1493 AESENC X9, X9
1494 AESENC X10, X10
1495 AESENC X11, X11
1496 AESENC X12, X12
1497 AESENC X13, X13
1498 AESENC X14, X14
1499 AESENC X15, X15
1500
1501 AESENC X8, X8
1502 AESENC X9, X9
1503 AESENC X10, X10
1504 AESENC X11, X11
1505 AESENC X12, X12
1506 AESENC X13, X13
1507 AESENC X14, X14
1508 AESENC X15, X15
1509
1510 // combine results
1511 PXOR X12, X8
1512 PXOR X13, X9
1513 PXOR X14, X10
1514 PXOR X15, X11
1515 PXOR X10, X8
1516 PXOR X11, X9
1517 PXOR X9, X8
1518 // X15 must be zero on return
1519 PXOR X15, X15
1520 MOVQ X8, AX // return X8
1521 RET
1522
1523 aes129plus:
1524 // make 7 more starting seeds
1525 MOVO X1, X2
1526 MOVO X1, X3
1527 MOVO X1, X4
1528 MOVO X1, X5
1529 MOVO X1, X6
1530 MOVO X1, X7
1531 PXOR runtime·aeskeysched+16(SB), X1
1532 PXOR runtime·aeskeysched+32(SB), X2
1533 PXOR runtime·aeskeysched+48(SB), X3
1534 PXOR runtime·aeskeysched+64(SB), X4
1535 PXOR runtime·aeskeysched+80(SB), X5
1536 PXOR runtime·aeskeysched+96(SB), X6
1537 PXOR runtime·aeskeysched+112(SB), X7
1538 AESENC X1, X1
1539 AESENC X2, X2
1540 AESENC X3, X3
1541 AESENC X4, X4
1542 AESENC X5, X5
1543 AESENC X6, X6
1544 AESENC X7, X7
1545
1546 // start with last (possibly overlapping) block
1547 MOVOU -128(AX)(CX*1), X8
1548 MOVOU -112(AX)(CX*1), X9
1549 MOVOU -96(AX)(CX*1), X10
1550 MOVOU -80(AX)(CX*1), X11
1551 MOVOU -64(AX)(CX*1), X12
1552 MOVOU -48(AX)(CX*1), X13
1553 MOVOU -32(AX)(CX*1), X14
1554 MOVOU -16(AX)(CX*1), X15
1555
1556 // xor in seed
1557 PXOR X0, X8
1558 PXOR X1, X9
1559 PXOR X2, X10
1560 PXOR X3, X11
1561 PXOR X4, X12
1562 PXOR X5, X13
1563 PXOR X6, X14
1564 PXOR X7, X15
1565
1566 // compute number of remaining 128-byte blocks
1567 DECQ CX
1568 SHRQ $7, CX
1569
1570 PCALIGN $16
1571 aesloop:
1572 // scramble state
1573 AESENC X8, X8
1574 AESENC X9, X9
1575 AESENC X10, X10
1576 AESENC X11, X11
1577 AESENC X12, X12
1578 AESENC X13, X13
1579 AESENC X14, X14
1580 AESENC X15, X15
1581
1582 // scramble state, xor in a block
1583 MOVOU (AX), X0
1584 MOVOU 16(AX), X1
1585 MOVOU 32(AX), X2
1586 MOVOU 48(AX), X3
1587 AESENC X0, X8
1588 AESENC X1, X9
1589 AESENC X2, X10
1590 AESENC X3, X11
1591 MOVOU 64(AX), X4
1592 MOVOU 80(AX), X5
1593 MOVOU 96(AX), X6
1594 MOVOU 112(AX), X7
1595 AESENC X4, X12
1596 AESENC X5, X13
1597 AESENC X6, X14
1598 AESENC X7, X15
1599
1600 ADDQ $128, AX
1601 DECQ CX
1602 JNE aesloop
1603
1604 // 3 more scrambles to finish
1605 AESENC X8, X8
1606 AESENC X9, X9
1607 AESENC X10, X10
1608 AESENC X11, X11
1609 AESENC X12, X12
1610 AESENC X13, X13
1611 AESENC X14, X14
1612 AESENC X15, X15
1613 AESENC X8, X8
1614 AESENC X9, X9
1615 AESENC X10, X10
1616 AESENC X11, X11
1617 AESENC X12, X12
1618 AESENC X13, X13
1619 AESENC X14, X14
1620 AESENC X15, X15
1621 AESENC X8, X8
1622 AESENC X9, X9
1623 AESENC X10, X10
1624 AESENC X11, X11
1625 AESENC X12, X12
1626 AESENC X13, X13
1627 AESENC X14, X14
1628 AESENC X15, X15
1629
1630 PXOR X12, X8
1631 PXOR X13, X9
1632 PXOR X14, X10
1633 PXOR X15, X11
1634 PXOR X10, X8
1635 PXOR X11, X9
1636 PXOR X9, X8
1637 // X15 must be zero on return
1638 PXOR X15, X15
1639 MOVQ X8, AX // return X8
1640 RET
1641
1642 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1643 // ABIInternal for performance.
1644 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1645 // AX = ptr to data
1646 // BX = seed
1647 CMPB runtime·useAeshash(SB), $0
1648 JEQ noaes
1649 MOVQ BX, X0 // X0 = seed
1650 PINSRD $2, (AX), X0 // data
1651 AESENC runtime·aeskeysched+0(SB), X0
1652 AESENC runtime·aeskeysched+16(SB), X0
1653 AESENC runtime·aeskeysched+32(SB), X0
1654 MOVQ X0, AX // return X0
1655 RET
1656 noaes:
1657 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1658
1659 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1660 // ABIInternal for performance.
1661 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1662 // AX = ptr to data
1663 // BX = seed
1664 CMPB runtime·useAeshash(SB), $0
1665 JEQ noaes
1666 MOVQ BX, X0 // X0 = seed
1667 PINSRQ $1, (AX), X0 // data
1668 AESENC runtime·aeskeysched+0(SB), X0
1669 AESENC runtime·aeskeysched+16(SB), X0
1670 AESENC runtime·aeskeysched+32(SB), X0
1671 MOVQ X0, AX // return X0
1672 RET
1673 noaes:
1674 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1675
1676 // simple mask to get rid of data in the high part of the register.
1677 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1678 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1679 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1680 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1681 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1682 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1683 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1684 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1685 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1686 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1687 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1688 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1689 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1690 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1691 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1692 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1693 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1694 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1695 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1696 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1697 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1698 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1699 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1700 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1701 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1702 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1703 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1704 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1705 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1706 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1707 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1708 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1709 GLOBL masks<>(SB),RODATA,$256
1710
1711 // func checkASM() bool
1712 TEXT ·checkASM(SB),NOSPLIT,$0-1
1713 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1714 MOVQ $masks<>(SB), AX
1715 MOVQ $shifts<>(SB), BX
1716 ORQ BX, AX
1717 TESTQ $15, AX
1718 SETEQ ret+0(FP)
1719 RET
1720
1721 // these are arguments to pshufb. They move data down from
1722 // the high bytes of the register to the low bytes of the register.
1723 // index is how many bytes to move.
1724 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1725 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1726 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1727 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1728 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1729 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1730 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1731 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1732 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1733 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1734 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1735 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1736 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1737 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1738 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1739 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1740 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1741 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1742 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1743 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1744 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1745 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1746 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1747 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1748 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1749 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1750 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1751 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1752 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1753 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1754 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1755 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1756 GLOBL shifts<>(SB),RODATA,$256
1757
1758 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1759 // Must obey the gcc calling convention.
1760 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1761 get_tls(CX)
1762 MOVQ g(CX), AX
1763 MOVQ g_m(AX), AX
1764 MOVQ m_curg(AX), AX
1765 MOVQ (g_stack+stack_hi)(AX), AX
1766 RET
1767
1768 // The top-most function running on a goroutine
1769 // returns to goexit+PCQuantum.
1770 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1771 BYTE $0x90 // NOP
1772 CALL runtime·goexit1(SB) // does not return
1773 // traceback from goexit1 must hit code range of goexit
1774 BYTE $0x90 // NOP
1775
1776 // This is called from .init_array and follows the platform, not Go, ABI.
1777 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1778 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1779 MOVQ runtime·lastmoduledatap(SB), AX
1780 MOVQ DI, moduledata_next(AX)
1781 MOVQ DI, runtime·lastmoduledatap(SB)
1782 POPQ R15
1783 RET
1784
1785 // Initialize special registers then jump to sigpanic.
1786 // This function is injected from the signal handler for panicking
1787 // signals. It is quite painful to set X15 in the signal context,
1788 // so we do it here.
1789 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1790 get_tls(R14)
1791 MOVQ g(R14), R14
1792 XORPS X15, X15
1793 JMP ·sigpanic<ABIInternal>(SB)
1794
1795 // gcWriteBarrier informs the GC about heap pointer writes.
1796 //
1797 // gcWriteBarrier returns space in a write barrier buffer which
1798 // should be filled in by the caller.
1799 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1800 // number of bytes of buffer needed in R11, and returns a pointer
1801 // to the buffer space in R11.
1802 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1803 // but may clobber others (e.g., SSE registers).
1804 // Typical use would be, when doing *(CX+88) = AX
1805 // CMPL $0, runtime.writeBarrier(SB)
1806 // JEQ dowrite
1807 // CALL runtime.gcBatchBarrier2(SB)
1808 // MOVQ AX, (R11)
1809 // MOVQ 88(CX), DX
1810 // MOVQ DX, 8(R11)
1811 // dowrite:
1812 // MOVQ AX, 88(CX)
1813 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1814 // Save the registers clobbered by the fast path. This is slightly
1815 // faster than having the caller spill these.
1816 MOVQ R12, 96(SP)
1817 MOVQ R13, 104(SP)
1818 retry:
1819 // TODO: Consider passing g.m.p in as an argument so they can be shared
1820 // across a sequence of write barriers.
1821 MOVQ g_m(R14), R13
1822 MOVQ m_p(R13), R13
1823 // Get current buffer write position.
1824 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1825 ADDQ R11, R12 // new next position
1826 // Is the buffer full?
1827 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1828 JA flush
1829 // Commit to the larger buffer.
1830 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1831 // Make return value (the original next position)
1832 SUBQ R11, R12
1833 MOVQ R12, R11
1834 // Restore registers.
1835 MOVQ 96(SP), R12
1836 MOVQ 104(SP), R13
1837 RET
1838
1839 flush:
1840 // Save all general purpose registers since these could be
1841 // clobbered by wbBufFlush and were not saved by the caller.
1842 // It is possible for wbBufFlush to clobber other registers
1843 // (e.g., SSE registers), but the compiler takes care of saving
1844 // those in the caller if necessary. This strikes a balance
1845 // with registers that are likely to be used.
1846 //
1847 // We don't have type information for these, but all code under
1848 // here is NOSPLIT, so nothing will observe these.
1849 //
1850 // TODO: We could strike a different balance; e.g., saving X0
1851 // and not saving GP registers that are less likely to be used.
1852 MOVQ DI, 0(SP)
1853 MOVQ AX, 8(SP)
1854 MOVQ BX, 16(SP)
1855 MOVQ CX, 24(SP)
1856 MOVQ DX, 32(SP)
1857 // DI already saved
1858 MOVQ SI, 40(SP)
1859 MOVQ BP, 48(SP)
1860 MOVQ R8, 56(SP)
1861 MOVQ R9, 64(SP)
1862 MOVQ R10, 72(SP)
1863 MOVQ R11, 80(SP)
1864 // R12 already saved
1865 // R13 already saved
1866 // R14 is g
1867 MOVQ R15, 88(SP)
1868
1869 CALL runtime·wbBufFlush(SB)
1870
1871 MOVQ 0(SP), DI
1872 MOVQ 8(SP), AX
1873 MOVQ 16(SP), BX
1874 MOVQ 24(SP), CX
1875 MOVQ 32(SP), DX
1876 MOVQ 40(SP), SI
1877 MOVQ 48(SP), BP
1878 MOVQ 56(SP), R8
1879 MOVQ 64(SP), R9
1880 MOVQ 72(SP), R10
1881 MOVQ 80(SP), R11
1882 MOVQ 88(SP), R15
1883 JMP retry
1884
1885 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1886 MOVL $8, R11
1887 JMP gcWriteBarrier<>(SB)
1888 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1889 MOVL $16, R11
1890 JMP gcWriteBarrier<>(SB)
1891 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1892 MOVL $24, R11
1893 JMP gcWriteBarrier<>(SB)
1894 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1895 MOVL $32, R11
1896 JMP gcWriteBarrier<>(SB)
1897 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1898 MOVL $40, R11
1899 JMP gcWriteBarrier<>(SB)
1900 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1901 MOVL $48, R11
1902 JMP gcWriteBarrier<>(SB)
1903 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1904 MOVL $56, R11
1905 JMP gcWriteBarrier<>(SB)
1906 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1907 MOVL $64, R11
1908 JMP gcWriteBarrier<>(SB)
1909
1910 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1911 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1912
1913 // debugCallV2 is the entry point for debugger-injected function
1914 // calls on running goroutines. It informs the runtime that a
1915 // debug call has been injected and creates a call frame for the
1916 // debugger to fill in.
1917 //
1918 // To inject a function call, a debugger should:
1919 // 1. Check that the goroutine is in state _Grunning and that
1920 // there are at least 256 bytes free on the stack.
1921 // 2. Push the current PC on the stack (updating SP).
1922 // 3. Write the desired argument frame size at SP-16 (using the SP
1923 // after step 2).
1924 // 4. Save all machine registers (including flags and XMM registers)
1925 // so they can be restored later by the debugger.
1926 // 5. Set the PC to debugCallV2 and resume execution.
1927 //
1928 // If the goroutine is in state _Grunnable, then it's not generally
1929 // safe to inject a call because it may return out via other runtime
1930 // operations. Instead, the debugger should unwind the stack to find
1931 // the return to non-runtime code, add a temporary breakpoint there,
1932 // and inject the call once that breakpoint is hit.
1933 //
1934 // If the goroutine is in any other state, it's not safe to inject a call.
1935 //
1936 // This function communicates back to the debugger by setting R12 and
1937 // invoking INT3 to raise a breakpoint signal. See the comments in the
1938 // implementation for the protocol the debugger is expected to
1939 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1940 //
1941 // The debugger must ensure that any pointers passed to the function
1942 // obey escape analysis requirements. Specifically, it must not pass
1943 // a stack pointer to an escaping argument. debugCallV2 cannot check
1944 // this invariant.
1945 //
1946 // This is ABIInternal because Go code injects its PC directly into new
1947 // goroutine stacks.
1948 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1949 // Save all registers that may contain pointers so they can be
1950 // conservatively scanned.
1951 //
1952 // We can't do anything that might clobber any of these
1953 // registers before this.
1954 MOVQ R15, r15-(14*8+8)(SP)
1955 MOVQ R14, r14-(13*8+8)(SP)
1956 MOVQ R13, r13-(12*8+8)(SP)
1957 MOVQ R12, r12-(11*8+8)(SP)
1958 MOVQ R11, r11-(10*8+8)(SP)
1959 MOVQ R10, r10-(9*8+8)(SP)
1960 MOVQ R9, r9-(8*8+8)(SP)
1961 MOVQ R8, r8-(7*8+8)(SP)
1962 MOVQ DI, di-(6*8+8)(SP)
1963 MOVQ SI, si-(5*8+8)(SP)
1964 MOVQ BP, bp-(4*8+8)(SP)
1965 MOVQ BX, bx-(3*8+8)(SP)
1966 MOVQ DX, dx-(2*8+8)(SP)
1967 // Save the frame size before we clobber it. Either of the last
1968 // saves could clobber this depending on whether there's a saved BP.
1969 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1970 MOVQ CX, cx-(1*8+8)(SP)
1971 MOVQ AX, ax-(0*8+8)(SP)
1972
1973 // Save the argument frame size.
1974 MOVQ DX, frameSize-128(SP)
1975
1976 // Perform a safe-point check.
1977 MOVQ retpc-8(FP), AX // Caller's PC
1978 MOVQ AX, 0(SP)
1979 CALL runtime·debugCallCheck(SB)
1980 MOVQ 8(SP), AX
1981 TESTQ AX, AX
1982 JZ good
1983 // The safety check failed. Put the reason string at the top
1984 // of the stack.
1985 MOVQ AX, 0(SP)
1986 MOVQ 16(SP), AX
1987 MOVQ AX, 8(SP)
1988 // Set R12 to 8 and invoke INT3. The debugger should get the
1989 // reason a call can't be injected from the top of the stack
1990 // and resume execution.
1991 MOVQ $8, R12
1992 BYTE $0xcc
1993 JMP restore
1994
1995 good:
1996 // Registers are saved and it's safe to make a call.
1997 // Open up a call frame, moving the stack if necessary.
1998 //
1999 // Once the frame is allocated, this will set R12 to 0 and
2000 // invoke INT3. The debugger should write the argument
2001 // frame for the call at SP, set up argument registers, push
2002 // the trapping PC on the stack, set the PC to the function to
2003 // call, set RDX to point to the closure (if a closure call),
2004 // and resume execution.
2005 //
2006 // If the function returns, this will set R12 to 1 and invoke
2007 // INT3. The debugger can then inspect any return value saved
2008 // on the stack at SP and in registers and resume execution again.
2009 //
2010 // If the function panics, this will set R12 to 2 and invoke INT3.
2011 // The interface{} value of the panic will be at SP. The debugger
2012 // can inspect the panic value and resume execution again.
2013 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
2014 CMPQ AX, $MAXSIZE; \
2015 JA 5(PC); \
2016 MOVQ $NAME(SB), AX; \
2017 MOVQ AX, 0(SP); \
2018 CALL runtime·debugCallWrap(SB); \
2019 JMP restore
2020
2021 MOVQ frameSize-128(SP), AX
2022 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
2023 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
2024 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
2025 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
2026 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
2027 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
2028 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
2029 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
2030 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
2031 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
2032 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
2033 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
2034 // The frame size is too large. Report the error.
2035 MOVQ $debugCallFrameTooLarge<>(SB), AX
2036 MOVQ AX, 0(SP)
2037 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
2038 MOVQ $8, R12
2039 BYTE $0xcc
2040 JMP restore
2041
2042 restore:
2043 // Calls and failures resume here.
2044 //
2045 // Set R12 to 16 and invoke INT3. The debugger should restore
2046 // all registers except RIP and RSP and resume execution.
2047 MOVQ $16, R12
2048 BYTE $0xcc
2049 // We must not modify flags after this point.
2050
2051 // Restore pointer-containing registers, which may have been
2052 // modified from the debugger's copy by stack copying.
2053 MOVQ ax-(0*8+8)(SP), AX
2054 MOVQ cx-(1*8+8)(SP), CX
2055 MOVQ dx-(2*8+8)(SP), DX
2056 MOVQ bx-(3*8+8)(SP), BX
2057 MOVQ bp-(4*8+8)(SP), BP
2058 MOVQ si-(5*8+8)(SP), SI
2059 MOVQ di-(6*8+8)(SP), DI
2060 MOVQ r8-(7*8+8)(SP), R8
2061 MOVQ r9-(8*8+8)(SP), R9
2062 MOVQ r10-(9*8+8)(SP), R10
2063 MOVQ r11-(10*8+8)(SP), R11
2064 MOVQ r12-(11*8+8)(SP), R12
2065 MOVQ r13-(12*8+8)(SP), R13
2066 MOVQ r14-(13*8+8)(SP), R14
2067 MOVQ r15-(14*8+8)(SP), R15
2068
2069 RET
2070
2071 // runtime.debugCallCheck assumes that functions defined with the
2072 // DEBUG_CALL_FN macro are safe points to inject calls.
2073 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2074 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2075 NO_LOCAL_POINTERS; \
2076 MOVQ $0, R12; \
2077 BYTE $0xcc; \
2078 MOVQ $1, R12; \
2079 BYTE $0xcc; \
2080 RET
2081 DEBUG_CALL_FN(debugCall32<>, 32)
2082 DEBUG_CALL_FN(debugCall64<>, 64)
2083 DEBUG_CALL_FN(debugCall128<>, 128)
2084 DEBUG_CALL_FN(debugCall256<>, 256)
2085 DEBUG_CALL_FN(debugCall512<>, 512)
2086 DEBUG_CALL_FN(debugCall1024<>, 1024)
2087 DEBUG_CALL_FN(debugCall2048<>, 2048)
2088 DEBUG_CALL_FN(debugCall4096<>, 4096)
2089 DEBUG_CALL_FN(debugCall8192<>, 8192)
2090 DEBUG_CALL_FN(debugCall16384<>, 16384)
2091 DEBUG_CALL_FN(debugCall32768<>, 32768)
2092 DEBUG_CALL_FN(debugCall65536<>, 65536)
2093
2094 // func debugCallPanicked(val interface{})
2095 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2096 // Copy the panic value to the top of stack.
2097 MOVQ val_type+0(FP), AX
2098 MOVQ AX, 0(SP)
2099 MOVQ val_data+8(FP), AX
2100 MOVQ AX, 8(SP)
2101 MOVQ $2, R12
2102 BYTE $0xcc
2103 RET
2104
2105 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2106 NO_LOCAL_POINTERS
2107 // Save all 14 int registers that could have an index in them.
2108 // They may be pointers, but if they are they are dead.
2109 MOVQ AX, 16(SP)
2110 MOVQ CX, 24(SP)
2111 MOVQ DX, 32(SP)
2112 MOVQ BX, 40(SP)
2113 // skip SP @ 48(SP)
2114 MOVQ BP, 56(SP)
2115 MOVQ SI, 64(SP)
2116 MOVQ DI, 72(SP)
2117 MOVQ R8, 80(SP)
2118 MOVQ R9, 88(SP)
2119 MOVQ R10, 96(SP)
2120 MOVQ R11, 104(SP)
2121 MOVQ R12, 112(SP)
2122 MOVQ R13, 120(SP)
2123 // skip R14 @ 128(SP) (aka G)
2124 MOVQ R15, 136(SP)
2125
2126 MOVQ SP, AX // hide SP read from vet
2127 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2128 LEAQ 16(SP), BX
2129 CALL runtime·panicBounds64<ABIInternal>(SB)
2130 RET
2131
2132 #ifdef GOOS_android
2133 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2134 // Earlier androids are set up in gcc_android.c.
2135 DATA runtime·tls_g+0(SB)/8, $16
2136 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2137 #endif
2138 #ifdef GOOS_windows
2139 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2140 #endif
2141
2142 // The compiler and assembler's -spectre=ret mode rewrites
2143 // all indirect CALL AX / JMP AX instructions to be
2144 // CALL retpolineAX / JMP retpolineAX.
2145 // See https://support.google.com/faqs/answer/7625886.
2146 #define RETPOLINE(reg) \
2147 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2148 /* nospec: */ \
2149 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2150 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2151 /* setup: */ \
2152 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2153 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2154 /* RET */ BYTE $0xC3
2155
2156 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2157 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2158 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2159 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2160 /* SP is 4, can't happen / magic encodings */
2161 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2162 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2163 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2164 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2165 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2166 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2167 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2168 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2169 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2170 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2171 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2172
2173 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2174 MOVQ BP, AX
2175 RET
2176
View as plain text