Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 #ifndef GOOS_windows
41 // Avoid calling it on Windows because it is not used
42 // and it would crash the application due to the autogenerated
43 // ABI wrapper trying to access a non-existent TLS slot.
44 CALL runtime·libpreinit(SB)
45 #endif
46
47 // Create a new thread to finish Go runtime initialization.
48 MOVQ _cgo_sys_thread_create(SB), AX
49 TESTQ AX, AX
50 JZ nocgo
51
52 // We're calling back to C.
53 // Align stack per C ABI requirements.
54 MOVQ SP, BX // Callee-save in C ABI
55 ANDQ $~15, SP
56 MOVQ $_rt0_amd64_lib_go(SB), DI
57 MOVQ $0, SI
58 #ifdef GOOS_windows
59 // For Windows ABI
60 MOVQ DI, CX
61 MOVQ SI, DX
62 // Leave space for four words on the stack as required
63 // by the Windows amd64 calling convention.
64 ADJSP $32
65 #endif
66 CALL AX
67 #ifdef GOOS_windows
68 ADJSP $-32 // just to make the assembler not complain about unbalanced stack
69 #endif
70 MOVQ BX, SP
71 JMP restore
72
73 nocgo:
74 ADJSP $16
75 MOVQ $0x800000, 0(SP) // stacksize
76 MOVQ $_rt0_amd64_lib_go(SB), AX
77 MOVQ AX, 8(SP) // fn
78 CALL runtime·newosproc0(SB)
79 ADJSP $-16
80
81 restore:
82 POP_REGS_HOST_TO_ABI0()
83 RET
84
85 // _rt0_amd64_lib_go initializes the Go runtime.
86 // This is started in a separate thread by _rt0_amd64_lib.
87 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
88 MOVQ _rt0_amd64_lib_argc<>(SB), DI
89 MOVQ _rt0_amd64_lib_argv<>(SB), SI
90 JMP runtime·rt0_go(SB)
91
92 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
93 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
94 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
95 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
96
97 #ifdef GOAMD64_v2
98 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
99 #endif
100
101 #ifdef GOAMD64_v3
102 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
103 #endif
104
105 #ifdef GOAMD64_v4
106 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
107 #endif
108
109 GLOBL bad_cpu_msg<>(SB), RODATA, $84
110
111 // Define a list of AMD64 microarchitecture level features
112 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
113
114 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
115 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
116 // LAHF/SAHF
117 #define V2_EXT_FEATURES_CX (1 << 0)
118 // FMA MOVBE OSXSAVE AVX F16C
119 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
120 // ABM (FOR LZNCT)
121 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
122 // BMI1 AVX2 BMI2
123 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
124 // XMM YMM
125 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
126
127 #define V4_FEATURES_CX V3_FEATURES_CX
128
129 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
130 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
131 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
132 // OPMASK ZMM
133 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
134
135 #ifdef GOAMD64_v2
136 #define NEED_MAX_CPUID 0x80000001
137 #define NEED_FEATURES_CX V2_FEATURES_CX
138 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
139 #endif
140
141 #ifdef GOAMD64_v3
142 #define NEED_MAX_CPUID 0x80000001
143 #define NEED_FEATURES_CX V3_FEATURES_CX
144 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
145 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
146 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
147 #endif
148
149 #ifdef GOAMD64_v4
150 #define NEED_MAX_CPUID 0x80000001
151 #define NEED_FEATURES_CX V4_FEATURES_CX
152 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
153 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
154
155 // Darwin requires a different approach to check AVX512 support, see CL 285572.
156 #ifdef GOOS_darwin
157 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
158 // These values are from:
159 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
160 #define commpage64_base_address 0x00007fffffe00000
161 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
162 #define commpage64_version (commpage64_base_address+0x01E)
163 #define AVX512F 0x0000004000000000
164 #define AVX512CD 0x0000008000000000
165 #define AVX512DQ 0x0000010000000000
166 #define AVX512BW 0x0000020000000000
167 #define AVX512VL 0x0000100000000000
168 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
169 #else
170 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
171 #endif
172
173 #endif
174
175 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
176 // copy arguments forward on an even stack
177 MOVQ DI, AX // argc
178 MOVQ SI, BX // argv
179 SUBQ $(5*8), SP // 3args 2auto
180 ANDQ $~15, SP
181 MOVQ AX, 24(SP)
182 MOVQ BX, 32(SP)
183
184 // create istack out of the given (operating system) stack.
185 // _cgo_init may update stackguard.
186 MOVQ $runtime·g0(SB), DI
187 LEAQ (-64*1024)(SP), BX
188 MOVQ BX, g_stackguard0(DI)
189 MOVQ BX, g_stackguard1(DI)
190 MOVQ BX, (g_stack+stack_lo)(DI)
191 MOVQ SP, (g_stack+stack_hi)(DI)
192
193 // find out information about the processor we're on
194 MOVL $0, AX
195 CPUID
196 CMPL AX, $0
197 JE nocpuinfo
198
199 CMPL BX, $0x756E6547 // "Genu"
200 JNE notintel
201 CMPL DX, $0x49656E69 // "ineI"
202 JNE notintel
203 CMPL CX, $0x6C65746E // "ntel"
204 JNE notintel
205 MOVB $1, runtime·isIntel(SB)
206
207 notintel:
208 // Load EAX=1 cpuid flags
209 MOVL $1, AX
210 CPUID
211 MOVL AX, runtime·processorVersionInfo(SB)
212
213 nocpuinfo:
214 // if there is an _cgo_init, call it.
215 MOVQ _cgo_init(SB), AX
216 TESTQ AX, AX
217 JZ needtls
218 // arg 1: g0, already in DI
219 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
220 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
221 MOVQ $0, CX
222 #ifdef GOOS_android
223 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
224 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
225 // Compensate for tls_g (+16).
226 MOVQ -16(TLS), CX
227 #endif
228 #ifdef GOOS_windows
229 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
230 // Adjust for the Win64 calling convention.
231 MOVQ CX, R9 // arg 4
232 MOVQ DX, R8 // arg 3
233 MOVQ SI, DX // arg 2
234 MOVQ DI, CX // arg 1
235 #endif
236 CALL AX
237
238 // update stackguard after _cgo_init
239 MOVQ $runtime·g0(SB), CX
240 MOVQ (g_stack+stack_lo)(CX), AX
241 ADDQ $const_stackGuard, AX
242 MOVQ AX, g_stackguard0(CX)
243 MOVQ AX, g_stackguard1(CX)
244
245 #ifndef GOOS_windows
246 JMP ok
247 #endif
248 needtls:
249 #ifdef GOOS_plan9
250 // skip TLS setup on Plan 9
251 JMP ok
252 #endif
253 #ifdef GOOS_solaris
254 // skip TLS setup on Solaris
255 JMP ok
256 #endif
257 #ifdef GOOS_illumos
258 // skip TLS setup on illumos
259 JMP ok
260 #endif
261 #ifdef GOOS_darwin
262 // skip TLS setup on Darwin
263 JMP ok
264 #endif
265 #ifdef GOOS_openbsd
266 // skip TLS setup on OpenBSD
267 JMP ok
268 #endif
269
270 #ifdef GOOS_windows
271 CALL runtime·wintls(SB)
272 #endif
273
274 LEAQ runtime·m0+m_tls(SB), DI
275 CALL runtime·settls(SB)
276
277 // store through it, to make sure it works
278 get_tls(BX)
279 MOVQ $0x123, g(BX)
280 MOVQ runtime·m0+m_tls(SB), AX
281 CMPQ AX, $0x123
282 JEQ 2(PC)
283 CALL runtime·abort(SB)
284 ok:
285 // set the per-goroutine and per-mach "registers"
286 get_tls(BX)
287 LEAQ runtime·g0(SB), CX
288 MOVQ CX, g(BX)
289 LEAQ runtime·m0(SB), AX
290
291 // save m->g0 = g0
292 MOVQ CX, m_g0(AX)
293 // save m0 to g0->m
294 MOVQ AX, g_m(CX)
295
296 CLD // convention is D is always left cleared
297
298 // Check GOAMD64 requirements
299 // We need to do this after setting up TLS, so that
300 // we can report an error if there is a failure. See issue 49586.
301 #ifdef NEED_FEATURES_CX
302 MOVL $0, AX
303 CPUID
304 CMPL AX, $0
305 JE bad_cpu
306 MOVL $1, AX
307 CPUID
308 ANDL $NEED_FEATURES_CX, CX
309 CMPL CX, $NEED_FEATURES_CX
310 JNE bad_cpu
311 #endif
312
313 #ifdef NEED_MAX_CPUID
314 MOVL $0x80000000, AX
315 CPUID
316 CMPL AX, $NEED_MAX_CPUID
317 JL bad_cpu
318 #endif
319
320 #ifdef NEED_EXT_FEATURES_BX
321 MOVL $7, AX
322 MOVL $0, CX
323 CPUID
324 ANDL $NEED_EXT_FEATURES_BX, BX
325 CMPL BX, $NEED_EXT_FEATURES_BX
326 JNE bad_cpu
327 #endif
328
329 #ifdef NEED_EXT_FEATURES_CX
330 MOVL $0x80000001, AX
331 CPUID
332 ANDL $NEED_EXT_FEATURES_CX, CX
333 CMPL CX, $NEED_EXT_FEATURES_CX
334 JNE bad_cpu
335 #endif
336
337 #ifdef NEED_OS_SUPPORT_AX
338 XORL CX, CX
339 XGETBV
340 ANDL $NEED_OS_SUPPORT_AX, AX
341 CMPL AX, $NEED_OS_SUPPORT_AX
342 JNE bad_cpu
343 #endif
344
345 #ifdef NEED_DARWIN_SUPPORT
346 MOVQ $commpage64_version, BX
347 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
348 JL bad_cpu
349 MOVQ $commpage64_cpu_capabilities64, BX
350 MOVQ (BX), BX
351 MOVQ $NEED_DARWIN_SUPPORT, CX
352 ANDQ CX, BX
353 CMPQ BX, CX
354 JNE bad_cpu
355 #endif
356
357 CALL runtime·check(SB)
358
359 MOVL 24(SP), AX // copy argc
360 MOVL AX, 0(SP)
361 MOVQ 32(SP), AX // copy argv
362 MOVQ AX, 8(SP)
363 CALL runtime·args(SB)
364 CALL runtime·osinit(SB)
365 CALL runtime·schedinit(SB)
366
367 // create a new goroutine to start program
368 MOVQ $runtime·mainPC(SB), AX // entry
369 PUSHQ AX
370 CALL runtime·newproc(SB)
371 POPQ AX
372
373 // start this M
374 CALL runtime·mstart(SB)
375
376 CALL runtime·abort(SB) // mstart should never return
377 RET
378
379 bad_cpu: // show that the program requires a certain microarchitecture level.
380 MOVQ $2, 0(SP)
381 MOVQ $bad_cpu_msg<>(SB), AX
382 MOVQ AX, 8(SP)
383 MOVQ $84, 16(SP)
384 CALL runtime·write(SB)
385 MOVQ $1, 0(SP)
386 CALL runtime·exit(SB)
387 CALL runtime·abort(SB)
388 RET
389
390 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
391 // intended to be called by debuggers.
392 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
393 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
394 RET
395
396 // mainPC is a function value for runtime.main, to be passed to newproc.
397 // The reference to runtime.main is made via ABIInternal, since the
398 // actual function (not the ABI0 wrapper) is needed by newproc.
399 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
400 GLOBL runtime·mainPC(SB),RODATA,$8
401
402 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
403 BYTE $0xcc
404 RET
405
406 TEXT runtime·asminit(SB),NOSPLIT,$0-0
407 // No per-thread init.
408 RET
409
410 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
411 CALL runtime·mstart0(SB)
412 RET // not reached
413
414 /*
415 * go-routine
416 */
417
418 // func gogo(buf *gobuf)
419 // restore state from Gobuf; longjmp
420 TEXT runtime·gogo(SB), NOSPLIT, $0-8
421 MOVQ buf+0(FP), BX // gobuf
422 MOVQ gobuf_g(BX), DX
423 MOVQ 0(DX), CX // make sure g != nil
424 JMP gogo<>(SB)
425
426 TEXT gogo<>(SB), NOSPLIT, $0
427 get_tls(CX)
428 MOVQ DX, g(CX)
429 MOVQ DX, R14 // set the g register
430 MOVQ gobuf_sp(BX), SP // restore SP
431 MOVQ gobuf_ctxt(BX), DX
432 MOVQ gobuf_bp(BX), BP
433 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
434 MOVQ $0, gobuf_ctxt(BX)
435 MOVQ $0, gobuf_bp(BX)
436 MOVQ gobuf_pc(BX), BX
437 JMP BX
438
439 // func mcall(fn func(*g))
440 // Switch to m->g0's stack, call fn(g).
441 // Fn must never return. It should gogo(&g->sched)
442 // to keep running g.
443 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
444 MOVQ AX, DX // DX = fn
445
446 // Save state in g->sched. The caller's SP and PC are restored by gogo to
447 // resume execution in the caller's frame (implicit return). The caller's BP
448 // is also restored to support frame pointer unwinding.
449 MOVQ SP, BX // hide (SP) reads from vet
450 MOVQ 8(BX), BX // caller's PC
451 MOVQ BX, (g_sched+gobuf_pc)(R14)
452 LEAQ fn+0(FP), BX // caller's SP
453 MOVQ BX, (g_sched+gobuf_sp)(R14)
454 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
455 // can cause a frame pointer cycle, see CL 476235.
456 MOVQ (BP), BX // caller's BP
457 MOVQ BX, (g_sched+gobuf_bp)(R14)
458
459 // switch to m->g0 & its stack, call fn
460 MOVQ g_m(R14), BX
461 MOVQ m_g0(BX), SI // SI = g.m.g0
462 CMPQ SI, R14 // if g == m->g0 call badmcall
463 JNE goodm
464 JMP runtime·badmcall(SB)
465 goodm:
466 MOVQ R14, AX // AX (and arg 0) = g
467 MOVQ SI, R14 // g = g.m.g0
468 get_tls(CX) // Set G in TLS
469 MOVQ R14, g(CX)
470 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
471 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
472 PUSHQ AX // open up space for fn's arg spill slot
473 MOVQ 0(DX), R12
474 CALL R12 // fn(g)
475 // The Windows native stack unwinder incorrectly classifies the next instruction
476 // as part of the function epilogue, producing a wrong call stack.
477 // Add a NOP to work around this issue. See go.dev/issue/67007.
478 BYTE $0x90
479 POPQ AX
480 JMP runtime·badmcall2(SB)
481 RET
482
483 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
484 // of the G stack. We need to distinguish the routine that
485 // lives at the bottom of the G stack from the one that lives
486 // at the top of the system stack because the one at the top of
487 // the system stack terminates the stack walk (see topofstack()).
488 // The frame layout needs to match systemstack
489 // so that it can pretend to be systemstack_switch.
490 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
491 UNDEF
492 // Make sure this function is not leaf,
493 // so the frame is saved.
494 CALL runtime·abort(SB)
495 RET
496
497 // func systemstack(fn func())
498 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
499 MOVQ fn+0(FP), DI // DI = fn
500 get_tls(CX)
501 MOVQ g(CX), AX // AX = g
502 MOVQ g_m(AX), BX // BX = m
503
504 CMPQ AX, m_gsignal(BX)
505 JEQ noswitch
506
507 MOVQ m_g0(BX), DX // DX = g0
508 CMPQ AX, DX
509 JEQ noswitch
510
511 CMPQ AX, m_curg(BX)
512 JNE bad
513
514 // Switch stacks.
515 // The original frame pointer is stored in BP,
516 // which is useful for stack unwinding.
517 // Save our state in g->sched. Pretend to
518 // be systemstack_switch if the G stack is scanned.
519 CALL gosave_systemstack_switch<>(SB)
520
521 // switch to g0
522 MOVQ DX, g(CX)
523 MOVQ DX, R14 // set the g register
524 MOVQ (g_sched+gobuf_sp)(DX), SP
525
526 // call target function
527 MOVQ DI, DX
528 MOVQ 0(DI), DI
529 CALL DI
530
531 // switch back to g
532 get_tls(CX)
533 MOVQ g(CX), AX
534 MOVQ g_m(AX), BX
535 MOVQ m_curg(BX), AX
536 MOVQ AX, g(CX)
537 MOVQ (g_sched+gobuf_sp)(AX), SP
538 MOVQ (g_sched+gobuf_bp)(AX), BP
539 MOVQ $0, (g_sched+gobuf_sp)(AX)
540 MOVQ $0, (g_sched+gobuf_bp)(AX)
541 RET
542
543 noswitch:
544 // already on m stack; tail call the function
545 // Using a tail call here cleans up tracebacks since we won't stop
546 // at an intermediate systemstack.
547 MOVQ DI, DX
548 MOVQ 0(DI), DI
549 // The function epilogue is not called on a tail call.
550 // Pop BP from the stack to simulate it.
551 POPQ BP
552 JMP DI
553
554 bad:
555 // Bad: g is not gsignal, not g0, not curg. What is it?
556 MOVQ $runtime·badsystemstack(SB), AX
557 CALL AX
558 INT $3
559
560 // func switchToCrashStack0(fn func())
561 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
562 MOVQ g_m(R14), BX // curm
563
564 // set g to gcrash
565 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
566 MOVQ BX, g_m(R14) // g.m = curm
567 MOVQ R14, m_g0(BX) // curm.g0 = g
568 get_tls(CX)
569 MOVQ R14, g(CX)
570
571 // switch to crashstack
572 MOVQ (g_stack+stack_hi)(R14), BX
573 SUBQ $(4*8), BX
574 MOVQ BX, SP
575
576 // call target function
577 MOVQ AX, DX
578 MOVQ 0(AX), AX
579 CALL AX
580
581 // should never return
582 CALL runtime·abort(SB)
583 UNDEF
584
585 /*
586 * support for morestack
587 */
588
589 // Called during function prolog when more stack is needed.
590 //
591 // The traceback routines see morestack on a g0 as being
592 // the top of a stack (for example, morestack calling newstack
593 // calling the scheduler calling newm calling gc), so we must
594 // record an argument size. For that purpose, it has no arguments.
595 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
596 // Cannot grow scheduler stack (m->g0).
597 get_tls(CX)
598 MOVQ g(CX), DI // DI = g
599 MOVQ g_m(DI), BX // BX = m
600
601 // Set g->sched to context in f.
602 MOVQ 0(SP), AX // f's PC
603 MOVQ AX, (g_sched+gobuf_pc)(DI)
604 LEAQ 8(SP), AX // f's SP
605 MOVQ AX, (g_sched+gobuf_sp)(DI)
606 MOVQ BP, (g_sched+gobuf_bp)(DI)
607 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
608
609 MOVQ m_g0(BX), SI // SI = m.g0
610 CMPQ DI, SI
611 JNE 3(PC)
612 CALL runtime·badmorestackg0(SB)
613 CALL runtime·abort(SB)
614
615 // Cannot grow signal stack (m->gsignal).
616 MOVQ m_gsignal(BX), SI
617 CMPQ DI, SI
618 JNE 3(PC)
619 CALL runtime·badmorestackgsignal(SB)
620 CALL runtime·abort(SB)
621
622 // Called from f.
623 // Set m->morebuf to f's caller.
624 NOP SP // tell vet SP changed - stop checking offsets
625 MOVQ 8(SP), AX // f's caller's PC
626 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
627 LEAQ 16(SP), AX // f's caller's SP
628 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
629 MOVQ DI, (m_morebuf+gobuf_g)(BX)
630
631 // Call newstack on m->g0's stack.
632 MOVQ m_g0(BX), BX
633 MOVQ BX, g(CX)
634 MOVQ (g_sched+gobuf_sp)(BX), SP
635 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
636 CALL runtime·newstack(SB)
637 CALL runtime·abort(SB) // crash if newstack returns
638 RET
639
640 // morestack but not preserving ctxt.
641 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
642 MOVL $0, DX
643 JMP runtime·morestack(SB)
644
645 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
646 TEXT ·spillArgs(SB),NOSPLIT,$0-0
647 MOVQ AX, 0(R12)
648 MOVQ BX, 8(R12)
649 MOVQ CX, 16(R12)
650 MOVQ DI, 24(R12)
651 MOVQ SI, 32(R12)
652 MOVQ R8, 40(R12)
653 MOVQ R9, 48(R12)
654 MOVQ R10, 56(R12)
655 MOVQ R11, 64(R12)
656 MOVQ X0, 72(R12)
657 MOVQ X1, 80(R12)
658 MOVQ X2, 88(R12)
659 MOVQ X3, 96(R12)
660 MOVQ X4, 104(R12)
661 MOVQ X5, 112(R12)
662 MOVQ X6, 120(R12)
663 MOVQ X7, 128(R12)
664 MOVQ X8, 136(R12)
665 MOVQ X9, 144(R12)
666 MOVQ X10, 152(R12)
667 MOVQ X11, 160(R12)
668 MOVQ X12, 168(R12)
669 MOVQ X13, 176(R12)
670 MOVQ X14, 184(R12)
671 RET
672
673 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
674 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
675 MOVQ 0(R12), AX
676 MOVQ 8(R12), BX
677 MOVQ 16(R12), CX
678 MOVQ 24(R12), DI
679 MOVQ 32(R12), SI
680 MOVQ 40(R12), R8
681 MOVQ 48(R12), R9
682 MOVQ 56(R12), R10
683 MOVQ 64(R12), R11
684 MOVQ 72(R12), X0
685 MOVQ 80(R12), X1
686 MOVQ 88(R12), X2
687 MOVQ 96(R12), X3
688 MOVQ 104(R12), X4
689 MOVQ 112(R12), X5
690 MOVQ 120(R12), X6
691 MOVQ 128(R12), X7
692 MOVQ 136(R12), X8
693 MOVQ 144(R12), X9
694 MOVQ 152(R12), X10
695 MOVQ 160(R12), X11
696 MOVQ 168(R12), X12
697 MOVQ 176(R12), X13
698 MOVQ 184(R12), X14
699 RET
700
701 // reflectcall: call a function with the given argument list
702 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
703 // we don't have variable-sized frames, so we use a small number
704 // of constant-sized-frame functions to encode a few bits of size in the pc.
705 // Caution: ugly multiline assembly macros in your future!
706
707 #define DISPATCH(NAME,MAXSIZE) \
708 CMPQ CX, $MAXSIZE; \
709 JA 3(PC); \
710 MOVQ $NAME(SB), AX; \
711 JMP AX
712 // Note: can't just "JMP NAME(SB)" - bad inlining results.
713
714 TEXT ·reflectcall(SB), NOSPLIT, $0-48
715 MOVLQZX frameSize+32(FP), CX
716 DISPATCH(runtime·call16, 16)
717 DISPATCH(runtime·call32, 32)
718 DISPATCH(runtime·call64, 64)
719 DISPATCH(runtime·call128, 128)
720 DISPATCH(runtime·call256, 256)
721 DISPATCH(runtime·call512, 512)
722 DISPATCH(runtime·call1024, 1024)
723 DISPATCH(runtime·call2048, 2048)
724 DISPATCH(runtime·call4096, 4096)
725 DISPATCH(runtime·call8192, 8192)
726 DISPATCH(runtime·call16384, 16384)
727 DISPATCH(runtime·call32768, 32768)
728 DISPATCH(runtime·call65536, 65536)
729 DISPATCH(runtime·call131072, 131072)
730 DISPATCH(runtime·call262144, 262144)
731 DISPATCH(runtime·call524288, 524288)
732 DISPATCH(runtime·call1048576, 1048576)
733 DISPATCH(runtime·call2097152, 2097152)
734 DISPATCH(runtime·call4194304, 4194304)
735 DISPATCH(runtime·call8388608, 8388608)
736 DISPATCH(runtime·call16777216, 16777216)
737 DISPATCH(runtime·call33554432, 33554432)
738 DISPATCH(runtime·call67108864, 67108864)
739 DISPATCH(runtime·call134217728, 134217728)
740 DISPATCH(runtime·call268435456, 268435456)
741 DISPATCH(runtime·call536870912, 536870912)
742 DISPATCH(runtime·call1073741824, 1073741824)
743 MOVQ $runtime·badreflectcall(SB), AX
744 JMP AX
745
746 #define CALLFN(NAME,MAXSIZE) \
747 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
748 NO_LOCAL_POINTERS; \
749 /* copy arguments to stack */ \
750 MOVQ stackArgs+16(FP), SI; \
751 MOVLQZX stackArgsSize+24(FP), CX; \
752 MOVQ SP, DI; \
753 REP;MOVSB; \
754 /* set up argument registers */ \
755 MOVQ regArgs+40(FP), R12; \
756 CALL ·unspillArgs(SB); \
757 /* call function */ \
758 MOVQ f+8(FP), DX; \
759 PCDATA $PCDATA_StackMapIndex, $0; \
760 MOVQ (DX), R12; \
761 CALL R12; \
762 /* copy register return values back */ \
763 MOVQ regArgs+40(FP), R12; \
764 CALL ·spillArgs(SB); \
765 MOVLQZX stackArgsSize+24(FP), CX; \
766 MOVLQZX stackRetOffset+28(FP), BX; \
767 MOVQ stackArgs+16(FP), DI; \
768 MOVQ stackArgsType+0(FP), DX; \
769 MOVQ SP, SI; \
770 ADDQ BX, DI; \
771 ADDQ BX, SI; \
772 SUBQ BX, CX; \
773 CALL callRet<>(SB); \
774 RET
775
776 // callRet copies return values back at the end of call*. This is a
777 // separate function so it can allocate stack space for the arguments
778 // to reflectcallmove. It does not follow the Go ABI; it expects its
779 // arguments in registers.
780 TEXT callRet<>(SB), NOSPLIT, $40-0
781 NO_LOCAL_POINTERS
782 MOVQ DX, 0(SP)
783 MOVQ DI, 8(SP)
784 MOVQ SI, 16(SP)
785 MOVQ CX, 24(SP)
786 MOVQ R12, 32(SP)
787 CALL runtime·reflectcallmove(SB)
788 RET
789
790 CALLFN(·call16, 16)
791 CALLFN(·call32, 32)
792 CALLFN(·call64, 64)
793 CALLFN(·call128, 128)
794 CALLFN(·call256, 256)
795 CALLFN(·call512, 512)
796 CALLFN(·call1024, 1024)
797 CALLFN(·call2048, 2048)
798 CALLFN(·call4096, 4096)
799 CALLFN(·call8192, 8192)
800 CALLFN(·call16384, 16384)
801 CALLFN(·call32768, 32768)
802 CALLFN(·call65536, 65536)
803 CALLFN(·call131072, 131072)
804 CALLFN(·call262144, 262144)
805 CALLFN(·call524288, 524288)
806 CALLFN(·call1048576, 1048576)
807 CALLFN(·call2097152, 2097152)
808 CALLFN(·call4194304, 4194304)
809 CALLFN(·call8388608, 8388608)
810 CALLFN(·call16777216, 16777216)
811 CALLFN(·call33554432, 33554432)
812 CALLFN(·call67108864, 67108864)
813 CALLFN(·call134217728, 134217728)
814 CALLFN(·call268435456, 268435456)
815 CALLFN(·call536870912, 536870912)
816 CALLFN(·call1073741824, 1073741824)
817
818 TEXT runtime·procyield(SB),NOSPLIT,$0-0
819 MOVL cycles+0(FP), AX
820 again:
821 PAUSE
822 SUBL $1, AX
823 JNZ again
824 RET
825
826
827 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
828 // Stores are already ordered on x86, so this is just a
829 // compile barrier.
830 RET
831
832 // Save state of caller into g->sched,
833 // but using fake PC from systemstack_switch.
834 // Must only be called from functions with frame pointer
835 // and without locals ($0) or else unwinding from
836 // systemstack_switch is incorrect.
837 // Smashes R9.
838 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
839 // Take systemstack_switch PC and add 8 bytes to skip
840 // the prologue. The final location does not matter
841 // as long as we are between the prologue and the epilogue.
842 MOVQ $runtime·systemstack_switch+8(SB), R9
843 MOVQ R9, (g_sched+gobuf_pc)(R14)
844 LEAQ 8(SP), R9
845 MOVQ R9, (g_sched+gobuf_sp)(R14)
846 MOVQ BP, (g_sched+gobuf_bp)(R14)
847 // Assert ctxt is zero. See func save.
848 MOVQ (g_sched+gobuf_ctxt)(R14), R9
849 TESTQ R9, R9
850 JZ 2(PC)
851 CALL runtime·abort(SB)
852 RET
853
854 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
855 // Call fn(arg) aligned appropriately for the gcc ABI.
856 // Called on a system stack, and there may be no g yet (during needm).
857 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
858 MOVQ fn+0(FP), AX
859 MOVQ arg+8(FP), BX
860 MOVQ SP, DX
861 ANDQ $~15, SP // alignment
862 MOVQ DX, 8(SP)
863 MOVQ BX, DI // DI = first argument in AMD64 ABI
864 MOVQ BX, CX // CX = first argument in Win64
865 CALL AX
866 MOVQ 8(SP), DX
867 MOVQ DX, SP
868 RET
869
870 // asmcgocall_landingpad calls AX with BX as argument.
871 // Must be called on the system stack.
872 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
873 #ifdef GOOS_windows
874 // Make sure we have enough room for 4 stack-backed fast-call
875 // registers as per Windows amd64 calling convention.
876 ADJSP $32
877 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
878 // thrown in the cgo call. Exceptions that reach this function will be
879 // handled by runtime.sehtramp thanks to the SEH metadata added
880 // by the compiler.
881 // Note that runtime.sehtramp can't be attached directly to asmcgocall
882 // because its initial stack pointer can be outside the system stack bounds,
883 // and Windows stops the stack unwinding without calling the exception handler
884 // when it reaches that point.
885 MOVQ BX, CX // CX = first argument in Win64
886 CALL AX
887 // The exception handler is not called if the next instruction is part of
888 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
889 BYTE $0x90
890 ADJSP $-32
891 RET
892 #endif
893 // Tail call AX on non-Windows, as the extra stack frame is not needed.
894 MOVQ BX, DI // DI = first argument in AMD64 ABI
895 JMP AX
896
897 // func asmcgocall(fn, arg unsafe.Pointer) int32
898 // Call fn(arg) on the scheduler stack,
899 // aligned appropriately for the gcc ABI.
900 // See cgocall.go for more details.
901 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
902 MOVQ fn+0(FP), AX
903 MOVQ arg+8(FP), BX
904
905 MOVQ SP, DX
906
907 // Figure out if we need to switch to m->g0 stack.
908 // We get called to create new OS threads too, and those
909 // come in on the m->g0 stack already. Or we might already
910 // be on the m->gsignal stack.
911 get_tls(CX)
912 MOVQ g(CX), DI
913 CMPQ DI, $0
914 JEQ nosave
915 MOVQ g_m(DI), R8
916 MOVQ m_gsignal(R8), SI
917 CMPQ DI, SI
918 JEQ nosave
919 MOVQ m_g0(R8), SI
920 CMPQ DI, SI
921 JEQ nosave
922
923 // Switch to system stack.
924 // The original frame pointer is stored in BP,
925 // which is useful for stack unwinding.
926 CALL gosave_systemstack_switch<>(SB)
927 MOVQ SI, g(CX)
928 MOVQ (g_sched+gobuf_sp)(SI), SP
929
930 // Now on a scheduling stack (a pthread-created stack).
931 SUBQ $16, SP
932 ANDQ $~15, SP // alignment for gcc ABI
933 MOVQ DI, 8(SP) // save g
934 MOVQ (g_stack+stack_hi)(DI), DI
935 SUBQ DX, DI
936 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
937 CALL runtime·asmcgocall_landingpad(SB)
938
939 // Restore registers, g, stack pointer.
940 get_tls(CX)
941 MOVQ 8(SP), DI
942 MOVQ (g_stack+stack_hi)(DI), SI
943 SUBQ 0(SP), SI
944 MOVQ DI, g(CX)
945 MOVQ SI, SP
946
947 MOVL AX, ret+16(FP)
948 RET
949
950 nosave:
951 // Running on a system stack, perhaps even without a g.
952 // Having no g can happen during thread creation or thread teardown
953 // (see needm/dropm on Solaris, for example).
954 // This code is like the above sequence but without saving/restoring g
955 // and without worrying about the stack moving out from under us
956 // (because we're on a system stack, not a goroutine stack).
957 // The above code could be used directly if already on a system stack,
958 // but then the only path through this code would be a rare case on Solaris.
959 // Using this code for all "already on system stack" calls exercises it more,
960 // which should help keep it correct.
961 SUBQ $16, SP
962 ANDQ $~15, SP
963 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
964 MOVQ DX, 0(SP) // save original stack pointer
965 CALL runtime·asmcgocall_landingpad(SB)
966 MOVQ 0(SP), SI // restore original stack pointer
967 MOVQ SI, SP
968 MOVL AX, ret+16(FP)
969 RET
970
971 #ifdef GOOS_windows
972 // Dummy TLS that's used on Windows so that we don't crash trying
973 // to restore the G register in needm. needm and its callees are
974 // very careful never to actually use the G, the TLS just can't be
975 // unset since we're in Go code.
976 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
977 #endif
978
979 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
980 // See cgocall.go for more details.
981 TEXT ·cgocallback(SB),NOSPLIT,$24-24
982 NO_LOCAL_POINTERS
983
984 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
985 // It is used to dropm while thread is exiting.
986 MOVQ fn+0(FP), AX
987 CMPQ AX, $0
988 JNE loadg
989 // Restore the g from frame.
990 get_tls(CX)
991 MOVQ frame+8(FP), BX
992 MOVQ BX, g(CX)
993 JMP dropm
994
995 loadg:
996 // If g is nil, Go did not create the current thread,
997 // or if this thread never called into Go on pthread platforms.
998 // Call needm to obtain one m for temporary use.
999 // In this case, we're running on the thread stack, so there's
1000 // lots of space, but the linker doesn't know. Hide the call from
1001 // the linker analysis by using an indirect call through AX.
1002 get_tls(CX)
1003 #ifdef GOOS_windows
1004 MOVL $0, BX
1005 CMPQ CX, $0
1006 JEQ 2(PC)
1007 #endif
1008 MOVQ g(CX), BX
1009 CMPQ BX, $0
1010 JEQ needm
1011 MOVQ g_m(BX), BX
1012 MOVQ BX, savedm-8(SP) // saved copy of oldm
1013 JMP havem
1014 needm:
1015 #ifdef GOOS_windows
1016 // Set up a dummy TLS value. needm is careful not to use it,
1017 // but it needs to be there to prevent autogenerated code from
1018 // crashing when it loads from it.
1019 // We don't need to clear it or anything later because needm
1020 // will set up TLS properly.
1021 MOVQ $zeroTLS<>(SB), DI
1022 CALL runtime·settls(SB)
1023 #endif
1024 // On some platforms (Windows) we cannot call needm through
1025 // an ABI wrapper because there's no TLS set up, and the ABI
1026 // wrapper will try to restore the G register (R14) from TLS.
1027 // Clear X15 because Go expects it and we're not calling
1028 // through a wrapper, but otherwise avoid setting the G
1029 // register in the wrapper and call needm directly. It
1030 // takes no arguments and doesn't return any values so
1031 // there's no need to handle that. Clear R14 so that there's
1032 // a bad value in there, in case needm tries to use it.
1033 XORPS X15, X15
1034 XORQ R14, R14
1035 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1036 CALL AX
1037 MOVQ $0, savedm-8(SP)
1038 get_tls(CX)
1039 MOVQ g(CX), BX
1040 MOVQ g_m(BX), BX
1041
1042 // Set m->sched.sp = SP, so that if a panic happens
1043 // during the function we are about to execute, it will
1044 // have a valid SP to run on the g0 stack.
1045 // The next few lines (after the havem label)
1046 // will save this SP onto the stack and then write
1047 // the same SP back to m->sched.sp. That seems redundant,
1048 // but if an unrecovered panic happens, unwindm will
1049 // restore the g->sched.sp from the stack location
1050 // and then systemstack will try to use it. If we don't set it here,
1051 // that restored SP will be uninitialized (typically 0) and
1052 // will not be usable.
1053 MOVQ m_g0(BX), SI
1054 MOVQ SP, (g_sched+gobuf_sp)(SI)
1055
1056 havem:
1057 // Now there's a valid m, and we're running on its m->g0.
1058 // Save current m->g0->sched.sp on stack and then set it to SP.
1059 // Save current sp in m->g0->sched.sp in preparation for
1060 // switch back to m->curg stack.
1061 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1062 MOVQ m_g0(BX), SI
1063 MOVQ (g_sched+gobuf_sp)(SI), AX
1064 MOVQ AX, 0(SP)
1065 MOVQ SP, (g_sched+gobuf_sp)(SI)
1066
1067 // Switch to m->curg stack and call runtime.cgocallbackg.
1068 // Because we are taking over the execution of m->curg
1069 // but *not* resuming what had been running, we need to
1070 // save that information (m->curg->sched) so we can restore it.
1071 // We can restore m->curg->sched.sp easily, because calling
1072 // runtime.cgocallbackg leaves SP unchanged upon return.
1073 // To save m->curg->sched.pc, we push it onto the curg stack and
1074 // open a frame the same size as cgocallback's g0 frame.
1075 // Once we switch to the curg stack, the pushed PC will appear
1076 // to be the return PC of cgocallback, so that the traceback
1077 // will seamlessly trace back into the earlier calls.
1078 MOVQ m_curg(BX), SI
1079 MOVQ SI, g(CX)
1080 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1081 MOVQ (g_sched+gobuf_pc)(SI), BX
1082 MOVQ BX, -8(DI) // "push" return PC on the g stack
1083 // Gather our arguments into registers.
1084 MOVQ fn+0(FP), BX
1085 MOVQ frame+8(FP), CX
1086 MOVQ ctxt+16(FP), DX
1087 // Compute the size of the frame, including return PC and, if
1088 // GOEXPERIMENT=framepointer, the saved base pointer
1089 LEAQ fn+0(FP), AX
1090 SUBQ SP, AX // AX is our actual frame size
1091 SUBQ AX, DI // Allocate the same frame size on the g stack
1092 MOVQ DI, SP
1093
1094 MOVQ BX, 0(SP)
1095 MOVQ CX, 8(SP)
1096 MOVQ DX, 16(SP)
1097 MOVQ $runtime·cgocallbackg(SB), AX
1098 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1099
1100 // Compute the size of the frame again. FP and SP have
1101 // completely different values here than they did above,
1102 // but only their difference matters.
1103 LEAQ fn+0(FP), AX
1104 SUBQ SP, AX
1105
1106 // Restore g->sched (== m->curg->sched) from saved values.
1107 get_tls(CX)
1108 MOVQ g(CX), SI
1109 MOVQ SP, DI
1110 ADDQ AX, DI
1111 MOVQ -8(DI), BX
1112 MOVQ BX, (g_sched+gobuf_pc)(SI)
1113 MOVQ DI, (g_sched+gobuf_sp)(SI)
1114
1115 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1116 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1117 // so we do not have to restore it.)
1118 MOVQ g(CX), BX
1119 MOVQ g_m(BX), BX
1120 MOVQ m_g0(BX), SI
1121 MOVQ SI, g(CX)
1122 MOVQ (g_sched+gobuf_sp)(SI), SP
1123 MOVQ 0(SP), AX
1124 MOVQ AX, (g_sched+gobuf_sp)(SI)
1125
1126 // If the m on entry was nil, we called needm above to borrow an m,
1127 // 1. for the duration of the call on non-pthread platforms,
1128 // 2. or the duration of the C thread alive on pthread platforms.
1129 // If the m on entry wasn't nil,
1130 // 1. the thread might be a Go thread,
1131 // 2. or it wasn't the first call from a C thread on pthread platforms,
1132 // since then we skip dropm to reuse the m in the first call.
1133 MOVQ savedm-8(SP), BX
1134 CMPQ BX, $0
1135 JNE done
1136
1137 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1138 MOVQ _cgo_pthread_key_created(SB), AX
1139 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1140 CMPQ AX, $0
1141 JEQ dropm
1142 CMPQ (AX), $0
1143 JNE done
1144
1145 dropm:
1146 MOVQ $runtime·dropm(SB), AX
1147 CALL AX
1148 #ifdef GOOS_windows
1149 // We need to clear the TLS pointer in case the next
1150 // thread that comes into Go tries to reuse that space
1151 // but uses the same M.
1152 XORQ DI, DI
1153 CALL runtime·settls(SB)
1154 #endif
1155 done:
1156
1157 // Done!
1158 RET
1159
1160 // func setg(gg *g)
1161 // set g. for use by needm.
1162 TEXT runtime·setg(SB), NOSPLIT, $0-8
1163 MOVQ gg+0(FP), BX
1164 get_tls(CX)
1165 MOVQ BX, g(CX)
1166 RET
1167
1168 // void setg_gcc(G*); set g called from gcc.
1169 TEXT setg_gcc<>(SB),NOSPLIT,$0
1170 get_tls(AX)
1171 MOVQ DI, g(AX)
1172 MOVQ DI, R14 // set the g register
1173 RET
1174
1175 TEXT runtime·abort(SB),NOSPLIT,$0-0
1176 INT $3
1177 loop:
1178 JMP loop
1179
1180 // check that SP is in range [g->stack.lo, g->stack.hi)
1181 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1182 get_tls(CX)
1183 MOVQ g(CX), AX
1184 CMPQ (g_stack+stack_hi)(AX), SP
1185 JHI 2(PC)
1186 CALL runtime·abort(SB)
1187 CMPQ SP, (g_stack+stack_lo)(AX)
1188 JHI 2(PC)
1189 CALL runtime·abort(SB)
1190 RET
1191
1192 // func cputicks() int64
1193 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1194 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1195 JNE fences
1196 // Instruction stream serializing RDTSCP is supported.
1197 // RDTSCP is supported by Intel Nehalem (2008) and
1198 // AMD K8 Rev. F (2006) and newer.
1199 RDTSCP
1200 done:
1201 SHLQ $32, DX
1202 ADDQ DX, AX
1203 MOVQ AX, ret+0(FP)
1204 RET
1205 fences:
1206 // MFENCE is instruction stream serializing and flushes the
1207 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1208 // are dependent on MSR C001_1029 and CPU generation.
1209 // LFENCE on Intel does wait for all previous instructions to have executed.
1210 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1211 // previous instructions executed and all previous loads and stores to globally visible.
1212 // Using MFENCE;LFENCE here aligns the serializing properties without
1213 // runtime detection of CPU manufacturer.
1214 MFENCE
1215 LFENCE
1216 RDTSC
1217 JMP done
1218
1219 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1220 // hash function using AES hardware instructions
1221 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1222 // AX = ptr to data
1223 // BX = seed
1224 // CX = size
1225 CMPB runtime·useAeshash(SB), $0
1226 JEQ noaes
1227 JMP aeshashbody<>(SB)
1228 noaes:
1229 JMP runtime·memhashFallback<ABIInternal>(SB)
1230
1231 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1232 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1233 // AX = ptr to string struct
1234 // BX = seed
1235 CMPB runtime·useAeshash(SB), $0
1236 JEQ noaes
1237 MOVQ 8(AX), CX // length of string
1238 MOVQ (AX), AX // string data
1239 JMP aeshashbody<>(SB)
1240 noaes:
1241 JMP runtime·strhashFallback<ABIInternal>(SB)
1242
1243 // AX: data
1244 // BX: hash seed
1245 // CX: length
1246 // At return: AX = return value
1247 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1248 // Fill an SSE register with our seeds.
1249 MOVQ BX, X0 // 64 bits of per-table hash seed
1250 PINSRW $4, CX, X0 // 16 bits of length
1251 PSHUFHW $0, X0, X0 // repeat length 4 times total
1252 MOVO X0, X1 // save unscrambled seed
1253 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1254 AESENC X0, X0 // scramble seed
1255
1256 CMPQ CX, $16
1257 JB aes0to15
1258 JE aes16
1259 CMPQ CX, $32
1260 JBE aes17to32
1261 CMPQ CX, $64
1262 JBE aes33to64
1263 CMPQ CX, $128
1264 JBE aes65to128
1265 JMP aes129plus
1266
1267 aes0to15:
1268 TESTQ CX, CX
1269 JE aes0
1270
1271 ADDQ $16, AX
1272 TESTW $0xff0, AX
1273 JE endofpage
1274
1275 // 16 bytes loaded at this address won't cross
1276 // a page boundary, so we can load it directly.
1277 MOVOU -16(AX), X1
1278 ADDQ CX, CX
1279 MOVQ $masks<>(SB), AX
1280 PAND (AX)(CX*8), X1
1281 final1:
1282 PXOR X0, X1 // xor data with seed
1283 AESENC X1, X1 // scramble combo 3 times
1284 AESENC X1, X1
1285 AESENC X1, X1
1286 MOVQ X1, AX // return X1
1287 RET
1288
1289 endofpage:
1290 // address ends in 1111xxxx. Might be up against
1291 // a page boundary, so load ending at last byte.
1292 // Then shift bytes down using pshufb.
1293 MOVOU -32(AX)(CX*1), X1
1294 ADDQ CX, CX
1295 MOVQ $shifts<>(SB), AX
1296 PSHUFB (AX)(CX*8), X1
1297 JMP final1
1298
1299 aes0:
1300 // Return scrambled input seed
1301 AESENC X0, X0
1302 MOVQ X0, AX // return X0
1303 RET
1304
1305 aes16:
1306 MOVOU (AX), X1
1307 JMP final1
1308
1309 aes17to32:
1310 // make second starting seed
1311 PXOR runtime·aeskeysched+16(SB), X1
1312 AESENC X1, X1
1313
1314 // load data to be hashed
1315 MOVOU (AX), X2
1316 MOVOU -16(AX)(CX*1), X3
1317
1318 // xor with seed
1319 PXOR X0, X2
1320 PXOR X1, X3
1321
1322 // scramble 3 times
1323 AESENC X2, X2
1324 AESENC X3, X3
1325 AESENC X2, X2
1326 AESENC X3, X3
1327 AESENC X2, X2
1328 AESENC X3, X3
1329
1330 // combine results
1331 PXOR X3, X2
1332 MOVQ X2, AX // return X2
1333 RET
1334
1335 aes33to64:
1336 // make 3 more starting seeds
1337 MOVO X1, X2
1338 MOVO X1, X3
1339 PXOR runtime·aeskeysched+16(SB), X1
1340 PXOR runtime·aeskeysched+32(SB), X2
1341 PXOR runtime·aeskeysched+48(SB), X3
1342 AESENC X1, X1
1343 AESENC X2, X2
1344 AESENC X3, X3
1345
1346 MOVOU (AX), X4
1347 MOVOU 16(AX), X5
1348 MOVOU -32(AX)(CX*1), X6
1349 MOVOU -16(AX)(CX*1), X7
1350
1351 PXOR X0, X4
1352 PXOR X1, X5
1353 PXOR X2, X6
1354 PXOR X3, X7
1355
1356 AESENC X4, X4
1357 AESENC X5, X5
1358 AESENC X6, X6
1359 AESENC X7, X7
1360
1361 AESENC X4, X4
1362 AESENC X5, X5
1363 AESENC X6, X6
1364 AESENC X7, X7
1365
1366 AESENC X4, X4
1367 AESENC X5, X5
1368 AESENC X6, X6
1369 AESENC X7, X7
1370
1371 PXOR X6, X4
1372 PXOR X7, X5
1373 PXOR X5, X4
1374 MOVQ X4, AX // return X4
1375 RET
1376
1377 aes65to128:
1378 // make 7 more starting seeds
1379 MOVO X1, X2
1380 MOVO X1, X3
1381 MOVO X1, X4
1382 MOVO X1, X5
1383 MOVO X1, X6
1384 MOVO X1, X7
1385 PXOR runtime·aeskeysched+16(SB), X1
1386 PXOR runtime·aeskeysched+32(SB), X2
1387 PXOR runtime·aeskeysched+48(SB), X3
1388 PXOR runtime·aeskeysched+64(SB), X4
1389 PXOR runtime·aeskeysched+80(SB), X5
1390 PXOR runtime·aeskeysched+96(SB), X6
1391 PXOR runtime·aeskeysched+112(SB), X7
1392 AESENC X1, X1
1393 AESENC X2, X2
1394 AESENC X3, X3
1395 AESENC X4, X4
1396 AESENC X5, X5
1397 AESENC X6, X6
1398 AESENC X7, X7
1399
1400 // load data
1401 MOVOU (AX), X8
1402 MOVOU 16(AX), X9
1403 MOVOU 32(AX), X10
1404 MOVOU 48(AX), X11
1405 MOVOU -64(AX)(CX*1), X12
1406 MOVOU -48(AX)(CX*1), X13
1407 MOVOU -32(AX)(CX*1), X14
1408 MOVOU -16(AX)(CX*1), X15
1409
1410 // xor with seed
1411 PXOR X0, X8
1412 PXOR X1, X9
1413 PXOR X2, X10
1414 PXOR X3, X11
1415 PXOR X4, X12
1416 PXOR X5, X13
1417 PXOR X6, X14
1418 PXOR X7, X15
1419
1420 // scramble 3 times
1421 AESENC X8, X8
1422 AESENC X9, X9
1423 AESENC X10, X10
1424 AESENC X11, X11
1425 AESENC X12, X12
1426 AESENC X13, X13
1427 AESENC X14, X14
1428 AESENC X15, X15
1429
1430 AESENC X8, X8
1431 AESENC X9, X9
1432 AESENC X10, X10
1433 AESENC X11, X11
1434 AESENC X12, X12
1435 AESENC X13, X13
1436 AESENC X14, X14
1437 AESENC X15, X15
1438
1439 AESENC X8, X8
1440 AESENC X9, X9
1441 AESENC X10, X10
1442 AESENC X11, X11
1443 AESENC X12, X12
1444 AESENC X13, X13
1445 AESENC X14, X14
1446 AESENC X15, X15
1447
1448 // combine results
1449 PXOR X12, X8
1450 PXOR X13, X9
1451 PXOR X14, X10
1452 PXOR X15, X11
1453 PXOR X10, X8
1454 PXOR X11, X9
1455 PXOR X9, X8
1456 // X15 must be zero on return
1457 PXOR X15, X15
1458 MOVQ X8, AX // return X8
1459 RET
1460
1461 aes129plus:
1462 // make 7 more starting seeds
1463 MOVO X1, X2
1464 MOVO X1, X3
1465 MOVO X1, X4
1466 MOVO X1, X5
1467 MOVO X1, X6
1468 MOVO X1, X7
1469 PXOR runtime·aeskeysched+16(SB), X1
1470 PXOR runtime·aeskeysched+32(SB), X2
1471 PXOR runtime·aeskeysched+48(SB), X3
1472 PXOR runtime·aeskeysched+64(SB), X4
1473 PXOR runtime·aeskeysched+80(SB), X5
1474 PXOR runtime·aeskeysched+96(SB), X6
1475 PXOR runtime·aeskeysched+112(SB), X7
1476 AESENC X1, X1
1477 AESENC X2, X2
1478 AESENC X3, X3
1479 AESENC X4, X4
1480 AESENC X5, X5
1481 AESENC X6, X6
1482 AESENC X7, X7
1483
1484 // start with last (possibly overlapping) block
1485 MOVOU -128(AX)(CX*1), X8
1486 MOVOU -112(AX)(CX*1), X9
1487 MOVOU -96(AX)(CX*1), X10
1488 MOVOU -80(AX)(CX*1), X11
1489 MOVOU -64(AX)(CX*1), X12
1490 MOVOU -48(AX)(CX*1), X13
1491 MOVOU -32(AX)(CX*1), X14
1492 MOVOU -16(AX)(CX*1), X15
1493
1494 // xor in seed
1495 PXOR X0, X8
1496 PXOR X1, X9
1497 PXOR X2, X10
1498 PXOR X3, X11
1499 PXOR X4, X12
1500 PXOR X5, X13
1501 PXOR X6, X14
1502 PXOR X7, X15
1503
1504 // compute number of remaining 128-byte blocks
1505 DECQ CX
1506 SHRQ $7, CX
1507
1508 PCALIGN $16
1509 aesloop:
1510 // scramble state
1511 AESENC X8, X8
1512 AESENC X9, X9
1513 AESENC X10, X10
1514 AESENC X11, X11
1515 AESENC X12, X12
1516 AESENC X13, X13
1517 AESENC X14, X14
1518 AESENC X15, X15
1519
1520 // scramble state, xor in a block
1521 MOVOU (AX), X0
1522 MOVOU 16(AX), X1
1523 MOVOU 32(AX), X2
1524 MOVOU 48(AX), X3
1525 AESENC X0, X8
1526 AESENC X1, X9
1527 AESENC X2, X10
1528 AESENC X3, X11
1529 MOVOU 64(AX), X4
1530 MOVOU 80(AX), X5
1531 MOVOU 96(AX), X6
1532 MOVOU 112(AX), X7
1533 AESENC X4, X12
1534 AESENC X5, X13
1535 AESENC X6, X14
1536 AESENC X7, X15
1537
1538 ADDQ $128, AX
1539 DECQ CX
1540 JNE aesloop
1541
1542 // 3 more scrambles to finish
1543 AESENC X8, X8
1544 AESENC X9, X9
1545 AESENC X10, X10
1546 AESENC X11, X11
1547 AESENC X12, X12
1548 AESENC X13, X13
1549 AESENC X14, X14
1550 AESENC X15, X15
1551 AESENC X8, X8
1552 AESENC X9, X9
1553 AESENC X10, X10
1554 AESENC X11, X11
1555 AESENC X12, X12
1556 AESENC X13, X13
1557 AESENC X14, X14
1558 AESENC X15, X15
1559 AESENC X8, X8
1560 AESENC X9, X9
1561 AESENC X10, X10
1562 AESENC X11, X11
1563 AESENC X12, X12
1564 AESENC X13, X13
1565 AESENC X14, X14
1566 AESENC X15, X15
1567
1568 PXOR X12, X8
1569 PXOR X13, X9
1570 PXOR X14, X10
1571 PXOR X15, X11
1572 PXOR X10, X8
1573 PXOR X11, X9
1574 PXOR X9, X8
1575 // X15 must be zero on return
1576 PXOR X15, X15
1577 MOVQ X8, AX // return X8
1578 RET
1579
1580 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1581 // ABIInternal for performance.
1582 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1583 // AX = ptr to data
1584 // BX = seed
1585 CMPB runtime·useAeshash(SB), $0
1586 JEQ noaes
1587 MOVQ BX, X0 // X0 = seed
1588 PINSRD $2, (AX), X0 // data
1589 AESENC runtime·aeskeysched+0(SB), X0
1590 AESENC runtime·aeskeysched+16(SB), X0
1591 AESENC runtime·aeskeysched+32(SB), X0
1592 MOVQ X0, AX // return X0
1593 RET
1594 noaes:
1595 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1596
1597 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1598 // ABIInternal for performance.
1599 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1600 // AX = ptr to data
1601 // BX = seed
1602 CMPB runtime·useAeshash(SB), $0
1603 JEQ noaes
1604 MOVQ BX, X0 // X0 = seed
1605 PINSRQ $1, (AX), X0 // data
1606 AESENC runtime·aeskeysched+0(SB), X0
1607 AESENC runtime·aeskeysched+16(SB), X0
1608 AESENC runtime·aeskeysched+32(SB), X0
1609 MOVQ X0, AX // return X0
1610 RET
1611 noaes:
1612 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1613
1614 // simple mask to get rid of data in the high part of the register.
1615 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1616 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1617 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1618 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1619 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1620 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1621 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1622 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1623 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1624 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1625 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1626 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1627 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1628 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1629 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1630 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1631 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1632 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1633 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1634 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1635 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1636 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1637 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1638 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1639 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1640 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1641 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1642 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1643 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1644 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1645 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1646 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1647 GLOBL masks<>(SB),RODATA,$256
1648
1649 // func checkASM() bool
1650 TEXT ·checkASM(SB),NOSPLIT,$0-1
1651 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1652 MOVQ $masks<>(SB), AX
1653 MOVQ $shifts<>(SB), BX
1654 ORQ BX, AX
1655 TESTQ $15, AX
1656 SETEQ ret+0(FP)
1657 RET
1658
1659 // these are arguments to pshufb. They move data down from
1660 // the high bytes of the register to the low bytes of the register.
1661 // index is how many bytes to move.
1662 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1663 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1664 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1665 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1666 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1667 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1668 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1669 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1670 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1671 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1672 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1673 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1674 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1675 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1676 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1677 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1678 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1679 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1680 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1681 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1682 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1683 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1684 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1685 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1686 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1687 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1688 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1689 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1690 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1691 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1692 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1693 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1694 GLOBL shifts<>(SB),RODATA,$256
1695
1696 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1697 // Must obey the gcc calling convention.
1698 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1699 get_tls(CX)
1700 MOVQ g(CX), AX
1701 MOVQ g_m(AX), AX
1702 MOVQ m_curg(AX), AX
1703 MOVQ (g_stack+stack_hi)(AX), AX
1704 RET
1705
1706 // The top-most function running on a goroutine
1707 // returns to goexit+PCQuantum.
1708 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1709 BYTE $0x90 // NOP
1710 CALL runtime·goexit1(SB) // does not return
1711 // traceback from goexit1 must hit code range of goexit
1712 BYTE $0x90 // NOP
1713
1714 // This is called from .init_array and follows the platform, not Go, ABI.
1715 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1716 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1717 MOVQ runtime·lastmoduledatap(SB), AX
1718 MOVQ DI, moduledata_next(AX)
1719 MOVQ DI, runtime·lastmoduledatap(SB)
1720 POPQ R15
1721 RET
1722
1723 // Initialize special registers then jump to sigpanic.
1724 // This function is injected from the signal handler for panicking
1725 // signals. It is quite painful to set X15 in the signal context,
1726 // so we do it here.
1727 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1728 get_tls(R14)
1729 MOVQ g(R14), R14
1730 XORPS X15, X15
1731 JMP ·sigpanic<ABIInternal>(SB)
1732
1733 // gcWriteBarrier informs the GC about heap pointer writes.
1734 //
1735 // gcWriteBarrier returns space in a write barrier buffer which
1736 // should be filled in by the caller.
1737 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1738 // number of bytes of buffer needed in R11, and returns a pointer
1739 // to the buffer space in R11.
1740 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1741 // but may clobber others (e.g., SSE registers).
1742 // Typical use would be, when doing *(CX+88) = AX
1743 // CMPL $0, runtime.writeBarrier(SB)
1744 // JEQ dowrite
1745 // CALL runtime.gcBatchBarrier2(SB)
1746 // MOVQ AX, (R11)
1747 // MOVQ 88(CX), DX
1748 // MOVQ DX, 8(R11)
1749 // dowrite:
1750 // MOVQ AX, 88(CX)
1751 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1752 // Save the registers clobbered by the fast path. This is slightly
1753 // faster than having the caller spill these.
1754 MOVQ R12, 96(SP)
1755 MOVQ R13, 104(SP)
1756 retry:
1757 // TODO: Consider passing g.m.p in as an argument so they can be shared
1758 // across a sequence of write barriers.
1759 MOVQ g_m(R14), R13
1760 MOVQ m_p(R13), R13
1761 // Get current buffer write position.
1762 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1763 ADDQ R11, R12 // new next position
1764 // Is the buffer full?
1765 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1766 JA flush
1767 // Commit to the larger buffer.
1768 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1769 // Make return value (the original next position)
1770 SUBQ R11, R12
1771 MOVQ R12, R11
1772 // Restore registers.
1773 MOVQ 96(SP), R12
1774 MOVQ 104(SP), R13
1775 RET
1776
1777 flush:
1778 // Save all general purpose registers since these could be
1779 // clobbered by wbBufFlush and were not saved by the caller.
1780 // It is possible for wbBufFlush to clobber other registers
1781 // (e.g., SSE registers), but the compiler takes care of saving
1782 // those in the caller if necessary. This strikes a balance
1783 // with registers that are likely to be used.
1784 //
1785 // We don't have type information for these, but all code under
1786 // here is NOSPLIT, so nothing will observe these.
1787 //
1788 // TODO: We could strike a different balance; e.g., saving X0
1789 // and not saving GP registers that are less likely to be used.
1790 MOVQ DI, 0(SP)
1791 MOVQ AX, 8(SP)
1792 MOVQ BX, 16(SP)
1793 MOVQ CX, 24(SP)
1794 MOVQ DX, 32(SP)
1795 // DI already saved
1796 MOVQ SI, 40(SP)
1797 MOVQ BP, 48(SP)
1798 MOVQ R8, 56(SP)
1799 MOVQ R9, 64(SP)
1800 MOVQ R10, 72(SP)
1801 MOVQ R11, 80(SP)
1802 // R12 already saved
1803 // R13 already saved
1804 // R14 is g
1805 MOVQ R15, 88(SP)
1806
1807 CALL runtime·wbBufFlush(SB)
1808
1809 MOVQ 0(SP), DI
1810 MOVQ 8(SP), AX
1811 MOVQ 16(SP), BX
1812 MOVQ 24(SP), CX
1813 MOVQ 32(SP), DX
1814 MOVQ 40(SP), SI
1815 MOVQ 48(SP), BP
1816 MOVQ 56(SP), R8
1817 MOVQ 64(SP), R9
1818 MOVQ 72(SP), R10
1819 MOVQ 80(SP), R11
1820 MOVQ 88(SP), R15
1821 JMP retry
1822
1823 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1824 MOVL $8, R11
1825 JMP gcWriteBarrier<>(SB)
1826 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1827 MOVL $16, R11
1828 JMP gcWriteBarrier<>(SB)
1829 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1830 MOVL $24, R11
1831 JMP gcWriteBarrier<>(SB)
1832 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1833 MOVL $32, R11
1834 JMP gcWriteBarrier<>(SB)
1835 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1836 MOVL $40, R11
1837 JMP gcWriteBarrier<>(SB)
1838 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1839 MOVL $48, R11
1840 JMP gcWriteBarrier<>(SB)
1841 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1842 MOVL $56, R11
1843 JMP gcWriteBarrier<>(SB)
1844 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1845 MOVL $64, R11
1846 JMP gcWriteBarrier<>(SB)
1847
1848 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1849 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1850
1851 // debugCallV2 is the entry point for debugger-injected function
1852 // calls on running goroutines. It informs the runtime that a
1853 // debug call has been injected and creates a call frame for the
1854 // debugger to fill in.
1855 //
1856 // To inject a function call, a debugger should:
1857 // 1. Check that the goroutine is in state _Grunning and that
1858 // there are at least 256 bytes free on the stack.
1859 // 2. Push the current PC on the stack (updating SP).
1860 // 3. Write the desired argument frame size at SP-16 (using the SP
1861 // after step 2).
1862 // 4. Save all machine registers (including flags and XMM registers)
1863 // so they can be restored later by the debugger.
1864 // 5. Set the PC to debugCallV2 and resume execution.
1865 //
1866 // If the goroutine is in state _Grunnable, then it's not generally
1867 // safe to inject a call because it may return out via other runtime
1868 // operations. Instead, the debugger should unwind the stack to find
1869 // the return to non-runtime code, add a temporary breakpoint there,
1870 // and inject the call once that breakpoint is hit.
1871 //
1872 // If the goroutine is in any other state, it's not safe to inject a call.
1873 //
1874 // This function communicates back to the debugger by setting R12 and
1875 // invoking INT3 to raise a breakpoint signal. See the comments in the
1876 // implementation for the protocol the debugger is expected to
1877 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1878 //
1879 // The debugger must ensure that any pointers passed to the function
1880 // obey escape analysis requirements. Specifically, it must not pass
1881 // a stack pointer to an escaping argument. debugCallV2 cannot check
1882 // this invariant.
1883 //
1884 // This is ABIInternal because Go code injects its PC directly into new
1885 // goroutine stacks.
1886 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1887 // Save all registers that may contain pointers so they can be
1888 // conservatively scanned.
1889 //
1890 // We can't do anything that might clobber any of these
1891 // registers before this.
1892 MOVQ R15, r15-(14*8+8)(SP)
1893 MOVQ R14, r14-(13*8+8)(SP)
1894 MOVQ R13, r13-(12*8+8)(SP)
1895 MOVQ R12, r12-(11*8+8)(SP)
1896 MOVQ R11, r11-(10*8+8)(SP)
1897 MOVQ R10, r10-(9*8+8)(SP)
1898 MOVQ R9, r9-(8*8+8)(SP)
1899 MOVQ R8, r8-(7*8+8)(SP)
1900 MOVQ DI, di-(6*8+8)(SP)
1901 MOVQ SI, si-(5*8+8)(SP)
1902 MOVQ BP, bp-(4*8+8)(SP)
1903 MOVQ BX, bx-(3*8+8)(SP)
1904 MOVQ DX, dx-(2*8+8)(SP)
1905 // Save the frame size before we clobber it. Either of the last
1906 // saves could clobber this depending on whether there's a saved BP.
1907 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1908 MOVQ CX, cx-(1*8+8)(SP)
1909 MOVQ AX, ax-(0*8+8)(SP)
1910
1911 // Save the argument frame size.
1912 MOVQ DX, frameSize-128(SP)
1913
1914 // Perform a safe-point check.
1915 MOVQ retpc-8(FP), AX // Caller's PC
1916 MOVQ AX, 0(SP)
1917 CALL runtime·debugCallCheck(SB)
1918 MOVQ 8(SP), AX
1919 TESTQ AX, AX
1920 JZ good
1921 // The safety check failed. Put the reason string at the top
1922 // of the stack.
1923 MOVQ AX, 0(SP)
1924 MOVQ 16(SP), AX
1925 MOVQ AX, 8(SP)
1926 // Set R12 to 8 and invoke INT3. The debugger should get the
1927 // reason a call can't be injected from the top of the stack
1928 // and resume execution.
1929 MOVQ $8, R12
1930 BYTE $0xcc
1931 JMP restore
1932
1933 good:
1934 // Registers are saved and it's safe to make a call.
1935 // Open up a call frame, moving the stack if necessary.
1936 //
1937 // Once the frame is allocated, this will set R12 to 0 and
1938 // invoke INT3. The debugger should write the argument
1939 // frame for the call at SP, set up argument registers, push
1940 // the trapping PC on the stack, set the PC to the function to
1941 // call, set RDX to point to the closure (if a closure call),
1942 // and resume execution.
1943 //
1944 // If the function returns, this will set R12 to 1 and invoke
1945 // INT3. The debugger can then inspect any return value saved
1946 // on the stack at SP and in registers and resume execution again.
1947 //
1948 // If the function panics, this will set R12 to 2 and invoke INT3.
1949 // The interface{} value of the panic will be at SP. The debugger
1950 // can inspect the panic value and resume execution again.
1951 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1952 CMPQ AX, $MAXSIZE; \
1953 JA 5(PC); \
1954 MOVQ $NAME(SB), AX; \
1955 MOVQ AX, 0(SP); \
1956 CALL runtime·debugCallWrap(SB); \
1957 JMP restore
1958
1959 MOVQ frameSize-128(SP), AX
1960 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1961 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1962 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1963 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1964 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1965 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1966 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1967 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1968 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1969 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1970 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1971 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1972 // The frame size is too large. Report the error.
1973 MOVQ $debugCallFrameTooLarge<>(SB), AX
1974 MOVQ AX, 0(SP)
1975 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1976 MOVQ $8, R12
1977 BYTE $0xcc
1978 JMP restore
1979
1980 restore:
1981 // Calls and failures resume here.
1982 //
1983 // Set R12 to 16 and invoke INT3. The debugger should restore
1984 // all registers except RIP and RSP and resume execution.
1985 MOVQ $16, R12
1986 BYTE $0xcc
1987 // We must not modify flags after this point.
1988
1989 // Restore pointer-containing registers, which may have been
1990 // modified from the debugger's copy by stack copying.
1991 MOVQ ax-(0*8+8)(SP), AX
1992 MOVQ cx-(1*8+8)(SP), CX
1993 MOVQ dx-(2*8+8)(SP), DX
1994 MOVQ bx-(3*8+8)(SP), BX
1995 MOVQ bp-(4*8+8)(SP), BP
1996 MOVQ si-(5*8+8)(SP), SI
1997 MOVQ di-(6*8+8)(SP), DI
1998 MOVQ r8-(7*8+8)(SP), R8
1999 MOVQ r9-(8*8+8)(SP), R9
2000 MOVQ r10-(9*8+8)(SP), R10
2001 MOVQ r11-(10*8+8)(SP), R11
2002 MOVQ r12-(11*8+8)(SP), R12
2003 MOVQ r13-(12*8+8)(SP), R13
2004 MOVQ r14-(13*8+8)(SP), R14
2005 MOVQ r15-(14*8+8)(SP), R15
2006
2007 RET
2008
2009 // runtime.debugCallCheck assumes that functions defined with the
2010 // DEBUG_CALL_FN macro are safe points to inject calls.
2011 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2012 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2013 NO_LOCAL_POINTERS; \
2014 MOVQ $0, R12; \
2015 BYTE $0xcc; \
2016 MOVQ $1, R12; \
2017 BYTE $0xcc; \
2018 RET
2019 DEBUG_CALL_FN(debugCall32<>, 32)
2020 DEBUG_CALL_FN(debugCall64<>, 64)
2021 DEBUG_CALL_FN(debugCall128<>, 128)
2022 DEBUG_CALL_FN(debugCall256<>, 256)
2023 DEBUG_CALL_FN(debugCall512<>, 512)
2024 DEBUG_CALL_FN(debugCall1024<>, 1024)
2025 DEBUG_CALL_FN(debugCall2048<>, 2048)
2026 DEBUG_CALL_FN(debugCall4096<>, 4096)
2027 DEBUG_CALL_FN(debugCall8192<>, 8192)
2028 DEBUG_CALL_FN(debugCall16384<>, 16384)
2029 DEBUG_CALL_FN(debugCall32768<>, 32768)
2030 DEBUG_CALL_FN(debugCall65536<>, 65536)
2031
2032 // func debugCallPanicked(val interface{})
2033 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2034 // Copy the panic value to the top of stack.
2035 MOVQ val_type+0(FP), AX
2036 MOVQ AX, 0(SP)
2037 MOVQ val_data+8(FP), AX
2038 MOVQ AX, 8(SP)
2039 MOVQ $2, R12
2040 BYTE $0xcc
2041 RET
2042
2043 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2044 NO_LOCAL_POINTERS
2045 // Save all 14 int registers that could have an index in them.
2046 // They may be pointers, but if they are they are dead.
2047 MOVQ AX, 16(SP)
2048 MOVQ CX, 24(SP)
2049 MOVQ DX, 32(SP)
2050 MOVQ BX, 40(SP)
2051 // skip SP @ 48(SP)
2052 MOVQ BP, 56(SP)
2053 MOVQ SI, 64(SP)
2054 MOVQ DI, 72(SP)
2055 MOVQ R8, 80(SP)
2056 MOVQ R9, 88(SP)
2057 MOVQ R10, 96(SP)
2058 MOVQ R11, 104(SP)
2059 MOVQ R12, 112(SP)
2060 MOVQ R13, 120(SP)
2061 // skip R14 @ 128(SP) (aka G)
2062 MOVQ R15, 136(SP)
2063
2064 MOVQ SP, AX // hide SP read from vet
2065 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2066 LEAQ 16(SP), BX
2067 CALL runtime·panicBounds64<ABIInternal>(SB)
2068 RET
2069
2070 #ifdef GOOS_android
2071 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2072 // Earlier androids are set up in gcc_android.c.
2073 DATA runtime·tls_g+0(SB)/8, $16
2074 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2075 #endif
2076 #ifdef GOOS_windows
2077 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2078 #endif
2079
2080 // The compiler and assembler's -spectre=ret mode rewrites
2081 // all indirect CALL AX / JMP AX instructions to be
2082 // CALL retpolineAX / JMP retpolineAX.
2083 // See https://support.google.com/faqs/answer/7625886.
2084 #define RETPOLINE(reg) \
2085 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2086 /* nospec: */ \
2087 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2088 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2089 /* setup: */ \
2090 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2091 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2092 /* RET */ BYTE $0xC3
2093
2094 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2095 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2096 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2097 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2098 /* SP is 4, can't happen / magic encodings */
2099 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2100 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2101 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2102 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2103 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2104 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2105 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2106 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2107 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2108 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2109 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2110
2111 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2112 MOVQ BP, AX
2113 RET
2114
View as plain text