Text file
src/runtime/asm_amd64.s
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9 #include "cgo/abi_amd64.h"
10
11 // _rt0_amd64 is common startup code for most amd64 systems when using
12 // internal linking. This is the entry point for the program from the
13 // kernel for an ordinary -buildmode=exe program. The stack holds the
14 // number of arguments and the C-style argv.
15 TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20 // main is common startup code for most amd64 systems when using
21 // external linking. The C startup code will call the symbol "main"
22 // passing argc and argv in the usual C ABI registers DI and SI.
23 TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26 // _rt0_amd64_lib is common startup code for most amd64 systems when
27 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
28 // arrange to invoke this function as a global constructor (for
29 // c-archive) or when the shared library is loaded (for c-shared).
30 // We expect argc and argv to be passed in the usual C ABI registers
31 // DI and SI.
32 TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 #ifndef GOOS_windows
41 // Avoid calling it on Windows because it is not used
42 // and it would crash the application due to the autogenerated
43 // ABI wrapper trying to access a non-existent TLS slot.
44 CALL runtime·libpreinit(SB)
45 #endif
46
47 // Create a new thread to finish Go runtime initialization.
48 MOVQ _cgo_sys_thread_create(SB), AX
49 TESTQ AX, AX
50 JZ nocgo
51
52 // We're calling back to C.
53 // Align stack per C ABI requirements.
54 MOVQ SP, BX // Callee-save in C ABI
55 ANDQ $~15, SP
56 MOVQ $_rt0_amd64_lib_go(SB), DI
57 MOVQ $0, SI
58 #ifdef GOOS_windows
59 // For Windows ABI
60 MOVQ DI, CX
61 MOVQ SI, DX
62 // Leave space for four words on the stack as required
63 // by the Windows amd64 calling convention.
64 ADJSP $32
65 #endif
66 CALL AX
67 #ifdef GOOS_windows
68 ADJSP $-32 // just to make the assembler not complain about unbalanced stack
69 #endif
70 MOVQ BX, SP
71 JMP restore
72
73 nocgo:
74 ADJSP $16
75 MOVQ $0x800000, 0(SP) // stacksize
76 MOVQ $_rt0_amd64_lib_go(SB), AX
77 MOVQ AX, 8(SP) // fn
78 CALL runtime·newosproc0(SB)
79 ADJSP $-16
80
81 restore:
82 POP_REGS_HOST_TO_ABI0()
83 RET
84
85 // _rt0_amd64_lib_go initializes the Go runtime.
86 // This is started in a separate thread by _rt0_amd64_lib.
87 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
88 MOVQ _rt0_amd64_lib_argc<>(SB), DI
89 MOVQ _rt0_amd64_lib_argv<>(SB), SI
90 JMP runtime·rt0_go(SB)
91
92 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
93 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
94 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
95 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
96
97 #ifdef GOAMD64_v2
98 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
99 #endif
100
101 #ifdef GOAMD64_v3
102 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
103 #endif
104
105 #ifdef GOAMD64_v4
106 DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
107 #endif
108
109 GLOBL bad_cpu_msg<>(SB), RODATA, $84
110
111 // Define a list of AMD64 microarchitecture level features
112 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
113
114 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
115 #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
116 // LAHF/SAHF
117 #define V2_EXT_FEATURES_CX (1 << 0)
118 // FMA MOVBE OSXSAVE AVX F16C
119 #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
120 // ABM (FOR LZNCT)
121 #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
122 // BMI1 AVX2 BMI2
123 #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
124 // XMM YMM
125 #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
126
127 #define V4_FEATURES_CX V3_FEATURES_CX
128
129 #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
130 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
131 #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
132 // OPMASK ZMM
133 #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
134
135 #ifdef GOAMD64_v2
136 #define NEED_MAX_CPUID 0x80000001
137 #define NEED_FEATURES_CX V2_FEATURES_CX
138 #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
139 #endif
140
141 #ifdef GOAMD64_v3
142 #define NEED_MAX_CPUID 0x80000001
143 #define NEED_FEATURES_CX V3_FEATURES_CX
144 #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
145 #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
146 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
147 #endif
148
149 #ifdef GOAMD64_v4
150 #define NEED_MAX_CPUID 0x80000001
151 #define NEED_FEATURES_CX V4_FEATURES_CX
152 #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
153 #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
154
155 // Darwin requires a different approach to check AVX512 support, see CL 285572.
156 #ifdef GOOS_darwin
157 #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
158 // These values are from:
159 // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
160 #define commpage64_base_address 0x00007fffffe00000
161 #define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
162 #define commpage64_version (commpage64_base_address+0x01E)
163 #define AVX512F 0x0000004000000000
164 #define AVX512CD 0x0000008000000000
165 #define AVX512DQ 0x0000010000000000
166 #define AVX512BW 0x0000020000000000
167 #define AVX512VL 0x0000100000000000
168 #define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
169 #else
170 #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
171 #endif
172
173 #endif
174
175 TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
176 // copy arguments forward on an even stack
177 MOVQ DI, AX // argc
178 MOVQ SI, BX // argv
179 SUBQ $(5*8), SP // 3args 2auto
180 ANDQ $~15, SP
181 MOVQ AX, 24(SP)
182 MOVQ BX, 32(SP)
183
184 // create istack out of the given (operating system) stack.
185 // _cgo_init may update stackguard.
186 MOVQ $runtime·g0(SB), DI
187 LEAQ (-64*1024)(SP), BX
188 MOVQ BX, g_stackguard0(DI)
189 MOVQ BX, g_stackguard1(DI)
190 MOVQ BX, (g_stack+stack_lo)(DI)
191 MOVQ SP, (g_stack+stack_hi)(DI)
192
193 // find out information about the processor we're on
194 MOVL $0, AX
195 CPUID
196 CMPL AX, $0
197 JE nocpuinfo
198
199 CMPL BX, $0x756E6547 // "Genu"
200 JNE notintel
201 CMPL DX, $0x49656E69 // "ineI"
202 JNE notintel
203 CMPL CX, $0x6C65746E // "ntel"
204 JNE notintel
205 MOVB $1, runtime·isIntel(SB)
206
207 notintel:
208 // Load EAX=1 cpuid flags
209 MOVL $1, AX
210 CPUID
211 MOVL AX, runtime·processorVersionInfo(SB)
212
213 nocpuinfo:
214 // if there is an _cgo_init, call it.
215 MOVQ _cgo_init(SB), AX
216 TESTQ AX, AX
217 JZ needtls
218 // arg 1: g0, already in DI
219 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
220 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
221 MOVQ $0, CX
222 #ifdef GOOS_android
223 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
224 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
225 // Compensate for tls_g (+16).
226 MOVQ -16(TLS), CX
227 #endif
228 #ifdef GOOS_windows
229 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
230 // Adjust for the Win64 calling convention.
231 MOVQ CX, R9 // arg 4
232 MOVQ DX, R8 // arg 3
233 MOVQ SI, DX // arg 2
234 MOVQ DI, CX // arg 1
235 #endif
236 CALL AX
237
238 // update stackguard after _cgo_init
239 MOVQ $runtime·g0(SB), CX
240 MOVQ (g_stack+stack_lo)(CX), AX
241 ADDQ $const_stackGuard, AX
242 MOVQ AX, g_stackguard0(CX)
243 MOVQ AX, g_stackguard1(CX)
244
245 #ifndef GOOS_windows
246 JMP ok
247 #endif
248 needtls:
249 #ifdef GOOS_plan9
250 // skip TLS setup on Plan 9
251 JMP ok
252 #endif
253 #ifdef GOOS_solaris
254 // skip TLS setup on Solaris
255 JMP ok
256 #endif
257 #ifdef GOOS_illumos
258 // skip TLS setup on illumos
259 JMP ok
260 #endif
261 #ifdef GOOS_darwin
262 // skip TLS setup on Darwin
263 JMP ok
264 #endif
265 #ifdef GOOS_openbsd
266 // skip TLS setup on OpenBSD
267 JMP ok
268 #endif
269
270 #ifdef GOOS_windows
271 CALL runtime·wintls(SB)
272 #endif
273
274 LEAQ runtime·m0+m_tls(SB), DI
275 CALL runtime·settls(SB)
276
277 // store through it, to make sure it works
278 get_tls(BX)
279 MOVQ $0x123, g(BX)
280 MOVQ runtime·m0+m_tls(SB), AX
281 CMPQ AX, $0x123
282 JEQ 2(PC)
283 CALL runtime·abort(SB)
284 ok:
285 // set the per-goroutine and per-mach "registers"
286 get_tls(BX)
287 LEAQ runtime·g0(SB), CX
288 MOVQ CX, g(BX)
289 LEAQ runtime·m0(SB), AX
290
291 // save m->g0 = g0
292 MOVQ CX, m_g0(AX)
293 // save m0 to g0->m
294 MOVQ AX, g_m(CX)
295
296 CLD // convention is D is always left cleared
297
298 // Check GOAMD64 requirements
299 // We need to do this after setting up TLS, so that
300 // we can report an error if there is a failure. See issue 49586.
301 #ifdef NEED_FEATURES_CX
302 MOVL $0, AX
303 CPUID
304 CMPL AX, $0
305 JE bad_cpu
306 MOVL $1, AX
307 CPUID
308 ANDL $NEED_FEATURES_CX, CX
309 CMPL CX, $NEED_FEATURES_CX
310 JNE bad_cpu
311 #endif
312
313 #ifdef NEED_MAX_CPUID
314 MOVL $0x80000000, AX
315 CPUID
316 CMPL AX, $NEED_MAX_CPUID
317 JL bad_cpu
318 #endif
319
320 #ifdef NEED_EXT_FEATURES_BX
321 MOVL $7, AX
322 MOVL $0, CX
323 CPUID
324 ANDL $NEED_EXT_FEATURES_BX, BX
325 CMPL BX, $NEED_EXT_FEATURES_BX
326 JNE bad_cpu
327 #endif
328
329 #ifdef NEED_EXT_FEATURES_CX
330 MOVL $0x80000001, AX
331 CPUID
332 ANDL $NEED_EXT_FEATURES_CX, CX
333 CMPL CX, $NEED_EXT_FEATURES_CX
334 JNE bad_cpu
335 #endif
336
337 #ifdef NEED_OS_SUPPORT_AX
338 XORL CX, CX
339 XGETBV
340 ANDL $NEED_OS_SUPPORT_AX, AX
341 CMPL AX, $NEED_OS_SUPPORT_AX
342 JNE bad_cpu
343 #endif
344
345 #ifdef NEED_DARWIN_SUPPORT
346 MOVQ $commpage64_version, BX
347 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
348 JL bad_cpu
349 MOVQ $commpage64_cpu_capabilities64, BX
350 MOVQ (BX), BX
351 MOVQ $NEED_DARWIN_SUPPORT, CX
352 ANDQ CX, BX
353 CMPQ BX, CX
354 JNE bad_cpu
355 #endif
356
357 CALL runtime·check(SB)
358
359 MOVL 24(SP), AX // copy argc
360 MOVL AX, 0(SP)
361 MOVQ 32(SP), AX // copy argv
362 MOVQ AX, 8(SP)
363 CALL runtime·args(SB)
364 CALL runtime·osinit(SB)
365 CALL runtime·schedinit(SB)
366
367 // create a new goroutine to start program
368 MOVQ $runtime·mainPC(SB), AX // entry
369 PUSHQ AX
370 CALL runtime·newproc(SB)
371 POPQ AX
372
373 // start this M
374 CALL runtime·mstart(SB)
375
376 CALL runtime·abort(SB) // mstart should never return
377 RET
378
379 bad_cpu: // show that the program requires a certain microarchitecture level.
380 MOVQ $2, 0(SP)
381 MOVQ $bad_cpu_msg<>(SB), AX
382 MOVQ AX, 8(SP)
383 MOVQ $84, 16(SP)
384 CALL runtime·write(SB)
385 MOVQ $1, 0(SP)
386 CALL runtime·exit(SB)
387 CALL runtime·abort(SB)
388 RET
389
390 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
391 // intended to be called by debuggers.
392 MOVQ $runtime·debugPinnerV1<ABIInternal>(SB), AX
393 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
394 RET
395
396 // mainPC is a function value for runtime.main, to be passed to newproc.
397 // The reference to runtime.main is made via ABIInternal, since the
398 // actual function (not the ABI0 wrapper) is needed by newproc.
399 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
400 GLOBL runtime·mainPC(SB),RODATA,$8
401
402 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
403 BYTE $0xcc
404 RET
405
406 TEXT runtime·asminit(SB),NOSPLIT,$0-0
407 // No per-thread init.
408 RET
409
410 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
411 CALL runtime·mstart0(SB)
412 RET // not reached
413
414 /*
415 * go-routine
416 */
417
418 // func gogo(buf *gobuf)
419 // restore state from Gobuf; longjmp
420 TEXT runtime·gogo(SB), NOSPLIT, $0-8
421 MOVQ buf+0(FP), BX // gobuf
422 MOVQ gobuf_g(BX), DX
423 MOVQ 0(DX), CX // make sure g != nil
424 JMP gogo<>(SB)
425
426 TEXT gogo<>(SB), NOSPLIT, $0
427 get_tls(CX)
428 MOVQ DX, g(CX)
429 MOVQ DX, R14 // set the g register
430 MOVQ gobuf_sp(BX), SP // restore SP
431 MOVQ gobuf_ctxt(BX), DX
432 MOVQ gobuf_bp(BX), BP
433 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
434 MOVQ $0, gobuf_ctxt(BX)
435 MOVQ $0, gobuf_bp(BX)
436 MOVQ gobuf_pc(BX), BX
437 JMP BX
438
439 // func mcall(fn func(*g))
440 // Switch to m->g0's stack, call fn(g).
441 // Fn must never return. It should gogo(&g->sched)
442 // to keep running g.
443 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
444 MOVQ AX, DX // DX = fn
445
446 // Save state in g->sched. The caller's SP and PC are restored by gogo to
447 // resume execution in the caller's frame (implicit return). The caller's BP
448 // is also restored to support frame pointer unwinding.
449 MOVQ SP, BX // hide (SP) reads from vet
450 MOVQ 8(BX), BX // caller's PC
451 MOVQ BX, (g_sched+gobuf_pc)(R14)
452 LEAQ fn+0(FP), BX // caller's SP
453 MOVQ BX, (g_sched+gobuf_sp)(R14)
454 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
455 // can cause a frame pointer cycle, see CL 476235.
456 MOVQ (BP), BX // caller's BP
457 MOVQ BX, (g_sched+gobuf_bp)(R14)
458
459 // switch to m->g0 & its stack, call fn
460 MOVQ g_m(R14), BX
461 MOVQ m_g0(BX), SI // SI = g.m.g0
462 CMPQ SI, R14 // if g == m->g0 call badmcall
463 JNE goodm
464 JMP runtime·badmcall(SB)
465 goodm:
466 MOVQ R14, AX // AX (and arg 0) = g
467 MOVQ SI, R14 // g = g.m.g0
468 get_tls(CX) // Set G in TLS
469 MOVQ R14, g(CX)
470 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
471 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
472 PUSHQ AX // open up space for fn's arg spill slot
473 MOVQ 0(DX), R12
474 CALL R12 // fn(g)
475 // The Windows native stack unwinder incorrectly classifies the next instruction
476 // as part of the function epilogue, producing a wrong call stack.
477 // Add a NOP to work around this issue. See go.dev/issue/67007.
478 BYTE $0x90
479 POPQ AX
480 JMP runtime·badmcall2(SB)
481 RET
482
483 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
484 // of the G stack. We need to distinguish the routine that
485 // lives at the bottom of the G stack from the one that lives
486 // at the top of the system stack because the one at the top of
487 // the system stack terminates the stack walk (see topofstack()).
488 // The frame layout needs to match systemstack
489 // so that it can pretend to be systemstack_switch.
490 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
491 UNDEF
492 // Make sure this function is not leaf,
493 // so the frame is saved.
494 CALL runtime·abort(SB)
495 RET
496
497 // func systemstack(fn func())
498 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
499 MOVQ fn+0(FP), DI // DI = fn
500 get_tls(CX)
501 MOVQ g(CX), AX // AX = g
502 MOVQ g_m(AX), BX // BX = m
503
504 CMPQ AX, m_gsignal(BX)
505 JEQ noswitch
506
507 MOVQ m_g0(BX), DX // DX = g0
508 CMPQ AX, DX
509 JEQ noswitch
510
511 CMPQ AX, m_curg(BX)
512 JNE bad
513
514 // Switch stacks.
515 // The original frame pointer is stored in BP,
516 // which is useful for stack unwinding.
517 // Save our state in g->sched. Pretend to
518 // be systemstack_switch if the G stack is scanned.
519 CALL gosave_systemstack_switch<>(SB)
520
521 // switch to g0
522 MOVQ DX, g(CX)
523 MOVQ DX, R14 // set the g register
524 MOVQ (g_sched+gobuf_sp)(DX), SP
525
526 // call target function
527 MOVQ DI, DX
528 MOVQ 0(DI), DI
529 CALL DI
530
531 // switch back to g
532 get_tls(CX)
533 MOVQ g(CX), AX
534 MOVQ g_m(AX), BX
535 MOVQ m_curg(BX), AX
536 MOVQ AX, g(CX)
537 MOVQ (g_sched+gobuf_sp)(AX), SP
538 MOVQ (g_sched+gobuf_bp)(AX), BP
539 MOVQ $0, (g_sched+gobuf_sp)(AX)
540 MOVQ $0, (g_sched+gobuf_bp)(AX)
541 RET
542
543 noswitch:
544 // already on m stack; tail call the function
545 // Using a tail call here cleans up tracebacks since we won't stop
546 // at an intermediate systemstack.
547 MOVQ DI, DX
548 MOVQ 0(DI), DI
549 // The function epilogue is not called on a tail call.
550 // Pop BP from the stack to simulate it.
551 POPQ BP
552 JMP DI
553
554 bad:
555 // Bad: g is not gsignal, not g0, not curg. What is it?
556 MOVQ $runtime·badsystemstack(SB), AX
557 CALL AX
558 INT $3
559
560 // func switchToCrashStack0(fn func())
561 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
562 MOVQ g_m(R14), BX // curm
563
564 // set g to gcrash
565 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
566 MOVQ BX, g_m(R14) // g.m = curm
567 MOVQ R14, m_g0(BX) // curm.g0 = g
568 get_tls(CX)
569 MOVQ R14, g(CX)
570
571 // switch to crashstack
572 MOVQ (g_stack+stack_hi)(R14), BX
573 SUBQ $(4*8), BX
574 MOVQ BX, SP
575
576 // call target function
577 MOVQ AX, DX
578 MOVQ 0(AX), AX
579 CALL AX
580
581 // should never return
582 CALL runtime·abort(SB)
583 UNDEF
584
585 /*
586 * support for morestack
587 */
588
589 // Called during function prolog when more stack is needed.
590 //
591 // The traceback routines see morestack on a g0 as being
592 // the top of a stack (for example, morestack calling newstack
593 // calling the scheduler calling newm calling gc), so we must
594 // record an argument size. For that purpose, it has no arguments.
595 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
596 // Cannot grow scheduler stack (m->g0).
597 get_tls(CX)
598 MOVQ g(CX), DI // DI = g
599 MOVQ g_m(DI), BX // BX = m
600
601 // Set g->sched to context in f.
602 MOVQ 0(SP), AX // f's PC
603 MOVQ AX, (g_sched+gobuf_pc)(DI)
604 LEAQ 8(SP), AX // f's SP
605 MOVQ AX, (g_sched+gobuf_sp)(DI)
606 MOVQ BP, (g_sched+gobuf_bp)(DI)
607 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
608
609 MOVQ m_g0(BX), SI // SI = m.g0
610 CMPQ DI, SI
611 JNE 3(PC)
612 CALL runtime·badmorestackg0(SB)
613 CALL runtime·abort(SB)
614
615 // Cannot grow signal stack (m->gsignal).
616 MOVQ m_gsignal(BX), SI
617 CMPQ DI, SI
618 JNE 3(PC)
619 CALL runtime·badmorestackgsignal(SB)
620 CALL runtime·abort(SB)
621
622 // Called from f.
623 // Set m->morebuf to f's caller.
624 NOP SP // tell vet SP changed - stop checking offsets
625 MOVQ 8(SP), AX // f's caller's PC
626 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
627 LEAQ 16(SP), AX // f's caller's SP
628 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
629 MOVQ DI, (m_morebuf+gobuf_g)(BX)
630
631 // Call newstack on m->g0's stack.
632 MOVQ m_g0(BX), BX
633 MOVQ BX, g(CX)
634 MOVQ (g_sched+gobuf_sp)(BX), SP
635 MOVQ $0, BP // clear frame pointer, as caller may execute on another M
636 CALL runtime·newstack(SB)
637 CALL runtime·abort(SB) // crash if newstack returns
638 RET
639
640 // morestack but not preserving ctxt.
641 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
642 MOVL $0, DX
643 JMP runtime·morestack(SB)
644
645 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
646 TEXT ·spillArgs(SB),NOSPLIT,$0-0
647 MOVQ AX, 0(R12)
648 MOVQ BX, 8(R12)
649 MOVQ CX, 16(R12)
650 MOVQ DI, 24(R12)
651 MOVQ SI, 32(R12)
652 MOVQ R8, 40(R12)
653 MOVQ R9, 48(R12)
654 MOVQ R10, 56(R12)
655 MOVQ R11, 64(R12)
656 MOVQ X0, 72(R12)
657 MOVQ X1, 80(R12)
658 MOVQ X2, 88(R12)
659 MOVQ X3, 96(R12)
660 MOVQ X4, 104(R12)
661 MOVQ X5, 112(R12)
662 MOVQ X6, 120(R12)
663 MOVQ X7, 128(R12)
664 MOVQ X8, 136(R12)
665 MOVQ X9, 144(R12)
666 MOVQ X10, 152(R12)
667 MOVQ X11, 160(R12)
668 MOVQ X12, 168(R12)
669 MOVQ X13, 176(R12)
670 MOVQ X14, 184(R12)
671 RET
672
673 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
674 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
675 MOVQ 0(R12), AX
676 MOVQ 8(R12), BX
677 MOVQ 16(R12), CX
678 MOVQ 24(R12), DI
679 MOVQ 32(R12), SI
680 MOVQ 40(R12), R8
681 MOVQ 48(R12), R9
682 MOVQ 56(R12), R10
683 MOVQ 64(R12), R11
684 MOVQ 72(R12), X0
685 MOVQ 80(R12), X1
686 MOVQ 88(R12), X2
687 MOVQ 96(R12), X3
688 MOVQ 104(R12), X4
689 MOVQ 112(R12), X5
690 MOVQ 120(R12), X6
691 MOVQ 128(R12), X7
692 MOVQ 136(R12), X8
693 MOVQ 144(R12), X9
694 MOVQ 152(R12), X10
695 MOVQ 160(R12), X11
696 MOVQ 168(R12), X12
697 MOVQ 176(R12), X13
698 MOVQ 184(R12), X14
699 RET
700
701 // reflectcall: call a function with the given argument list
702 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
703 // we don't have variable-sized frames, so we use a small number
704 // of constant-sized-frame functions to encode a few bits of size in the pc.
705 // Caution: ugly multiline assembly macros in your future!
706
707 #define DISPATCH(NAME,MAXSIZE) \
708 CMPQ CX, $MAXSIZE; \
709 JA 3(PC); \
710 MOVQ $NAME(SB), AX; \
711 JMP AX
712 // Note: can't just "JMP NAME(SB)" - bad inlining results.
713
714 TEXT ·reflectcall(SB), NOSPLIT, $0-48
715 MOVLQZX frameSize+32(FP), CX
716 DISPATCH(runtime·call16, 16)
717 DISPATCH(runtime·call32, 32)
718 DISPATCH(runtime·call64, 64)
719 DISPATCH(runtime·call128, 128)
720 DISPATCH(runtime·call256, 256)
721 DISPATCH(runtime·call512, 512)
722 DISPATCH(runtime·call1024, 1024)
723 DISPATCH(runtime·call2048, 2048)
724 DISPATCH(runtime·call4096, 4096)
725 DISPATCH(runtime·call8192, 8192)
726 DISPATCH(runtime·call16384, 16384)
727 DISPATCH(runtime·call32768, 32768)
728 DISPATCH(runtime·call65536, 65536)
729 DISPATCH(runtime·call131072, 131072)
730 DISPATCH(runtime·call262144, 262144)
731 DISPATCH(runtime·call524288, 524288)
732 DISPATCH(runtime·call1048576, 1048576)
733 DISPATCH(runtime·call2097152, 2097152)
734 DISPATCH(runtime·call4194304, 4194304)
735 DISPATCH(runtime·call8388608, 8388608)
736 DISPATCH(runtime·call16777216, 16777216)
737 DISPATCH(runtime·call33554432, 33554432)
738 DISPATCH(runtime·call67108864, 67108864)
739 DISPATCH(runtime·call134217728, 134217728)
740 DISPATCH(runtime·call268435456, 268435456)
741 DISPATCH(runtime·call536870912, 536870912)
742 DISPATCH(runtime·call1073741824, 1073741824)
743 MOVQ $runtime·badreflectcall(SB), AX
744 JMP AX
745
746 #define CALLFN(NAME,MAXSIZE) \
747 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
748 NO_LOCAL_POINTERS; \
749 /* copy arguments to stack */ \
750 MOVQ stackArgs+16(FP), SI; \
751 MOVLQZX stackArgsSize+24(FP), CX; \
752 MOVQ SP, DI; \
753 REP;MOVSB; \
754 /* set up argument registers */ \
755 MOVQ regArgs+40(FP), R12; \
756 CALL ·unspillArgs(SB); \
757 /* call function */ \
758 MOVQ f+8(FP), DX; \
759 PCDATA $PCDATA_StackMapIndex, $0; \
760 MOVQ (DX), R12; \
761 CALL R12; \
762 /* copy register return values back */ \
763 MOVQ regArgs+40(FP), R12; \
764 CALL ·spillArgs(SB); \
765 MOVLQZX stackArgsSize+24(FP), CX; \
766 MOVLQZX stackRetOffset+28(FP), BX; \
767 MOVQ stackArgs+16(FP), DI; \
768 MOVQ stackArgsType+0(FP), DX; \
769 MOVQ SP, SI; \
770 ADDQ BX, DI; \
771 ADDQ BX, SI; \
772 SUBQ BX, CX; \
773 CALL callRet<>(SB); \
774 RET
775
776 // callRet copies return values back at the end of call*. This is a
777 // separate function so it can allocate stack space for the arguments
778 // to reflectcallmove. It does not follow the Go ABI; it expects its
779 // arguments in registers.
780 TEXT callRet<>(SB), NOSPLIT, $40-0
781 NO_LOCAL_POINTERS
782 MOVQ DX, 0(SP)
783 MOVQ DI, 8(SP)
784 MOVQ SI, 16(SP)
785 MOVQ CX, 24(SP)
786 MOVQ R12, 32(SP)
787 CALL runtime·reflectcallmove(SB)
788 RET
789
790 CALLFN(·call16, 16)
791 CALLFN(·call32, 32)
792 CALLFN(·call64, 64)
793 CALLFN(·call128, 128)
794 CALLFN(·call256, 256)
795 CALLFN(·call512, 512)
796 CALLFN(·call1024, 1024)
797 CALLFN(·call2048, 2048)
798 CALLFN(·call4096, 4096)
799 CALLFN(·call8192, 8192)
800 CALLFN(·call16384, 16384)
801 CALLFN(·call32768, 32768)
802 CALLFN(·call65536, 65536)
803 CALLFN(·call131072, 131072)
804 CALLFN(·call262144, 262144)
805 CALLFN(·call524288, 524288)
806 CALLFN(·call1048576, 1048576)
807 CALLFN(·call2097152, 2097152)
808 CALLFN(·call4194304, 4194304)
809 CALLFN(·call8388608, 8388608)
810 CALLFN(·call16777216, 16777216)
811 CALLFN(·call33554432, 33554432)
812 CALLFN(·call67108864, 67108864)
813 CALLFN(·call134217728, 134217728)
814 CALLFN(·call268435456, 268435456)
815 CALLFN(·call536870912, 536870912)
816 CALLFN(·call1073741824, 1073741824)
817
818 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
819 MOVL cycles+0(FP), AX
820 TESTL AX, AX
821 JZ done
822 again:
823 PAUSE
824 SUBL $1, AX
825 JNZ again
826 done:
827 RET
828
829
830 TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
831 // Stores are already ordered on x86, so this is just a
832 // compile barrier.
833 RET
834
835 // Save state of caller into g->sched,
836 // but using fake PC from systemstack_switch.
837 // Must only be called from functions with frame pointer
838 // and without locals ($0) or else unwinding from
839 // systemstack_switch is incorrect.
840 // Smashes R9.
841 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
842 // Take systemstack_switch PC and add 8 bytes to skip
843 // the prologue. The final location does not matter
844 // as long as we are between the prologue and the epilogue.
845 MOVQ $runtime·systemstack_switch+8(SB), R9
846 MOVQ R9, (g_sched+gobuf_pc)(R14)
847 LEAQ 8(SP), R9
848 MOVQ R9, (g_sched+gobuf_sp)(R14)
849 MOVQ BP, (g_sched+gobuf_bp)(R14)
850 // Assert ctxt is zero. See func save.
851 MOVQ (g_sched+gobuf_ctxt)(R14), R9
852 TESTQ R9, R9
853 JZ 2(PC)
854 CALL runtime·abort(SB)
855 RET
856
857 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
858 // Call fn(arg) aligned appropriately for the gcc ABI.
859 // Called on a system stack, and there may be no g yet (during needm).
860 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
861 MOVQ fn+0(FP), AX
862 MOVQ arg+8(FP), BX
863 MOVQ SP, DX
864 ANDQ $~15, SP // alignment
865 MOVQ DX, 8(SP)
866 MOVQ BX, DI // DI = first argument in AMD64 ABI
867 MOVQ BX, CX // CX = first argument in Win64
868 CALL AX
869 MOVQ 8(SP), DX
870 MOVQ DX, SP
871 RET
872
873 // asmcgocall_landingpad calls AX with BX as argument.
874 // Must be called on the system stack.
875 TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
876 #ifdef GOOS_windows
877 // Make sure we have enough room for 4 stack-backed fast-call
878 // registers as per Windows amd64 calling convention.
879 ADJSP $32
880 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
881 // thrown in the cgo call. Exceptions that reach this function will be
882 // handled by runtime.sehtramp thanks to the SEH metadata added
883 // by the compiler.
884 // Note that runtime.sehtramp can't be attached directly to asmcgocall
885 // because its initial stack pointer can be outside the system stack bounds,
886 // and Windows stops the stack unwinding without calling the exception handler
887 // when it reaches that point.
888 MOVQ BX, CX // CX = first argument in Win64
889 CALL AX
890 // The exception handler is not called if the next instruction is part of
891 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
892 BYTE $0x90
893 ADJSP $-32
894 RET
895 #endif
896 // Tail call AX on non-Windows, as the extra stack frame is not needed.
897 MOVQ BX, DI // DI = first argument in AMD64 ABI
898 JMP AX
899
900 // func asmcgocall(fn, arg unsafe.Pointer) int32
901 // Call fn(arg) on the scheduler stack,
902 // aligned appropriately for the gcc ABI.
903 // See cgocall.go for more details.
904 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
905 MOVQ fn+0(FP), AX
906 MOVQ arg+8(FP), BX
907
908 MOVQ SP, DX
909
910 // Figure out if we need to switch to m->g0 stack.
911 // We get called to create new OS threads too, and those
912 // come in on the m->g0 stack already. Or we might already
913 // be on the m->gsignal stack.
914 get_tls(CX)
915 MOVQ g(CX), DI
916 CMPQ DI, $0
917 JEQ nosave
918 MOVQ g_m(DI), R8
919 MOVQ m_gsignal(R8), SI
920 CMPQ DI, SI
921 JEQ nosave
922 MOVQ m_g0(R8), SI
923 CMPQ DI, SI
924 JEQ nosave
925
926 // Switch to system stack.
927 // The original frame pointer is stored in BP,
928 // which is useful for stack unwinding.
929 CALL gosave_systemstack_switch<>(SB)
930 MOVQ SI, g(CX)
931 MOVQ (g_sched+gobuf_sp)(SI), SP
932
933 // Now on a scheduling stack (a pthread-created stack).
934 SUBQ $16, SP
935 ANDQ $~15, SP // alignment for gcc ABI
936 MOVQ DI, 8(SP) // save g
937 MOVQ (g_stack+stack_hi)(DI), DI
938 SUBQ DX, DI
939 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
940 CALL runtime·asmcgocall_landingpad(SB)
941
942 // Restore registers, g, stack pointer.
943 get_tls(CX)
944 MOVQ 8(SP), DI
945 MOVQ (g_stack+stack_hi)(DI), SI
946 SUBQ 0(SP), SI
947 MOVQ DI, g(CX)
948 MOVQ SI, SP
949
950 MOVL AX, ret+16(FP)
951 RET
952
953 nosave:
954 // Running on a system stack, perhaps even without a g.
955 // Having no g can happen during thread creation or thread teardown
956 // (see needm/dropm on Solaris, for example).
957 // This code is like the above sequence but without saving/restoring g
958 // and without worrying about the stack moving out from under us
959 // (because we're on a system stack, not a goroutine stack).
960 // The above code could be used directly if already on a system stack,
961 // but then the only path through this code would be a rare case on Solaris.
962 // Using this code for all "already on system stack" calls exercises it more,
963 // which should help keep it correct.
964 SUBQ $16, SP
965 ANDQ $~15, SP
966 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
967 MOVQ DX, 0(SP) // save original stack pointer
968 CALL runtime·asmcgocall_landingpad(SB)
969 MOVQ 0(SP), SI // restore original stack pointer
970 MOVQ SI, SP
971 MOVL AX, ret+16(FP)
972 RET
973
974 #ifdef GOOS_windows
975 // Dummy TLS that's used on Windows so that we don't crash trying
976 // to restore the G register in needm. needm and its callees are
977 // very careful never to actually use the G, the TLS just can't be
978 // unset since we're in Go code.
979 GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
980 #endif
981
982 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
983 // See cgocall.go for more details.
984 TEXT ·cgocallback(SB),NOSPLIT,$24-24
985 NO_LOCAL_POINTERS
986
987 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
988 // It is used to dropm while thread is exiting.
989 MOVQ fn+0(FP), AX
990 CMPQ AX, $0
991 JNE loadg
992 // Restore the g from frame.
993 get_tls(CX)
994 MOVQ frame+8(FP), BX
995 MOVQ BX, g(CX)
996 JMP dropm
997
998 loadg:
999 // If g is nil, Go did not create the current thread,
1000 // or if this thread never called into Go on pthread platforms.
1001 // Call needm to obtain one m for temporary use.
1002 // In this case, we're running on the thread stack, so there's
1003 // lots of space, but the linker doesn't know. Hide the call from
1004 // the linker analysis by using an indirect call through AX.
1005 get_tls(CX)
1006 #ifdef GOOS_windows
1007 MOVL $0, BX
1008 CMPQ CX, $0
1009 JEQ 2(PC)
1010 #endif
1011 MOVQ g(CX), BX
1012 CMPQ BX, $0
1013 JEQ needm
1014 MOVQ g_m(BX), BX
1015 MOVQ BX, savedm-8(SP) // saved copy of oldm
1016 JMP havem
1017 needm:
1018 #ifdef GOOS_windows
1019 // Set up a dummy TLS value. needm is careful not to use it,
1020 // but it needs to be there to prevent autogenerated code from
1021 // crashing when it loads from it.
1022 // We don't need to clear it or anything later because needm
1023 // will set up TLS properly.
1024 MOVQ $zeroTLS<>(SB), DI
1025 CALL runtime·settls(SB)
1026 #endif
1027 // On some platforms (Windows) we cannot call needm through
1028 // an ABI wrapper because there's no TLS set up, and the ABI
1029 // wrapper will try to restore the G register (R14) from TLS.
1030 // Clear X15 because Go expects it and we're not calling
1031 // through a wrapper, but otherwise avoid setting the G
1032 // register in the wrapper and call needm directly. It
1033 // takes no arguments and doesn't return any values so
1034 // there's no need to handle that. Clear R14 so that there's
1035 // a bad value in there, in case needm tries to use it.
1036 XORPS X15, X15
1037 XORQ R14, R14
1038 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1039 CALL AX
1040 MOVQ $0, savedm-8(SP)
1041 get_tls(CX)
1042 MOVQ g(CX), BX
1043 MOVQ g_m(BX), BX
1044
1045 // Set m->sched.sp = SP, so that if a panic happens
1046 // during the function we are about to execute, it will
1047 // have a valid SP to run on the g0 stack.
1048 // The next few lines (after the havem label)
1049 // will save this SP onto the stack and then write
1050 // the same SP back to m->sched.sp. That seems redundant,
1051 // but if an unrecovered panic happens, unwindm will
1052 // restore the g->sched.sp from the stack location
1053 // and then systemstack will try to use it. If we don't set it here,
1054 // that restored SP will be uninitialized (typically 0) and
1055 // will not be usable.
1056 MOVQ m_g0(BX), SI
1057 MOVQ SP, (g_sched+gobuf_sp)(SI)
1058
1059 havem:
1060 // Now there's a valid m, and we're running on its m->g0.
1061 // Save current m->g0->sched.sp on stack and then set it to SP.
1062 // Save current sp in m->g0->sched.sp in preparation for
1063 // switch back to m->curg stack.
1064 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1065 MOVQ m_g0(BX), SI
1066 MOVQ (g_sched+gobuf_sp)(SI), AX
1067 MOVQ AX, 0(SP)
1068 MOVQ SP, (g_sched+gobuf_sp)(SI)
1069
1070 // Switch to m->curg stack and call runtime.cgocallbackg.
1071 // Because we are taking over the execution of m->curg
1072 // but *not* resuming what had been running, we need to
1073 // save that information (m->curg->sched) so we can restore it.
1074 // We can restore m->curg->sched.sp easily, because calling
1075 // runtime.cgocallbackg leaves SP unchanged upon return.
1076 // To save m->curg->sched.pc, we push it onto the curg stack and
1077 // open a frame the same size as cgocallback's g0 frame.
1078 // Once we switch to the curg stack, the pushed PC will appear
1079 // to be the return PC of cgocallback, so that the traceback
1080 // will seamlessly trace back into the earlier calls.
1081 MOVQ m_curg(BX), SI
1082 MOVQ SI, g(CX)
1083 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1084 MOVQ (g_sched+gobuf_pc)(SI), BX
1085 MOVQ BX, -8(DI) // "push" return PC on the g stack
1086 // Gather our arguments into registers.
1087 MOVQ fn+0(FP), BX
1088 MOVQ frame+8(FP), CX
1089 MOVQ ctxt+16(FP), DX
1090 // Compute the size of the frame, including return PC and, if
1091 // GOEXPERIMENT=framepointer, the saved base pointer
1092 LEAQ fn+0(FP), AX
1093 SUBQ SP, AX // AX is our actual frame size
1094 SUBQ AX, DI // Allocate the same frame size on the g stack
1095 MOVQ DI, SP
1096
1097 MOVQ BX, 0(SP)
1098 MOVQ CX, 8(SP)
1099 MOVQ DX, 16(SP)
1100 MOVQ $runtime·cgocallbackg(SB), AX
1101 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1102
1103 // Compute the size of the frame again. FP and SP have
1104 // completely different values here than they did above,
1105 // but only their difference matters.
1106 LEAQ fn+0(FP), AX
1107 SUBQ SP, AX
1108
1109 // Restore g->sched (== m->curg->sched) from saved values.
1110 get_tls(CX)
1111 MOVQ g(CX), SI
1112 MOVQ SP, DI
1113 ADDQ AX, DI
1114 MOVQ -8(DI), BX
1115 MOVQ BX, (g_sched+gobuf_pc)(SI)
1116 MOVQ DI, (g_sched+gobuf_sp)(SI)
1117
1118 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1119 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1120 // so we do not have to restore it.)
1121 MOVQ g(CX), BX
1122 MOVQ g_m(BX), BX
1123 MOVQ m_g0(BX), SI
1124 MOVQ SI, g(CX)
1125 MOVQ (g_sched+gobuf_sp)(SI), SP
1126 MOVQ 0(SP), AX
1127 MOVQ AX, (g_sched+gobuf_sp)(SI)
1128
1129 // If the m on entry was nil, we called needm above to borrow an m,
1130 // 1. for the duration of the call on non-pthread platforms,
1131 // 2. or the duration of the C thread alive on pthread platforms.
1132 // If the m on entry wasn't nil,
1133 // 1. the thread might be a Go thread,
1134 // 2. or it wasn't the first call from a C thread on pthread platforms,
1135 // since then we skip dropm to reuse the m in the first call.
1136 MOVQ savedm-8(SP), BX
1137 CMPQ BX, $0
1138 JNE done
1139
1140 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1141 MOVQ _cgo_pthread_key_created(SB), AX
1142 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1143 CMPQ AX, $0
1144 JEQ dropm
1145 CMPQ (AX), $0
1146 JNE done
1147
1148 dropm:
1149 MOVQ $runtime·dropm(SB), AX
1150 CALL AX
1151 #ifdef GOOS_windows
1152 // We need to clear the TLS pointer in case the next
1153 // thread that comes into Go tries to reuse that space
1154 // but uses the same M.
1155 XORQ DI, DI
1156 CALL runtime·settls(SB)
1157 #endif
1158 done:
1159
1160 // Done!
1161 RET
1162
1163 // func setg(gg *g)
1164 // set g. for use by needm.
1165 TEXT runtime·setg(SB), NOSPLIT, $0-8
1166 MOVQ gg+0(FP), BX
1167 get_tls(CX)
1168 MOVQ BX, g(CX)
1169 RET
1170
1171 // void setg_gcc(G*); set g called from gcc.
1172 TEXT setg_gcc<>(SB),NOSPLIT,$0
1173 get_tls(AX)
1174 MOVQ DI, g(AX)
1175 MOVQ DI, R14 // set the g register
1176 RET
1177
1178 TEXT runtime·abort(SB),NOSPLIT,$0-0
1179 INT $3
1180 loop:
1181 JMP loop
1182
1183 // check that SP is in range [g->stack.lo, g->stack.hi)
1184 TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1185 get_tls(CX)
1186 MOVQ g(CX), AX
1187 CMPQ (g_stack+stack_hi)(AX), SP
1188 JHI 2(PC)
1189 CALL runtime·abort(SB)
1190 CMPQ SP, (g_stack+stack_lo)(AX)
1191 JHI 2(PC)
1192 CALL runtime·abort(SB)
1193 RET
1194
1195 // func cputicks() int64
1196 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1197 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1198 JNE fences
1199 // Instruction stream serializing RDTSCP is supported.
1200 // RDTSCP is supported by Intel Nehalem (2008) and
1201 // AMD K8 Rev. F (2006) and newer.
1202 RDTSCP
1203 done:
1204 SHLQ $32, DX
1205 ADDQ DX, AX
1206 MOVQ AX, ret+0(FP)
1207 RET
1208 fences:
1209 // MFENCE is instruction stream serializing and flushes the
1210 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1211 // are dependent on MSR C001_1029 and CPU generation.
1212 // LFENCE on Intel does wait for all previous instructions to have executed.
1213 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1214 // previous instructions executed and all previous loads and stores to globally visible.
1215 // Using MFENCE;LFENCE here aligns the serializing properties without
1216 // runtime detection of CPU manufacturer.
1217 MFENCE
1218 LFENCE
1219 RDTSC
1220 JMP done
1221
1222 // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1223 // hash function using AES hardware instructions
1224 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1225 // AX = ptr to data
1226 // BX = seed
1227 // CX = size
1228 CMPB runtime·useAeshash(SB), $0
1229 JEQ noaes
1230 JMP aeshashbody<>(SB)
1231 noaes:
1232 JMP runtime·memhashFallback<ABIInternal>(SB)
1233
1234 // func strhash(p unsafe.Pointer, h uintptr) uintptr
1235 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1236 // AX = ptr to string struct
1237 // BX = seed
1238 CMPB runtime·useAeshash(SB), $0
1239 JEQ noaes
1240 MOVQ 8(AX), CX // length of string
1241 MOVQ (AX), AX // string data
1242 JMP aeshashbody<>(SB)
1243 noaes:
1244 JMP runtime·strhashFallback<ABIInternal>(SB)
1245
1246 // AX: data
1247 // BX: hash seed
1248 // CX: length
1249 // At return: AX = return value
1250 TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1251 // Fill an SSE register with our seeds.
1252 MOVQ BX, X0 // 64 bits of per-table hash seed
1253 PINSRW $4, CX, X0 // 16 bits of length
1254 PSHUFHW $0, X0, X0 // repeat length 4 times total
1255 MOVO X0, X1 // save unscrambled seed
1256 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1257 AESENC X0, X0 // scramble seed
1258
1259 CMPQ CX, $16
1260 JB aes0to15
1261 JE aes16
1262 CMPQ CX, $32
1263 JBE aes17to32
1264 CMPQ CX, $64
1265 JBE aes33to64
1266 CMPQ CX, $128
1267 JBE aes65to128
1268 JMP aes129plus
1269
1270 aes0to15:
1271 TESTQ CX, CX
1272 JE aes0
1273
1274 ADDQ $16, AX
1275 TESTW $0xff0, AX
1276 JE endofpage
1277
1278 // 16 bytes loaded at this address won't cross
1279 // a page boundary, so we can load it directly.
1280 MOVOU -16(AX), X1
1281 ADDQ CX, CX
1282 MOVQ $masks<>(SB), AX
1283 PAND (AX)(CX*8), X1
1284 final1:
1285 PXOR X0, X1 // xor data with seed
1286 AESENC X1, X1 // scramble combo 3 times
1287 AESENC X1, X1
1288 AESENC X1, X1
1289 MOVQ X1, AX // return X1
1290 RET
1291
1292 endofpage:
1293 // address ends in 1111xxxx. Might be up against
1294 // a page boundary, so load ending at last byte.
1295 // Then shift bytes down using pshufb.
1296 MOVOU -32(AX)(CX*1), X1
1297 ADDQ CX, CX
1298 MOVQ $shifts<>(SB), AX
1299 PSHUFB (AX)(CX*8), X1
1300 JMP final1
1301
1302 aes0:
1303 // Return scrambled input seed
1304 AESENC X0, X0
1305 MOVQ X0, AX // return X0
1306 RET
1307
1308 aes16:
1309 MOVOU (AX), X1
1310 JMP final1
1311
1312 aes17to32:
1313 // make second starting seed
1314 PXOR runtime·aeskeysched+16(SB), X1
1315 AESENC X1, X1
1316
1317 // load data to be hashed
1318 MOVOU (AX), X2
1319 MOVOU -16(AX)(CX*1), X3
1320
1321 // xor with seed
1322 PXOR X0, X2
1323 PXOR X1, X3
1324
1325 // scramble 3 times
1326 AESENC X2, X2
1327 AESENC X3, X3
1328 AESENC X2, X2
1329 AESENC X3, X3
1330 AESENC X2, X2
1331 AESENC X3, X3
1332
1333 // combine results
1334 PXOR X3, X2
1335 MOVQ X2, AX // return X2
1336 RET
1337
1338 aes33to64:
1339 // make 3 more starting seeds
1340 MOVO X1, X2
1341 MOVO X1, X3
1342 PXOR runtime·aeskeysched+16(SB), X1
1343 PXOR runtime·aeskeysched+32(SB), X2
1344 PXOR runtime·aeskeysched+48(SB), X3
1345 AESENC X1, X1
1346 AESENC X2, X2
1347 AESENC X3, X3
1348
1349 MOVOU (AX), X4
1350 MOVOU 16(AX), X5
1351 MOVOU -32(AX)(CX*1), X6
1352 MOVOU -16(AX)(CX*1), X7
1353
1354 PXOR X0, X4
1355 PXOR X1, X5
1356 PXOR X2, X6
1357 PXOR X3, X7
1358
1359 AESENC X4, X4
1360 AESENC X5, X5
1361 AESENC X6, X6
1362 AESENC X7, X7
1363
1364 AESENC X4, X4
1365 AESENC X5, X5
1366 AESENC X6, X6
1367 AESENC X7, X7
1368
1369 AESENC X4, X4
1370 AESENC X5, X5
1371 AESENC X6, X6
1372 AESENC X7, X7
1373
1374 PXOR X6, X4
1375 PXOR X7, X5
1376 PXOR X5, X4
1377 MOVQ X4, AX // return X4
1378 RET
1379
1380 aes65to128:
1381 // make 7 more starting seeds
1382 MOVO X1, X2
1383 MOVO X1, X3
1384 MOVO X1, X4
1385 MOVO X1, X5
1386 MOVO X1, X6
1387 MOVO X1, X7
1388 PXOR runtime·aeskeysched+16(SB), X1
1389 PXOR runtime·aeskeysched+32(SB), X2
1390 PXOR runtime·aeskeysched+48(SB), X3
1391 PXOR runtime·aeskeysched+64(SB), X4
1392 PXOR runtime·aeskeysched+80(SB), X5
1393 PXOR runtime·aeskeysched+96(SB), X6
1394 PXOR runtime·aeskeysched+112(SB), X7
1395 AESENC X1, X1
1396 AESENC X2, X2
1397 AESENC X3, X3
1398 AESENC X4, X4
1399 AESENC X5, X5
1400 AESENC X6, X6
1401 AESENC X7, X7
1402
1403 // load data
1404 MOVOU (AX), X8
1405 MOVOU 16(AX), X9
1406 MOVOU 32(AX), X10
1407 MOVOU 48(AX), X11
1408 MOVOU -64(AX)(CX*1), X12
1409 MOVOU -48(AX)(CX*1), X13
1410 MOVOU -32(AX)(CX*1), X14
1411 MOVOU -16(AX)(CX*1), X15
1412
1413 // xor with seed
1414 PXOR X0, X8
1415 PXOR X1, X9
1416 PXOR X2, X10
1417 PXOR X3, X11
1418 PXOR X4, X12
1419 PXOR X5, X13
1420 PXOR X6, X14
1421 PXOR X7, X15
1422
1423 // scramble 3 times
1424 AESENC X8, X8
1425 AESENC X9, X9
1426 AESENC X10, X10
1427 AESENC X11, X11
1428 AESENC X12, X12
1429 AESENC X13, X13
1430 AESENC X14, X14
1431 AESENC X15, X15
1432
1433 AESENC X8, X8
1434 AESENC X9, X9
1435 AESENC X10, X10
1436 AESENC X11, X11
1437 AESENC X12, X12
1438 AESENC X13, X13
1439 AESENC X14, X14
1440 AESENC X15, X15
1441
1442 AESENC X8, X8
1443 AESENC X9, X9
1444 AESENC X10, X10
1445 AESENC X11, X11
1446 AESENC X12, X12
1447 AESENC X13, X13
1448 AESENC X14, X14
1449 AESENC X15, X15
1450
1451 // combine results
1452 PXOR X12, X8
1453 PXOR X13, X9
1454 PXOR X14, X10
1455 PXOR X15, X11
1456 PXOR X10, X8
1457 PXOR X11, X9
1458 PXOR X9, X8
1459 // X15 must be zero on return
1460 PXOR X15, X15
1461 MOVQ X8, AX // return X8
1462 RET
1463
1464 aes129plus:
1465 // make 7 more starting seeds
1466 MOVO X1, X2
1467 MOVO X1, X3
1468 MOVO X1, X4
1469 MOVO X1, X5
1470 MOVO X1, X6
1471 MOVO X1, X7
1472 PXOR runtime·aeskeysched+16(SB), X1
1473 PXOR runtime·aeskeysched+32(SB), X2
1474 PXOR runtime·aeskeysched+48(SB), X3
1475 PXOR runtime·aeskeysched+64(SB), X4
1476 PXOR runtime·aeskeysched+80(SB), X5
1477 PXOR runtime·aeskeysched+96(SB), X6
1478 PXOR runtime·aeskeysched+112(SB), X7
1479 AESENC X1, X1
1480 AESENC X2, X2
1481 AESENC X3, X3
1482 AESENC X4, X4
1483 AESENC X5, X5
1484 AESENC X6, X6
1485 AESENC X7, X7
1486
1487 // start with last (possibly overlapping) block
1488 MOVOU -128(AX)(CX*1), X8
1489 MOVOU -112(AX)(CX*1), X9
1490 MOVOU -96(AX)(CX*1), X10
1491 MOVOU -80(AX)(CX*1), X11
1492 MOVOU -64(AX)(CX*1), X12
1493 MOVOU -48(AX)(CX*1), X13
1494 MOVOU -32(AX)(CX*1), X14
1495 MOVOU -16(AX)(CX*1), X15
1496
1497 // xor in seed
1498 PXOR X0, X8
1499 PXOR X1, X9
1500 PXOR X2, X10
1501 PXOR X3, X11
1502 PXOR X4, X12
1503 PXOR X5, X13
1504 PXOR X6, X14
1505 PXOR X7, X15
1506
1507 // compute number of remaining 128-byte blocks
1508 DECQ CX
1509 SHRQ $7, CX
1510
1511 PCALIGN $16
1512 aesloop:
1513 // scramble state
1514 AESENC X8, X8
1515 AESENC X9, X9
1516 AESENC X10, X10
1517 AESENC X11, X11
1518 AESENC X12, X12
1519 AESENC X13, X13
1520 AESENC X14, X14
1521 AESENC X15, X15
1522
1523 // scramble state, xor in a block
1524 MOVOU (AX), X0
1525 MOVOU 16(AX), X1
1526 MOVOU 32(AX), X2
1527 MOVOU 48(AX), X3
1528 AESENC X0, X8
1529 AESENC X1, X9
1530 AESENC X2, X10
1531 AESENC X3, X11
1532 MOVOU 64(AX), X4
1533 MOVOU 80(AX), X5
1534 MOVOU 96(AX), X6
1535 MOVOU 112(AX), X7
1536 AESENC X4, X12
1537 AESENC X5, X13
1538 AESENC X6, X14
1539 AESENC X7, X15
1540
1541 ADDQ $128, AX
1542 DECQ CX
1543 JNE aesloop
1544
1545 // 3 more scrambles to finish
1546 AESENC X8, X8
1547 AESENC X9, X9
1548 AESENC X10, X10
1549 AESENC X11, X11
1550 AESENC X12, X12
1551 AESENC X13, X13
1552 AESENC X14, X14
1553 AESENC X15, X15
1554 AESENC X8, X8
1555 AESENC X9, X9
1556 AESENC X10, X10
1557 AESENC X11, X11
1558 AESENC X12, X12
1559 AESENC X13, X13
1560 AESENC X14, X14
1561 AESENC X15, X15
1562 AESENC X8, X8
1563 AESENC X9, X9
1564 AESENC X10, X10
1565 AESENC X11, X11
1566 AESENC X12, X12
1567 AESENC X13, X13
1568 AESENC X14, X14
1569 AESENC X15, X15
1570
1571 PXOR X12, X8
1572 PXOR X13, X9
1573 PXOR X14, X10
1574 PXOR X15, X11
1575 PXOR X10, X8
1576 PXOR X11, X9
1577 PXOR X9, X8
1578 // X15 must be zero on return
1579 PXOR X15, X15
1580 MOVQ X8, AX // return X8
1581 RET
1582
1583 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
1584 // ABIInternal for performance.
1585 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1586 // AX = ptr to data
1587 // BX = seed
1588 CMPB runtime·useAeshash(SB), $0
1589 JEQ noaes
1590 MOVQ BX, X0 // X0 = seed
1591 PINSRD $2, (AX), X0 // data
1592 AESENC runtime·aeskeysched+0(SB), X0
1593 AESENC runtime·aeskeysched+16(SB), X0
1594 AESENC runtime·aeskeysched+32(SB), X0
1595 MOVQ X0, AX // return X0
1596 RET
1597 noaes:
1598 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1599
1600 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
1601 // ABIInternal for performance.
1602 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1603 // AX = ptr to data
1604 // BX = seed
1605 CMPB runtime·useAeshash(SB), $0
1606 JEQ noaes
1607 MOVQ BX, X0 // X0 = seed
1608 PINSRQ $1, (AX), X0 // data
1609 AESENC runtime·aeskeysched+0(SB), X0
1610 AESENC runtime·aeskeysched+16(SB), X0
1611 AESENC runtime·aeskeysched+32(SB), X0
1612 MOVQ X0, AX // return X0
1613 RET
1614 noaes:
1615 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1616
1617 // simple mask to get rid of data in the high part of the register.
1618 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1619 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1620 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1621 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1622 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1623 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1624 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1625 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1626 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1627 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1628 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1629 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1630 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1631 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1632 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1633 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1634 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1635 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1636 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1637 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1638 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1639 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1640 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1641 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1642 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1643 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1644 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1645 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1646 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1647 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1648 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1649 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1650 GLOBL masks<>(SB),RODATA,$256
1651
1652 // func checkASM() bool
1653 TEXT ·checkASM(SB),NOSPLIT,$0-1
1654 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1655 MOVQ $masks<>(SB), AX
1656 MOVQ $shifts<>(SB), BX
1657 ORQ BX, AX
1658 TESTQ $15, AX
1659 SETEQ ret+0(FP)
1660 RET
1661
1662 // these are arguments to pshufb. They move data down from
1663 // the high bytes of the register to the low bytes of the register.
1664 // index is how many bytes to move.
1665 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1666 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1667 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1668 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1669 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1670 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1671 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1672 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1673 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1674 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1675 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1676 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1677 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1678 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1679 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1680 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1681 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1682 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1683 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1684 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1685 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1686 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1687 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1688 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1689 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1690 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1691 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1692 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1693 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1694 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1695 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1696 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1697 GLOBL shifts<>(SB),RODATA,$256
1698
1699 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1700 // Must obey the gcc calling convention.
1701 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1702 get_tls(CX)
1703 MOVQ g(CX), AX
1704 MOVQ g_m(AX), AX
1705 MOVQ m_curg(AX), AX
1706 MOVQ (g_stack+stack_hi)(AX), AX
1707 RET
1708
1709 // The top-most function running on a goroutine
1710 // returns to goexit+PCQuantum.
1711 TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1712 BYTE $0x90 // NOP
1713 CALL runtime·goexit1(SB) // does not return
1714 // traceback from goexit1 must hit code range of goexit
1715 BYTE $0x90 // NOP
1716
1717 // This is called from .init_array and follows the platform, not Go, ABI.
1718 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1719 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1720 MOVQ runtime·lastmoduledatap(SB), AX
1721 MOVQ DI, moduledata_next(AX)
1722 MOVQ DI, runtime·lastmoduledatap(SB)
1723 POPQ R15
1724 RET
1725
1726 // Initialize special registers then jump to sigpanic.
1727 // This function is injected from the signal handler for panicking
1728 // signals. It is quite painful to set X15 in the signal context,
1729 // so we do it here.
1730 TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1731 get_tls(R14)
1732 MOVQ g(R14), R14
1733 XORPS X15, X15
1734 JMP ·sigpanic<ABIInternal>(SB)
1735
1736 // gcWriteBarrier informs the GC about heap pointer writes.
1737 //
1738 // gcWriteBarrier returns space in a write barrier buffer which
1739 // should be filled in by the caller.
1740 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1741 // number of bytes of buffer needed in R11, and returns a pointer
1742 // to the buffer space in R11.
1743 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1744 // but may clobber others (e.g., SSE registers).
1745 // Typical use would be, when doing *(CX+88) = AX
1746 // CMPL $0, runtime.writeBarrier(SB)
1747 // JEQ dowrite
1748 // CALL runtime.gcBatchBarrier2(SB)
1749 // MOVQ AX, (R11)
1750 // MOVQ 88(CX), DX
1751 // MOVQ DX, 8(R11)
1752 // dowrite:
1753 // MOVQ AX, 88(CX)
1754 TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1755 // Save the registers clobbered by the fast path. This is slightly
1756 // faster than having the caller spill these.
1757 MOVQ R12, 96(SP)
1758 MOVQ R13, 104(SP)
1759 retry:
1760 // TODO: Consider passing g.m.p in as an argument so they can be shared
1761 // across a sequence of write barriers.
1762 MOVQ g_m(R14), R13
1763 MOVQ m_p(R13), R13
1764 // Get current buffer write position.
1765 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1766 ADDQ R11, R12 // new next position
1767 // Is the buffer full?
1768 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1769 JA flush
1770 // Commit to the larger buffer.
1771 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1772 // Make return value (the original next position)
1773 SUBQ R11, R12
1774 MOVQ R12, R11
1775 // Restore registers.
1776 MOVQ 96(SP), R12
1777 MOVQ 104(SP), R13
1778 RET
1779
1780 flush:
1781 // Save all general purpose registers since these could be
1782 // clobbered by wbBufFlush and were not saved by the caller.
1783 // It is possible for wbBufFlush to clobber other registers
1784 // (e.g., SSE registers), but the compiler takes care of saving
1785 // those in the caller if necessary. This strikes a balance
1786 // with registers that are likely to be used.
1787 //
1788 // We don't have type information for these, but all code under
1789 // here is NOSPLIT, so nothing will observe these.
1790 //
1791 // TODO: We could strike a different balance; e.g., saving X0
1792 // and not saving GP registers that are less likely to be used.
1793 MOVQ DI, 0(SP)
1794 MOVQ AX, 8(SP)
1795 MOVQ BX, 16(SP)
1796 MOVQ CX, 24(SP)
1797 MOVQ DX, 32(SP)
1798 // DI already saved
1799 MOVQ SI, 40(SP)
1800 MOVQ BP, 48(SP)
1801 MOVQ R8, 56(SP)
1802 MOVQ R9, 64(SP)
1803 MOVQ R10, 72(SP)
1804 MOVQ R11, 80(SP)
1805 // R12 already saved
1806 // R13 already saved
1807 // R14 is g
1808 MOVQ R15, 88(SP)
1809
1810 CALL runtime·wbBufFlush(SB)
1811
1812 MOVQ 0(SP), DI
1813 MOVQ 8(SP), AX
1814 MOVQ 16(SP), BX
1815 MOVQ 24(SP), CX
1816 MOVQ 32(SP), DX
1817 MOVQ 40(SP), SI
1818 MOVQ 48(SP), BP
1819 MOVQ 56(SP), R8
1820 MOVQ 64(SP), R9
1821 MOVQ 72(SP), R10
1822 MOVQ 80(SP), R11
1823 MOVQ 88(SP), R15
1824 JMP retry
1825
1826 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1827 MOVL $8, R11
1828 JMP gcWriteBarrier<>(SB)
1829 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1830 MOVL $16, R11
1831 JMP gcWriteBarrier<>(SB)
1832 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1833 MOVL $24, R11
1834 JMP gcWriteBarrier<>(SB)
1835 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1836 MOVL $32, R11
1837 JMP gcWriteBarrier<>(SB)
1838 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1839 MOVL $40, R11
1840 JMP gcWriteBarrier<>(SB)
1841 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1842 MOVL $48, R11
1843 JMP gcWriteBarrier<>(SB)
1844 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1845 MOVL $56, R11
1846 JMP gcWriteBarrier<>(SB)
1847 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1848 MOVL $64, R11
1849 JMP gcWriteBarrier<>(SB)
1850
1851 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1852 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1853
1854 // debugCallV2 is the entry point for debugger-injected function
1855 // calls on running goroutines. It informs the runtime that a
1856 // debug call has been injected and creates a call frame for the
1857 // debugger to fill in.
1858 //
1859 // To inject a function call, a debugger should:
1860 // 1. Check that the goroutine is in state _Grunning and that
1861 // there are at least 256 bytes free on the stack.
1862 // 2. Push the current PC on the stack (updating SP).
1863 // 3. Write the desired argument frame size at SP-16 (using the SP
1864 // after step 2).
1865 // 4. Save all machine registers (including flags and XMM registers)
1866 // so they can be restored later by the debugger.
1867 // 5. Set the PC to debugCallV2 and resume execution.
1868 //
1869 // If the goroutine is in state _Grunnable, then it's not generally
1870 // safe to inject a call because it may return out via other runtime
1871 // operations. Instead, the debugger should unwind the stack to find
1872 // the return to non-runtime code, add a temporary breakpoint there,
1873 // and inject the call once that breakpoint is hit.
1874 //
1875 // If the goroutine is in any other state, it's not safe to inject a call.
1876 //
1877 // This function communicates back to the debugger by setting R12 and
1878 // invoking INT3 to raise a breakpoint signal. See the comments in the
1879 // implementation for the protocol the debugger is expected to
1880 // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1881 //
1882 // The debugger must ensure that any pointers passed to the function
1883 // obey escape analysis requirements. Specifically, it must not pass
1884 // a stack pointer to an escaping argument. debugCallV2 cannot check
1885 // this invariant.
1886 //
1887 // This is ABIInternal because Go code injects its PC directly into new
1888 // goroutine stacks.
1889 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1890 // Save all registers that may contain pointers so they can be
1891 // conservatively scanned.
1892 //
1893 // We can't do anything that might clobber any of these
1894 // registers before this.
1895 MOVQ R15, r15-(14*8+8)(SP)
1896 MOVQ R14, r14-(13*8+8)(SP)
1897 MOVQ R13, r13-(12*8+8)(SP)
1898 MOVQ R12, r12-(11*8+8)(SP)
1899 MOVQ R11, r11-(10*8+8)(SP)
1900 MOVQ R10, r10-(9*8+8)(SP)
1901 MOVQ R9, r9-(8*8+8)(SP)
1902 MOVQ R8, r8-(7*8+8)(SP)
1903 MOVQ DI, di-(6*8+8)(SP)
1904 MOVQ SI, si-(5*8+8)(SP)
1905 MOVQ BP, bp-(4*8+8)(SP)
1906 MOVQ BX, bx-(3*8+8)(SP)
1907 MOVQ DX, dx-(2*8+8)(SP)
1908 // Save the frame size before we clobber it. Either of the last
1909 // saves could clobber this depending on whether there's a saved BP.
1910 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1911 MOVQ CX, cx-(1*8+8)(SP)
1912 MOVQ AX, ax-(0*8+8)(SP)
1913
1914 // Save the argument frame size.
1915 MOVQ DX, frameSize-128(SP)
1916
1917 // Perform a safe-point check.
1918 MOVQ retpc-8(FP), AX // Caller's PC
1919 MOVQ AX, 0(SP)
1920 CALL runtime·debugCallCheck(SB)
1921 MOVQ 8(SP), AX
1922 TESTQ AX, AX
1923 JZ good
1924 // The safety check failed. Put the reason string at the top
1925 // of the stack.
1926 MOVQ AX, 0(SP)
1927 MOVQ 16(SP), AX
1928 MOVQ AX, 8(SP)
1929 // Set R12 to 8 and invoke INT3. The debugger should get the
1930 // reason a call can't be injected from the top of the stack
1931 // and resume execution.
1932 MOVQ $8, R12
1933 BYTE $0xcc
1934 JMP restore
1935
1936 good:
1937 // Registers are saved and it's safe to make a call.
1938 // Open up a call frame, moving the stack if necessary.
1939 //
1940 // Once the frame is allocated, this will set R12 to 0 and
1941 // invoke INT3. The debugger should write the argument
1942 // frame for the call at SP, set up argument registers, push
1943 // the trapping PC on the stack, set the PC to the function to
1944 // call, set RDX to point to the closure (if a closure call),
1945 // and resume execution.
1946 //
1947 // If the function returns, this will set R12 to 1 and invoke
1948 // INT3. The debugger can then inspect any return value saved
1949 // on the stack at SP and in registers and resume execution again.
1950 //
1951 // If the function panics, this will set R12 to 2 and invoke INT3.
1952 // The interface{} value of the panic will be at SP. The debugger
1953 // can inspect the panic value and resume execution again.
1954 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1955 CMPQ AX, $MAXSIZE; \
1956 JA 5(PC); \
1957 MOVQ $NAME(SB), AX; \
1958 MOVQ AX, 0(SP); \
1959 CALL runtime·debugCallWrap(SB); \
1960 JMP restore
1961
1962 MOVQ frameSize-128(SP), AX
1963 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1964 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1965 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1966 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1967 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1968 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1969 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1970 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1971 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1972 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1973 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1974 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1975 // The frame size is too large. Report the error.
1976 MOVQ $debugCallFrameTooLarge<>(SB), AX
1977 MOVQ AX, 0(SP)
1978 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1979 MOVQ $8, R12
1980 BYTE $0xcc
1981 JMP restore
1982
1983 restore:
1984 // Calls and failures resume here.
1985 //
1986 // Set R12 to 16 and invoke INT3. The debugger should restore
1987 // all registers except RIP and RSP and resume execution.
1988 MOVQ $16, R12
1989 BYTE $0xcc
1990 // We must not modify flags after this point.
1991
1992 // Restore pointer-containing registers, which may have been
1993 // modified from the debugger's copy by stack copying.
1994 MOVQ ax-(0*8+8)(SP), AX
1995 MOVQ cx-(1*8+8)(SP), CX
1996 MOVQ dx-(2*8+8)(SP), DX
1997 MOVQ bx-(3*8+8)(SP), BX
1998 MOVQ bp-(4*8+8)(SP), BP
1999 MOVQ si-(5*8+8)(SP), SI
2000 MOVQ di-(6*8+8)(SP), DI
2001 MOVQ r8-(7*8+8)(SP), R8
2002 MOVQ r9-(8*8+8)(SP), R9
2003 MOVQ r10-(9*8+8)(SP), R10
2004 MOVQ r11-(10*8+8)(SP), R11
2005 MOVQ r12-(11*8+8)(SP), R12
2006 MOVQ r13-(12*8+8)(SP), R13
2007 MOVQ r14-(13*8+8)(SP), R14
2008 MOVQ r15-(14*8+8)(SP), R15
2009
2010 RET
2011
2012 // runtime.debugCallCheck assumes that functions defined with the
2013 // DEBUG_CALL_FN macro are safe points to inject calls.
2014 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
2015 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2016 NO_LOCAL_POINTERS; \
2017 MOVQ $0, R12; \
2018 BYTE $0xcc; \
2019 MOVQ $1, R12; \
2020 BYTE $0xcc; \
2021 RET
2022 DEBUG_CALL_FN(debugCall32<>, 32)
2023 DEBUG_CALL_FN(debugCall64<>, 64)
2024 DEBUG_CALL_FN(debugCall128<>, 128)
2025 DEBUG_CALL_FN(debugCall256<>, 256)
2026 DEBUG_CALL_FN(debugCall512<>, 512)
2027 DEBUG_CALL_FN(debugCall1024<>, 1024)
2028 DEBUG_CALL_FN(debugCall2048<>, 2048)
2029 DEBUG_CALL_FN(debugCall4096<>, 4096)
2030 DEBUG_CALL_FN(debugCall8192<>, 8192)
2031 DEBUG_CALL_FN(debugCall16384<>, 16384)
2032 DEBUG_CALL_FN(debugCall32768<>, 32768)
2033 DEBUG_CALL_FN(debugCall65536<>, 65536)
2034
2035 // func debugCallPanicked(val interface{})
2036 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2037 // Copy the panic value to the top of stack.
2038 MOVQ val_type+0(FP), AX
2039 MOVQ AX, 0(SP)
2040 MOVQ val_data+8(FP), AX
2041 MOVQ AX, 8(SP)
2042 MOVQ $2, R12
2043 BYTE $0xcc
2044 RET
2045
2046 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
2047 NO_LOCAL_POINTERS
2048 // Save all 14 int registers that could have an index in them.
2049 // They may be pointers, but if they are they are dead.
2050 MOVQ AX, 16(SP)
2051 MOVQ CX, 24(SP)
2052 MOVQ DX, 32(SP)
2053 MOVQ BX, 40(SP)
2054 // skip SP @ 48(SP)
2055 MOVQ BP, 56(SP)
2056 MOVQ SI, 64(SP)
2057 MOVQ DI, 72(SP)
2058 MOVQ R8, 80(SP)
2059 MOVQ R9, 88(SP)
2060 MOVQ R10, 96(SP)
2061 MOVQ R11, 104(SP)
2062 MOVQ R12, 112(SP)
2063 MOVQ R13, 120(SP)
2064 // skip R14 @ 128(SP) (aka G)
2065 MOVQ R15, 136(SP)
2066
2067 MOVQ SP, AX // hide SP read from vet
2068 MOVQ 152(AX), AX // PC immediately after call to panicBounds
2069 LEAQ 16(SP), BX
2070 CALL runtime·panicBounds64<ABIInternal>(SB)
2071 RET
2072
2073 #ifdef GOOS_android
2074 // Use the free TLS_SLOT_APP slot #2 on Android Q.
2075 // Earlier androids are set up in gcc_android.c.
2076 DATA runtime·tls_g+0(SB)/8, $16
2077 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2078 #endif
2079 #ifdef GOOS_windows
2080 GLOBL runtime·tls_g+0(SB), NOPTR, $8
2081 #endif
2082
2083 // The compiler and assembler's -spectre=ret mode rewrites
2084 // all indirect CALL AX / JMP AX instructions to be
2085 // CALL retpolineAX / JMP retpolineAX.
2086 // See https://support.google.com/faqs/answer/7625886.
2087 #define RETPOLINE(reg) \
2088 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2089 /* nospec: */ \
2090 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2091 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2092 /* setup: */ \
2093 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2094 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2095 /* RET */ BYTE $0xC3
2096
2097 TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2098 TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2099 TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2100 TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2101 /* SP is 4, can't happen / magic encodings */
2102 TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2103 TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2104 TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2105 TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2106 TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2107 TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2108 TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2109 TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2110 TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2111 TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2112 TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2113
2114 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2115 MOVQ BP, AX
2116 RET
2117
View as plain text