Text file
src/runtime/asm_arm64.s
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "tls_arm64.h"
8 #include "funcdata.h"
9 #include "textflag.h"
10 #include "cgo/abi_arm64.h"
11
12 // _rt0_arm64 is common startup code for most arm64 systems when using
13 // internal linking. This is the entry point for the program from the
14 // kernel for an ordinary -buildmode=exe program. The stack holds the
15 // number of arguments and the C-style argv.
16 TEXT _rt0_arm64(SB),NOSPLIT,$0
17 MOVD 0(RSP), R0 // argc
18 ADD $8, RSP, R1 // argv
19 JMP runtime·rt0_go(SB)
20
21 // main is common startup code for most amd64 systems when using
22 // external linking. The C startup code will call the symbol "main"
23 // passing argc and argv in the usual C ABI registers R0 and R1.
24 TEXT main(SB),NOSPLIT,$0
25 JMP runtime·rt0_go(SB)
26
27 // _rt0_arm64_lib is common startup code for most arm64 systems when
28 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
29 // arrange to invoke this function as a global constructor (for
30 // c-archive) or when the shared library is loaded (for c-shared).
31 // We expect argc and argv to be passed in the usual C ABI registers
32 // R0 and R1.
33 TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
34 // Preserve callee-save registers.
35 SAVE_R19_TO_R28(24)
36 SAVE_F8_TO_F15(104)
37
38 // Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
39 MOVD ZR, g
40
41 MOVD R0, _rt0_arm64_lib_argc<>(SB)
42 MOVD R1, _rt0_arm64_lib_argv<>(SB)
43
44 // Synchronous initialization.
45 MOVD $runtime·libpreinit(SB), R4
46 BL (R4)
47
48 // Create a new thread to do the runtime initialization and return.
49 MOVD _cgo_sys_thread_create(SB), R4
50 CBZ R4, nocgo
51 MOVD $_rt0_arm64_lib_go(SB), R0
52 MOVD $0, R1
53 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
54 BL (R4)
55 ADD $16, RSP
56 B restore
57
58 nocgo:
59 MOVD $0x800000, R0 // stacksize = 8192KB
60 MOVD $_rt0_arm64_lib_go(SB), R1
61 MOVD R0, 8(RSP)
62 MOVD R1, 16(RSP)
63 MOVD $runtime·newosproc0(SB),R4
64 BL (R4)
65
66 restore:
67 // Restore callee-save registers.
68 RESTORE_R19_TO_R28(24)
69 RESTORE_F8_TO_F15(104)
70 RET
71
72 TEXT _rt0_arm64_lib_go(SB),NOSPLIT,$0
73 MOVD _rt0_arm64_lib_argc<>(SB), R0
74 MOVD _rt0_arm64_lib_argv<>(SB), R1
75 MOVD $runtime·rt0_go(SB),R4
76 B (R4)
77
78 DATA _rt0_arm64_lib_argc<>(SB)/8, $0
79 GLOBL _rt0_arm64_lib_argc<>(SB),NOPTR, $8
80 DATA _rt0_arm64_lib_argv<>(SB)/8, $0
81 GLOBL _rt0_arm64_lib_argv<>(SB),NOPTR, $8
82
83 #ifdef GOARM64_LSE
84 DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
85 GLOBL no_lse_msg<>(SB), RODATA, $64
86 #endif
87
88 // We know for sure that Linux and FreeBSD allow to read instruction set
89 // attribute registers (while some others OSes, like OpenBSD and Darwin,
90 // are not). Let's be conservative and allow code reading such registers
91 // only when we sure this won't lead to sigill.
92 #ifdef GOOS_linux
93 #define ISA_REGS_READABLE
94 #endif
95 #ifdef GOOS_freebsd
96 #define ISA_REGS_READABLE
97 #endif
98
99 #ifdef GOARM64_LSE
100 #ifdef ISA_REGS_READABLE
101 #define CHECK_GOARM64_LSE
102 #endif
103 #endif
104
105 TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
106 // SP = stack; R0 = argc; R1 = argv
107
108 SUB $32, RSP
109 MOVW R0, 8(RSP) // argc
110 MOVD R1, 16(RSP) // argv
111
112 #ifdef TLS_darwin
113 // Initialize TLS.
114 MOVD ZR, g // clear g, make sure it's not junk.
115 SUB $32, RSP
116 MRS_TPIDR_R0
117 AND $~7, R0
118 MOVD R0, 16(RSP) // arg2: TLS base
119 MOVD $runtime·tls_g(SB), R2
120 MOVD R2, 8(RSP) // arg1: &tlsg
121 BL ·tlsinit(SB)
122 ADD $32, RSP
123 #endif
124
125 // create istack out of the given (operating system) stack.
126 // _cgo_init may update stackguard.
127 MOVD $runtime·g0(SB), g
128 MOVD RSP, R7
129 MOVD $(-64*1024)(R7), R0
130 MOVD R0, g_stackguard0(g)
131 MOVD R0, g_stackguard1(g)
132 MOVD R0, (g_stack+stack_lo)(g)
133 MOVD R7, (g_stack+stack_hi)(g)
134
135 // if there is a _cgo_init, call it using the gcc ABI.
136 MOVD _cgo_init(SB), R12
137 CBZ R12, nocgo
138
139 #ifdef GOOS_android
140 MRS_TPIDR_R0 // load TLS base pointer
141 MOVD R0, R3 // arg 3: TLS base pointer
142 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
143 #else
144 MOVD $0, R2 // arg 2: not used when using platform's TLS
145 #endif
146 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
147 MOVD g, R0 // arg 0: G
148 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
149 BL (R12)
150 ADD $16, RSP
151
152 nocgo:
153 BL runtime·save_g(SB)
154 // update stackguard after _cgo_init
155 MOVD (g_stack+stack_lo)(g), R0
156 ADD $const_stackGuard, R0
157 MOVD R0, g_stackguard0(g)
158 MOVD R0, g_stackguard1(g)
159
160 // set the per-goroutine and per-mach "registers"
161 MOVD $runtime·m0(SB), R0
162
163 // save m->g0 = g0
164 MOVD g, m_g0(R0)
165 // save m0 to g0->m
166 MOVD R0, g_m(g)
167
168 BL runtime·check(SB)
169
170 #ifdef GOOS_windows
171 BL runtime·wintls(SB)
172 #endif
173
174 // Check that CPU we use for execution supports instructions targeted during compile-time.
175 #ifdef CHECK_GOARM64_LSE
176 // Read the ID_AA64ISAR0_EL1 register
177 MRS ID_AA64ISAR0_EL1, R0
178
179 // Extract the LSE field (bits [23:20])
180 LSR $20, R0, R0
181 AND $0xf, R0, R0
182
183 // LSE support is indicated by a non-zero value
184 CBZ R0, no_lse
185 #endif
186
187 MOVW 8(RSP), R0 // copy argc
188 MOVW R0, -8(RSP)
189 MOVD 16(RSP), R0 // copy argv
190 MOVD R0, 0(RSP)
191 BL runtime·args(SB)
192 BL runtime·osinit(SB)
193 BL runtime·schedinit(SB)
194
195 // create a new goroutine to start program
196 MOVD $runtime·mainPC(SB), R0 // entry
197 SUB $16, RSP
198 MOVD R0, 8(RSP) // arg
199 MOVD $0, 0(RSP) // dummy LR
200 BL runtime·newproc(SB)
201 ADD $16, RSP
202
203 // start this M
204 BL runtime·mstart(SB)
205 UNDEF
206
207 #ifdef CHECK_GOARM64_LSE
208 no_lse:
209 MOVD $1, R0 // stderr
210 MOVD R0, 8(RSP)
211 MOVD $no_lse_msg<>(SB), R1 // message address
212 MOVD R1, 16(RSP)
213 MOVD $64, R2 // message length
214 MOVD R2, 24(RSP)
215 CALL runtime·write(SB)
216 CALL runtime·exit(SB)
217 CALL runtime·abort(SB)
218 RET
219 #endif
220
221 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
222 // intended to be called by debuggers.
223 MOVD $runtime·debugPinnerV1<ABIInternal>(SB), R0
224 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
225
226 MOVD $0, R0
227 MOVD R0, (R0) // boom
228 UNDEF
229
230 DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
231 GLOBL runtime·mainPC(SB),RODATA,$8
232
233 // Windows ARM64 needs an immediate 0xf000 argument.
234 // See go.dev/issues/53837.
235 #define BREAK \
236 #ifdef GOOS_windows \
237 BRK $0xf000 \
238 #else \
239 BRK \
240 #endif \
241
242
243 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
244 BREAK
245 RET
246
247 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
248 RET
249
250 TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
251 BL runtime·mstart0(SB)
252 RET // not reached
253
254 /*
255 * go-routine
256 */
257
258 // void gogo(Gobuf*)
259 // restore state from Gobuf; longjmp
260 TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
261 MOVD buf+0(FP), R5
262 MOVD gobuf_g(R5), R6
263 MOVD 0(R6), R4 // make sure g != nil
264 B gogo<>(SB)
265
266 TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
267 MOVD R6, g
268 BL runtime·save_g(SB)
269
270 MOVD gobuf_sp(R5), R0
271 MOVD R0, RSP
272 MOVD gobuf_bp(R5), R29
273 MOVD gobuf_lr(R5), LR
274 MOVD gobuf_ctxt(R5), R26
275 MOVD $0, gobuf_sp(R5)
276 MOVD $0, gobuf_bp(R5)
277 MOVD $0, gobuf_lr(R5)
278 MOVD $0, gobuf_ctxt(R5)
279 CMP ZR, ZR // set condition codes for == test, needed by stack split
280 MOVD gobuf_pc(R5), R6
281 B (R6)
282
283 // void mcall(fn func(*g))
284 // Switch to m->g0's stack, call fn(g).
285 // Fn must never return. It should gogo(&g->sched)
286 // to keep running g.
287 TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
288 MOVD R0, R26 // context
289
290 // Save caller state in g->sched
291 MOVD RSP, R0
292 MOVD R0, (g_sched+gobuf_sp)(g)
293 MOVD R29, (g_sched+gobuf_bp)(g)
294 MOVD LR, (g_sched+gobuf_pc)(g)
295 MOVD $0, (g_sched+gobuf_lr)(g)
296
297 // Switch to m->g0 & its stack, call fn.
298 MOVD g, R3
299 MOVD g_m(g), R8
300 MOVD m_g0(R8), g
301 BL runtime·save_g(SB)
302 CMP g, R3
303 BNE 2(PC)
304 B runtime·badmcall(SB)
305
306 MOVD (g_sched+gobuf_sp)(g), R0
307 MOVD R0, RSP // sp = m->g0->sched.sp
308 MOVD $0, R29 // clear frame pointer, as caller may execute on another M
309 MOVD R3, R0 // arg = g
310 MOVD $0, -16(RSP) // dummy LR
311 SUB $16, RSP
312 MOVD 0(R26), R4 // code pointer
313 BL (R4)
314 B runtime·badmcall2(SB)
315
316 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
317 // of the G stack. We need to distinguish the routine that
318 // lives at the bottom of the G stack from the one that lives
319 // at the top of the system stack because the one at the top of
320 // the system stack terminates the stack walk (see topofstack()).
321 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
322 UNDEF
323 BL (LR) // make sure this function is not leaf
324 RET
325
326 // func systemstack(fn func())
327 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
328 MOVD fn+0(FP), R3 // R3 = fn
329 MOVD R3, R26 // context
330 MOVD g_m(g), R4 // R4 = m
331
332 MOVD m_gsignal(R4), R5 // R5 = gsignal
333 CMP g, R5
334 BEQ noswitch
335
336 MOVD m_g0(R4), R5 // R5 = g0
337 CMP g, R5
338 BEQ noswitch
339
340 MOVD m_curg(R4), R6
341 CMP g, R6
342 BEQ switch
343
344 // Bad: g is not gsignal, not g0, not curg. What is it?
345 // Hide call from linker nosplit analysis.
346 MOVD $runtime·badsystemstack(SB), R3
347 BL (R3)
348 B runtime·abort(SB)
349
350 switch:
351 // Switch stacks.
352 // The original frame pointer is stored in R29,
353 // which is useful for stack unwinding.
354 // Save our state in g->sched. Pretend to
355 // be systemstack_switch if the G stack is scanned.
356 BL gosave_systemstack_switch<>(SB)
357
358 // switch to g0
359 MOVD R5, g
360 BL runtime·save_g(SB)
361 MOVD (g_sched+gobuf_sp)(g), R3
362 MOVD R3, RSP
363
364 // call target function
365 MOVD 0(R26), R3 // code pointer
366 BL (R3)
367
368 // switch back to g
369 MOVD g_m(g), R3
370 MOVD m_curg(R3), g
371 BL runtime·save_g(SB)
372 MOVD (g_sched+gobuf_sp)(g), R0
373 MOVD R0, RSP
374 MOVD (g_sched+gobuf_bp)(g), R29
375 MOVD $0, (g_sched+gobuf_sp)(g)
376 MOVD $0, (g_sched+gobuf_bp)(g)
377 RET
378
379 noswitch:
380 // already on m stack, just call directly
381 // Using a tail call here cleans up tracebacks since we won't stop
382 // at an intermediate systemstack.
383 MOVD 0(R26), R3 // code pointer
384 MOVD.P 16(RSP), R30 // restore LR
385 SUB $8, RSP, R29 // restore FP
386 B (R3)
387
388 // func switchToCrashStack0(fn func())
389 TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
390 MOVD R0, R26 // context register
391 MOVD g_m(g), R1 // curm
392
393 // set g to gcrash
394 MOVD $runtime·gcrash(SB), g // g = &gcrash
395 BL runtime·save_g(SB) // clobbers R0
396 MOVD R1, g_m(g) // g.m = curm
397 MOVD g, m_g0(R1) // curm.g0 = g
398
399 // switch to crashstack
400 MOVD (g_stack+stack_hi)(g), R1
401 SUB $(4*8), R1
402 MOVD R1, RSP
403
404 // call target function
405 MOVD 0(R26), R0
406 CALL (R0)
407
408 // should never return
409 CALL runtime·abort(SB)
410 UNDEF
411
412 /*
413 * support for morestack
414 */
415
416 // Called during function prolog when more stack is needed.
417 // Caller has already loaded:
418 // R3 prolog's LR (R30)
419 //
420 // The traceback routines see morestack on a g0 as being
421 // the top of a stack (for example, morestack calling newstack
422 // calling the scheduler calling newm calling gc), so we must
423 // record an argument size. For that purpose, it has no arguments.
424 TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
425 // Cannot grow scheduler stack (m->g0).
426 MOVD g_m(g), R8
427 MOVD m_g0(R8), R4
428
429 // Called from f.
430 // Set g->sched to context in f
431 MOVD RSP, R0
432 MOVD R0, (g_sched+gobuf_sp)(g)
433 MOVD R29, (g_sched+gobuf_bp)(g)
434 MOVD LR, (g_sched+gobuf_pc)(g)
435 MOVD R3, (g_sched+gobuf_lr)(g)
436 MOVD R26, (g_sched+gobuf_ctxt)(g)
437
438 CMP g, R4
439 BNE 3(PC)
440 BL runtime·badmorestackg0(SB)
441 B runtime·abort(SB)
442
443 // Cannot grow signal stack (m->gsignal).
444 MOVD m_gsignal(R8), R4
445 CMP g, R4
446 BNE 3(PC)
447 BL runtime·badmorestackgsignal(SB)
448 B runtime·abort(SB)
449
450 // Called from f.
451 // Set m->morebuf to f's callers.
452 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
453 MOVD RSP, R0
454 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
455 MOVD g, (m_morebuf+gobuf_g)(R8)
456
457 // Call newstack on m->g0's stack.
458 MOVD m_g0(R8), g
459 BL runtime·save_g(SB)
460 MOVD (g_sched+gobuf_sp)(g), R0
461 MOVD R0, RSP
462 MOVD $0, R29 // clear frame pointer, as caller may execute on another M
463 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
464 BL runtime·newstack(SB)
465
466 // Not reached, but make sure the return PC from the call to newstack
467 // is still in this function, and not the beginning of the next.
468 UNDEF
469
470 TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
471 // Force SPWRITE. This function doesn't actually write SP,
472 // but it is called with a special calling convention where
473 // the caller doesn't save LR on stack but passes it as a
474 // register (R3), and the unwinder currently doesn't understand.
475 // Make it SPWRITE to stop unwinding. (See issue 54332)
476 MOVD RSP, RSP
477
478 MOVW $0, R26
479 B runtime·morestack(SB)
480
481 // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
482 TEXT ·spillArgs(SB),NOSPLIT,$0-0
483 STP (R0, R1), (0*8)(R20)
484 STP (R2, R3), (2*8)(R20)
485 STP (R4, R5), (4*8)(R20)
486 STP (R6, R7), (6*8)(R20)
487 STP (R8, R9), (8*8)(R20)
488 STP (R10, R11), (10*8)(R20)
489 STP (R12, R13), (12*8)(R20)
490 STP (R14, R15), (14*8)(R20)
491 FSTPD (F0, F1), (16*8)(R20)
492 FSTPD (F2, F3), (18*8)(R20)
493 FSTPD (F4, F5), (20*8)(R20)
494 FSTPD (F6, F7), (22*8)(R20)
495 FSTPD (F8, F9), (24*8)(R20)
496 FSTPD (F10, F11), (26*8)(R20)
497 FSTPD (F12, F13), (28*8)(R20)
498 FSTPD (F14, F15), (30*8)(R20)
499 RET
500
501 // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
502 TEXT ·unspillArgs(SB),NOSPLIT,$0-0
503 LDP (0*8)(R20), (R0, R1)
504 LDP (2*8)(R20), (R2, R3)
505 LDP (4*8)(R20), (R4, R5)
506 LDP (6*8)(R20), (R6, R7)
507 LDP (8*8)(R20), (R8, R9)
508 LDP (10*8)(R20), (R10, R11)
509 LDP (12*8)(R20), (R12, R13)
510 LDP (14*8)(R20), (R14, R15)
511 FLDPD (16*8)(R20), (F0, F1)
512 FLDPD (18*8)(R20), (F2, F3)
513 FLDPD (20*8)(R20), (F4, F5)
514 FLDPD (22*8)(R20), (F6, F7)
515 FLDPD (24*8)(R20), (F8, F9)
516 FLDPD (26*8)(R20), (F10, F11)
517 FLDPD (28*8)(R20), (F12, F13)
518 FLDPD (30*8)(R20), (F14, F15)
519 RET
520
521 // reflectcall: call a function with the given argument list
522 // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
523 // we don't have variable-sized frames, so we use a small number
524 // of constant-sized-frame functions to encode a few bits of size in the pc.
525 // Caution: ugly multiline assembly macros in your future!
526
527 #define DISPATCH(NAME,MAXSIZE) \
528 MOVD $MAXSIZE, R27; \
529 CMP R27, R16; \
530 BGT 3(PC); \
531 MOVD $NAME(SB), R27; \
532 B (R27)
533 // Note: can't just "B NAME(SB)" - bad inlining results.
534
535 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
536 MOVWU frameSize+32(FP), R16
537 DISPATCH(runtime·call16, 16)
538 DISPATCH(runtime·call32, 32)
539 DISPATCH(runtime·call64, 64)
540 DISPATCH(runtime·call128, 128)
541 DISPATCH(runtime·call256, 256)
542 DISPATCH(runtime·call512, 512)
543 DISPATCH(runtime·call1024, 1024)
544 DISPATCH(runtime·call2048, 2048)
545 DISPATCH(runtime·call4096, 4096)
546 DISPATCH(runtime·call8192, 8192)
547 DISPATCH(runtime·call16384, 16384)
548 DISPATCH(runtime·call32768, 32768)
549 DISPATCH(runtime·call65536, 65536)
550 DISPATCH(runtime·call131072, 131072)
551 DISPATCH(runtime·call262144, 262144)
552 DISPATCH(runtime·call524288, 524288)
553 DISPATCH(runtime·call1048576, 1048576)
554 DISPATCH(runtime·call2097152, 2097152)
555 DISPATCH(runtime·call4194304, 4194304)
556 DISPATCH(runtime·call8388608, 8388608)
557 DISPATCH(runtime·call16777216, 16777216)
558 DISPATCH(runtime·call33554432, 33554432)
559 DISPATCH(runtime·call67108864, 67108864)
560 DISPATCH(runtime·call134217728, 134217728)
561 DISPATCH(runtime·call268435456, 268435456)
562 DISPATCH(runtime·call536870912, 536870912)
563 DISPATCH(runtime·call1073741824, 1073741824)
564 MOVD $runtime·badreflectcall(SB), R0
565 B (R0)
566
567 #define CALLFN(NAME,MAXSIZE) \
568 TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
569 NO_LOCAL_POINTERS; \
570 /* copy arguments to stack */ \
571 MOVD stackArgs+16(FP), R3; \
572 MOVWU stackArgsSize+24(FP), R4; \
573 ADD $8, RSP, R5; \
574 BIC $0xf, R4, R6; \
575 CBZ R6, 6(PC); \
576 /* if R6=(argsize&~15) != 0 */ \
577 ADD R6, R5, R6; \
578 /* copy 16 bytes a time */ \
579 LDP.P 16(R3), (R7, R8); \
580 STP.P (R7, R8), 16(R5); \
581 CMP R5, R6; \
582 BNE -3(PC); \
583 AND $0xf, R4, R6; \
584 CBZ R6, 6(PC); \
585 /* if R6=(argsize&15) != 0 */ \
586 ADD R6, R5, R6; \
587 /* copy 1 byte a time for the rest */ \
588 MOVBU.P 1(R3), R7; \
589 MOVBU.P R7, 1(R5); \
590 CMP R5, R6; \
591 BNE -3(PC); \
592 /* set up argument registers */ \
593 MOVD regArgs+40(FP), R20; \
594 CALL ·unspillArgs(SB); \
595 /* call function */ \
596 MOVD f+8(FP), R26; \
597 MOVD (R26), R20; \
598 PCDATA $PCDATA_StackMapIndex, $0; \
599 BL (R20); \
600 /* copy return values back */ \
601 MOVD regArgs+40(FP), R20; \
602 CALL ·spillArgs(SB); \
603 MOVD stackArgsType+0(FP), R7; \
604 MOVD stackArgs+16(FP), R3; \
605 MOVWU stackArgsSize+24(FP), R4; \
606 MOVWU stackRetOffset+28(FP), R6; \
607 ADD $8, RSP, R5; \
608 ADD R6, R5; \
609 ADD R6, R3; \
610 SUB R6, R4; \
611 BL callRet<>(SB); \
612 RET
613
614 // callRet copies return values back at the end of call*. This is a
615 // separate function so it can allocate stack space for the arguments
616 // to reflectcallmove. It does not follow the Go ABI; it expects its
617 // arguments in registers.
618 TEXT callRet<>(SB), NOSPLIT, $48-0
619 NO_LOCAL_POINTERS
620 STP (R7, R3), 8(RSP)
621 STP (R5, R4), 24(RSP)
622 MOVD R20, 40(RSP)
623 BL runtime·reflectcallmove(SB)
624 RET
625
626 CALLFN(·call16, 16)
627 CALLFN(·call32, 32)
628 CALLFN(·call64, 64)
629 CALLFN(·call128, 128)
630 CALLFN(·call256, 256)
631 CALLFN(·call512, 512)
632 CALLFN(·call1024, 1024)
633 CALLFN(·call2048, 2048)
634 CALLFN(·call4096, 4096)
635 CALLFN(·call8192, 8192)
636 CALLFN(·call16384, 16384)
637 CALLFN(·call32768, 32768)
638 CALLFN(·call65536, 65536)
639 CALLFN(·call131072, 131072)
640 CALLFN(·call262144, 262144)
641 CALLFN(·call524288, 524288)
642 CALLFN(·call1048576, 1048576)
643 CALLFN(·call2097152, 2097152)
644 CALLFN(·call4194304, 4194304)
645 CALLFN(·call8388608, 8388608)
646 CALLFN(·call16777216, 16777216)
647 CALLFN(·call33554432, 33554432)
648 CALLFN(·call67108864, 67108864)
649 CALLFN(·call134217728, 134217728)
650 CALLFN(·call268435456, 268435456)
651 CALLFN(·call536870912, 536870912)
652 CALLFN(·call1073741824, 1073741824)
653
654 // func memhash32(p unsafe.Pointer, h uintptr) uintptr
655 TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
656 MOVB runtime·useAeshash(SB), R10
657 CBZ R10, noaes
658 MOVD $runtime·aeskeysched+0(SB), R3
659
660 VEOR V0.B16, V0.B16, V0.B16
661 VLD1 (R3), [V2.B16]
662 VLD1 (R0), V0.S[1]
663 VMOV R1, V0.S[0]
664
665 AESE V2.B16, V0.B16
666 AESMC V0.B16, V0.B16
667 AESE V2.B16, V0.B16
668 AESMC V0.B16, V0.B16
669 AESE V2.B16, V0.B16
670
671 VMOV V0.D[0], R0
672 RET
673 noaes:
674 B runtime·memhash32Fallback<ABIInternal>(SB)
675
676 // func memhash64(p unsafe.Pointer, h uintptr) uintptr
677 TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
678 MOVB runtime·useAeshash(SB), R10
679 CBZ R10, noaes
680 MOVD $runtime·aeskeysched+0(SB), R3
681
682 VEOR V0.B16, V0.B16, V0.B16
683 VLD1 (R3), [V2.B16]
684 VLD1 (R0), V0.D[1]
685 VMOV R1, V0.D[0]
686
687 AESE V2.B16, V0.B16
688 AESMC V0.B16, V0.B16
689 AESE V2.B16, V0.B16
690 AESMC V0.B16, V0.B16
691 AESE V2.B16, V0.B16
692
693 VMOV V0.D[0], R0
694 RET
695 noaes:
696 B runtime·memhash64Fallback<ABIInternal>(SB)
697
698 // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
699 TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
700 MOVB runtime·useAeshash(SB), R10
701 CBZ R10, noaes
702 B aeshashbody<>(SB)
703 noaes:
704 B runtime·memhashFallback<ABIInternal>(SB)
705
706 // func strhash(p unsafe.Pointer, h uintptr) uintptr
707 TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
708 MOVB runtime·useAeshash(SB), R10
709 CBZ R10, noaes
710 LDP (R0), (R0, R2) // string data / length
711 B aeshashbody<>(SB)
712 noaes:
713 B runtime·strhashFallback<ABIInternal>(SB)
714
715 // R0: data
716 // R1: seed data
717 // R2: length
718 // At return, R0 = return value
719 TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
720 VEOR V30.B16, V30.B16, V30.B16
721 VMOV R1, V30.D[0]
722 VMOV R2, V30.D[1] // load length into seed
723
724 MOVD $runtime·aeskeysched+0(SB), R4
725 VLD1.P 16(R4), [V0.B16]
726 AESE V30.B16, V0.B16
727 AESMC V0.B16, V0.B16
728 CMP $16, R2
729 BLO aes0to15
730 BEQ aes16
731 CMP $32, R2
732 BLS aes17to32
733 CMP $64, R2
734 BLS aes33to64
735 CMP $128, R2
736 BLS aes65to128
737 B aes129plus
738
739 aes0to15:
740 CBZ R2, aes0
741 VEOR V2.B16, V2.B16, V2.B16
742 TBZ $3, R2, less_than_8
743 VLD1.P 8(R0), V2.D[0]
744
745 less_than_8:
746 TBZ $2, R2, less_than_4
747 VLD1.P 4(R0), V2.S[2]
748
749 less_than_4:
750 TBZ $1, R2, less_than_2
751 VLD1.P 2(R0), V2.H[6]
752
753 less_than_2:
754 TBZ $0, R2, done
755 VLD1 (R0), V2.B[14]
756 done:
757 AESE V0.B16, V2.B16
758 AESMC V2.B16, V2.B16
759 AESE V0.B16, V2.B16
760 AESMC V2.B16, V2.B16
761 AESE V0.B16, V2.B16
762 AESMC V2.B16, V2.B16
763
764 VMOV V2.D[0], R0
765 RET
766
767 aes0:
768 VMOV V0.D[0], R0
769 RET
770
771 aes16:
772 VLD1 (R0), [V2.B16]
773 B done
774
775 aes17to32:
776 // make second seed
777 VLD1 (R4), [V1.B16]
778 AESE V30.B16, V1.B16
779 AESMC V1.B16, V1.B16
780 SUB $16, R2, R10
781 VLD1.P (R0)(R10), [V2.B16]
782 VLD1 (R0), [V3.B16]
783
784 AESE V0.B16, V2.B16
785 AESMC V2.B16, V2.B16
786 AESE V1.B16, V3.B16
787 AESMC V3.B16, V3.B16
788
789 AESE V0.B16, V2.B16
790 AESMC V2.B16, V2.B16
791 AESE V1.B16, V3.B16
792 AESMC V3.B16, V3.B16
793
794 AESE V0.B16, V2.B16
795 AESE V1.B16, V3.B16
796
797 VEOR V3.B16, V2.B16, V2.B16
798
799 VMOV V2.D[0], R0
800 RET
801
802 aes33to64:
803 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
804 AESE V30.B16, V1.B16
805 AESMC V1.B16, V1.B16
806 AESE V30.B16, V2.B16
807 AESMC V2.B16, V2.B16
808 AESE V30.B16, V3.B16
809 AESMC V3.B16, V3.B16
810 SUB $32, R2, R10
811
812 VLD1.P (R0)(R10), [V4.B16, V5.B16]
813 VLD1 (R0), [V6.B16, V7.B16]
814
815 AESE V0.B16, V4.B16
816 AESMC V4.B16, V4.B16
817 AESE V1.B16, V5.B16
818 AESMC V5.B16, V5.B16
819 AESE V2.B16, V6.B16
820 AESMC V6.B16, V6.B16
821 AESE V3.B16, V7.B16
822 AESMC V7.B16, V7.B16
823
824 AESE V0.B16, V4.B16
825 AESMC V4.B16, V4.B16
826 AESE V1.B16, V5.B16
827 AESMC V5.B16, V5.B16
828 AESE V2.B16, V6.B16
829 AESMC V6.B16, V6.B16
830 AESE V3.B16, V7.B16
831 AESMC V7.B16, V7.B16
832
833 AESE V0.B16, V4.B16
834 AESE V1.B16, V5.B16
835 AESE V2.B16, V6.B16
836 AESE V3.B16, V7.B16
837
838 VEOR V6.B16, V4.B16, V4.B16
839 VEOR V7.B16, V5.B16, V5.B16
840 VEOR V5.B16, V4.B16, V4.B16
841
842 VMOV V4.D[0], R0
843 RET
844
845 aes65to128:
846 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
847 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
848 AESE V30.B16, V1.B16
849 AESMC V1.B16, V1.B16
850 AESE V30.B16, V2.B16
851 AESMC V2.B16, V2.B16
852 AESE V30.B16, V3.B16
853 AESMC V3.B16, V3.B16
854 AESE V30.B16, V4.B16
855 AESMC V4.B16, V4.B16
856 AESE V30.B16, V5.B16
857 AESMC V5.B16, V5.B16
858 AESE V30.B16, V6.B16
859 AESMC V6.B16, V6.B16
860 AESE V30.B16, V7.B16
861 AESMC V7.B16, V7.B16
862
863 SUB $64, R2, R10
864 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
865 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
866 AESE V0.B16, V8.B16
867 AESMC V8.B16, V8.B16
868 AESE V1.B16, V9.B16
869 AESMC V9.B16, V9.B16
870 AESE V2.B16, V10.B16
871 AESMC V10.B16, V10.B16
872 AESE V3.B16, V11.B16
873 AESMC V11.B16, V11.B16
874 AESE V4.B16, V12.B16
875 AESMC V12.B16, V12.B16
876 AESE V5.B16, V13.B16
877 AESMC V13.B16, V13.B16
878 AESE V6.B16, V14.B16
879 AESMC V14.B16, V14.B16
880 AESE V7.B16, V15.B16
881 AESMC V15.B16, V15.B16
882
883 AESE V0.B16, V8.B16
884 AESMC V8.B16, V8.B16
885 AESE V1.B16, V9.B16
886 AESMC V9.B16, V9.B16
887 AESE V2.B16, V10.B16
888 AESMC V10.B16, V10.B16
889 AESE V3.B16, V11.B16
890 AESMC V11.B16, V11.B16
891 AESE V4.B16, V12.B16
892 AESMC V12.B16, V12.B16
893 AESE V5.B16, V13.B16
894 AESMC V13.B16, V13.B16
895 AESE V6.B16, V14.B16
896 AESMC V14.B16, V14.B16
897 AESE V7.B16, V15.B16
898 AESMC V15.B16, V15.B16
899
900 AESE V0.B16, V8.B16
901 AESE V1.B16, V9.B16
902 AESE V2.B16, V10.B16
903 AESE V3.B16, V11.B16
904 AESE V4.B16, V12.B16
905 AESE V5.B16, V13.B16
906 AESE V6.B16, V14.B16
907 AESE V7.B16, V15.B16
908
909 VEOR V12.B16, V8.B16, V8.B16
910 VEOR V13.B16, V9.B16, V9.B16
911 VEOR V14.B16, V10.B16, V10.B16
912 VEOR V15.B16, V11.B16, V11.B16
913 VEOR V10.B16, V8.B16, V8.B16
914 VEOR V11.B16, V9.B16, V9.B16
915 VEOR V9.B16, V8.B16, V8.B16
916
917 VMOV V8.D[0], R0
918 RET
919
920 aes129plus:
921 PRFM (R0), PLDL1KEEP
922 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
923 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
924 AESE V30.B16, V1.B16
925 AESMC V1.B16, V1.B16
926 AESE V30.B16, V2.B16
927 AESMC V2.B16, V2.B16
928 AESE V30.B16, V3.B16
929 AESMC V3.B16, V3.B16
930 AESE V30.B16, V4.B16
931 AESMC V4.B16, V4.B16
932 AESE V30.B16, V5.B16
933 AESMC V5.B16, V5.B16
934 AESE V30.B16, V6.B16
935 AESMC V6.B16, V6.B16
936 AESE V30.B16, V7.B16
937 AESMC V7.B16, V7.B16
938 ADD R0, R2, R10
939 SUB $128, R10, R10
940 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
941 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
942 SUB $1, R2, R2
943 LSR $7, R2, R2
944
945 aesloop:
946 AESE V8.B16, V0.B16
947 AESMC V0.B16, V0.B16
948 AESE V9.B16, V1.B16
949 AESMC V1.B16, V1.B16
950 AESE V10.B16, V2.B16
951 AESMC V2.B16, V2.B16
952 AESE V11.B16, V3.B16
953 AESMC V3.B16, V3.B16
954 AESE V12.B16, V4.B16
955 AESMC V4.B16, V4.B16
956 AESE V13.B16, V5.B16
957 AESMC V5.B16, V5.B16
958 AESE V14.B16, V6.B16
959 AESMC V6.B16, V6.B16
960 AESE V15.B16, V7.B16
961 AESMC V7.B16, V7.B16
962
963 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
964 AESE V8.B16, V0.B16
965 AESMC V0.B16, V0.B16
966 AESE V9.B16, V1.B16
967 AESMC V1.B16, V1.B16
968 AESE V10.B16, V2.B16
969 AESMC V2.B16, V2.B16
970 AESE V11.B16, V3.B16
971 AESMC V3.B16, V3.B16
972
973 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
974 AESE V12.B16, V4.B16
975 AESMC V4.B16, V4.B16
976 AESE V13.B16, V5.B16
977 AESMC V5.B16, V5.B16
978 AESE V14.B16, V6.B16
979 AESMC V6.B16, V6.B16
980 AESE V15.B16, V7.B16
981 AESMC V7.B16, V7.B16
982 SUB $1, R2, R2
983 CBNZ R2, aesloop
984
985 AESE V8.B16, V0.B16
986 AESMC V0.B16, V0.B16
987 AESE V9.B16, V1.B16
988 AESMC V1.B16, V1.B16
989 AESE V10.B16, V2.B16
990 AESMC V2.B16, V2.B16
991 AESE V11.B16, V3.B16
992 AESMC V3.B16, V3.B16
993 AESE V12.B16, V4.B16
994 AESMC V4.B16, V4.B16
995 AESE V13.B16, V5.B16
996 AESMC V5.B16, V5.B16
997 AESE V14.B16, V6.B16
998 AESMC V6.B16, V6.B16
999 AESE V15.B16, V7.B16
1000 AESMC V7.B16, V7.B16
1001
1002 AESE V8.B16, V0.B16
1003 AESMC V0.B16, V0.B16
1004 AESE V9.B16, V1.B16
1005 AESMC V1.B16, V1.B16
1006 AESE V10.B16, V2.B16
1007 AESMC V2.B16, V2.B16
1008 AESE V11.B16, V3.B16
1009 AESMC V3.B16, V3.B16
1010 AESE V12.B16, V4.B16
1011 AESMC V4.B16, V4.B16
1012 AESE V13.B16, V5.B16
1013 AESMC V5.B16, V5.B16
1014 AESE V14.B16, V6.B16
1015 AESMC V6.B16, V6.B16
1016 AESE V15.B16, V7.B16
1017 AESMC V7.B16, V7.B16
1018
1019 AESE V8.B16, V0.B16
1020 AESE V9.B16, V1.B16
1021 AESE V10.B16, V2.B16
1022 AESE V11.B16, V3.B16
1023 AESE V12.B16, V4.B16
1024 AESE V13.B16, V5.B16
1025 AESE V14.B16, V6.B16
1026 AESE V15.B16, V7.B16
1027
1028 VEOR V0.B16, V1.B16, V0.B16
1029 VEOR V2.B16, V3.B16, V2.B16
1030 VEOR V4.B16, V5.B16, V4.B16
1031 VEOR V6.B16, V7.B16, V6.B16
1032 VEOR V0.B16, V2.B16, V0.B16
1033 VEOR V4.B16, V6.B16, V4.B16
1034 VEOR V4.B16, V0.B16, V0.B16
1035
1036 VMOV V0.D[0], R0
1037 RET
1038
1039 // The Arm architecture provides a user space accessible counter-timer which
1040 // is incremented at a fixed but machine-specific rate. Software can (spin)
1041 // wait until the counter-timer reaches some desired value.
1042 //
1043 // Armv8.7-A introduced the WFET (FEAT_WFxT) instruction, which allows the
1044 // processor to enter a low power state for a set time, or until an event is
1045 // received.
1046 //
1047 // However, WFET is not used here because it is only available on newer hardware,
1048 // and we aim to maintain compatibility with older Armv8-A platforms that do not
1049 // support this feature.
1050 //
1051 // As a fallback, we can instead use the ISB instruction to decrease processor
1052 // activity and thus power consumption between checks of the counter-timer.
1053 // Note that we do not depend on the latency of the ISB instruction which is
1054 // implementation specific. Actual delay comes from comparing against a fresh
1055 // read of the counter-timer value.
1056 //
1057 // Read more in this Arm blog post:
1058 // https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
1059
1060 TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
1061 MOVWU cycles+0(FP), R0
1062 CBZ R0, done
1063 //Prevent speculation of subsequent counter/timer reads and memory accesses.
1064 ISB $15
1065 // If the delay is very short, just return.
1066 // Hardcode 18ns as the first ISB delay.
1067 CMP $18, R0
1068 BLS done
1069 // Adjust for overhead of initial ISB.
1070 SUB $18, R0, R0
1071 // Convert the delay from nanoseconds to counter/timer ticks.
1072 // Read the counter/timer frequency.
1073 // delay_ticks = (delay * CNTFRQ_EL0) / 1e9
1074 // With the below simplifications and adjustments,
1075 // we are usually within 2% of the correct value:
1076 // delay_ticks = (delay + delay / 16) * CNTFRQ_EL0 >> 30
1077 MRS CNTFRQ_EL0, R1
1078 ADD R0>>4, R0, R0
1079 MUL R1, R0, R0
1080 LSR $30, R0, R0
1081 CBZ R0, done
1082 // start = current counter/timer value
1083 MRS CNTVCT_EL0, R2
1084 delay:
1085 // Delay using ISB for all ticks.
1086 ISB $15
1087 // Substract and compare to handle counter roll-over.
1088 // counter_read() - start < delay_ticks
1089 MRS CNTVCT_EL0, R1
1090 SUB R2, R1, R1
1091 CMP R0, R1
1092 BCC delay
1093 done:
1094 RET
1095
1096 // Save state of caller into g->sched,
1097 // but using fake PC from systemstack_switch.
1098 // Must only be called from functions with no locals ($0)
1099 // or else unwinding from systemstack_switch is incorrect.
1100 // Smashes R0.
1101 TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
1102 MOVD $runtime·systemstack_switch(SB), R0
1103 ADD $8, R0 // get past prologue
1104 MOVD R0, (g_sched+gobuf_pc)(g)
1105 MOVD RSP, R0
1106 MOVD R0, (g_sched+gobuf_sp)(g)
1107 MOVD R29, (g_sched+gobuf_bp)(g)
1108 MOVD $0, (g_sched+gobuf_lr)(g)
1109 // Assert ctxt is zero. See func save.
1110 MOVD (g_sched+gobuf_ctxt)(g), R0
1111 CBZ R0, 2(PC)
1112 CALL runtime·abort(SB)
1113 RET
1114
1115 // func asmcgocall_no_g(fn, arg unsafe.Pointer)
1116 // Call fn(arg) aligned appropriately for the gcc ABI.
1117 // Called on a system stack, and there may be no g yet (during needm).
1118 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
1119 MOVD fn+0(FP), R1
1120 MOVD arg+8(FP), R0
1121 SUB $16, RSP // skip over saved frame pointer below RSP
1122 BL (R1)
1123 ADD $16, RSP // skip over saved frame pointer below RSP
1124 RET
1125
1126 // func asmcgocall(fn, arg unsafe.Pointer) int32
1127 // Call fn(arg) on the scheduler stack,
1128 // aligned appropriately for the gcc ABI.
1129 // See cgocall.go for more details.
1130 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
1131 MOVD fn+0(FP), R1
1132 MOVD arg+8(FP), R0
1133
1134 MOVD RSP, R2 // save original stack pointer
1135 CBZ g, nosave
1136 MOVD g, R4
1137
1138 // Figure out if we need to switch to m->g0 stack.
1139 // We get called to create new OS threads too, and those
1140 // come in on the m->g0 stack already. Or we might already
1141 // be on the m->gsignal stack.
1142 MOVD g_m(g), R8
1143 MOVD m_gsignal(R8), R3
1144 CMP R3, g
1145 BEQ nosave
1146 MOVD m_g0(R8), R3
1147 CMP R3, g
1148 BEQ nosave
1149
1150 // Switch to system stack.
1151 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
1152 BL gosave_systemstack_switch<>(SB)
1153 MOVD R3, g
1154 BL runtime·save_g(SB)
1155 MOVD (g_sched+gobuf_sp)(g), R0
1156 MOVD R0, RSP
1157 MOVD (g_sched+gobuf_bp)(g), R29
1158 MOVD R9, R0
1159
1160 // Now on a scheduling stack (a pthread-created stack).
1161 // Save room for two of our pointers /*, plus 32 bytes of callee
1162 // save area that lives on the caller stack. */
1163 MOVD RSP, R13
1164 SUB $16, R13
1165 MOVD R13, RSP
1166 MOVD R4, 0(RSP) // save old g on stack
1167 MOVD (g_stack+stack_hi)(R4), R4
1168 SUB R2, R4
1169 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
1170 BL (R1)
1171 MOVD R0, R9
1172
1173 // Restore g, stack pointer. R0 is errno, so don't touch it
1174 MOVD 0(RSP), g
1175 BL runtime·save_g(SB)
1176 MOVD (g_stack+stack_hi)(g), R5
1177 MOVD 8(RSP), R6
1178 SUB R6, R5
1179 MOVD R9, R0
1180 MOVD R5, RSP
1181
1182 MOVW R0, ret+16(FP)
1183 RET
1184
1185 nosave:
1186 // Running on a system stack, perhaps even without a g.
1187 // Having no g can happen during thread creation or thread teardown
1188 // (see needm/dropm on Solaris, for example).
1189 // This code is like the above sequence but without saving/restoring g
1190 // and without worrying about the stack moving out from under us
1191 // (because we're on a system stack, not a goroutine stack).
1192 // The above code could be used directly if already on a system stack,
1193 // but then the only path through this code would be a rare case on Solaris.
1194 // Using this code for all "already on system stack" calls exercises it more,
1195 // which should help keep it correct.
1196 MOVD RSP, R13
1197 SUB $16, R13
1198 MOVD R13, RSP
1199 MOVD $0, R4
1200 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1201 MOVD R2, 8(RSP) // Save original stack pointer.
1202 BL (R1)
1203 // Restore stack pointer.
1204 MOVD 8(RSP), R2
1205 MOVD R2, RSP
1206 MOVD R0, ret+16(FP)
1207 RET
1208
1209 // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1210 // See cgocall.go for more details.
1211 TEXT ·cgocallback(SB),NOSPLIT,$24-24
1212 NO_LOCAL_POINTERS
1213
1214 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1215 // It is used to dropm while thread is exiting.
1216 MOVD fn+0(FP), R1
1217 CBNZ R1, loadg
1218 // Restore the g from frame.
1219 MOVD frame+8(FP), g
1220 B dropm
1221
1222 loadg:
1223 // Load g from thread-local storage.
1224 BL runtime·load_g(SB)
1225
1226 // If g is nil, Go did not create the current thread,
1227 // or if this thread never called into Go on pthread platforms.
1228 // Call needm to obtain one for temporary use.
1229 // In this case, we're running on the thread stack, so there's
1230 // lots of space, but the linker doesn't know. Hide the call from
1231 // the linker analysis by using an indirect call.
1232 CBZ g, needm
1233
1234 MOVD g_m(g), R8
1235 MOVD R8, savedm-8(SP)
1236 B havem
1237
1238 needm:
1239 MOVD g, savedm-8(SP) // g is zero, so is m.
1240 MOVD $runtime·needAndBindM(SB), R0
1241 BL (R0)
1242
1243 // Set m->g0->sched.sp = SP, so that if a panic happens
1244 // during the function we are about to execute, it will
1245 // have a valid SP to run on the g0 stack.
1246 // The next few lines (after the havem label)
1247 // will save this SP onto the stack and then write
1248 // the same SP back to m->sched.sp. That seems redundant,
1249 // but if an unrecovered panic happens, unwindm will
1250 // restore the g->sched.sp from the stack location
1251 // and then systemstack will try to use it. If we don't set it here,
1252 // that restored SP will be uninitialized (typically 0) and
1253 // will not be usable.
1254 MOVD g_m(g), R8
1255 MOVD m_g0(R8), R3
1256 MOVD RSP, R0
1257 MOVD R0, (g_sched+gobuf_sp)(R3)
1258 MOVD R29, (g_sched+gobuf_bp)(R3)
1259
1260 havem:
1261 // Now there's a valid m, and we're running on its m->g0.
1262 // Save current m->g0->sched.sp on stack and then set it to SP.
1263 // Save current sp in m->g0->sched.sp in preparation for
1264 // switch back to m->curg stack.
1265 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1266 // Beware that the frame size is actually 32+16.
1267 MOVD m_g0(R8), R3
1268 MOVD (g_sched+gobuf_sp)(R3), R4
1269 MOVD R4, savedsp-16(SP)
1270 MOVD RSP, R0
1271 MOVD R0, (g_sched+gobuf_sp)(R3)
1272
1273 // Switch to m->curg stack and call runtime.cgocallbackg.
1274 // Because we are taking over the execution of m->curg
1275 // but *not* resuming what had been running, we need to
1276 // save that information (m->curg->sched) so we can restore it.
1277 // We can restore m->curg->sched.sp easily, because calling
1278 // runtime.cgocallbackg leaves SP unchanged upon return.
1279 // To save m->curg->sched.pc, we push it onto the curg stack and
1280 // open a frame the same size as cgocallback's g0 frame.
1281 // Once we switch to the curg stack, the pushed PC will appear
1282 // to be the return PC of cgocallback, so that the traceback
1283 // will seamlessly trace back into the earlier calls.
1284 MOVD m_curg(R8), g
1285 BL runtime·save_g(SB)
1286 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1287 MOVD (g_sched+gobuf_pc)(g), R5
1288 MOVD R5, -48(R4)
1289 MOVD (g_sched+gobuf_bp)(g), R5
1290 MOVD R5, -56(R4)
1291 // Gather our arguments into registers.
1292 MOVD fn+0(FP), R1
1293 MOVD frame+8(FP), R2
1294 MOVD ctxt+16(FP), R3
1295 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1296 MOVD R0, RSP // switch stack
1297 MOVD R1, 8(RSP)
1298 MOVD R2, 16(RSP)
1299 MOVD R3, 24(RSP)
1300 MOVD $runtime·cgocallbackg(SB), R0
1301 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1302
1303 // Restore g->sched (== m->curg->sched) from saved values.
1304 MOVD 0(RSP), R5
1305 MOVD R5, (g_sched+gobuf_pc)(g)
1306 MOVD RSP, R4
1307 ADD $48, R4, R4
1308 MOVD R4, (g_sched+gobuf_sp)(g)
1309
1310 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1311 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1312 // so we do not have to restore it.)
1313 MOVD g_m(g), R8
1314 MOVD m_g0(R8), g
1315 BL runtime·save_g(SB)
1316 MOVD (g_sched+gobuf_sp)(g), R0
1317 MOVD R0, RSP
1318 MOVD savedsp-16(SP), R4
1319 MOVD R4, (g_sched+gobuf_sp)(g)
1320
1321 // If the m on entry was nil, we called needm above to borrow an m,
1322 // 1. for the duration of the call on non-pthread platforms,
1323 // 2. or the duration of the C thread alive on pthread platforms.
1324 // If the m on entry wasn't nil,
1325 // 1. the thread might be a Go thread,
1326 // 2. or it wasn't the first call from a C thread on pthread platforms,
1327 // since then we skip dropm to reuse the m in the first call.
1328 MOVD savedm-8(SP), R6
1329 CBNZ R6, droppedm
1330
1331 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1332 MOVD _cgo_pthread_key_created(SB), R6
1333 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1334 CBZ R6, dropm
1335 MOVD (R6), R6
1336 CBNZ R6, droppedm
1337
1338 dropm:
1339 MOVD $runtime·dropm(SB), R0
1340 BL (R0)
1341 droppedm:
1342
1343 // Done!
1344 RET
1345
1346 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1347 // Must obey the gcc calling convention.
1348 TEXT _cgo_topofstack(SB),NOSPLIT,$24
1349 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1350 // are callee-save in the gcc calling convention, so save them.
1351 MOVD R27, savedR27-8(SP)
1352 MOVD g, saveG-16(SP)
1353
1354 BL runtime·load_g(SB)
1355 MOVD g_m(g), R0
1356 MOVD m_curg(R0), R0
1357 MOVD (g_stack+stack_hi)(R0), R0
1358
1359 MOVD saveG-16(SP), g
1360 MOVD savedR28-8(SP), R27
1361 RET
1362
1363 // void setg(G*); set g. for use by needm.
1364 TEXT runtime·setg(SB), NOSPLIT, $0-8
1365 MOVD gg+0(FP), g
1366 // This only happens if iscgo, so jump straight to save_g
1367 BL runtime·save_g(SB)
1368 RET
1369
1370 // void setg_gcc(G*); set g called from gcc
1371 TEXT setg_gcc<>(SB),NOSPLIT,$8
1372 MOVD R0, g
1373 MOVD R27, savedR27-8(SP)
1374 BL runtime·save_g(SB)
1375 MOVD savedR27-8(SP), R27
1376 RET
1377
1378 TEXT runtime·emptyfunc(SB),0,$0-0
1379 RET
1380
1381 TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1382 MOVD ZR, R0
1383 MOVD (R0), R0
1384 UNDEF
1385
1386 // The top-most function running on a goroutine
1387 // returns to goexit+PCQuantum.
1388 TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1389 MOVD R0, R0 // NOP
1390 BL runtime·goexit1(SB) // does not return
1391
1392 // This is called from .init_array and follows the platform, not Go, ABI.
1393 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1394 SUB $0x10, RSP
1395 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1396 MOVD runtime·lastmoduledatap(SB), R1
1397 MOVD R0, moduledata_next(R1)
1398 MOVD R0, runtime·lastmoduledatap(SB)
1399 MOVD 8(RSP), R27
1400 ADD $0x10, RSP
1401 RET
1402
1403 TEXT ·checkASM(SB),NOSPLIT,$0-1
1404 MOVW $1, R3
1405 MOVB R3, ret+0(FP)
1406 RET
1407
1408 // gcWriteBarrier informs the GC about heap pointer writes.
1409 //
1410 // gcWriteBarrier does NOT follow the Go ABI. It accepts the
1411 // number of bytes of buffer needed in R25, and returns a pointer
1412 // to the buffer space in R25.
1413 // It clobbers condition codes.
1414 // It does not clobber any general-purpose registers except R27,
1415 // but may clobber others (e.g., floating point registers)
1416 // The act of CALLing gcWriteBarrier will clobber R30 (LR).
1417 TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1418 // Save the registers clobbered by the fast path.
1419 STP (R0, R1), 184(RSP)
1420 retry:
1421 MOVD g_m(g), R0
1422 MOVD m_p(R0), R0
1423 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1424 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1425 // Increment wbBuf.next position.
1426 ADD R25, R1
1427 // Is the buffer full?
1428 CMP R27, R1
1429 BHI flush
1430 // Commit to the larger buffer.
1431 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1432 // Make return value (the original next position)
1433 SUB R25, R1, R25
1434 // Restore registers.
1435 LDP 184(RSP), (R0, R1)
1436 RET
1437
1438 flush:
1439 // Save all general purpose registers since these could be
1440 // clobbered by wbBufFlush and were not saved by the caller.
1441 // R0 and R1 already saved
1442 STP (R2, R3), 1*8(RSP)
1443 STP (R4, R5), 3*8(RSP)
1444 STP (R6, R7), 5*8(RSP)
1445 STP (R8, R9), 7*8(RSP)
1446 STP (R10, R11), 9*8(RSP)
1447 STP (R12, R13), 11*8(RSP)
1448 STP (R14, R15), 13*8(RSP)
1449 // R16, R17 may be clobbered by linker trampoline
1450 // R18 is unused.
1451 STP (R19, R20), 15*8(RSP)
1452 STP (R21, R22), 17*8(RSP)
1453 STP (R23, R24), 19*8(RSP)
1454 STP (R25, R26), 21*8(RSP)
1455 // R27 is temp register.
1456 // R28 is g.
1457 // R29 is frame pointer (unused).
1458 // R30 is LR, which was saved by the prologue.
1459 // R31 is SP.
1460
1461 CALL runtime·wbBufFlush(SB)
1462 LDP 1*8(RSP), (R2, R3)
1463 LDP 3*8(RSP), (R4, R5)
1464 LDP 5*8(RSP), (R6, R7)
1465 LDP 7*8(RSP), (R8, R9)
1466 LDP 9*8(RSP), (R10, R11)
1467 LDP 11*8(RSP), (R12, R13)
1468 LDP 13*8(RSP), (R14, R15)
1469 LDP 15*8(RSP), (R19, R20)
1470 LDP 17*8(RSP), (R21, R22)
1471 LDP 19*8(RSP), (R23, R24)
1472 LDP 21*8(RSP), (R25, R26)
1473 JMP retry
1474
1475 TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1476 MOVD $8, R25
1477 JMP gcWriteBarrier<>(SB)
1478 TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1479 MOVD $16, R25
1480 JMP gcWriteBarrier<>(SB)
1481 TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1482 MOVD $24, R25
1483 JMP gcWriteBarrier<>(SB)
1484 TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1485 MOVD $32, R25
1486 JMP gcWriteBarrier<>(SB)
1487 TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1488 MOVD $40, R25
1489 JMP gcWriteBarrier<>(SB)
1490 TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1491 MOVD $48, R25
1492 JMP gcWriteBarrier<>(SB)
1493 TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1494 MOVD $56, R25
1495 JMP gcWriteBarrier<>(SB)
1496 TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1497 MOVD $64, R25
1498 JMP gcWriteBarrier<>(SB)
1499
1500 DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1501 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1502
1503 // debugCallV2 is the entry point for debugger-injected function
1504 // calls on running goroutines. It informs the runtime that a
1505 // debug call has been injected and creates a call frame for the
1506 // debugger to fill in.
1507 //
1508 // To inject a function call, a debugger should:
1509 // 1. Check that the goroutine is in state _Grunning and that
1510 // there are at least 288 bytes free on the stack.
1511 // 2. Set SP as SP-16.
1512 // 3. Store the current LR in (SP) (using the SP after step 2).
1513 // 4. Store the current PC in the LR register.
1514 // 5. Write the desired argument frame size at SP-16
1515 // 6. Save all machine registers (including flags and fpsimd registers)
1516 // so they can be restored later by the debugger.
1517 // 7. Set the PC to debugCallV2 and resume execution.
1518 //
1519 // If the goroutine is in state _Grunnable, then it's not generally
1520 // safe to inject a call because it may return out via other runtime
1521 // operations. Instead, the debugger should unwind the stack to find
1522 // the return to non-runtime code, add a temporary breakpoint there,
1523 // and inject the call once that breakpoint is hit.
1524 //
1525 // If the goroutine is in any other state, it's not safe to inject a call.
1526 //
1527 // This function communicates back to the debugger by setting R20 and
1528 // invoking BRK to raise a breakpoint signal. Note that the signal PC of
1529 // the signal triggered by the BRK instruction is the PC where the signal
1530 // is trapped, not the next PC, so to resume execution, the debugger needs
1531 // to set the signal PC to PC+4. See the comments in the implementation for
1532 // the protocol the debugger is expected to follow. InjectDebugCall in the
1533 // runtime tests demonstrates this protocol.
1534 //
1535 // The debugger must ensure that any pointers passed to the function
1536 // obey escape analysis requirements. Specifically, it must not pass
1537 // a stack pointer to an escaping argument. debugCallV2 cannot check
1538 // this invariant.
1539 //
1540 // This is ABIInternal because Go code injects its PC directly into new
1541 // goroutine stacks.
1542 TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1543 STP (R29, R30), -280(RSP)
1544 SUB $272, RSP, RSP
1545 SUB $8, RSP, R29
1546 // Save all registers that may contain pointers so they can be
1547 // conservatively scanned.
1548 //
1549 // We can't do anything that might clobber any of these
1550 // registers before this.
1551 STP (R27, g), (30*8)(RSP)
1552 STP (R25, R26), (28*8)(RSP)
1553 STP (R23, R24), (26*8)(RSP)
1554 STP (R21, R22), (24*8)(RSP)
1555 STP (R19, R20), (22*8)(RSP)
1556 STP (R16, R17), (20*8)(RSP)
1557 STP (R14, R15), (18*8)(RSP)
1558 STP (R12, R13), (16*8)(RSP)
1559 STP (R10, R11), (14*8)(RSP)
1560 STP (R8, R9), (12*8)(RSP)
1561 STP (R6, R7), (10*8)(RSP)
1562 STP (R4, R5), (8*8)(RSP)
1563 STP (R2, R3), (6*8)(RSP)
1564 STP (R0, R1), (4*8)(RSP)
1565
1566 // Perform a safe-point check.
1567 MOVD R30, 8(RSP) // Caller's PC
1568 CALL runtime·debugCallCheck(SB)
1569 MOVD 16(RSP), R0
1570 CBZ R0, good
1571
1572 // The safety check failed. Put the reason string at the top
1573 // of the stack.
1574 MOVD R0, 8(RSP)
1575 MOVD 24(RSP), R0
1576 MOVD R0, 16(RSP)
1577
1578 // Set R20 to 8 and invoke BRK. The debugger should get the
1579 // reason a call can't be injected from SP+8 and resume execution.
1580 MOVD $8, R20
1581 BREAK
1582 JMP restore
1583
1584 good:
1585 // Registers are saved and it's safe to make a call.
1586 // Open up a call frame, moving the stack if necessary.
1587 //
1588 // Once the frame is allocated, this will set R20 to 0 and
1589 // invoke BRK. The debugger should write the argument
1590 // frame for the call at SP+8, set up argument registers,
1591 // set the LR as the signal PC + 4, set the PC to the function
1592 // to call, set R26 to point to the closure (if a closure call),
1593 // and resume execution.
1594 //
1595 // If the function returns, this will set R20 to 1 and invoke
1596 // BRK. The debugger can then inspect any return value saved
1597 // on the stack at SP+8 and in registers. To resume execution,
1598 // the debugger should restore the LR from (SP).
1599 //
1600 // If the function panics, this will set R20 to 2 and invoke BRK.
1601 // The interface{} value of the panic will be at SP+8. The debugger
1602 // can inspect the panic value and resume execution again.
1603 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1604 CMP $MAXSIZE, R0; \
1605 BGT 5(PC); \
1606 MOVD $NAME(SB), R0; \
1607 MOVD R0, 8(RSP); \
1608 CALL runtime·debugCallWrap(SB); \
1609 JMP restore
1610
1611 MOVD 256(RSP), R0 // the argument frame size
1612 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1613 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1614 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1615 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1616 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1617 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1618 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1619 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1620 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1621 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1622 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1623 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1624 // The frame size is too large. Report the error.
1625 MOVD $debugCallFrameTooLarge<>(SB), R0
1626 MOVD R0, 8(RSP)
1627 MOVD $20, R0
1628 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1629 MOVD $8, R20
1630 BREAK
1631 JMP restore
1632
1633 restore:
1634 // Calls and failures resume here.
1635 //
1636 // Set R20 to 16 and invoke BRK. The debugger should restore
1637 // all registers except for PC and RSP and resume execution.
1638 MOVD $16, R20
1639 BREAK
1640 // We must not modify flags after this point.
1641
1642 // Restore pointer-containing registers, which may have been
1643 // modified from the debugger's copy by stack copying.
1644 LDP (30*8)(RSP), (R27, g)
1645 LDP (28*8)(RSP), (R25, R26)
1646 LDP (26*8)(RSP), (R23, R24)
1647 LDP (24*8)(RSP), (R21, R22)
1648 LDP (22*8)(RSP), (R19, R20)
1649 LDP (20*8)(RSP), (R16, R17)
1650 LDP (18*8)(RSP), (R14, R15)
1651 LDP (16*8)(RSP), (R12, R13)
1652 LDP (14*8)(RSP), (R10, R11)
1653 LDP (12*8)(RSP), (R8, R9)
1654 LDP (10*8)(RSP), (R6, R7)
1655 LDP (8*8)(RSP), (R4, R5)
1656 LDP (6*8)(RSP), (R2, R3)
1657 LDP (4*8)(RSP), (R0, R1)
1658
1659 LDP -8(RSP), (R29, R27)
1660 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1661 MOVD -16(RSP), R30 // restore old lr
1662 JMP (R27)
1663
1664 // runtime.debugCallCheck assumes that functions defined with the
1665 // DEBUG_CALL_FN macro are safe points to inject calls.
1666 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1667 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1668 NO_LOCAL_POINTERS; \
1669 MOVD $0, R20; \
1670 BREAK; \
1671 MOVD $1, R20; \
1672 BREAK; \
1673 RET
1674 DEBUG_CALL_FN(debugCall32<>, 32)
1675 DEBUG_CALL_FN(debugCall64<>, 64)
1676 DEBUG_CALL_FN(debugCall128<>, 128)
1677 DEBUG_CALL_FN(debugCall256<>, 256)
1678 DEBUG_CALL_FN(debugCall512<>, 512)
1679 DEBUG_CALL_FN(debugCall1024<>, 1024)
1680 DEBUG_CALL_FN(debugCall2048<>, 2048)
1681 DEBUG_CALL_FN(debugCall4096<>, 4096)
1682 DEBUG_CALL_FN(debugCall8192<>, 8192)
1683 DEBUG_CALL_FN(debugCall16384<>, 16384)
1684 DEBUG_CALL_FN(debugCall32768<>, 32768)
1685 DEBUG_CALL_FN(debugCall65536<>, 65536)
1686
1687 // func debugCallPanicked(val interface{})
1688 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1689 // Copy the panic value to the top of stack at SP+8.
1690 MOVD val_type+0(FP), R0
1691 MOVD R0, 8(RSP)
1692 MOVD val_data+8(FP), R0
1693 MOVD R0, 16(RSP)
1694 MOVD $2, R20
1695 BREAK
1696 RET
1697
1698 TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
1699 NO_LOCAL_POINTERS
1700 // Save all 16 int registers that could have an index in them.
1701 // They may be pointers, but if they are they are dead.
1702 STP (R0, R1), 24(RSP)
1703 STP (R2, R3), 40(RSP)
1704 STP (R4, R5), 56(RSP)
1705 STP (R6, R7), 72(RSP)
1706 STP (R8, R9), 88(RSP)
1707 STP (R10, R11), 104(RSP)
1708 STP (R12, R13), 120(RSP)
1709 STP (R14, R15), 136(RSP)
1710 MOVD LR, R0 // PC immediately after call to panicBounds
1711 ADD $24, RSP, R1 // pointer to save area
1712 CALL runtime·panicBounds64<ABIInternal>(SB)
1713 RET
1714
1715 TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1716 MOVD R29, R0
1717 RET
1718
View as plain text