Text file src/runtime/asm_arm64.s

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "cgo/abi_arm64.h"
    11  
    12  // _rt0_arm64 is common startup code for most arm64 systems when using
    13  // internal linking. This is the entry point for the program from the
    14  // kernel for an ordinary -buildmode=exe program. The stack holds the
    15  // number of arguments and the C-style argv.
    16  TEXT _rt0_arm64(SB),NOSPLIT,$0
    17  	MOVD	0(RSP), R0	// argc
    18  	ADD	$8, RSP, R1	// argv
    19  	JMP	runtime·rt0_go(SB)
    20  
    21  // main is common startup code for most amd64 systems when using
    22  // external linking. The C startup code will call the symbol "main"
    23  // passing argc and argv in the usual C ABI registers R0 and R1.
    24  TEXT main(SB),NOSPLIT,$0
    25  	JMP	runtime·rt0_go(SB)
    26  
    27  // _rt0_arm64_lib is common startup code for most arm64 systems when
    28  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    29  // arrange to invoke this function as a global constructor (for
    30  // c-archive) or when the shared library is loaded (for c-shared).
    31  // We expect argc and argv to be passed in the usual C ABI registers
    32  // R0 and R1.
    33  TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
    34  	// Preserve callee-save registers.
    35  	SAVE_R19_TO_R28(24)
    36  	SAVE_F8_TO_F15(104)
    37  
    38  	// Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
    39  	MOVD	ZR, g
    40  
    41  	MOVD	R0, _rt0_arm64_lib_argc<>(SB)
    42  	MOVD	R1, _rt0_arm64_lib_argv<>(SB)
    43  
    44  	MOVD	$runtime·libInit(SB), R4
    45  	BL	(R4)
    46  
    47  	// Restore callee-save registers.
    48  	RESTORE_R19_TO_R28(24)
    49  	RESTORE_F8_TO_F15(104)
    50  	RET
    51  
    52  TEXT runtime·rt0_lib_go<ABIInternal>(SB),NOSPLIT,$0
    53  	MOVD	_rt0_arm64_lib_argc<>(SB), R0
    54  	MOVD	_rt0_arm64_lib_argv<>(SB), R1
    55  	MOVD	$runtime·rt0_go(SB),R4
    56  	B	(R4)
    57  
    58  DATA _rt0_arm64_lib_argc<>(SB)/8, $0
    59  GLOBL _rt0_arm64_lib_argc<>(SB),NOPTR, $8
    60  DATA _rt0_arm64_lib_argv<>(SB)/8, $0
    61  GLOBL _rt0_arm64_lib_argv<>(SB),NOPTR, $8
    62  
    63  #ifdef GOARM64_LSE
    64  DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
    65  GLOBL no_lse_msg<>(SB), RODATA, $64
    66  #endif
    67  
    68  // We know for sure that Linux and FreeBSD allow to read instruction set
    69  // attribute registers (while some others OSes, like OpenBSD and Darwin,
    70  // are not). Let's be conservative and allow code reading such registers
    71  // only when we sure this won't lead to sigill.
    72  #ifdef GOOS_linux
    73  #define ISA_REGS_READABLE
    74  #endif
    75  #ifdef GOOS_freebsd
    76  #define ISA_REGS_READABLE
    77  #endif
    78  
    79  #ifdef GOARM64_LSE
    80  #ifdef ISA_REGS_READABLE
    81  #define CHECK_GOARM64_LSE
    82  #endif
    83  #endif
    84  
    85  TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
    86  	// SP = stack; R0 = argc; R1 = argv
    87  
    88  	SUB	$32, RSP
    89  	MOVW	R0, 8(RSP) // argc
    90  	MOVD	R1, 16(RSP) // argv
    91  
    92  	// This is typically the entry point for Go programs.
    93  	// Call stack unwinding must not proceed past this frame.
    94  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
    95  	// (which don't use debug info for performance reasons)
    96  	// won't attempt to unwind past this function.
    97  	// See go.dev/issue/63630
    98  	MOVD	$0, R29
    99  
   100  #ifdef TLS_darwin
   101  	// Initialize TLS.
   102  	MOVD	ZR, g // clear g, make sure it's not junk.
   103  	SUB	$32, RSP
   104  	MRS_TPIDR_R0
   105  	AND	$~7, R0
   106  	MOVD	R0, 16(RSP)             // arg2: TLS base
   107  	MOVD	$runtime·tls_g(SB), R2
   108  	MOVD	R2, 8(RSP)              // arg1: &tlsg
   109  	BL	·tlsinit(SB)
   110  	ADD	$32, RSP
   111  #endif
   112  
   113  	// create istack out of the given (operating system) stack.
   114  	// _cgo_init may update stackguard.
   115  	MOVD	$runtime·g0(SB), g
   116  	MOVD	RSP, R7
   117  	MOVD	$(-64*1024)(R7), R0
   118  	MOVD	R0, g_stackguard0(g)
   119  	MOVD	R0, g_stackguard1(g)
   120  	MOVD	R0, (g_stack+stack_lo)(g)
   121  	MOVD	R7, (g_stack+stack_hi)(g)
   122  
   123  	// if there is a _cgo_init, call it using the gcc ABI.
   124  	MOVD	_cgo_init(SB), R12
   125  	CBZ	R12, nocgo
   126  
   127  #ifdef GOOS_android
   128  	MRS_TPIDR_R0			// load TLS base pointer
   129  	MOVD	R0, R3			// arg 3: TLS base pointer
   130  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
   131  #else
   132  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
   133  #endif
   134  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
   135  	MOVD	g, R0			// arg 0: G
   136  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
   137  	BL	(R12)
   138  	ADD	$16, RSP
   139  
   140  nocgo:
   141  	BL	runtime·save_g(SB)
   142  	// update stackguard after _cgo_init
   143  	MOVD	(g_stack+stack_lo)(g), R0
   144  	ADD	$const_stackGuard, R0
   145  	MOVD	R0, g_stackguard0(g)
   146  	MOVD	R0, g_stackguard1(g)
   147  
   148  	// set the per-goroutine and per-mach "registers"
   149  	MOVD	$runtime·m0(SB), R0
   150  
   151  	// save m->g0 = g0
   152  	MOVD	g, m_g0(R0)
   153  	// save m0 to g0->m
   154  	MOVD	R0, g_m(g)
   155  
   156  	BL	runtime·check(SB)
   157  
   158  #ifdef GOOS_windows
   159  	BL	runtime·wintls(SB)
   160  #endif
   161  
   162  	// Check that CPU we use for execution supports instructions targeted during compile-time.
   163  #ifdef CHECK_GOARM64_LSE
   164  	// Read the ID_AA64ISAR0_EL1 register
   165  	MRS	ID_AA64ISAR0_EL1, R0
   166  
   167  	// Extract the LSE field (bits [23:20])
   168  	LSR	$20, R0, R0
   169  	AND	$0xf, R0, R0
   170  
   171  	// LSE support is indicated by a non-zero value
   172  	CBZ	R0, no_lse
   173  #endif
   174  
   175  	MOVW	8(RSP), R0	// copy argc
   176  	MOVW	R0, -8(RSP)
   177  	MOVD	16(RSP), R0		// copy argv
   178  	MOVD	R0, 0(RSP)
   179  	BL	runtime·args(SB)
   180  	BL	runtime·osinit(SB)
   181  	BL	runtime·schedinit(SB)
   182  
   183  	// create a new goroutine to start program
   184  	MOVD	$runtime·mainPC(SB), R0		// entry
   185  	SUB	$16, RSP
   186  	MOVD	R0, 8(RSP) // arg
   187  	MOVD	$0, 0(RSP) // dummy LR
   188  	BL	runtime·newproc(SB)
   189  	ADD	$16, RSP
   190  
   191  	// start this M
   192  	BL	runtime·mstart(SB)
   193  	UNDEF
   194  
   195  #ifdef CHECK_GOARM64_LSE
   196  no_lse:
   197  	MOVD	$1, R0 // stderr
   198  	MOVD	R0, 8(RSP)
   199  	MOVD	$no_lse_msg<>(SB), R1 // message address
   200  	MOVD	R1, 16(RSP)
   201  	MOVD	$64, R2 // message length
   202  	MOVD	R2, 24(RSP)
   203  	CALL	runtime·write(SB)
   204  	CALL	runtime·exit(SB)
   205  	CALL	runtime·abort(SB)
   206  	RET
   207  #endif
   208  
   209  	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
   210  	// intended to be called by debuggers.
   211  	MOVD	$runtime·debugPinnerV1<ABIInternal>(SB), R0
   212  	MOVD	$runtime·debugCallV2<ABIInternal>(SB), R0
   213  
   214  	MOVD	$0, R0
   215  	MOVD	R0, (R0)	// boom
   216  	UNDEF
   217  
   218  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   219  GLOBL	runtime·mainPC(SB),RODATA,$8
   220  
   221  // Windows ARM64 needs an immediate 0xf000 argument.
   222  // See go.dev/issues/53837.
   223  #define BREAK	\
   224  #ifdef GOOS_windows	\
   225  	BRK	$0xf000 	\
   226  #else 				\
   227  	BRK 			\
   228  #endif 				\
   229  
   230  
   231  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   232  	BREAK
   233  	RET
   234  
   235  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   236  	RET
   237  
   238  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   239  	// This is the root frame of new Go-created OS threads.
   240  	// Call stack unwinding must not proceed past this frame.
   241  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
   242  	// (which don't use debug info for performance reasons)
   243  	// won't attempt to unwind past this function.
   244  	// See go.dev/issue/63630
   245  	MOVD	$0, R29
   246  	BL	runtime·mstart0(SB)
   247  	RET // not reached
   248  
   249  /*
   250   *  go-routine
   251   */
   252  
   253  // void gogo(Gobuf*)
   254  // restore state from Gobuf; longjmp
   255  TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
   256  	MOVD	buf+0(FP), R5
   257  	MOVD	gobuf_g(R5), R6
   258  	MOVD	0(R6), R4	// make sure g != nil
   259  	B	gogo<>(SB)
   260  
   261  TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
   262  	MOVD	R6, g
   263  	BL	runtime·save_g(SB)
   264  
   265  	MOVD	gobuf_sp(R5), R0
   266  	MOVD	R0, RSP
   267  	MOVD	gobuf_bp(R5), R29
   268  	MOVD	gobuf_lr(R5), LR
   269  	MOVD	gobuf_ctxt(R5), R26
   270  	MOVD	$0, gobuf_sp(R5)
   271  	MOVD	$0, gobuf_bp(R5)
   272  	MOVD	$0, gobuf_lr(R5)
   273  	MOVD	$0, gobuf_ctxt(R5)
   274  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   275  	MOVD	gobuf_pc(R5), R6
   276  	B	(R6)
   277  
   278  // void mcall(fn func(*g))
   279  // Switch to m->g0's stack, call fn(g).
   280  // Fn must never return. It should gogo(&g->sched)
   281  // to keep running g.
   282  TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
   283  #ifdef GOEXPERIMENT_runtimesecret
   284  	MOVW	g_secret(g), R26
   285  	CBZ 	R26, nosecret
   286  	// Use R26 as a secondary link register
   287  	// We purposefully don't erase it in secretEraseRegistersMcall
   288  	MOVD	LR, R26
   289  	BL 	runtime·secretEraseRegistersMcall(SB)
   290  	MOVD	R26, LR
   291  
   292  nosecret:
   293  #endif
   294  	MOVD	R0, R26				// context
   295  
   296  	// Save caller state in g->sched
   297  	MOVD	RSP, R0
   298  	MOVD	R0, (g_sched+gobuf_sp)(g)
   299  	MOVD	R29, (g_sched+gobuf_bp)(g)
   300  	MOVD	LR, (g_sched+gobuf_pc)(g)
   301  	MOVD	$0, (g_sched+gobuf_lr)(g)
   302  
   303  	// Switch to m->g0 & its stack, call fn.
   304  	MOVD	g, R3
   305  	MOVD	g_m(g), R8
   306  	MOVD	m_g0(R8), g
   307  	BL	runtime·save_g(SB)
   308  	CMP	g, R3
   309  	BNE	2(PC)
   310  	B	runtime·badmcall(SB)
   311  
   312  	MOVD	(g_sched+gobuf_sp)(g), R0
   313  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   314  	MOVD	$0, R29				// clear frame pointer, as caller may execute on another M
   315  	MOVD	R3, R0				// arg = g
   316  	MOVD	$0, -16(RSP)			// dummy LR
   317  	SUB	$16, RSP
   318  	MOVD	0(R26), R4			// code pointer
   319  	BL	(R4)
   320  	B	runtime·badmcall2(SB)
   321  
   322  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   323  // of the G stack. We need to distinguish the routine that
   324  // lives at the bottom of the G stack from the one that lives
   325  // at the top of the system stack because the one at the top of
   326  // the system stack terminates the stack walk (see topofstack()).
   327  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   328  	UNDEF
   329  	BL	(LR)	// make sure this function is not leaf
   330  	RET
   331  
   332  // func systemstack(fn func())
   333  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   334  #ifdef GOEXPERIMENT_runtimesecret
   335  	MOVW	g_secret(g), R3
   336  	CBZ		R3, nosecret
   337  	BL 		·secretEraseRegisters(SB)
   338  
   339  nosecret:
   340  #endif
   341  	MOVD	fn+0(FP), R3	// R3 = fn
   342  	MOVD	R3, R26		// context
   343  	MOVD	g_m(g), R4	// R4 = m
   344  
   345  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   346  	CMP	g, R5
   347  	BEQ	noswitch
   348  
   349  	MOVD	m_g0(R4), R5	// R5 = g0
   350  	CMP	g, R5
   351  	BEQ	noswitch
   352  
   353  	MOVD	m_curg(R4), R6
   354  	CMP	g, R6
   355  	BEQ	switch
   356  
   357  	// Bad: g is not gsignal, not g0, not curg. What is it?
   358  	// Hide call from linker nosplit analysis.
   359  	MOVD	$runtime·badsystemstack(SB), R3
   360  	BL	(R3)
   361  	B	runtime·abort(SB)
   362  
   363  switch:
   364  	// Switch stacks.
   365  	// The original frame pointer is stored in R29,
   366  	// which is useful for stack unwinding.
   367  	// Save our state in g->sched. Pretend to
   368  	// be systemstack_switch if the G stack is scanned.
   369  	BL	gosave_systemstack_switch<>(SB)
   370  
   371  	// switch to g0
   372  	MOVD	R5, g
   373  	BL	runtime·save_g(SB)
   374  	MOVD	(g_sched+gobuf_sp)(g), R3
   375  	MOVD	R3, RSP
   376  
   377  	// call target function
   378  	MOVD	0(R26), R3	// code pointer
   379  	BL	(R3)
   380  
   381  	// switch back to g
   382  	MOVD	g_m(g), R3
   383  	MOVD	m_curg(R3), g
   384  	BL	runtime·save_g(SB)
   385  	MOVD	(g_sched+gobuf_sp)(g), R0
   386  	MOVD	R0, RSP
   387  	MOVD	(g_sched+gobuf_bp)(g), R29
   388  	MOVD	$0, (g_sched+gobuf_sp)(g)
   389  	MOVD	$0, (g_sched+gobuf_bp)(g)
   390  	RET
   391  
   392  noswitch:
   393  	// already on m stack, just call directly
   394  	// Using a tail call here cleans up tracebacks since we won't stop
   395  	// at an intermediate systemstack.
   396  	MOVD	0(R26), R3	// code pointer
   397  	MOVD.P	16(RSP), R30	// restore LR
   398  	SUB	$8, RSP, R29	// restore FP
   399  	B	(R3)
   400  
   401  // func switchToCrashStack0(fn func())
   402  TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
   403  	MOVD	R0, R26    // context register
   404  	MOVD	g_m(g), R1 // curm
   405  
   406  	// set g to gcrash
   407  	MOVD	$runtime·gcrash(SB), g // g = &gcrash
   408  	BL	runtime·save_g(SB)         // clobbers R0
   409  	MOVD	R1, g_m(g)             // g.m = curm
   410  	MOVD	g, m_g0(R1)            // curm.g0 = g
   411  
   412  	// switch to crashstack
   413  	MOVD	(g_stack+stack_hi)(g), R1
   414  	SUB	$(4*8), R1
   415  	MOVD	R1, RSP
   416  
   417  	// call target function
   418  	MOVD	0(R26), R0
   419  	CALL	(R0)
   420  
   421  	// should never return
   422  	CALL	runtime·abort(SB)
   423  	UNDEF
   424  
   425  /*
   426   * support for morestack
   427   */
   428  
   429  // Called during function prolog when more stack is needed.
   430  // Caller has already loaded:
   431  // R3 prolog's LR (R30)
   432  //
   433  // The traceback routines see morestack on a g0 as being
   434  // the top of a stack (for example, morestack calling newstack
   435  // calling the scheduler calling newm calling gc), so we must
   436  // record an argument size. For that purpose, it has no arguments.
   437  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   438  	// Cannot grow scheduler stack (m->g0).
   439  	MOVD	g_m(g), R8
   440  	MOVD	m_g0(R8), R4
   441  
   442  	// Called from f.
   443  	// Set g->sched to context in f
   444  	MOVD	RSP, R0
   445  	MOVD	R0, (g_sched+gobuf_sp)(g)
   446  	MOVD	R29, (g_sched+gobuf_bp)(g)
   447  	MOVD	LR, (g_sched+gobuf_pc)(g)
   448  	MOVD	R3, (g_sched+gobuf_lr)(g)
   449  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   450  
   451  	CMP	g, R4
   452  	BNE	3(PC)
   453  	BL	runtime·badmorestackg0(SB)
   454  	B	runtime·abort(SB)
   455  
   456  	// Cannot grow signal stack (m->gsignal).
   457  	MOVD	m_gsignal(R8), R4
   458  	CMP	g, R4
   459  	BNE	3(PC)
   460  	BL	runtime·badmorestackgsignal(SB)
   461  	B	runtime·abort(SB)
   462  
   463  	// Called from f.
   464  	// Set m->morebuf to f's callers.
   465  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   466  	MOVD	RSP, R0
   467  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   468  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   469  
   470  	// If in secret mode, erase registers on transition
   471  	// from G stack to M stack,
   472  #ifdef GOEXPERIMENT_runtimesecret
   473  	MOVW	g_secret(g), R4
   474  	CBZ 	R4, nosecret
   475  	BL	·secretEraseRegisters(SB)
   476  	MOVD	g_m(g), R8
   477  nosecret:
   478  #endif
   479  
   480  	// Call newstack on m->g0's stack.
   481  	MOVD	m_g0(R8), g
   482  	BL	runtime·save_g(SB)
   483  	MOVD	(g_sched+gobuf_sp)(g), R0
   484  	MOVD	R0, RSP
   485  	MOVD	$0, R29		// clear frame pointer, as caller may execute on another M
   486  	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   487  	BL	runtime·newstack(SB)
   488  
   489  	// Not reached, but make sure the return PC from the call to newstack
   490  	// is still in this function, and not the beginning of the next.
   491  	UNDEF
   492  
   493  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   494  	// Force SPWRITE. This function doesn't actually write SP,
   495  	// but it is called with a special calling convention where
   496  	// the caller doesn't save LR on stack but passes it as a
   497  	// register (R3), and the unwinder currently doesn't understand.
   498  	// Make it SPWRITE to stop unwinding. (See issue 54332)
   499  	MOVD	RSP, RSP
   500  
   501  	MOVW	$0, R26
   502  	B runtime·morestack(SB)
   503  
   504  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
   505  TEXT ·spillArgs(SB),NOSPLIT,$0-0
   506  	STP	(R0, R1), (0*8)(R20)
   507  	STP	(R2, R3), (2*8)(R20)
   508  	STP	(R4, R5), (4*8)(R20)
   509  	STP	(R6, R7), (6*8)(R20)
   510  	STP	(R8, R9), (8*8)(R20)
   511  	STP	(R10, R11), (10*8)(R20)
   512  	STP	(R12, R13), (12*8)(R20)
   513  	STP	(R14, R15), (14*8)(R20)
   514  	FSTPD	(F0, F1), (16*8)(R20)
   515  	FSTPD	(F2, F3), (18*8)(R20)
   516  	FSTPD	(F4, F5), (20*8)(R20)
   517  	FSTPD	(F6, F7), (22*8)(R20)
   518  	FSTPD	(F8, F9), (24*8)(R20)
   519  	FSTPD	(F10, F11), (26*8)(R20)
   520  	FSTPD	(F12, F13), (28*8)(R20)
   521  	FSTPD	(F14, F15), (30*8)(R20)
   522  	RET
   523  
   524  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
   525  TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   526  	LDP	(0*8)(R20), (R0, R1)
   527  	LDP	(2*8)(R20), (R2, R3)
   528  	LDP	(4*8)(R20), (R4, R5)
   529  	LDP	(6*8)(R20), (R6, R7)
   530  	LDP	(8*8)(R20), (R8, R9)
   531  	LDP	(10*8)(R20), (R10, R11)
   532  	LDP	(12*8)(R20), (R12, R13)
   533  	LDP	(14*8)(R20), (R14, R15)
   534  	FLDPD	(16*8)(R20), (F0, F1)
   535  	FLDPD	(18*8)(R20), (F2, F3)
   536  	FLDPD	(20*8)(R20), (F4, F5)
   537  	FLDPD	(22*8)(R20), (F6, F7)
   538  	FLDPD	(24*8)(R20), (F8, F9)
   539  	FLDPD	(26*8)(R20), (F10, F11)
   540  	FLDPD	(28*8)(R20), (F12, F13)
   541  	FLDPD	(30*8)(R20), (F14, F15)
   542  	RET
   543  
   544  // reflectcall: call a function with the given argument list
   545  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   546  // we don't have variable-sized frames, so we use a small number
   547  // of constant-sized-frame functions to encode a few bits of size in the pc.
   548  // Caution: ugly multiline assembly macros in your future!
   549  
   550  #define DISPATCH(NAME,MAXSIZE)		\
   551  	MOVD	$MAXSIZE, R27;		\
   552  	CMP	R27, R16;		\
   553  	BGT	3(PC);			\
   554  	MOVD	$NAME(SB), R27;	\
   555  	B	(R27)
   556  // Note: can't just "B NAME(SB)" - bad inlining results.
   557  
   558  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
   559  	MOVWU	frameSize+32(FP), R16
   560  	DISPATCH(runtime·call16, 16)
   561  	DISPATCH(runtime·call32, 32)
   562  	DISPATCH(runtime·call64, 64)
   563  	DISPATCH(runtime·call128, 128)
   564  	DISPATCH(runtime·call256, 256)
   565  	DISPATCH(runtime·call512, 512)
   566  	DISPATCH(runtime·call1024, 1024)
   567  	DISPATCH(runtime·call2048, 2048)
   568  	DISPATCH(runtime·call4096, 4096)
   569  	DISPATCH(runtime·call8192, 8192)
   570  	DISPATCH(runtime·call16384, 16384)
   571  	DISPATCH(runtime·call32768, 32768)
   572  	DISPATCH(runtime·call65536, 65536)
   573  	DISPATCH(runtime·call131072, 131072)
   574  	DISPATCH(runtime·call262144, 262144)
   575  	DISPATCH(runtime·call524288, 524288)
   576  	DISPATCH(runtime·call1048576, 1048576)
   577  	DISPATCH(runtime·call2097152, 2097152)
   578  	DISPATCH(runtime·call4194304, 4194304)
   579  	DISPATCH(runtime·call8388608, 8388608)
   580  	DISPATCH(runtime·call16777216, 16777216)
   581  	DISPATCH(runtime·call33554432, 33554432)
   582  	DISPATCH(runtime·call67108864, 67108864)
   583  	DISPATCH(runtime·call134217728, 134217728)
   584  	DISPATCH(runtime·call268435456, 268435456)
   585  	DISPATCH(runtime·call536870912, 536870912)
   586  	DISPATCH(runtime·call1073741824, 1073741824)
   587  	MOVD	$runtime·badreflectcall(SB), R0
   588  	B	(R0)
   589  
   590  #define CALLFN(NAME,MAXSIZE)			\
   591  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   592  	NO_LOCAL_POINTERS;			\
   593  	/* copy arguments to stack */		\
   594  	MOVD	stackArgs+16(FP), R3;			\
   595  	MOVWU	stackArgsSize+24(FP), R4;		\
   596  	ADD	$8, RSP, R5;			\
   597  	BIC	$0xf, R4, R6;			\
   598  	CBZ	R6, 6(PC);			\
   599  	/* if R6=(argsize&~15) != 0 */		\
   600  	ADD	R6, R5, R6;			\
   601  	/* copy 16 bytes a time */		\
   602  	LDP.P	16(R3), (R7, R8);		\
   603  	STP.P	(R7, R8), 16(R5);		\
   604  	CMP	R5, R6;				\
   605  	BNE	-3(PC);				\
   606  	AND	$0xf, R4, R6;			\
   607  	CBZ	R6, 6(PC);			\
   608  	/* if R6=(argsize&15) != 0 */		\
   609  	ADD	R6, R5, R6;			\
   610  	/* copy 1 byte a time for the rest */	\
   611  	MOVBU.P	1(R3), R7;			\
   612  	MOVBU.P	R7, 1(R5);			\
   613  	CMP	R5, R6;				\
   614  	BNE	-3(PC);				\
   615  	/* set up argument registers */		\
   616  	MOVD	regArgs+40(FP), R20;		\
   617  	CALL	·unspillArgs(SB);		\
   618  	/* call function */			\
   619  	MOVD	f+8(FP), R26;			\
   620  	MOVD	(R26), R20;			\
   621  	PCDATA	$PCDATA_StackMapIndex, $0;	\
   622  	BL	(R20);				\
   623  	/* copy return values back */		\
   624  	MOVD	regArgs+40(FP), R20;		\
   625  	CALL	·spillArgs(SB);		\
   626  	MOVD	stackArgsType+0(FP), R7;		\
   627  	MOVD	stackArgs+16(FP), R3;			\
   628  	MOVWU	stackArgsSize+24(FP), R4;			\
   629  	MOVWU	stackRetOffset+28(FP), R6;		\
   630  	ADD	$8, RSP, R5;			\
   631  	ADD	R6, R5; 			\
   632  	ADD	R6, R3;				\
   633  	SUB	R6, R4;				\
   634  	BL	callRet<>(SB);			\
   635  	RET
   636  
   637  // callRet copies return values back at the end of call*. This is a
   638  // separate function so it can allocate stack space for the arguments
   639  // to reflectcallmove. It does not follow the Go ABI; it expects its
   640  // arguments in registers.
   641  TEXT callRet<>(SB), NOSPLIT, $48-0
   642  	NO_LOCAL_POINTERS
   643  	STP	(R7, R3), 8(RSP)
   644  	STP	(R5, R4), 24(RSP)
   645  	MOVD	R20, 40(RSP)
   646  	BL	runtime·reflectcallmove(SB)
   647  	RET
   648  
   649  CALLFN(·call16, 16)
   650  CALLFN(·call32, 32)
   651  CALLFN(·call64, 64)
   652  CALLFN(·call128, 128)
   653  CALLFN(·call256, 256)
   654  CALLFN(·call512, 512)
   655  CALLFN(·call1024, 1024)
   656  CALLFN(·call2048, 2048)
   657  CALLFN(·call4096, 4096)
   658  CALLFN(·call8192, 8192)
   659  CALLFN(·call16384, 16384)
   660  CALLFN(·call32768, 32768)
   661  CALLFN(·call65536, 65536)
   662  CALLFN(·call131072, 131072)
   663  CALLFN(·call262144, 262144)
   664  CALLFN(·call524288, 524288)
   665  CALLFN(·call1048576, 1048576)
   666  CALLFN(·call2097152, 2097152)
   667  CALLFN(·call4194304, 4194304)
   668  CALLFN(·call8388608, 8388608)
   669  CALLFN(·call16777216, 16777216)
   670  CALLFN(·call33554432, 33554432)
   671  CALLFN(·call67108864, 67108864)
   672  CALLFN(·call134217728, 134217728)
   673  CALLFN(·call268435456, 268435456)
   674  CALLFN(·call536870912, 536870912)
   675  CALLFN(·call1073741824, 1073741824)
   676  
   677  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
   678  TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   679  	MOVB	runtime·useAeshash(SB), R10
   680  	CBZ	R10, noaes
   681  	MOVD	$runtime·aeskeysched+0(SB), R3
   682  
   683  	VEOR	V0.B16, V0.B16, V0.B16
   684  	VLD1	(R3), [V2.B16]
   685  	VLD1	(R0), V0.S[1]
   686  	VMOV	R1, V0.S[0]
   687  
   688  	AESE	V2.B16, V0.B16
   689  	AESMC	V0.B16, V0.B16
   690  	AESE	V2.B16, V0.B16
   691  	AESMC	V0.B16, V0.B16
   692  	AESE	V2.B16, V0.B16
   693  
   694  	VMOV	V0.D[0], R0
   695  	RET
   696  noaes:
   697  	B	runtime·memhash32Fallback<ABIInternal>(SB)
   698  
   699  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
   700  TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   701  	MOVB	runtime·useAeshash(SB), R10
   702  	CBZ	R10, noaes
   703  	MOVD	$runtime·aeskeysched+0(SB), R3
   704  
   705  	VEOR	V0.B16, V0.B16, V0.B16
   706  	VLD1	(R3), [V2.B16]
   707  	VLD1	(R0), V0.D[1]
   708  	VMOV	R1, V0.D[0]
   709  
   710  	AESE	V2.B16, V0.B16
   711  	AESMC	V0.B16, V0.B16
   712  	AESE	V2.B16, V0.B16
   713  	AESMC	V0.B16, V0.B16
   714  	AESE	V2.B16, V0.B16
   715  
   716  	VMOV	V0.D[0], R0
   717  	RET
   718  noaes:
   719  	B	runtime·memhash64Fallback<ABIInternal>(SB)
   720  
   721  // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
   722  TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
   723  	MOVB	runtime·useAeshash(SB), R10
   724  	CBZ	R10, noaes
   725  	B	runtime·aeshashbody<>(SB)
   726  noaes:
   727  	B	runtime·memhashFallback<ABIInternal>(SB)
   728  
   729  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   730  TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   731  	MOVB	runtime·useAeshash(SB), R10
   732  	CBZ	R10, noaes
   733  	LDP	(R0), (R0, R2)	// string data / length
   734  	B	runtime·aeshashbody<>(SB)
   735  noaes:
   736  	B	runtime·strhashFallback<ABIInternal>(SB)
   737  
   738  // R0: data
   739  // R1: seed data
   740  // R2: length
   741  // At return, R0 = return value
   742  TEXT runtime·aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
   743  	VEOR	V30.B16, V30.B16, V30.B16
   744  	VMOV	R1, V30.D[0]
   745  	VMOV	R2, V30.D[1] // load length into seed
   746  
   747  	MOVD	$runtime·aeskeysched+0(SB), R4
   748  	VLD1.P	16(R4), [V0.B16]
   749  	AESE	V30.B16, V0.B16
   750  	AESMC	V0.B16, V0.B16
   751  	CMP	$16, R2
   752  	BLO	aes0to15
   753  	BEQ	aes16
   754  	CMP	$32, R2
   755  	BLS	aes17to32
   756  	CMP	$64, R2
   757  	BLS	aes33to64
   758  	CMP	$128, R2
   759  	BLS	aes65to128
   760  	B	aes129plus
   761  
   762  aes0to15:
   763  	CBZ	R2, aes0
   764  	VEOR	V2.B16, V2.B16, V2.B16
   765  	TBZ	$3, R2, less_than_8
   766  	VLD1.P	8(R0), V2.D[0]
   767  
   768  less_than_8:
   769  	TBZ	$2, R2, less_than_4
   770  	VLD1.P	4(R0), V2.S[2]
   771  
   772  less_than_4:
   773  	TBZ	$1, R2, less_than_2
   774  	VLD1.P	2(R0), V2.H[6]
   775  
   776  less_than_2:
   777  	TBZ	$0, R2, done
   778  	VLD1	(R0), V2.B[14]
   779  done:
   780  	AESE	V0.B16, V2.B16
   781  	AESMC	V2.B16, V2.B16
   782  	AESE	V0.B16, V2.B16
   783  	AESMC	V2.B16, V2.B16
   784  	AESE	V0.B16, V2.B16
   785  	AESMC	V2.B16, V2.B16
   786  
   787  	VMOV	V2.D[0], R0
   788  	RET
   789  
   790  aes0:
   791  	VMOV	V0.D[0], R0
   792  	RET
   793  
   794  aes16:
   795  	VLD1	(R0), [V2.B16]
   796  	B	done
   797  
   798  aes17to32:
   799  	// make second seed
   800  	VLD1	(R4), [V1.B16]
   801  	AESE	V30.B16, V1.B16
   802  	AESMC	V1.B16, V1.B16
   803  	SUB	$16, R2, R10
   804  	VLD1.P	(R0)(R10), [V2.B16]
   805  	VLD1	(R0), [V3.B16]
   806  
   807  	AESE	V0.B16, V2.B16
   808  	AESMC	V2.B16, V2.B16
   809  	AESE	V1.B16, V3.B16
   810  	AESMC	V3.B16, V3.B16
   811  
   812  	AESE	V0.B16, V2.B16
   813  	AESMC	V2.B16, V2.B16
   814  	AESE	V1.B16, V3.B16
   815  	AESMC	V3.B16, V3.B16
   816  
   817  	AESE	V0.B16, V2.B16
   818  	AESE	V1.B16, V3.B16
   819  
   820  	VEOR	V3.B16, V2.B16, V2.B16
   821  
   822  	VMOV	V2.D[0], R0
   823  	RET
   824  
   825  aes33to64:
   826  	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
   827  	AESE	V30.B16, V1.B16
   828  	AESMC	V1.B16, V1.B16
   829  	AESE	V30.B16, V2.B16
   830  	AESMC	V2.B16, V2.B16
   831  	AESE	V30.B16, V3.B16
   832  	AESMC	V3.B16, V3.B16
   833  	SUB	$32, R2, R10
   834  
   835  	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
   836  	VLD1	(R0), [V6.B16, V7.B16]
   837  
   838  	AESE	V0.B16, V4.B16
   839  	AESMC	V4.B16, V4.B16
   840  	AESE	V1.B16, V5.B16
   841  	AESMC	V5.B16, V5.B16
   842  	AESE	V2.B16, V6.B16
   843  	AESMC	V6.B16, V6.B16
   844  	AESE	V3.B16, V7.B16
   845  	AESMC	V7.B16, V7.B16
   846  
   847  	AESE	V0.B16, V4.B16
   848  	AESMC	V4.B16, V4.B16
   849  	AESE	V1.B16, V5.B16
   850  	AESMC	V5.B16, V5.B16
   851  	AESE	V2.B16, V6.B16
   852  	AESMC	V6.B16, V6.B16
   853  	AESE	V3.B16, V7.B16
   854  	AESMC	V7.B16, V7.B16
   855  
   856  	AESE	V0.B16, V4.B16
   857  	AESE	V1.B16, V5.B16
   858  	AESE	V2.B16, V6.B16
   859  	AESE	V3.B16, V7.B16
   860  
   861  	VEOR	V6.B16, V4.B16, V4.B16
   862  	VEOR	V7.B16, V5.B16, V5.B16
   863  	VEOR	V5.B16, V4.B16, V4.B16
   864  
   865  	VMOV	V4.D[0], R0
   866  	RET
   867  
   868  aes65to128:
   869  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   870  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   871  	AESE	V30.B16, V1.B16
   872  	AESMC	V1.B16, V1.B16
   873  	AESE	V30.B16, V2.B16
   874  	AESMC	V2.B16, V2.B16
   875  	AESE	V30.B16, V3.B16
   876  	AESMC	V3.B16, V3.B16
   877  	AESE	V30.B16, V4.B16
   878  	AESMC	V4.B16, V4.B16
   879  	AESE	V30.B16, V5.B16
   880  	AESMC	V5.B16, V5.B16
   881  	AESE	V30.B16, V6.B16
   882  	AESMC	V6.B16, V6.B16
   883  	AESE	V30.B16, V7.B16
   884  	AESMC	V7.B16, V7.B16
   885  
   886  	SUB	$64, R2, R10
   887  	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   888  	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   889  	AESE	V0.B16,	 V8.B16
   890  	AESMC	V8.B16,  V8.B16
   891  	AESE	V1.B16,	 V9.B16
   892  	AESMC	V9.B16,  V9.B16
   893  	AESE	V2.B16, V10.B16
   894  	AESMC	V10.B16,  V10.B16
   895  	AESE	V3.B16, V11.B16
   896  	AESMC	V11.B16,  V11.B16
   897  	AESE	V4.B16, V12.B16
   898  	AESMC	V12.B16,  V12.B16
   899  	AESE	V5.B16, V13.B16
   900  	AESMC	V13.B16,  V13.B16
   901  	AESE	V6.B16, V14.B16
   902  	AESMC	V14.B16,  V14.B16
   903  	AESE	V7.B16, V15.B16
   904  	AESMC	V15.B16,  V15.B16
   905  
   906  	AESE	V0.B16,	 V8.B16
   907  	AESMC	V8.B16,  V8.B16
   908  	AESE	V1.B16,	 V9.B16
   909  	AESMC	V9.B16,  V9.B16
   910  	AESE	V2.B16, V10.B16
   911  	AESMC	V10.B16,  V10.B16
   912  	AESE	V3.B16, V11.B16
   913  	AESMC	V11.B16,  V11.B16
   914  	AESE	V4.B16, V12.B16
   915  	AESMC	V12.B16,  V12.B16
   916  	AESE	V5.B16, V13.B16
   917  	AESMC	V13.B16,  V13.B16
   918  	AESE	V6.B16, V14.B16
   919  	AESMC	V14.B16,  V14.B16
   920  	AESE	V7.B16, V15.B16
   921  	AESMC	V15.B16,  V15.B16
   922  
   923  	AESE	V0.B16,	 V8.B16
   924  	AESE	V1.B16,	 V9.B16
   925  	AESE	V2.B16, V10.B16
   926  	AESE	V3.B16, V11.B16
   927  	AESE	V4.B16, V12.B16
   928  	AESE	V5.B16, V13.B16
   929  	AESE	V6.B16, V14.B16
   930  	AESE	V7.B16, V15.B16
   931  
   932  	VEOR	V12.B16, V8.B16, V8.B16
   933  	VEOR	V13.B16, V9.B16, V9.B16
   934  	VEOR	V14.B16, V10.B16, V10.B16
   935  	VEOR	V15.B16, V11.B16, V11.B16
   936  	VEOR	V10.B16, V8.B16, V8.B16
   937  	VEOR	V11.B16, V9.B16, V9.B16
   938  	VEOR	V9.B16, V8.B16, V8.B16
   939  
   940  	VMOV	V8.D[0], R0
   941  	RET
   942  
   943  aes129plus:
   944  	PRFM (R0), PLDL1KEEP
   945  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   946  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   947  	AESE	V30.B16, V1.B16
   948  	AESMC	V1.B16, V1.B16
   949  	AESE	V30.B16, V2.B16
   950  	AESMC	V2.B16, V2.B16
   951  	AESE	V30.B16, V3.B16
   952  	AESMC	V3.B16, V3.B16
   953  	AESE	V30.B16, V4.B16
   954  	AESMC	V4.B16, V4.B16
   955  	AESE	V30.B16, V5.B16
   956  	AESMC	V5.B16, V5.B16
   957  	AESE	V30.B16, V6.B16
   958  	AESMC	V6.B16, V6.B16
   959  	AESE	V30.B16, V7.B16
   960  	AESMC	V7.B16, V7.B16
   961  	ADD	R0, R2, R10
   962  	SUB	$128, R10, R10
   963  	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   964  	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
   965  	SUB	$1, R2, R2
   966  	LSR	$7, R2, R2
   967  
   968  aesloop:
   969  	AESE	V8.B16,	 V0.B16
   970  	AESMC	V0.B16,  V0.B16
   971  	AESE	V9.B16,	 V1.B16
   972  	AESMC	V1.B16,  V1.B16
   973  	AESE	V10.B16, V2.B16
   974  	AESMC	V2.B16,  V2.B16
   975  	AESE	V11.B16, V3.B16
   976  	AESMC	V3.B16,  V3.B16
   977  	AESE	V12.B16, V4.B16
   978  	AESMC	V4.B16,  V4.B16
   979  	AESE	V13.B16, V5.B16
   980  	AESMC	V5.B16,  V5.B16
   981  	AESE	V14.B16, V6.B16
   982  	AESMC	V6.B16,  V6.B16
   983  	AESE	V15.B16, V7.B16
   984  	AESMC	V7.B16,  V7.B16
   985  
   986  	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
   987  	AESE	V8.B16,	 V0.B16
   988  	AESMC	V0.B16,  V0.B16
   989  	AESE	V9.B16,	 V1.B16
   990  	AESMC	V1.B16,  V1.B16
   991  	AESE	V10.B16, V2.B16
   992  	AESMC	V2.B16,  V2.B16
   993  	AESE	V11.B16, V3.B16
   994  	AESMC	V3.B16,  V3.B16
   995  
   996  	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   997  	AESE	V12.B16, V4.B16
   998  	AESMC	V4.B16,  V4.B16
   999  	AESE	V13.B16, V5.B16
  1000  	AESMC	V5.B16,  V5.B16
  1001  	AESE	V14.B16, V6.B16
  1002  	AESMC	V6.B16,  V6.B16
  1003  	AESE	V15.B16, V7.B16
  1004  	AESMC	V7.B16,  V7.B16
  1005  	SUB	$1, R2, R2
  1006  	CBNZ	R2, aesloop
  1007  
  1008  	AESE	V8.B16,	 V0.B16
  1009  	AESMC	V0.B16,  V0.B16
  1010  	AESE	V9.B16,	 V1.B16
  1011  	AESMC	V1.B16,  V1.B16
  1012  	AESE	V10.B16, V2.B16
  1013  	AESMC	V2.B16,  V2.B16
  1014  	AESE	V11.B16, V3.B16
  1015  	AESMC	V3.B16,  V3.B16
  1016  	AESE	V12.B16, V4.B16
  1017  	AESMC	V4.B16,  V4.B16
  1018  	AESE	V13.B16, V5.B16
  1019  	AESMC	V5.B16,  V5.B16
  1020  	AESE	V14.B16, V6.B16
  1021  	AESMC	V6.B16,  V6.B16
  1022  	AESE	V15.B16, V7.B16
  1023  	AESMC	V7.B16,  V7.B16
  1024  
  1025  	AESE	V8.B16,	 V0.B16
  1026  	AESMC	V0.B16,  V0.B16
  1027  	AESE	V9.B16,	 V1.B16
  1028  	AESMC	V1.B16,  V1.B16
  1029  	AESE	V10.B16, V2.B16
  1030  	AESMC	V2.B16,  V2.B16
  1031  	AESE	V11.B16, V3.B16
  1032  	AESMC	V3.B16,  V3.B16
  1033  	AESE	V12.B16, V4.B16
  1034  	AESMC	V4.B16,  V4.B16
  1035  	AESE	V13.B16, V5.B16
  1036  	AESMC	V5.B16,  V5.B16
  1037  	AESE	V14.B16, V6.B16
  1038  	AESMC	V6.B16,  V6.B16
  1039  	AESE	V15.B16, V7.B16
  1040  	AESMC	V7.B16,  V7.B16
  1041  
  1042  	AESE	V8.B16,	 V0.B16
  1043  	AESE	V9.B16,	 V1.B16
  1044  	AESE	V10.B16, V2.B16
  1045  	AESE	V11.B16, V3.B16
  1046  	AESE	V12.B16, V4.B16
  1047  	AESE	V13.B16, V5.B16
  1048  	AESE	V14.B16, V6.B16
  1049  	AESE	V15.B16, V7.B16
  1050  
  1051  	VEOR	V0.B16, V1.B16, V0.B16
  1052  	VEOR	V2.B16, V3.B16, V2.B16
  1053  	VEOR	V4.B16, V5.B16, V4.B16
  1054  	VEOR	V6.B16, V7.B16, V6.B16
  1055  	VEOR	V0.B16, V2.B16, V0.B16
  1056  	VEOR	V4.B16, V6.B16, V4.B16
  1057  	VEOR	V4.B16, V0.B16, V0.B16
  1058  
  1059  	VMOV	V0.D[0], R0
  1060  	RET
  1061  
  1062  // The Arm architecture provides a user space accessible counter-timer which
  1063  // is incremented at a fixed but machine-specific rate. Software can (spin)
  1064  // wait until the counter-timer reaches some desired value.
  1065  //
  1066  // Armv8.7-A introduced the WFET (FEAT_WFxT) instruction, which allows the
  1067  // processor to enter a low power state for a set time, or until an event is
  1068  // received.
  1069  //
  1070  // However, WFET is not used here because it is only available on newer hardware,
  1071  // and we aim to maintain compatibility with older Armv8-A platforms that do not
  1072  // support this feature.
  1073  //
  1074  // As a fallback, we can instead use the ISB instruction to decrease processor
  1075  // activity and thus power consumption between checks of the counter-timer.
  1076  // Note that we do not depend on the latency of the ISB instruction which is
  1077  // implementation specific. Actual delay comes from comparing against a fresh
  1078  // read of the counter-timer value.
  1079  //
  1080  // Read more in this Arm blog post:
  1081  // https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
  1082  
  1083  TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
  1084  	MOVWU	cycles+0(FP), R0
  1085  	CBZ	 R0, done
  1086  	//Prevent speculation of subsequent counter/timer reads and memory accesses.
  1087  	ISB     $15
  1088  	// If the delay is very short, just return.
  1089  	// Hardcode 18ns as the first ISB delay.
  1090  	CMP     $18, R0
  1091  	BLS     done
  1092  	// Adjust for overhead of initial ISB.
  1093  	SUB     $18, R0, R0
  1094  	// Convert the delay from nanoseconds to counter/timer ticks.
  1095  	// Read the counter/timer frequency.
  1096  	// delay_ticks = (delay * CNTFRQ_EL0) / 1e9
  1097  	// With the below simplifications and adjustments,
  1098  	// we are usually within 2% of the correct value:
  1099  	// delay_ticks = (delay + delay / 16) * CNTFRQ_EL0 >> 30
  1100  	MRS     CNTFRQ_EL0, R1
  1101  	ADD     R0>>4, R0, R0
  1102  	MUL     R1, R0, R0
  1103  	LSR     $30, R0, R0
  1104  	CBZ     R0, done
  1105  	// start = current counter/timer value
  1106  	MRS     CNTVCT_EL0, R2
  1107  delay:
  1108  	// Delay using ISB for all ticks.
  1109  	ISB     $15
  1110  	// Substract and compare to handle counter roll-over.
  1111  	// counter_read() - start < delay_ticks
  1112  	MRS     CNTVCT_EL0, R1
  1113  	SUB     R2, R1, R1
  1114  	CMP     R0, R1
  1115  	BCC     delay
  1116  done:
  1117  	RET
  1118  
  1119  // Save state of caller into g->sched,
  1120  // but using fake PC from systemstack_switch.
  1121  // Must only be called from functions with no locals ($0)
  1122  // or else unwinding from systemstack_switch is incorrect.
  1123  // Smashes R0.
  1124  TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
  1125  	MOVD	$runtime·systemstack_switch(SB), R0
  1126  	ADD	$8, R0	// get past prologue
  1127  	MOVD	R0, (g_sched+gobuf_pc)(g)
  1128  	MOVD	RSP, R0
  1129  	MOVD	R0, (g_sched+gobuf_sp)(g)
  1130  	MOVD	R29, (g_sched+gobuf_bp)(g)
  1131  	MOVD	$0, (g_sched+gobuf_lr)(g)
  1132  	// Assert ctxt is zero. See func save.
  1133  	MOVD	(g_sched+gobuf_ctxt)(g), R0
  1134  	CBZ	R0, 2(PC)
  1135  	CALL	runtime·abort(SB)
  1136  	RET
  1137  
  1138  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
  1139  // Call fn(arg) aligned appropriately for the gcc ABI.
  1140  // Called on a system stack, and there may be no g yet (during needm).
  1141  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
  1142  	MOVD	fn+0(FP), R1
  1143  	MOVD	arg+8(FP), R0
  1144  	SUB	$16, RSP	// skip over saved frame pointer below RSP
  1145  	BL	(R1)
  1146  	ADD	$16, RSP	// skip over saved frame pointer below RSP
  1147  	RET
  1148  
  1149  // func asmcgocall(fn, arg unsafe.Pointer) int32
  1150  // Call fn(arg) on the scheduler stack,
  1151  // aligned appropriately for the gcc ABI.
  1152  // See cgocall.go for more details.
  1153  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
  1154  	CBZ	g, nosave
  1155  
  1156  	// Figure out if we need to switch to m->g0 stack.
  1157  	// We get called to create new OS threads too, and those
  1158  	// come in on the m->g0 stack already. Or we might already
  1159  	// be on the m->gsignal stack.
  1160  	MOVD	g_m(g), R8
  1161  	MOVD	m_gsignal(R8), R3
  1162  	CMP	R3, g
  1163  	BEQ	nosave
  1164  	MOVD	m_g0(R8), R3
  1165  	CMP	R3, g
  1166  	BEQ	nosave
  1167  
  1168  	// running on a user stack. Figure out if we're running
  1169  	// secret code and clear our registers if so.
  1170  #ifdef GOEXPERIMENT_runtimesecret
  1171  	MOVW 	g_secret(g), R5
  1172  	CBZ		R5, nosecret
  1173  	BL 	·secretEraseRegisters(SB)
  1174  	// restore g0 back into R3
  1175  	MOVD	g_m(g), R3
  1176  	MOVD	m_g0(R3), R3
  1177  
  1178  nosecret:
  1179  #endif
  1180  	MOVD	fn+0(FP), R1
  1181  	MOVD	arg+8(FP), R0
  1182  	MOVD	RSP, R2
  1183  	MOVD	g, R4
  1184  
  1185  	// Switch to system stack.
  1186  	MOVD	R0, R9	// gosave_systemstack_switch<> and save_g might clobber R0
  1187  	BL	gosave_systemstack_switch<>(SB)
  1188  	MOVD	R3, g
  1189  	BL	runtime·save_g(SB)
  1190  	MOVD	(g_sched+gobuf_sp)(g), R0
  1191  	MOVD	R0, RSP
  1192  	MOVD	(g_sched+gobuf_bp)(g), R29
  1193  	MOVD	R9, R0
  1194  
  1195  	// Now on a scheduling stack (a pthread-created stack).
  1196  	// Save room for two of our pointers /*, plus 32 bytes of callee
  1197  	// save area that lives on the caller stack. */
  1198  	MOVD	RSP, R13
  1199  	SUB	$16, R13
  1200  	MOVD	R13, RSP
  1201  	MOVD	R4, 0(RSP)	// save old g on stack
  1202  	MOVD	(g_stack+stack_hi)(R4), R4
  1203  	SUB	R2, R4
  1204  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
  1205  	BL	(R1)
  1206  	MOVD	R0, R9
  1207  
  1208  	// Restore g, stack pointer. R0 is errno, so don't touch it
  1209  	MOVD	0(RSP), g
  1210  	BL	runtime·save_g(SB)
  1211  	MOVD	(g_stack+stack_hi)(g), R5
  1212  	MOVD	8(RSP), R6
  1213  	SUB	R6, R5
  1214  	MOVD	R9, R0
  1215  	MOVD	R5, RSP
  1216  
  1217  	MOVW	R0, ret+16(FP)
  1218  	RET
  1219  
  1220  nosave:
  1221  	// Running on a system stack, perhaps even without a g.
  1222  	// Having no g can happen during thread creation or thread teardown
  1223  	// (see needm/dropm on Solaris, for example).
  1224  	// This code is like the above sequence but without saving/restoring g
  1225  	// and without worrying about the stack moving out from under us
  1226  	// (because we're on a system stack, not a goroutine stack).
  1227  	MOVD	fn+0(FP), R1
  1228  	MOVD	arg+8(FP), R0
  1229  	MOVD	RSP, R2
  1230  	MOVD 	R2, R13
  1231  	SUB	$16, R13
  1232  	MOVD	R13, RSP
  1233  	MOVD	$0, R4
  1234  	MOVD	R4, 0(RSP)	// Where above code stores g, in case someone looks during debugging.
  1235  	MOVD	R2, 8(RSP)	// Save original stack pointer.
  1236  	BL	(R1)
  1237  	// Restore stack pointer.
  1238  	MOVD	8(RSP), R2
  1239  	MOVD	R2, RSP
  1240  	MOVD	R0, ret+16(FP)
  1241  	RET
  1242  
  1243  // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
  1244  // See cgocall.go for more details.
  1245  TEXT ·cgocallback(SB),NOSPLIT,$24-24
  1246  	NO_LOCAL_POINTERS
  1247  
  1248  	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
  1249  	// It is used to dropm while thread is exiting.
  1250  	MOVD	fn+0(FP), R1
  1251  	CBNZ	R1, loadg
  1252  	// Restore the g from frame.
  1253  	MOVD	frame+8(FP), g
  1254  	B	dropm
  1255  
  1256  loadg:
  1257  	// Load g from thread-local storage.
  1258  	BL	runtime·load_g(SB)
  1259  
  1260  	// If g is nil, Go did not create the current thread,
  1261  	// or if this thread never called into Go on pthread platforms.
  1262  	// Call needm to obtain one for temporary use.
  1263  	// In this case, we're running on the thread stack, so there's
  1264  	// lots of space, but the linker doesn't know. Hide the call from
  1265  	// the linker analysis by using an indirect call.
  1266  	CBZ	g, needm
  1267  
  1268  	MOVD	g_m(g), R8
  1269  	MOVD	R8, savedm-8(SP)
  1270  	B	havem
  1271  
  1272  needm:
  1273  	MOVD	g, savedm-8(SP) // g is zero, so is m.
  1274  	MOVD	$runtime·needAndBindM(SB), R0
  1275  	BL	(R0)
  1276  
  1277  	// Set m->g0->sched.sp = SP, so that if a panic happens
  1278  	// during the function we are about to execute, it will
  1279  	// have a valid SP to run on the g0 stack.
  1280  	// The next few lines (after the havem label)
  1281  	// will save this SP onto the stack and then write
  1282  	// the same SP back to m->sched.sp. That seems redundant,
  1283  	// but if an unrecovered panic happens, unwindm will
  1284  	// restore the g->sched.sp from the stack location
  1285  	// and then systemstack will try to use it. If we don't set it here,
  1286  	// that restored SP will be uninitialized (typically 0) and
  1287  	// will not be usable.
  1288  	MOVD	g_m(g), R8
  1289  	MOVD	m_g0(R8), R3
  1290  	MOVD	RSP, R0
  1291  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1292  	MOVD	R29, (g_sched+gobuf_bp)(R3)
  1293  
  1294  havem:
  1295  	// Now there's a valid m, and we're running on its m->g0.
  1296  	// Save current m->g0->sched.sp on stack and then set it to SP.
  1297  	// Save current sp in m->g0->sched.sp in preparation for
  1298  	// switch back to m->curg stack.
  1299  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
  1300  	// Beware that the frame size is actually 32+16.
  1301  	MOVD	m_g0(R8), R3
  1302  	MOVD	(g_sched+gobuf_sp)(R3), R4
  1303  	MOVD	R4, savedsp-16(SP)
  1304  	MOVD	RSP, R0
  1305  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1306  
  1307  	// Switch to m->curg stack and call runtime.cgocallbackg.
  1308  	// Because we are taking over the execution of m->curg
  1309  	// but *not* resuming what had been running, we need to
  1310  	// save that information (m->curg->sched) so we can restore it.
  1311  	// We can restore m->curg->sched.sp easily, because calling
  1312  	// runtime.cgocallbackg leaves SP unchanged upon return.
  1313  	// To save m->curg->sched.pc, we push it onto the curg stack and
  1314  	// open a frame the same size as cgocallback's g0 frame.
  1315  	// Once we switch to the curg stack, the pushed PC will appear
  1316  	// to be the return PC of cgocallback, so that the traceback
  1317  	// will seamlessly trace back into the earlier calls.
  1318  	MOVD	m_curg(R8), g
  1319  	BL	runtime·save_g(SB)
  1320  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
  1321  	MOVD	(g_sched+gobuf_pc)(g), R5
  1322  	MOVD	R5, -48(R4)
  1323  	MOVD	(g_sched+gobuf_bp)(g), R5
  1324  	MOVD	R5, -56(R4)
  1325  	// Gather our arguments into registers.
  1326  	MOVD	fn+0(FP), R1
  1327  	MOVD	frame+8(FP), R2
  1328  	MOVD	ctxt+16(FP), R3
  1329  	MOVD	$-48(R4), R0 // maintain 16-byte SP alignment
  1330  	MOVD	R0, RSP	// switch stack
  1331  	MOVD	R1, 8(RSP)
  1332  	MOVD	R2, 16(RSP)
  1333  	MOVD	R3, 24(RSP)
  1334  	MOVD	$runtime·cgocallbackg(SB), R0
  1335  	CALL	(R0) // indirect call to bypass nosplit check. We're on a different stack now.
  1336  
  1337  	// Restore g->sched (== m->curg->sched) from saved values.
  1338  	MOVD	0(RSP), R5
  1339  	MOVD	R5, (g_sched+gobuf_pc)(g)
  1340  	MOVD	RSP, R4
  1341  	ADD	$48, R4, R4
  1342  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1343  
  1344  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1345  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1346  	// so we do not have to restore it.)
  1347  	MOVD	g_m(g), R8
  1348  	MOVD	m_g0(R8), g
  1349  	BL	runtime·save_g(SB)
  1350  	MOVD	(g_sched+gobuf_sp)(g), R0
  1351  	MOVD	R0, RSP
  1352  	MOVD	savedsp-16(SP), R4
  1353  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1354  
  1355  	// If the m on entry was nil, we called needm above to borrow an m,
  1356  	// 1. for the duration of the call on non-pthread platforms,
  1357  	// 2. or the duration of the C thread alive on pthread platforms.
  1358  	// If the m on entry wasn't nil,
  1359  	// 1. the thread might be a Go thread,
  1360  	// 2. or it wasn't the first call from a C thread on pthread platforms,
  1361  	//    since then we skip dropm to reuse the m in the first call.
  1362  	MOVD	savedm-8(SP), R6
  1363  	CBNZ	R6, droppedm
  1364  
  1365  	// Skip dropm to reuse it in the next call, when a pthread key has been created.
  1366  	MOVD	_cgo_pthread_key_created(SB), R6
  1367  	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
  1368  	CBZ	R6, dropm
  1369  	MOVD	(R6), R6
  1370  	CBNZ	R6, droppedm
  1371  
  1372  dropm:
  1373  	MOVD	$runtime·dropm(SB), R0
  1374  	BL	(R0)
  1375  droppedm:
  1376  
  1377  	// Done!
  1378  	RET
  1379  
  1380  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1381  // Must obey the gcc calling convention.
  1382  TEXT _cgo_topofstack(SB),NOSPLIT,$24
  1383  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
  1384  	// are callee-save in the gcc calling convention, so save them.
  1385  	MOVD	R27, savedR27-8(SP)
  1386  	MOVD	g, saveG-16(SP)
  1387  
  1388  	BL	runtime·load_g(SB)
  1389  	MOVD	g_m(g), R0
  1390  	MOVD	m_curg(R0), R0
  1391  	MOVD	(g_stack+stack_hi)(R0), R0
  1392  
  1393  	MOVD	saveG-16(SP), g
  1394  	MOVD	savedR28-8(SP), R27
  1395  	RET
  1396  
  1397  // void setg(G*); set g. for use by needm.
  1398  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1399  	MOVD	gg+0(FP), g
  1400  	// This only happens if iscgo, so jump straight to save_g
  1401  	BL	runtime·save_g(SB)
  1402  	RET
  1403  
  1404  // void setg_gcc(G*); set g called from gcc
  1405  TEXT setg_gcc<>(SB),NOSPLIT,$8
  1406  	MOVD	R0, g
  1407  	MOVD	R27, savedR27-8(SP)
  1408  	BL	runtime·save_g(SB)
  1409  	MOVD	savedR27-8(SP), R27
  1410  	RET
  1411  
  1412  TEXT runtime·emptyfunc(SB),0,$0-0
  1413  	RET
  1414  
  1415  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
  1416  	MOVD	ZR, R0
  1417  	MOVD	(R0), R0
  1418  	UNDEF
  1419  
  1420  // The top-most function running on a goroutine
  1421  // returns to goexit+PCQuantum.
  1422  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
  1423  	MOVD	R0, R0	// NOP
  1424  	BL	runtime·goexit1(SB)	// does not return
  1425  
  1426  // This is called from .init_array and follows the platform, not Go, ABI.
  1427  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1428  	SUB	$0x10, RSP
  1429  	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1430  	MOVD	runtime·lastmoduledatap(SB), R1
  1431  	MOVD	R0, moduledata_next(R1)
  1432  	MOVD	R0, runtime·lastmoduledatap(SB)
  1433  	MOVD	8(RSP), R27
  1434  	ADD	$0x10, RSP
  1435  	RET
  1436  
  1437  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1438  	MOVW	$1, R3
  1439  	MOVB	R3, ret+0(FP)
  1440  	RET
  1441  
  1442  // gcWriteBarrier informs the GC about heap pointer writes.
  1443  //
  1444  // gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1445  // number of bytes of buffer needed in R25, and returns a pointer
  1446  // to the buffer space in R25.
  1447  // It clobbers condition codes.
  1448  // It does not clobber any general-purpose registers except R27,
  1449  // but may clobber others (e.g., floating point registers)
  1450  // The act of CALLing gcWriteBarrier will clobber R30 (LR).
  1451  TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
  1452  	// Save the registers clobbered by the fast path.
  1453  	STP	(R0, R1), 184(RSP)
  1454  retry:
  1455  	MOVD	g_m(g), R0
  1456  	MOVD	m_p(R0), R0
  1457  	MOVD	(p_wbBuf+wbBuf_next)(R0), R1
  1458  	MOVD	(p_wbBuf+wbBuf_end)(R0), R27
  1459  	// Increment wbBuf.next position.
  1460  	ADD	R25, R1
  1461  	// Is the buffer full?
  1462  	CMP	R27, R1
  1463  	BHI	flush
  1464  	// Commit to the larger buffer.
  1465  	MOVD	R1, (p_wbBuf+wbBuf_next)(R0)
  1466  	// Make return value (the original next position)
  1467  	SUB	R25, R1, R25
  1468  	// Restore registers.
  1469  	LDP	184(RSP), (R0, R1)
  1470  	RET
  1471  
  1472  flush:
  1473  	// Save all general purpose registers since these could be
  1474  	// clobbered by wbBufFlush and were not saved by the caller.
  1475  	// R0 and R1 already saved
  1476  	STP	(R2, R3), 1*8(RSP)
  1477  	STP	(R4, R5), 3*8(RSP)
  1478  	STP	(R6, R7), 5*8(RSP)
  1479  	STP	(R8, R9), 7*8(RSP)
  1480  	STP	(R10, R11), 9*8(RSP)
  1481  	STP	(R12, R13), 11*8(RSP)
  1482  	STP	(R14, R15), 13*8(RSP)
  1483  	// R16, R17 may be clobbered by linker trampoline
  1484  	// R18 is unused.
  1485  	STP	(R19, R20), 15*8(RSP)
  1486  	STP	(R21, R22), 17*8(RSP)
  1487  	STP	(R23, R24), 19*8(RSP)
  1488  	STP	(R25, R26), 21*8(RSP)
  1489  	// R27 is temp register.
  1490  	// R28 is g.
  1491  	// R29 is frame pointer (unused).
  1492  	// R30 is LR, which was saved by the prologue.
  1493  	// R31 is SP.
  1494  
  1495  	CALL	runtime·wbBufFlush(SB)
  1496  	LDP	1*8(RSP), (R2, R3)
  1497  	LDP	3*8(RSP), (R4, R5)
  1498  	LDP	5*8(RSP), (R6, R7)
  1499  	LDP	7*8(RSP), (R8, R9)
  1500  	LDP	9*8(RSP), (R10, R11)
  1501  	LDP	11*8(RSP), (R12, R13)
  1502  	LDP	13*8(RSP), (R14, R15)
  1503  	LDP	15*8(RSP), (R19, R20)
  1504  	LDP	17*8(RSP), (R21, R22)
  1505  	LDP	19*8(RSP), (R23, R24)
  1506  	LDP	21*8(RSP), (R25, R26)
  1507  	JMP	retry
  1508  
  1509  TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
  1510  	MOVD	$8, R25
  1511  	JMP	gcWriteBarrier<>(SB)
  1512  TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
  1513  	MOVD	$16, R25
  1514  	JMP	gcWriteBarrier<>(SB)
  1515  TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
  1516  	MOVD	$24, R25
  1517  	JMP	gcWriteBarrier<>(SB)
  1518  TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
  1519  	MOVD	$32, R25
  1520  	JMP	gcWriteBarrier<>(SB)
  1521  TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
  1522  	MOVD	$40, R25
  1523  	JMP	gcWriteBarrier<>(SB)
  1524  TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
  1525  	MOVD	$48, R25
  1526  	JMP	gcWriteBarrier<>(SB)
  1527  TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
  1528  	MOVD	$56, R25
  1529  	JMP	gcWriteBarrier<>(SB)
  1530  TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
  1531  	MOVD	$64, R25
  1532  	JMP	gcWriteBarrier<>(SB)
  1533  
  1534  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1535  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1536  
  1537  // debugCallV2 is the entry point for debugger-injected function
  1538  // calls on running goroutines. It informs the runtime that a
  1539  // debug call has been injected and creates a call frame for the
  1540  // debugger to fill in.
  1541  //
  1542  // To inject a function call, a debugger should:
  1543  // 1. Check that the goroutine is in state _Grunning and that
  1544  //    there are at least 288 bytes free on the stack.
  1545  // 2. Set SP as SP-16.
  1546  // 3. Store the current LR in (SP) (using the SP after step 2).
  1547  // 4. Store the current PC in the LR register.
  1548  // 5. Write the desired argument frame size at SP-16
  1549  // 6. Save all machine registers (including flags and fpsimd registers)
  1550  //    so they can be restored later by the debugger.
  1551  // 7. Set the PC to debugCallV2 and resume execution.
  1552  //
  1553  // If the goroutine is in state _Grunnable, then it's not generally
  1554  // safe to inject a call because it may return out via other runtime
  1555  // operations. Instead, the debugger should unwind the stack to find
  1556  // the return to non-runtime code, add a temporary breakpoint there,
  1557  // and inject the call once that breakpoint is hit.
  1558  //
  1559  // If the goroutine is in any other state, it's not safe to inject a call.
  1560  //
  1561  // This function communicates back to the debugger by setting R20 and
  1562  // invoking BRK to raise a breakpoint signal. Note that the signal PC of
  1563  // the signal triggered by the BRK instruction is the PC where the signal
  1564  // is trapped, not the next PC, so to resume execution, the debugger needs
  1565  // to set the signal PC to PC+4. See the comments in the implementation for
  1566  // the protocol the debugger is expected to follow. InjectDebugCall in the
  1567  // runtime tests demonstrates this protocol.
  1568  //
  1569  // The debugger must ensure that any pointers passed to the function
  1570  // obey escape analysis requirements. Specifically, it must not pass
  1571  // a stack pointer to an escaping argument. debugCallV2 cannot check
  1572  // this invariant.
  1573  //
  1574  // This is ABIInternal because Go code injects its PC directly into new
  1575  // goroutine stacks.
  1576  TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
  1577  	STP	(R29, R30), -280(RSP)
  1578  	SUB	$272, RSP, RSP
  1579  	SUB	$8, RSP, R29
  1580  	// Save all registers that may contain pointers so they can be
  1581  	// conservatively scanned.
  1582  	//
  1583  	// We can't do anything that might clobber any of these
  1584  	// registers before this.
  1585  	STP	(R27, g), (30*8)(RSP)
  1586  	STP	(R25, R26), (28*8)(RSP)
  1587  	STP	(R23, R24), (26*8)(RSP)
  1588  	STP	(R21, R22), (24*8)(RSP)
  1589  	STP	(R19, R20), (22*8)(RSP)
  1590  	STP	(R16, R17), (20*8)(RSP)
  1591  	STP	(R14, R15), (18*8)(RSP)
  1592  	STP	(R12, R13), (16*8)(RSP)
  1593  	STP	(R10, R11), (14*8)(RSP)
  1594  	STP	(R8, R9), (12*8)(RSP)
  1595  	STP	(R6, R7), (10*8)(RSP)
  1596  	STP	(R4, R5), (8*8)(RSP)
  1597  	STP	(R2, R3), (6*8)(RSP)
  1598  	STP	(R0, R1), (4*8)(RSP)
  1599  
  1600  	// Perform a safe-point check.
  1601  	MOVD	R30, 8(RSP) // Caller's PC
  1602  	CALL	runtime·debugCallCheck(SB)
  1603  	MOVD	16(RSP), R0
  1604  	CBZ	R0, good
  1605  
  1606  	// The safety check failed. Put the reason string at the top
  1607  	// of the stack.
  1608  	MOVD	R0, 8(RSP)
  1609  	MOVD	24(RSP), R0
  1610  	MOVD	R0, 16(RSP)
  1611  
  1612  	// Set R20 to 8 and invoke BRK. The debugger should get the
  1613  	// reason a call can't be injected from SP+8 and resume execution.
  1614  	MOVD	$8, R20
  1615  	BREAK
  1616  	JMP	restore
  1617  
  1618  good:
  1619  	// Registers are saved and it's safe to make a call.
  1620  	// Open up a call frame, moving the stack if necessary.
  1621  	//
  1622  	// Once the frame is allocated, this will set R20 to 0 and
  1623  	// invoke BRK. The debugger should write the argument
  1624  	// frame for the call at SP+8, set up argument registers,
  1625  	// set the LR as the signal PC + 4, set the PC to the function
  1626  	// to call, set R26 to point to the closure (if a closure call),
  1627  	// and resume execution.
  1628  	//
  1629  	// If the function returns, this will set R20 to 1 and invoke
  1630  	// BRK. The debugger can then inspect any return value saved
  1631  	// on the stack at SP+8 and in registers. To resume execution,
  1632  	// the debugger should restore the LR from (SP).
  1633  	//
  1634  	// If the function panics, this will set R20 to 2 and invoke BRK.
  1635  	// The interface{} value of the panic will be at SP+8. The debugger
  1636  	// can inspect the panic value and resume execution again.
  1637  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1638  	CMP	$MAXSIZE, R0;			\
  1639  	BGT	5(PC);				\
  1640  	MOVD	$NAME(SB), R0;			\
  1641  	MOVD	R0, 8(RSP);			\
  1642  	CALL	runtime·debugCallWrap(SB);	\
  1643  	JMP	restore
  1644  
  1645  	MOVD	256(RSP), R0 // the argument frame size
  1646  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1647  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1648  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1649  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1650  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1651  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1652  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1653  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1654  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1655  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1656  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1657  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1658  	// The frame size is too large. Report the error.
  1659  	MOVD	$debugCallFrameTooLarge<>(SB), R0
  1660  	MOVD	R0, 8(RSP)
  1661  	MOVD	$20, R0
  1662  	MOVD	R0, 16(RSP) // length of debugCallFrameTooLarge string
  1663  	MOVD	$8, R20
  1664  	BREAK
  1665  	JMP	restore
  1666  
  1667  restore:
  1668  	// Calls and failures resume here.
  1669  	//
  1670  	// Set R20 to 16 and invoke BRK. The debugger should restore
  1671  	// all registers except for PC and RSP and resume execution.
  1672  	MOVD	$16, R20
  1673  	BREAK
  1674  	// We must not modify flags after this point.
  1675  
  1676  	// Restore pointer-containing registers, which may have been
  1677  	// modified from the debugger's copy by stack copying.
  1678  	LDP	(30*8)(RSP), (R27, g)
  1679  	LDP	(28*8)(RSP), (R25, R26)
  1680  	LDP	(26*8)(RSP), (R23, R24)
  1681  	LDP	(24*8)(RSP), (R21, R22)
  1682  	LDP	(22*8)(RSP), (R19, R20)
  1683  	LDP	(20*8)(RSP), (R16, R17)
  1684  	LDP	(18*8)(RSP), (R14, R15)
  1685  	LDP	(16*8)(RSP), (R12, R13)
  1686  	LDP	(14*8)(RSP), (R10, R11)
  1687  	LDP	(12*8)(RSP), (R8, R9)
  1688  	LDP	(10*8)(RSP), (R6, R7)
  1689  	LDP	(8*8)(RSP), (R4, R5)
  1690  	LDP	(6*8)(RSP), (R2, R3)
  1691  	LDP	(4*8)(RSP), (R0, R1)
  1692  
  1693  	LDP	-8(RSP), (R29, R27)
  1694  	ADD	$288, RSP, RSP // Add 16 more bytes, see saveSigContext
  1695  	MOVD	-16(RSP), R30 // restore old lr
  1696  	JMP	(R27)
  1697  
  1698  // runtime.debugCallCheck assumes that functions defined with the
  1699  // DEBUG_CALL_FN macro are safe points to inject calls.
  1700  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1701  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1702  	NO_LOCAL_POINTERS;		\
  1703  	MOVD	$0, R20;		\
  1704  	BREAK;		\
  1705  	MOVD	$1, R20;		\
  1706  	BREAK;		\
  1707  	RET
  1708  DEBUG_CALL_FN(debugCall32<>, 32)
  1709  DEBUG_CALL_FN(debugCall64<>, 64)
  1710  DEBUG_CALL_FN(debugCall128<>, 128)
  1711  DEBUG_CALL_FN(debugCall256<>, 256)
  1712  DEBUG_CALL_FN(debugCall512<>, 512)
  1713  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1714  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1715  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1716  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1717  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1718  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1719  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1720  
  1721  // func debugCallPanicked(val interface{})
  1722  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1723  	// Copy the panic value to the top of stack at SP+8.
  1724  	MOVD	val_type+0(FP), R0
  1725  	MOVD	R0, 8(RSP)
  1726  	MOVD	val_data+8(FP), R0
  1727  	MOVD	R0, 16(RSP)
  1728  	MOVD	$2, R20
  1729  	BREAK
  1730  	RET
  1731  
  1732  TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
  1733  	NO_LOCAL_POINTERS
  1734  	// Save all 16 int registers that could have an index in them.
  1735  	// They may be pointers, but if they are they are dead.
  1736  	STP	(R0, R1), 24(RSP)
  1737  	STP	(R2, R3), 40(RSP)
  1738  	STP	(R4, R5), 56(RSP)
  1739  	STP	(R6, R7), 72(RSP)
  1740  	STP	(R8, R9), 88(RSP)
  1741  	STP	(R10, R11), 104(RSP)
  1742  	STP	(R12, R13), 120(RSP)
  1743  	STP	(R14, R15), 136(RSP)
  1744  	MOVD	LR, R0		// PC immediately after call to panicBounds
  1745  	ADD	$24, RSP, R1	// pointer to save area
  1746  	CALL	runtime·panicBounds64<ABIInternal>(SB)
  1747  	RET
  1748  
  1749  TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1750  	MOVD R29, R0
  1751  	RET
  1752  

View as plain text