Text file src/runtime/asm_arm64.s

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "tls_arm64.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "cgo/abi_arm64.h"
    11  
    12  // _rt0_arm64 is common startup code for most arm64 systems when using
    13  // internal linking. This is the entry point for the program from the
    14  // kernel for an ordinary -buildmode=exe program. The stack holds the
    15  // number of arguments and the C-style argv.
    16  TEXT _rt0_arm64(SB),NOSPLIT,$0
    17  	MOVD	0(RSP), R0	// argc
    18  	ADD	$8, RSP, R1	// argv
    19  	JMP	runtime·rt0_go(SB)
    20  
    21  // main is common startup code for most amd64 systems when using
    22  // external linking. The C startup code will call the symbol "main"
    23  // passing argc and argv in the usual C ABI registers R0 and R1.
    24  TEXT main(SB),NOSPLIT,$0
    25  	JMP	runtime·rt0_go(SB)
    26  
    27  // _rt0_arm64_lib is common startup code for most arm64 systems when
    28  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    29  // arrange to invoke this function as a global constructor (for
    30  // c-archive) or when the shared library is loaded (for c-shared).
    31  // We expect argc and argv to be passed in the usual C ABI registers
    32  // R0 and R1.
    33  TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
    34  	// Preserve callee-save registers.
    35  	SAVE_R19_TO_R28(24)
    36  	SAVE_F8_TO_F15(104)
    37  
    38  	// Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go
    39  	MOVD	ZR, g
    40  
    41  	MOVD	R0, _rt0_arm64_lib_argc<>(SB)
    42  	MOVD	R1, _rt0_arm64_lib_argv<>(SB)
    43  
    44  	// Synchronous initialization.
    45  	MOVD	$runtime·libpreinit(SB), R4
    46  	BL	(R4)
    47  
    48  	// Create a new thread to do the runtime initialization and return.
    49  	MOVD	_cgo_sys_thread_create(SB), R4
    50  	CBZ	R4, nocgo
    51  	MOVD	$_rt0_arm64_lib_go(SB), R0
    52  	MOVD	$0, R1
    53  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
    54  	BL	(R4)
    55  	ADD	$16, RSP
    56  	B	restore
    57  
    58  nocgo:
    59  	MOVD	$0x800000, R0                     // stacksize = 8192KB
    60  	MOVD	$_rt0_arm64_lib_go(SB), R1
    61  	MOVD	R0, 8(RSP)
    62  	MOVD	R1, 16(RSP)
    63  	MOVD	$runtime·newosproc0(SB),R4
    64  	BL	(R4)
    65  
    66  restore:
    67  	// Restore callee-save registers.
    68  	RESTORE_R19_TO_R28(24)
    69  	RESTORE_F8_TO_F15(104)
    70  	RET
    71  
    72  TEXT _rt0_arm64_lib_go(SB),NOSPLIT,$0
    73  	MOVD	_rt0_arm64_lib_argc<>(SB), R0
    74  	MOVD	_rt0_arm64_lib_argv<>(SB), R1
    75  	MOVD	$runtime·rt0_go(SB),R4
    76  	B	(R4)
    77  
    78  DATA _rt0_arm64_lib_argc<>(SB)/8, $0
    79  GLOBL _rt0_arm64_lib_argc<>(SB),NOPTR, $8
    80  DATA _rt0_arm64_lib_argv<>(SB)/8, $0
    81  GLOBL _rt0_arm64_lib_argv<>(SB),NOPTR, $8
    82  
    83  #ifdef GOARM64_LSE
    84  DATA no_lse_msg<>+0x00(SB)/64, $"This program can only run on ARM64 processors with LSE support.\n"
    85  GLOBL no_lse_msg<>(SB), RODATA, $64
    86  #endif
    87  
    88  // We know for sure that Linux and FreeBSD allow to read instruction set
    89  // attribute registers (while some others OSes, like OpenBSD and Darwin,
    90  // are not). Let's be conservative and allow code reading such registers
    91  // only when we sure this won't lead to sigill.
    92  #ifdef GOOS_linux
    93  #define ISA_REGS_READABLE
    94  #endif
    95  #ifdef GOOS_freebsd
    96  #define ISA_REGS_READABLE
    97  #endif
    98  
    99  #ifdef GOARM64_LSE
   100  #ifdef ISA_REGS_READABLE
   101  #define CHECK_GOARM64_LSE
   102  #endif
   103  #endif
   104  
   105  TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
   106  	// SP = stack; R0 = argc; R1 = argv
   107  
   108  	SUB	$32, RSP
   109  	MOVW	R0, 8(RSP) // argc
   110  	MOVD	R1, 16(RSP) // argv
   111  
   112  #ifdef TLS_darwin
   113  	// Initialize TLS.
   114  	MOVD	ZR, g // clear g, make sure it's not junk.
   115  	SUB	$32, RSP
   116  	MRS_TPIDR_R0
   117  	AND	$~7, R0
   118  	MOVD	R0, 16(RSP)             // arg2: TLS base
   119  	MOVD	$runtime·tls_g(SB), R2
   120  	MOVD	R2, 8(RSP)              // arg1: &tlsg
   121  	BL	·tlsinit(SB)
   122  	ADD	$32, RSP
   123  #endif
   124  
   125  	// create istack out of the given (operating system) stack.
   126  	// _cgo_init may update stackguard.
   127  	MOVD	$runtime·g0(SB), g
   128  	MOVD	RSP, R7
   129  	MOVD	$(-64*1024)(R7), R0
   130  	MOVD	R0, g_stackguard0(g)
   131  	MOVD	R0, g_stackguard1(g)
   132  	MOVD	R0, (g_stack+stack_lo)(g)
   133  	MOVD	R7, (g_stack+stack_hi)(g)
   134  
   135  	// if there is a _cgo_init, call it using the gcc ABI.
   136  	MOVD	_cgo_init(SB), R12
   137  	CBZ	R12, nocgo
   138  
   139  #ifdef GOOS_android
   140  	MRS_TPIDR_R0			// load TLS base pointer
   141  	MOVD	R0, R3			// arg 3: TLS base pointer
   142  	MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
   143  #else
   144  	MOVD	$0, R2		        // arg 2: not used when using platform's TLS
   145  #endif
   146  	MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
   147  	MOVD	g, R0			// arg 0: G
   148  	SUB	$16, RSP		// reserve 16 bytes for sp-8 where fp may be saved.
   149  	BL	(R12)
   150  	ADD	$16, RSP
   151  
   152  nocgo:
   153  	BL	runtime·save_g(SB)
   154  	// update stackguard after _cgo_init
   155  	MOVD	(g_stack+stack_lo)(g), R0
   156  	ADD	$const_stackGuard, R0
   157  	MOVD	R0, g_stackguard0(g)
   158  	MOVD	R0, g_stackguard1(g)
   159  
   160  	// set the per-goroutine and per-mach "registers"
   161  	MOVD	$runtime·m0(SB), R0
   162  
   163  	// save m->g0 = g0
   164  	MOVD	g, m_g0(R0)
   165  	// save m0 to g0->m
   166  	MOVD	R0, g_m(g)
   167  
   168  	BL	runtime·check(SB)
   169  
   170  #ifdef GOOS_windows
   171  	BL	runtime·wintls(SB)
   172  #endif
   173  
   174  	// Check that CPU we use for execution supports instructions targeted during compile-time.
   175  #ifdef CHECK_GOARM64_LSE
   176  	// Read the ID_AA64ISAR0_EL1 register
   177  	MRS	ID_AA64ISAR0_EL1, R0
   178  
   179  	// Extract the LSE field (bits [23:20])
   180  	LSR	$20, R0, R0
   181  	AND	$0xf, R0, R0
   182  
   183  	// LSE support is indicated by a non-zero value
   184  	CBZ	R0, no_lse
   185  #endif
   186  
   187  	MOVW	8(RSP), R0	// copy argc
   188  	MOVW	R0, -8(RSP)
   189  	MOVD	16(RSP), R0		// copy argv
   190  	MOVD	R0, 0(RSP)
   191  	BL	runtime·args(SB)
   192  	BL	runtime·osinit(SB)
   193  	BL	runtime·schedinit(SB)
   194  
   195  	// create a new goroutine to start program
   196  	MOVD	$runtime·mainPC(SB), R0		// entry
   197  	SUB	$16, RSP
   198  	MOVD	R0, 8(RSP) // arg
   199  	MOVD	$0, 0(RSP) // dummy LR
   200  	BL	runtime·newproc(SB)
   201  	ADD	$16, RSP
   202  
   203  	// start this M
   204  	BL	runtime·mstart(SB)
   205  	UNDEF
   206  
   207  #ifdef CHECK_GOARM64_LSE
   208  no_lse:
   209  	MOVD	$1, R0 // stderr
   210  	MOVD	R0, 8(RSP)
   211  	MOVD	$no_lse_msg<>(SB), R1 // message address
   212  	MOVD	R1, 16(RSP)
   213  	MOVD	$64, R2 // message length
   214  	MOVD	R2, 24(RSP)
   215  	CALL	runtime·write(SB)
   216  	CALL	runtime·exit(SB)
   217  	CALL	runtime·abort(SB)
   218  	RET
   219  #endif
   220  
   221  	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
   222  	// intended to be called by debuggers.
   223  	MOVD	$runtime·debugPinnerV1<ABIInternal>(SB), R0
   224  	MOVD	$runtime·debugCallV2<ABIInternal>(SB), R0
   225  
   226  	MOVD	$0, R0
   227  	MOVD	R0, (R0)	// boom
   228  	UNDEF
   229  
   230  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   231  GLOBL	runtime·mainPC(SB),RODATA,$8
   232  
   233  // Windows ARM64 needs an immediate 0xf000 argument.
   234  // See go.dev/issues/53837.
   235  #define BREAK	\
   236  #ifdef GOOS_windows	\
   237  	BRK	$0xf000 	\
   238  #else 				\
   239  	BRK 			\
   240  #endif 				\
   241  
   242  
   243  TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   244  	BREAK
   245  	RET
   246  
   247  TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   248  	RET
   249  
   250  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
   251  	BL	runtime·mstart0(SB)
   252  	RET // not reached
   253  
   254  /*
   255   *  go-routine
   256   */
   257  
   258  // void gogo(Gobuf*)
   259  // restore state from Gobuf; longjmp
   260  TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
   261  	MOVD	buf+0(FP), R5
   262  	MOVD	gobuf_g(R5), R6
   263  	MOVD	0(R6), R4	// make sure g != nil
   264  	B	gogo<>(SB)
   265  
   266  TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
   267  	MOVD	R6, g
   268  	BL	runtime·save_g(SB)
   269  
   270  	MOVD	gobuf_sp(R5), R0
   271  	MOVD	R0, RSP
   272  	MOVD	gobuf_bp(R5), R29
   273  	MOVD	gobuf_lr(R5), LR
   274  	MOVD	gobuf_ctxt(R5), R26
   275  	MOVD	$0, gobuf_sp(R5)
   276  	MOVD	$0, gobuf_bp(R5)
   277  	MOVD	$0, gobuf_lr(R5)
   278  	MOVD	$0, gobuf_ctxt(R5)
   279  	CMP	ZR, ZR // set condition codes for == test, needed by stack split
   280  	MOVD	gobuf_pc(R5), R6
   281  	B	(R6)
   282  
   283  // void mcall(fn func(*g))
   284  // Switch to m->g0's stack, call fn(g).
   285  // Fn must never return. It should gogo(&g->sched)
   286  // to keep running g.
   287  TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
   288  	MOVD	R0, R26				// context
   289  
   290  	// Save caller state in g->sched
   291  	MOVD	RSP, R0
   292  	MOVD	R0, (g_sched+gobuf_sp)(g)
   293  	MOVD	R29, (g_sched+gobuf_bp)(g)
   294  	MOVD	LR, (g_sched+gobuf_pc)(g)
   295  	MOVD	$0, (g_sched+gobuf_lr)(g)
   296  
   297  	// Switch to m->g0 & its stack, call fn.
   298  	MOVD	g, R3
   299  	MOVD	g_m(g), R8
   300  	MOVD	m_g0(R8), g
   301  	BL	runtime·save_g(SB)
   302  	CMP	g, R3
   303  	BNE	2(PC)
   304  	B	runtime·badmcall(SB)
   305  
   306  	MOVD	(g_sched+gobuf_sp)(g), R0
   307  	MOVD	R0, RSP	// sp = m->g0->sched.sp
   308  	MOVD	$0, R29				// clear frame pointer, as caller may execute on another M
   309  	MOVD	R3, R0				// arg = g
   310  	MOVD	$0, -16(RSP)			// dummy LR
   311  	SUB	$16, RSP
   312  	MOVD	0(R26), R4			// code pointer
   313  	BL	(R4)
   314  	B	runtime·badmcall2(SB)
   315  
   316  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   317  // of the G stack. We need to distinguish the routine that
   318  // lives at the bottom of the G stack from the one that lives
   319  // at the top of the system stack because the one at the top of
   320  // the system stack terminates the stack walk (see topofstack()).
   321  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   322  	UNDEF
   323  	BL	(LR)	// make sure this function is not leaf
   324  	RET
   325  
   326  // func systemstack(fn func())
   327  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   328  	MOVD	fn+0(FP), R3	// R3 = fn
   329  	MOVD	R3, R26		// context
   330  	MOVD	g_m(g), R4	// R4 = m
   331  
   332  	MOVD	m_gsignal(R4), R5	// R5 = gsignal
   333  	CMP	g, R5
   334  	BEQ	noswitch
   335  
   336  	MOVD	m_g0(R4), R5	// R5 = g0
   337  	CMP	g, R5
   338  	BEQ	noswitch
   339  
   340  	MOVD	m_curg(R4), R6
   341  	CMP	g, R6
   342  	BEQ	switch
   343  
   344  	// Bad: g is not gsignal, not g0, not curg. What is it?
   345  	// Hide call from linker nosplit analysis.
   346  	MOVD	$runtime·badsystemstack(SB), R3
   347  	BL	(R3)
   348  	B	runtime·abort(SB)
   349  
   350  switch:
   351  	// Switch stacks.
   352  	// The original frame pointer is stored in R29,
   353  	// which is useful for stack unwinding.
   354  	// Save our state in g->sched. Pretend to
   355  	// be systemstack_switch if the G stack is scanned.
   356  	BL	gosave_systemstack_switch<>(SB)
   357  
   358  	// switch to g0
   359  	MOVD	R5, g
   360  	BL	runtime·save_g(SB)
   361  	MOVD	(g_sched+gobuf_sp)(g), R3
   362  	MOVD	R3, RSP
   363  
   364  	// call target function
   365  	MOVD	0(R26), R3	// code pointer
   366  	BL	(R3)
   367  
   368  	// switch back to g
   369  	MOVD	g_m(g), R3
   370  	MOVD	m_curg(R3), g
   371  	BL	runtime·save_g(SB)
   372  	MOVD	(g_sched+gobuf_sp)(g), R0
   373  	MOVD	R0, RSP
   374  	MOVD	(g_sched+gobuf_bp)(g), R29
   375  	MOVD	$0, (g_sched+gobuf_sp)(g)
   376  	MOVD	$0, (g_sched+gobuf_bp)(g)
   377  	RET
   378  
   379  noswitch:
   380  	// already on m stack, just call directly
   381  	// Using a tail call here cleans up tracebacks since we won't stop
   382  	// at an intermediate systemstack.
   383  	MOVD	0(R26), R3	// code pointer
   384  	MOVD.P	16(RSP), R30	// restore LR
   385  	SUB	$8, RSP, R29	// restore FP
   386  	B	(R3)
   387  
   388  // func switchToCrashStack0(fn func())
   389  TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
   390  	MOVD	R0, R26    // context register
   391  	MOVD	g_m(g), R1 // curm
   392  
   393  	// set g to gcrash
   394  	MOVD	$runtime·gcrash(SB), g // g = &gcrash
   395  	BL	runtime·save_g(SB)         // clobbers R0
   396  	MOVD	R1, g_m(g)             // g.m = curm
   397  	MOVD	g, m_g0(R1)            // curm.g0 = g
   398  
   399  	// switch to crashstack
   400  	MOVD	(g_stack+stack_hi)(g), R1
   401  	SUB	$(4*8), R1
   402  	MOVD	R1, RSP
   403  
   404  	// call target function
   405  	MOVD	0(R26), R0
   406  	CALL	(R0)
   407  
   408  	// should never return
   409  	CALL	runtime·abort(SB)
   410  	UNDEF
   411  
   412  /*
   413   * support for morestack
   414   */
   415  
   416  // Called during function prolog when more stack is needed.
   417  // Caller has already loaded:
   418  // R3 prolog's LR (R30)
   419  //
   420  // The traceback routines see morestack on a g0 as being
   421  // the top of a stack (for example, morestack calling newstack
   422  // calling the scheduler calling newm calling gc), so we must
   423  // record an argument size. For that purpose, it has no arguments.
   424  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   425  	// Cannot grow scheduler stack (m->g0).
   426  	MOVD	g_m(g), R8
   427  	MOVD	m_g0(R8), R4
   428  
   429  	// Called from f.
   430  	// Set g->sched to context in f
   431  	MOVD	RSP, R0
   432  	MOVD	R0, (g_sched+gobuf_sp)(g)
   433  	MOVD	R29, (g_sched+gobuf_bp)(g)
   434  	MOVD	LR, (g_sched+gobuf_pc)(g)
   435  	MOVD	R3, (g_sched+gobuf_lr)(g)
   436  	MOVD	R26, (g_sched+gobuf_ctxt)(g)
   437  
   438  	CMP	g, R4
   439  	BNE	3(PC)
   440  	BL	runtime·badmorestackg0(SB)
   441  	B	runtime·abort(SB)
   442  
   443  	// Cannot grow signal stack (m->gsignal).
   444  	MOVD	m_gsignal(R8), R4
   445  	CMP	g, R4
   446  	BNE	3(PC)
   447  	BL	runtime·badmorestackgsignal(SB)
   448  	B	runtime·abort(SB)
   449  
   450  	// Called from f.
   451  	// Set m->morebuf to f's callers.
   452  	MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   453  	MOVD	RSP, R0
   454  	MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   455  	MOVD	g, (m_morebuf+gobuf_g)(R8)
   456  
   457  	// Call newstack on m->g0's stack.
   458  	MOVD	m_g0(R8), g
   459  	BL	runtime·save_g(SB)
   460  	MOVD	(g_sched+gobuf_sp)(g), R0
   461  	MOVD	R0, RSP
   462  	MOVD	$0, R29		// clear frame pointer, as caller may execute on another M
   463  	MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   464  	BL	runtime·newstack(SB)
   465  
   466  	// Not reached, but make sure the return PC from the call to newstack
   467  	// is still in this function, and not the beginning of the next.
   468  	UNDEF
   469  
   470  TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   471  	// Force SPWRITE. This function doesn't actually write SP,
   472  	// but it is called with a special calling convention where
   473  	// the caller doesn't save LR on stack but passes it as a
   474  	// register (R3), and the unwinder currently doesn't understand.
   475  	// Make it SPWRITE to stop unwinding. (See issue 54332)
   476  	MOVD	RSP, RSP
   477  
   478  	MOVW	$0, R26
   479  	B runtime·morestack(SB)
   480  
   481  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
   482  TEXT ·spillArgs(SB),NOSPLIT,$0-0
   483  	STP	(R0, R1), (0*8)(R20)
   484  	STP	(R2, R3), (2*8)(R20)
   485  	STP	(R4, R5), (4*8)(R20)
   486  	STP	(R6, R7), (6*8)(R20)
   487  	STP	(R8, R9), (8*8)(R20)
   488  	STP	(R10, R11), (10*8)(R20)
   489  	STP	(R12, R13), (12*8)(R20)
   490  	STP	(R14, R15), (14*8)(R20)
   491  	FSTPD	(F0, F1), (16*8)(R20)
   492  	FSTPD	(F2, F3), (18*8)(R20)
   493  	FSTPD	(F4, F5), (20*8)(R20)
   494  	FSTPD	(F6, F7), (22*8)(R20)
   495  	FSTPD	(F8, F9), (24*8)(R20)
   496  	FSTPD	(F10, F11), (26*8)(R20)
   497  	FSTPD	(F12, F13), (28*8)(R20)
   498  	FSTPD	(F14, F15), (30*8)(R20)
   499  	RET
   500  
   501  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
   502  TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   503  	LDP	(0*8)(R20), (R0, R1)
   504  	LDP	(2*8)(R20), (R2, R3)
   505  	LDP	(4*8)(R20), (R4, R5)
   506  	LDP	(6*8)(R20), (R6, R7)
   507  	LDP	(8*8)(R20), (R8, R9)
   508  	LDP	(10*8)(R20), (R10, R11)
   509  	LDP	(12*8)(R20), (R12, R13)
   510  	LDP	(14*8)(R20), (R14, R15)
   511  	FLDPD	(16*8)(R20), (F0, F1)
   512  	FLDPD	(18*8)(R20), (F2, F3)
   513  	FLDPD	(20*8)(R20), (F4, F5)
   514  	FLDPD	(22*8)(R20), (F6, F7)
   515  	FLDPD	(24*8)(R20), (F8, F9)
   516  	FLDPD	(26*8)(R20), (F10, F11)
   517  	FLDPD	(28*8)(R20), (F12, F13)
   518  	FLDPD	(30*8)(R20), (F14, F15)
   519  	RET
   520  
   521  // reflectcall: call a function with the given argument list
   522  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   523  // we don't have variable-sized frames, so we use a small number
   524  // of constant-sized-frame functions to encode a few bits of size in the pc.
   525  // Caution: ugly multiline assembly macros in your future!
   526  
   527  #define DISPATCH(NAME,MAXSIZE)		\
   528  	MOVD	$MAXSIZE, R27;		\
   529  	CMP	R27, R16;		\
   530  	BGT	3(PC);			\
   531  	MOVD	$NAME(SB), R27;	\
   532  	B	(R27)
   533  // Note: can't just "B NAME(SB)" - bad inlining results.
   534  
   535  TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
   536  	MOVWU	frameSize+32(FP), R16
   537  	DISPATCH(runtime·call16, 16)
   538  	DISPATCH(runtime·call32, 32)
   539  	DISPATCH(runtime·call64, 64)
   540  	DISPATCH(runtime·call128, 128)
   541  	DISPATCH(runtime·call256, 256)
   542  	DISPATCH(runtime·call512, 512)
   543  	DISPATCH(runtime·call1024, 1024)
   544  	DISPATCH(runtime·call2048, 2048)
   545  	DISPATCH(runtime·call4096, 4096)
   546  	DISPATCH(runtime·call8192, 8192)
   547  	DISPATCH(runtime·call16384, 16384)
   548  	DISPATCH(runtime·call32768, 32768)
   549  	DISPATCH(runtime·call65536, 65536)
   550  	DISPATCH(runtime·call131072, 131072)
   551  	DISPATCH(runtime·call262144, 262144)
   552  	DISPATCH(runtime·call524288, 524288)
   553  	DISPATCH(runtime·call1048576, 1048576)
   554  	DISPATCH(runtime·call2097152, 2097152)
   555  	DISPATCH(runtime·call4194304, 4194304)
   556  	DISPATCH(runtime·call8388608, 8388608)
   557  	DISPATCH(runtime·call16777216, 16777216)
   558  	DISPATCH(runtime·call33554432, 33554432)
   559  	DISPATCH(runtime·call67108864, 67108864)
   560  	DISPATCH(runtime·call134217728, 134217728)
   561  	DISPATCH(runtime·call268435456, 268435456)
   562  	DISPATCH(runtime·call536870912, 536870912)
   563  	DISPATCH(runtime·call1073741824, 1073741824)
   564  	MOVD	$runtime·badreflectcall(SB), R0
   565  	B	(R0)
   566  
   567  #define CALLFN(NAME,MAXSIZE)			\
   568  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   569  	NO_LOCAL_POINTERS;			\
   570  	/* copy arguments to stack */		\
   571  	MOVD	stackArgs+16(FP), R3;			\
   572  	MOVWU	stackArgsSize+24(FP), R4;		\
   573  	ADD	$8, RSP, R5;			\
   574  	BIC	$0xf, R4, R6;			\
   575  	CBZ	R6, 6(PC);			\
   576  	/* if R6=(argsize&~15) != 0 */		\
   577  	ADD	R6, R5, R6;			\
   578  	/* copy 16 bytes a time */		\
   579  	LDP.P	16(R3), (R7, R8);		\
   580  	STP.P	(R7, R8), 16(R5);		\
   581  	CMP	R5, R6;				\
   582  	BNE	-3(PC);				\
   583  	AND	$0xf, R4, R6;			\
   584  	CBZ	R6, 6(PC);			\
   585  	/* if R6=(argsize&15) != 0 */		\
   586  	ADD	R6, R5, R6;			\
   587  	/* copy 1 byte a time for the rest */	\
   588  	MOVBU.P	1(R3), R7;			\
   589  	MOVBU.P	R7, 1(R5);			\
   590  	CMP	R5, R6;				\
   591  	BNE	-3(PC);				\
   592  	/* set up argument registers */		\
   593  	MOVD	regArgs+40(FP), R20;		\
   594  	CALL	·unspillArgs(SB);		\
   595  	/* call function */			\
   596  	MOVD	f+8(FP), R26;			\
   597  	MOVD	(R26), R20;			\
   598  	PCDATA	$PCDATA_StackMapIndex, $0;	\
   599  	BL	(R20);				\
   600  	/* copy return values back */		\
   601  	MOVD	regArgs+40(FP), R20;		\
   602  	CALL	·spillArgs(SB);		\
   603  	MOVD	stackArgsType+0(FP), R7;		\
   604  	MOVD	stackArgs+16(FP), R3;			\
   605  	MOVWU	stackArgsSize+24(FP), R4;			\
   606  	MOVWU	stackRetOffset+28(FP), R6;		\
   607  	ADD	$8, RSP, R5;			\
   608  	ADD	R6, R5; 			\
   609  	ADD	R6, R3;				\
   610  	SUB	R6, R4;				\
   611  	BL	callRet<>(SB);			\
   612  	RET
   613  
   614  // callRet copies return values back at the end of call*. This is a
   615  // separate function so it can allocate stack space for the arguments
   616  // to reflectcallmove. It does not follow the Go ABI; it expects its
   617  // arguments in registers.
   618  TEXT callRet<>(SB), NOSPLIT, $48-0
   619  	NO_LOCAL_POINTERS
   620  	STP	(R7, R3), 8(RSP)
   621  	STP	(R5, R4), 24(RSP)
   622  	MOVD	R20, 40(RSP)
   623  	BL	runtime·reflectcallmove(SB)
   624  	RET
   625  
   626  CALLFN(·call16, 16)
   627  CALLFN(·call32, 32)
   628  CALLFN(·call64, 64)
   629  CALLFN(·call128, 128)
   630  CALLFN(·call256, 256)
   631  CALLFN(·call512, 512)
   632  CALLFN(·call1024, 1024)
   633  CALLFN(·call2048, 2048)
   634  CALLFN(·call4096, 4096)
   635  CALLFN(·call8192, 8192)
   636  CALLFN(·call16384, 16384)
   637  CALLFN(·call32768, 32768)
   638  CALLFN(·call65536, 65536)
   639  CALLFN(·call131072, 131072)
   640  CALLFN(·call262144, 262144)
   641  CALLFN(·call524288, 524288)
   642  CALLFN(·call1048576, 1048576)
   643  CALLFN(·call2097152, 2097152)
   644  CALLFN(·call4194304, 4194304)
   645  CALLFN(·call8388608, 8388608)
   646  CALLFN(·call16777216, 16777216)
   647  CALLFN(·call33554432, 33554432)
   648  CALLFN(·call67108864, 67108864)
   649  CALLFN(·call134217728, 134217728)
   650  CALLFN(·call268435456, 268435456)
   651  CALLFN(·call536870912, 536870912)
   652  CALLFN(·call1073741824, 1073741824)
   653  
   654  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
   655  TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   656  	MOVB	runtime·useAeshash(SB), R10
   657  	CBZ	R10, noaes
   658  	MOVD	$runtime·aeskeysched+0(SB), R3
   659  
   660  	VEOR	V0.B16, V0.B16, V0.B16
   661  	VLD1	(R3), [V2.B16]
   662  	VLD1	(R0), V0.S[1]
   663  	VMOV	R1, V0.S[0]
   664  
   665  	AESE	V2.B16, V0.B16
   666  	AESMC	V0.B16, V0.B16
   667  	AESE	V2.B16, V0.B16
   668  	AESMC	V0.B16, V0.B16
   669  	AESE	V2.B16, V0.B16
   670  
   671  	VMOV	V0.D[0], R0
   672  	RET
   673  noaes:
   674  	B	runtime·memhash32Fallback<ABIInternal>(SB)
   675  
   676  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
   677  TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   678  	MOVB	runtime·useAeshash(SB), R10
   679  	CBZ	R10, noaes
   680  	MOVD	$runtime·aeskeysched+0(SB), R3
   681  
   682  	VEOR	V0.B16, V0.B16, V0.B16
   683  	VLD1	(R3), [V2.B16]
   684  	VLD1	(R0), V0.D[1]
   685  	VMOV	R1, V0.D[0]
   686  
   687  	AESE	V2.B16, V0.B16
   688  	AESMC	V0.B16, V0.B16
   689  	AESE	V2.B16, V0.B16
   690  	AESMC	V0.B16, V0.B16
   691  	AESE	V2.B16, V0.B16
   692  
   693  	VMOV	V0.D[0], R0
   694  	RET
   695  noaes:
   696  	B	runtime·memhash64Fallback<ABIInternal>(SB)
   697  
   698  // func memhash(p unsafe.Pointer, h, size uintptr) uintptr
   699  TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
   700  	MOVB	runtime·useAeshash(SB), R10
   701  	CBZ	R10, noaes
   702  	B	aeshashbody<>(SB)
   703  noaes:
   704  	B	runtime·memhashFallback<ABIInternal>(SB)
   705  
   706  // func strhash(p unsafe.Pointer, h uintptr) uintptr
   707  TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
   708  	MOVB	runtime·useAeshash(SB), R10
   709  	CBZ	R10, noaes
   710  	LDP	(R0), (R0, R2)	// string data / length
   711  	B	aeshashbody<>(SB)
   712  noaes:
   713  	B	runtime·strhashFallback<ABIInternal>(SB)
   714  
   715  // R0: data
   716  // R1: seed data
   717  // R2: length
   718  // At return, R0 = return value
   719  TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
   720  	VEOR	V30.B16, V30.B16, V30.B16
   721  	VMOV	R1, V30.D[0]
   722  	VMOV	R2, V30.D[1] // load length into seed
   723  
   724  	MOVD	$runtime·aeskeysched+0(SB), R4
   725  	VLD1.P	16(R4), [V0.B16]
   726  	AESE	V30.B16, V0.B16
   727  	AESMC	V0.B16, V0.B16
   728  	CMP	$16, R2
   729  	BLO	aes0to15
   730  	BEQ	aes16
   731  	CMP	$32, R2
   732  	BLS	aes17to32
   733  	CMP	$64, R2
   734  	BLS	aes33to64
   735  	CMP	$128, R2
   736  	BLS	aes65to128
   737  	B	aes129plus
   738  
   739  aes0to15:
   740  	CBZ	R2, aes0
   741  	VEOR	V2.B16, V2.B16, V2.B16
   742  	TBZ	$3, R2, less_than_8
   743  	VLD1.P	8(R0), V2.D[0]
   744  
   745  less_than_8:
   746  	TBZ	$2, R2, less_than_4
   747  	VLD1.P	4(R0), V2.S[2]
   748  
   749  less_than_4:
   750  	TBZ	$1, R2, less_than_2
   751  	VLD1.P	2(R0), V2.H[6]
   752  
   753  less_than_2:
   754  	TBZ	$0, R2, done
   755  	VLD1	(R0), V2.B[14]
   756  done:
   757  	AESE	V0.B16, V2.B16
   758  	AESMC	V2.B16, V2.B16
   759  	AESE	V0.B16, V2.B16
   760  	AESMC	V2.B16, V2.B16
   761  	AESE	V0.B16, V2.B16
   762  	AESMC	V2.B16, V2.B16
   763  
   764  	VMOV	V2.D[0], R0
   765  	RET
   766  
   767  aes0:
   768  	VMOV	V0.D[0], R0
   769  	RET
   770  
   771  aes16:
   772  	VLD1	(R0), [V2.B16]
   773  	B	done
   774  
   775  aes17to32:
   776  	// make second seed
   777  	VLD1	(R4), [V1.B16]
   778  	AESE	V30.B16, V1.B16
   779  	AESMC	V1.B16, V1.B16
   780  	SUB	$16, R2, R10
   781  	VLD1.P	(R0)(R10), [V2.B16]
   782  	VLD1	(R0), [V3.B16]
   783  
   784  	AESE	V0.B16, V2.B16
   785  	AESMC	V2.B16, V2.B16
   786  	AESE	V1.B16, V3.B16
   787  	AESMC	V3.B16, V3.B16
   788  
   789  	AESE	V0.B16, V2.B16
   790  	AESMC	V2.B16, V2.B16
   791  	AESE	V1.B16, V3.B16
   792  	AESMC	V3.B16, V3.B16
   793  
   794  	AESE	V0.B16, V2.B16
   795  	AESE	V1.B16, V3.B16
   796  
   797  	VEOR	V3.B16, V2.B16, V2.B16
   798  
   799  	VMOV	V2.D[0], R0
   800  	RET
   801  
   802  aes33to64:
   803  	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
   804  	AESE	V30.B16, V1.B16
   805  	AESMC	V1.B16, V1.B16
   806  	AESE	V30.B16, V2.B16
   807  	AESMC	V2.B16, V2.B16
   808  	AESE	V30.B16, V3.B16
   809  	AESMC	V3.B16, V3.B16
   810  	SUB	$32, R2, R10
   811  
   812  	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
   813  	VLD1	(R0), [V6.B16, V7.B16]
   814  
   815  	AESE	V0.B16, V4.B16
   816  	AESMC	V4.B16, V4.B16
   817  	AESE	V1.B16, V5.B16
   818  	AESMC	V5.B16, V5.B16
   819  	AESE	V2.B16, V6.B16
   820  	AESMC	V6.B16, V6.B16
   821  	AESE	V3.B16, V7.B16
   822  	AESMC	V7.B16, V7.B16
   823  
   824  	AESE	V0.B16, V4.B16
   825  	AESMC	V4.B16, V4.B16
   826  	AESE	V1.B16, V5.B16
   827  	AESMC	V5.B16, V5.B16
   828  	AESE	V2.B16, V6.B16
   829  	AESMC	V6.B16, V6.B16
   830  	AESE	V3.B16, V7.B16
   831  	AESMC	V7.B16, V7.B16
   832  
   833  	AESE	V0.B16, V4.B16
   834  	AESE	V1.B16, V5.B16
   835  	AESE	V2.B16, V6.B16
   836  	AESE	V3.B16, V7.B16
   837  
   838  	VEOR	V6.B16, V4.B16, V4.B16
   839  	VEOR	V7.B16, V5.B16, V5.B16
   840  	VEOR	V5.B16, V4.B16, V4.B16
   841  
   842  	VMOV	V4.D[0], R0
   843  	RET
   844  
   845  aes65to128:
   846  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   847  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   848  	AESE	V30.B16, V1.B16
   849  	AESMC	V1.B16, V1.B16
   850  	AESE	V30.B16, V2.B16
   851  	AESMC	V2.B16, V2.B16
   852  	AESE	V30.B16, V3.B16
   853  	AESMC	V3.B16, V3.B16
   854  	AESE	V30.B16, V4.B16
   855  	AESMC	V4.B16, V4.B16
   856  	AESE	V30.B16, V5.B16
   857  	AESMC	V5.B16, V5.B16
   858  	AESE	V30.B16, V6.B16
   859  	AESMC	V6.B16, V6.B16
   860  	AESE	V30.B16, V7.B16
   861  	AESMC	V7.B16, V7.B16
   862  
   863  	SUB	$64, R2, R10
   864  	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   865  	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   866  	AESE	V0.B16,	 V8.B16
   867  	AESMC	V8.B16,  V8.B16
   868  	AESE	V1.B16,	 V9.B16
   869  	AESMC	V9.B16,  V9.B16
   870  	AESE	V2.B16, V10.B16
   871  	AESMC	V10.B16,  V10.B16
   872  	AESE	V3.B16, V11.B16
   873  	AESMC	V11.B16,  V11.B16
   874  	AESE	V4.B16, V12.B16
   875  	AESMC	V12.B16,  V12.B16
   876  	AESE	V5.B16, V13.B16
   877  	AESMC	V13.B16,  V13.B16
   878  	AESE	V6.B16, V14.B16
   879  	AESMC	V14.B16,  V14.B16
   880  	AESE	V7.B16, V15.B16
   881  	AESMC	V15.B16,  V15.B16
   882  
   883  	AESE	V0.B16,	 V8.B16
   884  	AESMC	V8.B16,  V8.B16
   885  	AESE	V1.B16,	 V9.B16
   886  	AESMC	V9.B16,  V9.B16
   887  	AESE	V2.B16, V10.B16
   888  	AESMC	V10.B16,  V10.B16
   889  	AESE	V3.B16, V11.B16
   890  	AESMC	V11.B16,  V11.B16
   891  	AESE	V4.B16, V12.B16
   892  	AESMC	V12.B16,  V12.B16
   893  	AESE	V5.B16, V13.B16
   894  	AESMC	V13.B16,  V13.B16
   895  	AESE	V6.B16, V14.B16
   896  	AESMC	V14.B16,  V14.B16
   897  	AESE	V7.B16, V15.B16
   898  	AESMC	V15.B16,  V15.B16
   899  
   900  	AESE	V0.B16,	 V8.B16
   901  	AESE	V1.B16,	 V9.B16
   902  	AESE	V2.B16, V10.B16
   903  	AESE	V3.B16, V11.B16
   904  	AESE	V4.B16, V12.B16
   905  	AESE	V5.B16, V13.B16
   906  	AESE	V6.B16, V14.B16
   907  	AESE	V7.B16, V15.B16
   908  
   909  	VEOR	V12.B16, V8.B16, V8.B16
   910  	VEOR	V13.B16, V9.B16, V9.B16
   911  	VEOR	V14.B16, V10.B16, V10.B16
   912  	VEOR	V15.B16, V11.B16, V11.B16
   913  	VEOR	V10.B16, V8.B16, V8.B16
   914  	VEOR	V11.B16, V9.B16, V9.B16
   915  	VEOR	V9.B16, V8.B16, V8.B16
   916  
   917  	VMOV	V8.D[0], R0
   918  	RET
   919  
   920  aes129plus:
   921  	PRFM (R0), PLDL1KEEP
   922  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   923  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   924  	AESE	V30.B16, V1.B16
   925  	AESMC	V1.B16, V1.B16
   926  	AESE	V30.B16, V2.B16
   927  	AESMC	V2.B16, V2.B16
   928  	AESE	V30.B16, V3.B16
   929  	AESMC	V3.B16, V3.B16
   930  	AESE	V30.B16, V4.B16
   931  	AESMC	V4.B16, V4.B16
   932  	AESE	V30.B16, V5.B16
   933  	AESMC	V5.B16, V5.B16
   934  	AESE	V30.B16, V6.B16
   935  	AESMC	V6.B16, V6.B16
   936  	AESE	V30.B16, V7.B16
   937  	AESMC	V7.B16, V7.B16
   938  	ADD	R0, R2, R10
   939  	SUB	$128, R10, R10
   940  	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   941  	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
   942  	SUB	$1, R2, R2
   943  	LSR	$7, R2, R2
   944  
   945  aesloop:
   946  	AESE	V8.B16,	 V0.B16
   947  	AESMC	V0.B16,  V0.B16
   948  	AESE	V9.B16,	 V1.B16
   949  	AESMC	V1.B16,  V1.B16
   950  	AESE	V10.B16, V2.B16
   951  	AESMC	V2.B16,  V2.B16
   952  	AESE	V11.B16, V3.B16
   953  	AESMC	V3.B16,  V3.B16
   954  	AESE	V12.B16, V4.B16
   955  	AESMC	V4.B16,  V4.B16
   956  	AESE	V13.B16, V5.B16
   957  	AESMC	V5.B16,  V5.B16
   958  	AESE	V14.B16, V6.B16
   959  	AESMC	V6.B16,  V6.B16
   960  	AESE	V15.B16, V7.B16
   961  	AESMC	V7.B16,  V7.B16
   962  
   963  	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
   964  	AESE	V8.B16,	 V0.B16
   965  	AESMC	V0.B16,  V0.B16
   966  	AESE	V9.B16,	 V1.B16
   967  	AESMC	V1.B16,  V1.B16
   968  	AESE	V10.B16, V2.B16
   969  	AESMC	V2.B16,  V2.B16
   970  	AESE	V11.B16, V3.B16
   971  	AESMC	V3.B16,  V3.B16
   972  
   973  	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   974  	AESE	V12.B16, V4.B16
   975  	AESMC	V4.B16,  V4.B16
   976  	AESE	V13.B16, V5.B16
   977  	AESMC	V5.B16,  V5.B16
   978  	AESE	V14.B16, V6.B16
   979  	AESMC	V6.B16,  V6.B16
   980  	AESE	V15.B16, V7.B16
   981  	AESMC	V7.B16,  V7.B16
   982  	SUB	$1, R2, R2
   983  	CBNZ	R2, aesloop
   984  
   985  	AESE	V8.B16,	 V0.B16
   986  	AESMC	V0.B16,  V0.B16
   987  	AESE	V9.B16,	 V1.B16
   988  	AESMC	V1.B16,  V1.B16
   989  	AESE	V10.B16, V2.B16
   990  	AESMC	V2.B16,  V2.B16
   991  	AESE	V11.B16, V3.B16
   992  	AESMC	V3.B16,  V3.B16
   993  	AESE	V12.B16, V4.B16
   994  	AESMC	V4.B16,  V4.B16
   995  	AESE	V13.B16, V5.B16
   996  	AESMC	V5.B16,  V5.B16
   997  	AESE	V14.B16, V6.B16
   998  	AESMC	V6.B16,  V6.B16
   999  	AESE	V15.B16, V7.B16
  1000  	AESMC	V7.B16,  V7.B16
  1001  
  1002  	AESE	V8.B16,	 V0.B16
  1003  	AESMC	V0.B16,  V0.B16
  1004  	AESE	V9.B16,	 V1.B16
  1005  	AESMC	V1.B16,  V1.B16
  1006  	AESE	V10.B16, V2.B16
  1007  	AESMC	V2.B16,  V2.B16
  1008  	AESE	V11.B16, V3.B16
  1009  	AESMC	V3.B16,  V3.B16
  1010  	AESE	V12.B16, V4.B16
  1011  	AESMC	V4.B16,  V4.B16
  1012  	AESE	V13.B16, V5.B16
  1013  	AESMC	V5.B16,  V5.B16
  1014  	AESE	V14.B16, V6.B16
  1015  	AESMC	V6.B16,  V6.B16
  1016  	AESE	V15.B16, V7.B16
  1017  	AESMC	V7.B16,  V7.B16
  1018  
  1019  	AESE	V8.B16,	 V0.B16
  1020  	AESE	V9.B16,	 V1.B16
  1021  	AESE	V10.B16, V2.B16
  1022  	AESE	V11.B16, V3.B16
  1023  	AESE	V12.B16, V4.B16
  1024  	AESE	V13.B16, V5.B16
  1025  	AESE	V14.B16, V6.B16
  1026  	AESE	V15.B16, V7.B16
  1027  
  1028  	VEOR	V0.B16, V1.B16, V0.B16
  1029  	VEOR	V2.B16, V3.B16, V2.B16
  1030  	VEOR	V4.B16, V5.B16, V4.B16
  1031  	VEOR	V6.B16, V7.B16, V6.B16
  1032  	VEOR	V0.B16, V2.B16, V0.B16
  1033  	VEOR	V4.B16, V6.B16, V4.B16
  1034  	VEOR	V4.B16, V0.B16, V0.B16
  1035  
  1036  	VMOV	V0.D[0], R0
  1037  	RET
  1038  
  1039  // The Arm architecture provides a user space accessible counter-timer which
  1040  // is incremented at a fixed but machine-specific rate. Software can (spin)
  1041  // wait until the counter-timer reaches some desired value.
  1042  //
  1043  // Armv8.7-A introduced the WFET (FEAT_WFxT) instruction, which allows the
  1044  // processor to enter a low power state for a set time, or until an event is
  1045  // received.
  1046  //
  1047  // However, WFET is not used here because it is only available on newer hardware,
  1048  // and we aim to maintain compatibility with older Armv8-A platforms that do not
  1049  // support this feature.
  1050  //
  1051  // As a fallback, we can instead use the ISB instruction to decrease processor
  1052  // activity and thus power consumption between checks of the counter-timer.
  1053  // Note that we do not depend on the latency of the ISB instruction which is
  1054  // implementation specific. Actual delay comes from comparing against a fresh
  1055  // read of the counter-timer value.
  1056  //
  1057  // Read more in this Arm blog post:
  1058  // https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/multi-threaded-applications-arm
  1059  
  1060  TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
  1061  	MOVWU	cycles+0(FP), R0
  1062  	CBZ	 R0, done
  1063  	//Prevent speculation of subsequent counter/timer reads and memory accesses.
  1064  	ISB     $15
  1065  	// If the delay is very short, just return.
  1066  	// Hardcode 18ns as the first ISB delay.
  1067  	CMP     $18, R0
  1068  	BLS     done
  1069  	// Adjust for overhead of initial ISB.
  1070  	SUB     $18, R0, R0
  1071  	// Convert the delay from nanoseconds to counter/timer ticks.
  1072  	// Read the counter/timer frequency.
  1073  	// delay_ticks = (delay * CNTFRQ_EL0) / 1e9
  1074  	// With the below simplifications and adjustments,
  1075  	// we are usually within 2% of the correct value:
  1076  	// delay_ticks = (delay + delay / 16) * CNTFRQ_EL0 >> 30
  1077  	MRS     CNTFRQ_EL0, R1
  1078  	ADD     R0>>4, R0, R0
  1079  	MUL     R1, R0, R0
  1080  	LSR     $30, R0, R0
  1081  	CBZ     R0, done
  1082  	// start = current counter/timer value
  1083  	MRS     CNTVCT_EL0, R2
  1084  delay:
  1085  	// Delay using ISB for all ticks.
  1086  	ISB     $15
  1087  	// Substract and compare to handle counter roll-over.
  1088  	// counter_read() - start < delay_ticks
  1089  	MRS     CNTVCT_EL0, R1
  1090  	SUB     R2, R1, R1
  1091  	CMP     R0, R1
  1092  	BCC     delay
  1093  done:
  1094  	RET
  1095  
  1096  // Save state of caller into g->sched,
  1097  // but using fake PC from systemstack_switch.
  1098  // Must only be called from functions with no locals ($0)
  1099  // or else unwinding from systemstack_switch is incorrect.
  1100  // Smashes R0.
  1101  TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
  1102  	MOVD	$runtime·systemstack_switch(SB), R0
  1103  	ADD	$8, R0	// get past prologue
  1104  	MOVD	R0, (g_sched+gobuf_pc)(g)
  1105  	MOVD	RSP, R0
  1106  	MOVD	R0, (g_sched+gobuf_sp)(g)
  1107  	MOVD	R29, (g_sched+gobuf_bp)(g)
  1108  	MOVD	$0, (g_sched+gobuf_lr)(g)
  1109  	// Assert ctxt is zero. See func save.
  1110  	MOVD	(g_sched+gobuf_ctxt)(g), R0
  1111  	CBZ	R0, 2(PC)
  1112  	CALL	runtime·abort(SB)
  1113  	RET
  1114  
  1115  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
  1116  // Call fn(arg) aligned appropriately for the gcc ABI.
  1117  // Called on a system stack, and there may be no g yet (during needm).
  1118  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
  1119  	MOVD	fn+0(FP), R1
  1120  	MOVD	arg+8(FP), R0
  1121  	SUB	$16, RSP	// skip over saved frame pointer below RSP
  1122  	BL	(R1)
  1123  	ADD	$16, RSP	// skip over saved frame pointer below RSP
  1124  	RET
  1125  
  1126  // func asmcgocall(fn, arg unsafe.Pointer) int32
  1127  // Call fn(arg) on the scheduler stack,
  1128  // aligned appropriately for the gcc ABI.
  1129  // See cgocall.go for more details.
  1130  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
  1131  	MOVD	fn+0(FP), R1
  1132  	MOVD	arg+8(FP), R0
  1133  
  1134  	MOVD	RSP, R2		// save original stack pointer
  1135  	CBZ	g, nosave
  1136  	MOVD	g, R4
  1137  
  1138  	// Figure out if we need to switch to m->g0 stack.
  1139  	// We get called to create new OS threads too, and those
  1140  	// come in on the m->g0 stack already. Or we might already
  1141  	// be on the m->gsignal stack.
  1142  	MOVD	g_m(g), R8
  1143  	MOVD	m_gsignal(R8), R3
  1144  	CMP	R3, g
  1145  	BEQ	nosave
  1146  	MOVD	m_g0(R8), R3
  1147  	CMP	R3, g
  1148  	BEQ	nosave
  1149  
  1150  	// Switch to system stack.
  1151  	MOVD	R0, R9	// gosave_systemstack_switch<> and save_g might clobber R0
  1152  	BL	gosave_systemstack_switch<>(SB)
  1153  	MOVD	R3, g
  1154  	BL	runtime·save_g(SB)
  1155  	MOVD	(g_sched+gobuf_sp)(g), R0
  1156  	MOVD	R0, RSP
  1157  	MOVD	(g_sched+gobuf_bp)(g), R29
  1158  	MOVD	R9, R0
  1159  
  1160  	// Now on a scheduling stack (a pthread-created stack).
  1161  	// Save room for two of our pointers /*, plus 32 bytes of callee
  1162  	// save area that lives on the caller stack. */
  1163  	MOVD	RSP, R13
  1164  	SUB	$16, R13
  1165  	MOVD	R13, RSP
  1166  	MOVD	R4, 0(RSP)	// save old g on stack
  1167  	MOVD	(g_stack+stack_hi)(R4), R4
  1168  	SUB	R2, R4
  1169  	MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
  1170  	BL	(R1)
  1171  	MOVD	R0, R9
  1172  
  1173  	// Restore g, stack pointer. R0 is errno, so don't touch it
  1174  	MOVD	0(RSP), g
  1175  	BL	runtime·save_g(SB)
  1176  	MOVD	(g_stack+stack_hi)(g), R5
  1177  	MOVD	8(RSP), R6
  1178  	SUB	R6, R5
  1179  	MOVD	R9, R0
  1180  	MOVD	R5, RSP
  1181  
  1182  	MOVW	R0, ret+16(FP)
  1183  	RET
  1184  
  1185  nosave:
  1186  	// Running on a system stack, perhaps even without a g.
  1187  	// Having no g can happen during thread creation or thread teardown
  1188  	// (see needm/dropm on Solaris, for example).
  1189  	// This code is like the above sequence but without saving/restoring g
  1190  	// and without worrying about the stack moving out from under us
  1191  	// (because we're on a system stack, not a goroutine stack).
  1192  	// The above code could be used directly if already on a system stack,
  1193  	// but then the only path through this code would be a rare case on Solaris.
  1194  	// Using this code for all "already on system stack" calls exercises it more,
  1195  	// which should help keep it correct.
  1196  	MOVD	RSP, R13
  1197  	SUB	$16, R13
  1198  	MOVD	R13, RSP
  1199  	MOVD	$0, R4
  1200  	MOVD	R4, 0(RSP)	// Where above code stores g, in case someone looks during debugging.
  1201  	MOVD	R2, 8(RSP)	// Save original stack pointer.
  1202  	BL	(R1)
  1203  	// Restore stack pointer.
  1204  	MOVD	8(RSP), R2
  1205  	MOVD	R2, RSP
  1206  	MOVD	R0, ret+16(FP)
  1207  	RET
  1208  
  1209  // cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
  1210  // See cgocall.go for more details.
  1211  TEXT ·cgocallback(SB),NOSPLIT,$24-24
  1212  	NO_LOCAL_POINTERS
  1213  
  1214  	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
  1215  	// It is used to dropm while thread is exiting.
  1216  	MOVD	fn+0(FP), R1
  1217  	CBNZ	R1, loadg
  1218  	// Restore the g from frame.
  1219  	MOVD	frame+8(FP), g
  1220  	B	dropm
  1221  
  1222  loadg:
  1223  	// Load g from thread-local storage.
  1224  	BL	runtime·load_g(SB)
  1225  
  1226  	// If g is nil, Go did not create the current thread,
  1227  	// or if this thread never called into Go on pthread platforms.
  1228  	// Call needm to obtain one for temporary use.
  1229  	// In this case, we're running on the thread stack, so there's
  1230  	// lots of space, but the linker doesn't know. Hide the call from
  1231  	// the linker analysis by using an indirect call.
  1232  	CBZ	g, needm
  1233  
  1234  	MOVD	g_m(g), R8
  1235  	MOVD	R8, savedm-8(SP)
  1236  	B	havem
  1237  
  1238  needm:
  1239  	MOVD	g, savedm-8(SP) // g is zero, so is m.
  1240  	MOVD	$runtime·needAndBindM(SB), R0
  1241  	BL	(R0)
  1242  
  1243  	// Set m->g0->sched.sp = SP, so that if a panic happens
  1244  	// during the function we are about to execute, it will
  1245  	// have a valid SP to run on the g0 stack.
  1246  	// The next few lines (after the havem label)
  1247  	// will save this SP onto the stack and then write
  1248  	// the same SP back to m->sched.sp. That seems redundant,
  1249  	// but if an unrecovered panic happens, unwindm will
  1250  	// restore the g->sched.sp from the stack location
  1251  	// and then systemstack will try to use it. If we don't set it here,
  1252  	// that restored SP will be uninitialized (typically 0) and
  1253  	// will not be usable.
  1254  	MOVD	g_m(g), R8
  1255  	MOVD	m_g0(R8), R3
  1256  	MOVD	RSP, R0
  1257  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1258  	MOVD	R29, (g_sched+gobuf_bp)(R3)
  1259  
  1260  havem:
  1261  	// Now there's a valid m, and we're running on its m->g0.
  1262  	// Save current m->g0->sched.sp on stack and then set it to SP.
  1263  	// Save current sp in m->g0->sched.sp in preparation for
  1264  	// switch back to m->curg stack.
  1265  	// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
  1266  	// Beware that the frame size is actually 32+16.
  1267  	MOVD	m_g0(R8), R3
  1268  	MOVD	(g_sched+gobuf_sp)(R3), R4
  1269  	MOVD	R4, savedsp-16(SP)
  1270  	MOVD	RSP, R0
  1271  	MOVD	R0, (g_sched+gobuf_sp)(R3)
  1272  
  1273  	// Switch to m->curg stack and call runtime.cgocallbackg.
  1274  	// Because we are taking over the execution of m->curg
  1275  	// but *not* resuming what had been running, we need to
  1276  	// save that information (m->curg->sched) so we can restore it.
  1277  	// We can restore m->curg->sched.sp easily, because calling
  1278  	// runtime.cgocallbackg leaves SP unchanged upon return.
  1279  	// To save m->curg->sched.pc, we push it onto the curg stack and
  1280  	// open a frame the same size as cgocallback's g0 frame.
  1281  	// Once we switch to the curg stack, the pushed PC will appear
  1282  	// to be the return PC of cgocallback, so that the traceback
  1283  	// will seamlessly trace back into the earlier calls.
  1284  	MOVD	m_curg(R8), g
  1285  	BL	runtime·save_g(SB)
  1286  	MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
  1287  	MOVD	(g_sched+gobuf_pc)(g), R5
  1288  	MOVD	R5, -48(R4)
  1289  	MOVD	(g_sched+gobuf_bp)(g), R5
  1290  	MOVD	R5, -56(R4)
  1291  	// Gather our arguments into registers.
  1292  	MOVD	fn+0(FP), R1
  1293  	MOVD	frame+8(FP), R2
  1294  	MOVD	ctxt+16(FP), R3
  1295  	MOVD	$-48(R4), R0 // maintain 16-byte SP alignment
  1296  	MOVD	R0, RSP	// switch stack
  1297  	MOVD	R1, 8(RSP)
  1298  	MOVD	R2, 16(RSP)
  1299  	MOVD	R3, 24(RSP)
  1300  	MOVD	$runtime·cgocallbackg(SB), R0
  1301  	CALL	(R0) // indirect call to bypass nosplit check. We're on a different stack now.
  1302  
  1303  	// Restore g->sched (== m->curg->sched) from saved values.
  1304  	MOVD	0(RSP), R5
  1305  	MOVD	R5, (g_sched+gobuf_pc)(g)
  1306  	MOVD	RSP, R4
  1307  	ADD	$48, R4, R4
  1308  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1309  
  1310  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1311  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1312  	// so we do not have to restore it.)
  1313  	MOVD	g_m(g), R8
  1314  	MOVD	m_g0(R8), g
  1315  	BL	runtime·save_g(SB)
  1316  	MOVD	(g_sched+gobuf_sp)(g), R0
  1317  	MOVD	R0, RSP
  1318  	MOVD	savedsp-16(SP), R4
  1319  	MOVD	R4, (g_sched+gobuf_sp)(g)
  1320  
  1321  	// If the m on entry was nil, we called needm above to borrow an m,
  1322  	// 1. for the duration of the call on non-pthread platforms,
  1323  	// 2. or the duration of the C thread alive on pthread platforms.
  1324  	// If the m on entry wasn't nil,
  1325  	// 1. the thread might be a Go thread,
  1326  	// 2. or it wasn't the first call from a C thread on pthread platforms,
  1327  	//    since then we skip dropm to reuse the m in the first call.
  1328  	MOVD	savedm-8(SP), R6
  1329  	CBNZ	R6, droppedm
  1330  
  1331  	// Skip dropm to reuse it in the next call, when a pthread key has been created.
  1332  	MOVD	_cgo_pthread_key_created(SB), R6
  1333  	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
  1334  	CBZ	R6, dropm
  1335  	MOVD	(R6), R6
  1336  	CBNZ	R6, droppedm
  1337  
  1338  dropm:
  1339  	MOVD	$runtime·dropm(SB), R0
  1340  	BL	(R0)
  1341  droppedm:
  1342  
  1343  	// Done!
  1344  	RET
  1345  
  1346  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1347  // Must obey the gcc calling convention.
  1348  TEXT _cgo_topofstack(SB),NOSPLIT,$24
  1349  	// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
  1350  	// are callee-save in the gcc calling convention, so save them.
  1351  	MOVD	R27, savedR27-8(SP)
  1352  	MOVD	g, saveG-16(SP)
  1353  
  1354  	BL	runtime·load_g(SB)
  1355  	MOVD	g_m(g), R0
  1356  	MOVD	m_curg(R0), R0
  1357  	MOVD	(g_stack+stack_hi)(R0), R0
  1358  
  1359  	MOVD	saveG-16(SP), g
  1360  	MOVD	savedR28-8(SP), R27
  1361  	RET
  1362  
  1363  // void setg(G*); set g. for use by needm.
  1364  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1365  	MOVD	gg+0(FP), g
  1366  	// This only happens if iscgo, so jump straight to save_g
  1367  	BL	runtime·save_g(SB)
  1368  	RET
  1369  
  1370  // void setg_gcc(G*); set g called from gcc
  1371  TEXT setg_gcc<>(SB),NOSPLIT,$8
  1372  	MOVD	R0, g
  1373  	MOVD	R27, savedR27-8(SP)
  1374  	BL	runtime·save_g(SB)
  1375  	MOVD	savedR27-8(SP), R27
  1376  	RET
  1377  
  1378  TEXT runtime·emptyfunc(SB),0,$0-0
  1379  	RET
  1380  
  1381  TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
  1382  	MOVD	ZR, R0
  1383  	MOVD	(R0), R0
  1384  	UNDEF
  1385  
  1386  // The top-most function running on a goroutine
  1387  // returns to goexit+PCQuantum.
  1388  TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
  1389  	MOVD	R0, R0	// NOP
  1390  	BL	runtime·goexit1(SB)	// does not return
  1391  
  1392  // This is called from .init_array and follows the platform, not Go, ABI.
  1393  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1394  	SUB	$0x10, RSP
  1395  	MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1396  	MOVD	runtime·lastmoduledatap(SB), R1
  1397  	MOVD	R0, moduledata_next(R1)
  1398  	MOVD	R0, runtime·lastmoduledatap(SB)
  1399  	MOVD	8(RSP), R27
  1400  	ADD	$0x10, RSP
  1401  	RET
  1402  
  1403  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1404  	MOVW	$1, R3
  1405  	MOVB	R3, ret+0(FP)
  1406  	RET
  1407  
  1408  // gcWriteBarrier informs the GC about heap pointer writes.
  1409  //
  1410  // gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1411  // number of bytes of buffer needed in R25, and returns a pointer
  1412  // to the buffer space in R25.
  1413  // It clobbers condition codes.
  1414  // It does not clobber any general-purpose registers except R27,
  1415  // but may clobber others (e.g., floating point registers)
  1416  // The act of CALLing gcWriteBarrier will clobber R30 (LR).
  1417  TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
  1418  	// Save the registers clobbered by the fast path.
  1419  	STP	(R0, R1), 184(RSP)
  1420  retry:
  1421  	MOVD	g_m(g), R0
  1422  	MOVD	m_p(R0), R0
  1423  	MOVD	(p_wbBuf+wbBuf_next)(R0), R1
  1424  	MOVD	(p_wbBuf+wbBuf_end)(R0), R27
  1425  	// Increment wbBuf.next position.
  1426  	ADD	R25, R1
  1427  	// Is the buffer full?
  1428  	CMP	R27, R1
  1429  	BHI	flush
  1430  	// Commit to the larger buffer.
  1431  	MOVD	R1, (p_wbBuf+wbBuf_next)(R0)
  1432  	// Make return value (the original next position)
  1433  	SUB	R25, R1, R25
  1434  	// Restore registers.
  1435  	LDP	184(RSP), (R0, R1)
  1436  	RET
  1437  
  1438  flush:
  1439  	// Save all general purpose registers since these could be
  1440  	// clobbered by wbBufFlush and were not saved by the caller.
  1441  	// R0 and R1 already saved
  1442  	STP	(R2, R3), 1*8(RSP)
  1443  	STP	(R4, R5), 3*8(RSP)
  1444  	STP	(R6, R7), 5*8(RSP)
  1445  	STP	(R8, R9), 7*8(RSP)
  1446  	STP	(R10, R11), 9*8(RSP)
  1447  	STP	(R12, R13), 11*8(RSP)
  1448  	STP	(R14, R15), 13*8(RSP)
  1449  	// R16, R17 may be clobbered by linker trampoline
  1450  	// R18 is unused.
  1451  	STP	(R19, R20), 15*8(RSP)
  1452  	STP	(R21, R22), 17*8(RSP)
  1453  	STP	(R23, R24), 19*8(RSP)
  1454  	STP	(R25, R26), 21*8(RSP)
  1455  	// R27 is temp register.
  1456  	// R28 is g.
  1457  	// R29 is frame pointer (unused).
  1458  	// R30 is LR, which was saved by the prologue.
  1459  	// R31 is SP.
  1460  
  1461  	CALL	runtime·wbBufFlush(SB)
  1462  	LDP	1*8(RSP), (R2, R3)
  1463  	LDP	3*8(RSP), (R4, R5)
  1464  	LDP	5*8(RSP), (R6, R7)
  1465  	LDP	7*8(RSP), (R8, R9)
  1466  	LDP	9*8(RSP), (R10, R11)
  1467  	LDP	11*8(RSP), (R12, R13)
  1468  	LDP	13*8(RSP), (R14, R15)
  1469  	LDP	15*8(RSP), (R19, R20)
  1470  	LDP	17*8(RSP), (R21, R22)
  1471  	LDP	19*8(RSP), (R23, R24)
  1472  	LDP	21*8(RSP), (R25, R26)
  1473  	JMP	retry
  1474  
  1475  TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
  1476  	MOVD	$8, R25
  1477  	JMP	gcWriteBarrier<>(SB)
  1478  TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
  1479  	MOVD	$16, R25
  1480  	JMP	gcWriteBarrier<>(SB)
  1481  TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
  1482  	MOVD	$24, R25
  1483  	JMP	gcWriteBarrier<>(SB)
  1484  TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
  1485  	MOVD	$32, R25
  1486  	JMP	gcWriteBarrier<>(SB)
  1487  TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
  1488  	MOVD	$40, R25
  1489  	JMP	gcWriteBarrier<>(SB)
  1490  TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
  1491  	MOVD	$48, R25
  1492  	JMP	gcWriteBarrier<>(SB)
  1493  TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
  1494  	MOVD	$56, R25
  1495  	JMP	gcWriteBarrier<>(SB)
  1496  TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
  1497  	MOVD	$64, R25
  1498  	JMP	gcWriteBarrier<>(SB)
  1499  
  1500  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1501  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1502  
  1503  // debugCallV2 is the entry point for debugger-injected function
  1504  // calls on running goroutines. It informs the runtime that a
  1505  // debug call has been injected and creates a call frame for the
  1506  // debugger to fill in.
  1507  //
  1508  // To inject a function call, a debugger should:
  1509  // 1. Check that the goroutine is in state _Grunning and that
  1510  //    there are at least 288 bytes free on the stack.
  1511  // 2. Set SP as SP-16.
  1512  // 3. Store the current LR in (SP) (using the SP after step 2).
  1513  // 4. Store the current PC in the LR register.
  1514  // 5. Write the desired argument frame size at SP-16
  1515  // 6. Save all machine registers (including flags and fpsimd registers)
  1516  //    so they can be restored later by the debugger.
  1517  // 7. Set the PC to debugCallV2 and resume execution.
  1518  //
  1519  // If the goroutine is in state _Grunnable, then it's not generally
  1520  // safe to inject a call because it may return out via other runtime
  1521  // operations. Instead, the debugger should unwind the stack to find
  1522  // the return to non-runtime code, add a temporary breakpoint there,
  1523  // and inject the call once that breakpoint is hit.
  1524  //
  1525  // If the goroutine is in any other state, it's not safe to inject a call.
  1526  //
  1527  // This function communicates back to the debugger by setting R20 and
  1528  // invoking BRK to raise a breakpoint signal. Note that the signal PC of
  1529  // the signal triggered by the BRK instruction is the PC where the signal
  1530  // is trapped, not the next PC, so to resume execution, the debugger needs
  1531  // to set the signal PC to PC+4. See the comments in the implementation for
  1532  // the protocol the debugger is expected to follow. InjectDebugCall in the
  1533  // runtime tests demonstrates this protocol.
  1534  //
  1535  // The debugger must ensure that any pointers passed to the function
  1536  // obey escape analysis requirements. Specifically, it must not pass
  1537  // a stack pointer to an escaping argument. debugCallV2 cannot check
  1538  // this invariant.
  1539  //
  1540  // This is ABIInternal because Go code injects its PC directly into new
  1541  // goroutine stacks.
  1542  TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
  1543  	STP	(R29, R30), -280(RSP)
  1544  	SUB	$272, RSP, RSP
  1545  	SUB	$8, RSP, R29
  1546  	// Save all registers that may contain pointers so they can be
  1547  	// conservatively scanned.
  1548  	//
  1549  	// We can't do anything that might clobber any of these
  1550  	// registers before this.
  1551  	STP	(R27, g), (30*8)(RSP)
  1552  	STP	(R25, R26), (28*8)(RSP)
  1553  	STP	(R23, R24), (26*8)(RSP)
  1554  	STP	(R21, R22), (24*8)(RSP)
  1555  	STP	(R19, R20), (22*8)(RSP)
  1556  	STP	(R16, R17), (20*8)(RSP)
  1557  	STP	(R14, R15), (18*8)(RSP)
  1558  	STP	(R12, R13), (16*8)(RSP)
  1559  	STP	(R10, R11), (14*8)(RSP)
  1560  	STP	(R8, R9), (12*8)(RSP)
  1561  	STP	(R6, R7), (10*8)(RSP)
  1562  	STP	(R4, R5), (8*8)(RSP)
  1563  	STP	(R2, R3), (6*8)(RSP)
  1564  	STP	(R0, R1), (4*8)(RSP)
  1565  
  1566  	// Perform a safe-point check.
  1567  	MOVD	R30, 8(RSP) // Caller's PC
  1568  	CALL	runtime·debugCallCheck(SB)
  1569  	MOVD	16(RSP), R0
  1570  	CBZ	R0, good
  1571  
  1572  	// The safety check failed. Put the reason string at the top
  1573  	// of the stack.
  1574  	MOVD	R0, 8(RSP)
  1575  	MOVD	24(RSP), R0
  1576  	MOVD	R0, 16(RSP)
  1577  
  1578  	// Set R20 to 8 and invoke BRK. The debugger should get the
  1579  	// reason a call can't be injected from SP+8 and resume execution.
  1580  	MOVD	$8, R20
  1581  	BREAK
  1582  	JMP	restore
  1583  
  1584  good:
  1585  	// Registers are saved and it's safe to make a call.
  1586  	// Open up a call frame, moving the stack if necessary.
  1587  	//
  1588  	// Once the frame is allocated, this will set R20 to 0 and
  1589  	// invoke BRK. The debugger should write the argument
  1590  	// frame for the call at SP+8, set up argument registers,
  1591  	// set the LR as the signal PC + 4, set the PC to the function
  1592  	// to call, set R26 to point to the closure (if a closure call),
  1593  	// and resume execution.
  1594  	//
  1595  	// If the function returns, this will set R20 to 1 and invoke
  1596  	// BRK. The debugger can then inspect any return value saved
  1597  	// on the stack at SP+8 and in registers. To resume execution,
  1598  	// the debugger should restore the LR from (SP).
  1599  	//
  1600  	// If the function panics, this will set R20 to 2 and invoke BRK.
  1601  	// The interface{} value of the panic will be at SP+8. The debugger
  1602  	// can inspect the panic value and resume execution again.
  1603  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1604  	CMP	$MAXSIZE, R0;			\
  1605  	BGT	5(PC);				\
  1606  	MOVD	$NAME(SB), R0;			\
  1607  	MOVD	R0, 8(RSP);			\
  1608  	CALL	runtime·debugCallWrap(SB);	\
  1609  	JMP	restore
  1610  
  1611  	MOVD	256(RSP), R0 // the argument frame size
  1612  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1613  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1614  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1615  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1616  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1617  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1618  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1619  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1620  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1621  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1622  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1623  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1624  	// The frame size is too large. Report the error.
  1625  	MOVD	$debugCallFrameTooLarge<>(SB), R0
  1626  	MOVD	R0, 8(RSP)
  1627  	MOVD	$20, R0
  1628  	MOVD	R0, 16(RSP) // length of debugCallFrameTooLarge string
  1629  	MOVD	$8, R20
  1630  	BREAK
  1631  	JMP	restore
  1632  
  1633  restore:
  1634  	// Calls and failures resume here.
  1635  	//
  1636  	// Set R20 to 16 and invoke BRK. The debugger should restore
  1637  	// all registers except for PC and RSP and resume execution.
  1638  	MOVD	$16, R20
  1639  	BREAK
  1640  	// We must not modify flags after this point.
  1641  
  1642  	// Restore pointer-containing registers, which may have been
  1643  	// modified from the debugger's copy by stack copying.
  1644  	LDP	(30*8)(RSP), (R27, g)
  1645  	LDP	(28*8)(RSP), (R25, R26)
  1646  	LDP	(26*8)(RSP), (R23, R24)
  1647  	LDP	(24*8)(RSP), (R21, R22)
  1648  	LDP	(22*8)(RSP), (R19, R20)
  1649  	LDP	(20*8)(RSP), (R16, R17)
  1650  	LDP	(18*8)(RSP), (R14, R15)
  1651  	LDP	(16*8)(RSP), (R12, R13)
  1652  	LDP	(14*8)(RSP), (R10, R11)
  1653  	LDP	(12*8)(RSP), (R8, R9)
  1654  	LDP	(10*8)(RSP), (R6, R7)
  1655  	LDP	(8*8)(RSP), (R4, R5)
  1656  	LDP	(6*8)(RSP), (R2, R3)
  1657  	LDP	(4*8)(RSP), (R0, R1)
  1658  
  1659  	LDP	-8(RSP), (R29, R27)
  1660  	ADD	$288, RSP, RSP // Add 16 more bytes, see saveSigContext
  1661  	MOVD	-16(RSP), R30 // restore old lr
  1662  	JMP	(R27)
  1663  
  1664  // runtime.debugCallCheck assumes that functions defined with the
  1665  // DEBUG_CALL_FN macro are safe points to inject calls.
  1666  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1667  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1668  	NO_LOCAL_POINTERS;		\
  1669  	MOVD	$0, R20;		\
  1670  	BREAK;		\
  1671  	MOVD	$1, R20;		\
  1672  	BREAK;		\
  1673  	RET
  1674  DEBUG_CALL_FN(debugCall32<>, 32)
  1675  DEBUG_CALL_FN(debugCall64<>, 64)
  1676  DEBUG_CALL_FN(debugCall128<>, 128)
  1677  DEBUG_CALL_FN(debugCall256<>, 256)
  1678  DEBUG_CALL_FN(debugCall512<>, 512)
  1679  DEBUG_CALL_FN(debugCall1024<>, 1024)
  1680  DEBUG_CALL_FN(debugCall2048<>, 2048)
  1681  DEBUG_CALL_FN(debugCall4096<>, 4096)
  1682  DEBUG_CALL_FN(debugCall8192<>, 8192)
  1683  DEBUG_CALL_FN(debugCall16384<>, 16384)
  1684  DEBUG_CALL_FN(debugCall32768<>, 32768)
  1685  DEBUG_CALL_FN(debugCall65536<>, 65536)
  1686  
  1687  // func debugCallPanicked(val interface{})
  1688  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1689  	// Copy the panic value to the top of stack at SP+8.
  1690  	MOVD	val_type+0(FP), R0
  1691  	MOVD	R0, 8(RSP)
  1692  	MOVD	val_data+8(FP), R0
  1693  	MOVD	R0, 16(RSP)
  1694  	MOVD	$2, R20
  1695  	BREAK
  1696  	RET
  1697  
  1698  TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
  1699  	NO_LOCAL_POINTERS
  1700  	// Save all 16 int registers that could have an index in them.
  1701  	// They may be pointers, but if they are they are dead.
  1702  	STP	(R0, R1), 24(RSP)
  1703  	STP	(R2, R3), 40(RSP)
  1704  	STP	(R4, R5), 56(RSP)
  1705  	STP	(R6, R7), 72(RSP)
  1706  	STP	(R8, R9), 88(RSP)
  1707  	STP	(R10, R11), 104(RSP)
  1708  	STP	(R12, R13), 120(RSP)
  1709  	STP	(R14, R15), 136(RSP)
  1710  	MOVD	LR, R0		// PC immediately after call to panicBounds
  1711  	ADD	$24, RSP, R1	// pointer to save area
  1712  	CALL	runtime·panicBounds64<ABIInternal>(SB)
  1713  	RET
  1714  
  1715  TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1716  	MOVD R29, R0
  1717  	RET
  1718  

View as plain text