Text file src/runtime/asm_amd64.s

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "go_tls.h"
     7  #include "funcdata.h"
     8  #include "textflag.h"
     9  #include "cgo/abi_amd64.h"
    10  
    11  // _rt0_amd64 is common startup code for most amd64 systems when using
    12  // internal linking. This is the entry point for the program from the
    13  // kernel for an ordinary -buildmode=exe program. The stack holds the
    14  // number of arguments and the C-style argv.
    15  TEXT _rt0_amd64(SB),NOSPLIT,$-8
    16  	MOVQ	0(SP), DI	// argc
    17  	LEAQ	8(SP), SI	// argv
    18  	JMP	runtime·rt0_go(SB)
    19  
    20  // main is common startup code for most amd64 systems when using
    21  // external linking. The C startup code will call the symbol "main"
    22  // passing argc and argv in the usual C ABI registers DI and SI.
    23  TEXT main(SB),NOSPLIT,$-8
    24  	JMP	runtime·rt0_go(SB)
    25  
    26  // _rt0_amd64_lib is common startup code for most amd64 systems when
    27  // using -buildmode=c-archive or -buildmode=c-shared. The linker will
    28  // arrange to invoke this function as a global constructor (for
    29  // c-archive) or when the shared library is loaded (for c-shared).
    30  // We expect argc and argv to be passed in the usual C ABI registers
    31  // DI and SI.
    32  TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
    33  	// Transition from C ABI to Go ABI.
    34  	PUSH_REGS_HOST_TO_ABI0()
    35  
    36  	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    37  	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    38  
    39  	// Synchronous initialization.
    40  #ifndef GOOS_windows
    41  	// Avoid calling it on Windows because it is not used
    42  	// and it would crash the application due to the autogenerated
    43  	// ABI wrapper trying to access a non-existent TLS slot.
    44  	CALL	runtime·libpreinit(SB)
    45  #endif
    46  
    47  	// Create a new thread to finish Go runtime initialization.
    48  	MOVQ	_cgo_sys_thread_create(SB), AX
    49  	TESTQ	AX, AX
    50  	JZ	nocgo
    51  
    52  	// We're calling back to C.
    53  	// Align stack per C ABI requirements.
    54  	MOVQ	SP, BX  // Callee-save in C ABI
    55  	ANDQ	$~15, SP
    56  	MOVQ	$_rt0_amd64_lib_go(SB), DI
    57  	MOVQ	$0, SI
    58  #ifdef GOOS_windows
    59  	// For Windows ABI
    60  	MOVQ	DI, CX
    61  	MOVQ	SI, DX
    62  	// Leave space for four words on the stack as required
    63  	// by the Windows amd64 calling convention.
    64  	ADJSP	$32
    65  #endif
    66  	CALL	AX
    67  #ifdef GOOS_windows
    68  	ADJSP	$-32 // just to make the assembler not complain about unbalanced stack
    69  #endif
    70  	MOVQ	BX, SP
    71  	JMP	restore
    72  
    73  nocgo:
    74  	ADJSP	$16
    75  	MOVQ	$0x800000, 0(SP)		// stacksize
    76  	MOVQ	$_rt0_amd64_lib_go(SB), AX
    77  	MOVQ	AX, 8(SP)			// fn
    78  	CALL	runtime·newosproc0(SB)
    79  	ADJSP	$-16
    80  
    81  restore:
    82  	POP_REGS_HOST_TO_ABI0()
    83  	RET
    84  
    85  // _rt0_amd64_lib_go initializes the Go runtime.
    86  // This is started in a separate thread by _rt0_amd64_lib.
    87  TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    88  	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    89  	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    90  	JMP	runtime·rt0_go(SB)
    91  
    92  DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    93  GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    94  DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    95  GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    96  
    97  #ifdef GOAMD64_v2
    98  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
    99  #endif
   100  
   101  #ifdef GOAMD64_v3
   102  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
   103  #endif
   104  
   105  #ifdef GOAMD64_v4
   106  DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
   107  #endif
   108  
   109  GLOBL bad_cpu_msg<>(SB), RODATA, $84
   110  
   111  // Define a list of AMD64 microarchitecture level features
   112  // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
   113  
   114                       // SSE3     SSSE3    CMPXCHNG16 SSE4.1    SSE4.2    POPCNT
   115  #define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13  | 1 << 19 | 1 << 20 | 1 << 23)
   116                           // LAHF/SAHF
   117  #define V2_EXT_FEATURES_CX (1 << 0)
   118                                        // FMA       MOVBE     OSXSAVE   AVX       F16C
   119  #define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
   120                                                // ABM (FOR LZNCT)
   121  #define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
   122                           // BMI1     AVX2     BMI2
   123  #define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
   124                         // XMM      YMM
   125  #define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
   126  
   127  #define V4_FEATURES_CX V3_FEATURES_CX
   128  
   129  #define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   130                                                // AVX512F   AVX512DQ  AVX512CD  AVX512BW  AVX512VL
   131  #define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
   132                                            // OPMASK   ZMM
   133  #define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
   134  
   135  #ifdef GOAMD64_v2
   136  #define NEED_MAX_CPUID 0x80000001
   137  #define NEED_FEATURES_CX V2_FEATURES_CX
   138  #define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
   139  #endif
   140  
   141  #ifdef GOAMD64_v3
   142  #define NEED_MAX_CPUID 0x80000001
   143  #define NEED_FEATURES_CX V3_FEATURES_CX
   144  #define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   145  #define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
   146  #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   147  #endif
   148  
   149  #ifdef GOAMD64_v4
   150  #define NEED_MAX_CPUID 0x80000001
   151  #define NEED_FEATURES_CX V4_FEATURES_CX
   152  #define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
   153  #define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
   154  
   155  // Darwin requires a different approach to check AVX512 support, see CL 285572.
   156  #ifdef GOOS_darwin
   157  #define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   158  // These values are from:
   159  // https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
   160  #define commpage64_base_address         0x00007fffffe00000
   161  #define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
   162  #define commpage64_version              (commpage64_base_address+0x01E)
   163  #define AVX512F                         0x0000004000000000
   164  #define AVX512CD                        0x0000008000000000
   165  #define AVX512DQ                        0x0000010000000000
   166  #define AVX512BW                        0x0000020000000000
   167  #define AVX512VL                        0x0000100000000000
   168  #define NEED_DARWIN_SUPPORT             (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
   169  #else
   170  #define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
   171  #endif
   172  
   173  #endif
   174  
   175  TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
   176  	// copy arguments forward on an even stack
   177  	MOVQ	DI, AX		// argc
   178  	MOVQ	SI, BX		// argv
   179  	SUBQ	$(5*8), SP		// 3args 2auto
   180  	ANDQ	$~15, SP
   181  	MOVQ	AX, 24(SP)
   182  	MOVQ	BX, 32(SP)
   183  
   184  	// This is typically the entry point for Go programs.
   185  	// Call stack unwinding must not proceed past this frame.
   186  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
   187  	// (which don't use debug info for performance reasons)
   188  	// won't attempt to unwind past this function.
   189  	// See go.dev/issue/63630
   190  	MOVQ	$0, BP
   191  
   192  	// create istack out of the given (operating system) stack.
   193  	// _cgo_init may update stackguard.
   194  	MOVQ	$runtime·g0(SB), DI
   195  	LEAQ	(-64*1024)(SP), BX
   196  	MOVQ	BX, g_stackguard0(DI)
   197  	MOVQ	BX, g_stackguard1(DI)
   198  	MOVQ	BX, (g_stack+stack_lo)(DI)
   199  	MOVQ	SP, (g_stack+stack_hi)(DI)
   200  
   201  	// find out information about the processor we're on
   202  	MOVL	$0, AX
   203  	CPUID
   204  	CMPL	AX, $0
   205  	JE	nocpuinfo
   206  
   207  	CMPL	BX, $0x756E6547  // "Genu"
   208  	JNE	notintel
   209  	CMPL	DX, $0x49656E69  // "ineI"
   210  	JNE	notintel
   211  	CMPL	CX, $0x6C65746E  // "ntel"
   212  	JNE	notintel
   213  	MOVB	$1, runtime·isIntel(SB)
   214  
   215  notintel:
   216  	// Load EAX=1 cpuid flags
   217  	MOVL	$1, AX
   218  	CPUID
   219  	MOVL	AX, runtime·processorVersionInfo(SB)
   220  
   221  nocpuinfo:
   222  	// if there is an _cgo_init, call it.
   223  	MOVQ	_cgo_init(SB), AX
   224  	TESTQ	AX, AX
   225  	JZ	needtls
   226  	// arg 1: g0, already in DI
   227  	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   228  	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   229  	MOVQ	$0, CX
   230  #ifdef GOOS_android
   231  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   232  	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   233  	// Compensate for tls_g (+16).
   234  	MOVQ	-16(TLS), CX
   235  #endif
   236  #ifdef GOOS_windows
   237  	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   238  	// Adjust for the Win64 calling convention.
   239  	MOVQ	CX, R9 // arg 4
   240  	MOVQ	DX, R8 // arg 3
   241  	MOVQ	SI, DX // arg 2
   242  	MOVQ	DI, CX // arg 1
   243  #endif
   244  	CALL	AX
   245  
   246  	// update stackguard after _cgo_init
   247  	MOVQ	$runtime·g0(SB), CX
   248  	MOVQ	(g_stack+stack_lo)(CX), AX
   249  	ADDQ	$const_stackGuard, AX
   250  	MOVQ	AX, g_stackguard0(CX)
   251  	MOVQ	AX, g_stackguard1(CX)
   252  
   253  #ifndef GOOS_windows
   254  	JMP ok
   255  #endif
   256  needtls:
   257  #ifdef GOOS_plan9
   258  	// skip TLS setup on Plan 9
   259  	JMP ok
   260  #endif
   261  #ifdef GOOS_solaris
   262  	// skip TLS setup on Solaris
   263  	JMP ok
   264  #endif
   265  #ifdef GOOS_illumos
   266  	// skip TLS setup on illumos
   267  	JMP ok
   268  #endif
   269  #ifdef GOOS_darwin
   270  	// skip TLS setup on Darwin
   271  	JMP ok
   272  #endif
   273  #ifdef GOOS_openbsd
   274  	// skip TLS setup on OpenBSD
   275  	JMP ok
   276  #endif
   277  
   278  #ifdef GOOS_windows
   279  	CALL	runtime·wintls(SB)
   280  #endif
   281  
   282  	LEAQ	runtime·m0+m_tls(SB), DI
   283  	CALL	runtime·settls(SB)
   284  
   285  	// store through it, to make sure it works
   286  	get_tls(BX)
   287  	MOVQ	$0x123, g(BX)
   288  	MOVQ	runtime·m0+m_tls(SB), AX
   289  	CMPQ	AX, $0x123
   290  	JEQ 2(PC)
   291  	CALL	runtime·abort(SB)
   292  ok:
   293  	// set the per-goroutine and per-mach "registers"
   294  	get_tls(BX)
   295  	LEAQ	runtime·g0(SB), CX
   296  	MOVQ	CX, g(BX)
   297  	LEAQ	runtime·m0(SB), AX
   298  
   299  	// save m->g0 = g0
   300  	MOVQ	CX, m_g0(AX)
   301  	// save m0 to g0->m
   302  	MOVQ	AX, g_m(CX)
   303  
   304  	CLD				// convention is D is always left cleared
   305  
   306  	// Check GOAMD64 requirements
   307  	// We need to do this after setting up TLS, so that
   308  	// we can report an error if there is a failure. See issue 49586.
   309  #ifdef NEED_FEATURES_CX
   310  	MOVL	$0, AX
   311  	CPUID
   312  	CMPL	AX, $0
   313  	JE	bad_cpu
   314  	MOVL	$1, AX
   315  	CPUID
   316  	ANDL	$NEED_FEATURES_CX, CX
   317  	CMPL	CX, $NEED_FEATURES_CX
   318  	JNE	bad_cpu
   319  #endif
   320  
   321  #ifdef NEED_MAX_CPUID
   322  	MOVL	$0x80000000, AX
   323  	CPUID
   324  	CMPL	AX, $NEED_MAX_CPUID
   325  	JL	bad_cpu
   326  #endif
   327  
   328  #ifdef NEED_EXT_FEATURES_BX
   329  	MOVL	$7, AX
   330  	MOVL	$0, CX
   331  	CPUID
   332  	ANDL	$NEED_EXT_FEATURES_BX, BX
   333  	CMPL	BX, $NEED_EXT_FEATURES_BX
   334  	JNE	bad_cpu
   335  #endif
   336  
   337  #ifdef NEED_EXT_FEATURES_CX
   338  	MOVL	$0x80000001, AX
   339  	CPUID
   340  	ANDL	$NEED_EXT_FEATURES_CX, CX
   341  	CMPL	CX, $NEED_EXT_FEATURES_CX
   342  	JNE	bad_cpu
   343  #endif
   344  
   345  #ifdef NEED_OS_SUPPORT_AX
   346  	XORL    CX, CX
   347  	XGETBV
   348  	ANDL	$NEED_OS_SUPPORT_AX, AX
   349  	CMPL	AX, $NEED_OS_SUPPORT_AX
   350  	JNE	bad_cpu
   351  #endif
   352  
   353  #ifdef NEED_DARWIN_SUPPORT
   354  	MOVQ	$commpage64_version, BX
   355  	CMPW	(BX), $13  // cpu_capabilities64 undefined in versions < 13
   356  	JL	bad_cpu
   357  	MOVQ	$commpage64_cpu_capabilities64, BX
   358  	MOVQ	(BX), BX
   359  	MOVQ	$NEED_DARWIN_SUPPORT, CX
   360  	ANDQ	CX, BX
   361  	CMPQ	BX, CX
   362  	JNE	bad_cpu
   363  #endif
   364  
   365  	CALL	runtime·check(SB)
   366  
   367  	MOVL	24(SP), AX		// copy argc
   368  	MOVL	AX, 0(SP)
   369  	MOVQ	32(SP), AX		// copy argv
   370  	MOVQ	AX, 8(SP)
   371  	CALL	runtime·args(SB)
   372  	CALL	runtime·osinit(SB)
   373  	CALL	runtime·schedinit(SB)
   374  
   375  	// create a new goroutine to start program
   376  	MOVQ	$runtime·mainPC(SB), AX		// entry
   377  	PUSHQ	AX
   378  	CALL	runtime·newproc(SB)
   379  	POPQ	AX
   380  
   381  	// start this M
   382  	CALL	runtime·mstart(SB)
   383  
   384  	CALL	runtime·abort(SB)	// mstart should never return
   385  	RET
   386  
   387  bad_cpu: // show that the program requires a certain microarchitecture level.
   388  	MOVQ	$2, 0(SP)
   389  	MOVQ	$bad_cpu_msg<>(SB), AX
   390  	MOVQ	AX, 8(SP)
   391  	MOVQ	$84, 16(SP)
   392  	CALL	runtime·write(SB)
   393  	MOVQ	$1, 0(SP)
   394  	CALL	runtime·exit(SB)
   395  	CALL	runtime·abort(SB)
   396  	RET
   397  
   398  	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
   399  	// intended to be called by debuggers.
   400  	MOVQ	$runtime·debugPinnerV1<ABIInternal>(SB), AX
   401  	MOVQ	$runtime·debugCallV2<ABIInternal>(SB), AX
   402  	RET
   403  
   404  // mainPC is a function value for runtime.main, to be passed to newproc.
   405  // The reference to runtime.main is made via ABIInternal, since the
   406  // actual function (not the ABI0 wrapper) is needed by newproc.
   407  DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   408  GLOBL	runtime·mainPC(SB),RODATA,$8
   409  
   410  TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   411  	BYTE	$0xcc
   412  	RET
   413  
   414  TEXT runtime·asminit(SB),NOSPLIT,$0-0
   415  	// No per-thread init.
   416  	RET
   417  
   418  TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
   419  	// This is the root frame of new Go-created OS threads.
   420  	// Call stack unwinding must not proceed past this frame.
   421  	// Set the frame pointer register to 0 so that frame pointer-based unwinders
   422  	// (which don't use debug info for performance reasons)
   423  	// won't attempt to unwind past this function.
   424  	// See go.dev/issue/63630
   425  	MOVD	$0, BP
   426  	CALL	runtime·mstart0(SB)
   427  	RET // not reached
   428  
   429  /*
   430   *  go-routine
   431   */
   432  
   433  // func gogo(buf *gobuf)
   434  // restore state from Gobuf; longjmp
   435  TEXT runtime·gogo(SB), NOSPLIT, $0-8
   436  	MOVQ	buf+0(FP), BX		// gobuf
   437  	MOVQ	gobuf_g(BX), DX
   438  	MOVQ	0(DX), CX		// make sure g != nil
   439  	JMP	gogo<>(SB)
   440  
   441  TEXT gogo<>(SB), NOSPLIT, $0
   442  	get_tls(CX)
   443  	MOVQ	DX, g(CX)
   444  	MOVQ	DX, R14		// set the g register
   445  	MOVQ	gobuf_sp(BX), SP	// restore SP
   446  	MOVQ	gobuf_ctxt(BX), DX
   447  	MOVQ	gobuf_bp(BX), BP
   448  	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   449  	MOVQ	$0, gobuf_ctxt(BX)
   450  	MOVQ	$0, gobuf_bp(BX)
   451  	MOVQ	gobuf_pc(BX), BX
   452  	JMP	BX
   453  
   454  // func mcall(fn func(*g))
   455  // Switch to m->g0's stack, call fn(g).
   456  // Fn must never return. It should gogo(&g->sched)
   457  // to keep running g.
   458  TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
   459  	MOVQ	AX, DX	// DX = fn
   460  
   461  	// Save state in g->sched. The caller's SP and PC are restored by gogo to
   462  	// resume execution in the caller's frame (implicit return). The caller's BP
   463  	// is also restored to support frame pointer unwinding.
   464  	MOVQ	SP, BX	// hide (SP) reads from vet
   465  	MOVQ	8(BX), BX	// caller's PC
   466  	MOVQ	BX, (g_sched+gobuf_pc)(R14)
   467  	LEAQ	fn+0(FP), BX	// caller's SP
   468  	MOVQ	BX, (g_sched+gobuf_sp)(R14)
   469  	// Get the caller's frame pointer by dereferencing BP. Storing BP as it is
   470  	// can cause a frame pointer cycle, see CL 476235.
   471  	MOVQ	(BP), BX // caller's BP
   472  	MOVQ	BX, (g_sched+gobuf_bp)(R14)
   473  
   474  	// switch to m->g0 & its stack, call fn
   475  	MOVQ	g_m(R14), BX
   476  	MOVQ	m_g0(BX), SI	// SI = g.m.g0
   477  	CMPQ	SI, R14	// if g == m->g0 call badmcall
   478  	JNE	goodm
   479  	JMP	runtime·badmcall(SB)
   480  goodm:
   481  	MOVQ	R14, AX		// AX (and arg 0) = g
   482  	MOVQ	SI, R14		// g = g.m.g0
   483  	get_tls(CX)		// Set G in TLS
   484  	MOVQ	R14, g(CX)
   485  	MOVQ	(g_sched+gobuf_sp)(R14), SP	// sp = g0.sched.sp
   486  	MOVQ	$0, BP	// clear frame pointer, as caller may execute on another M
   487  	PUSHQ	AX	// open up space for fn's arg spill slot
   488  	MOVQ	0(DX), R12
   489  	CALL	R12		// fn(g)
   490  	// The Windows native stack unwinder incorrectly classifies the next instruction
   491  	// as part of the function epilogue, producing a wrong call stack.
   492  	// Add a NOP to work around this issue. See go.dev/issue/67007.
   493  	BYTE	$0x90
   494  	POPQ	AX
   495  	JMP	runtime·badmcall2(SB)
   496  	RET
   497  
   498  // systemstack_switch is a dummy routine that systemstack leaves at the bottom
   499  // of the G stack. We need to distinguish the routine that
   500  // lives at the bottom of the G stack from the one that lives
   501  // at the top of the system stack because the one at the top of
   502  // the system stack terminates the stack walk (see topofstack()).
   503  // The frame layout needs to match systemstack
   504  // so that it can pretend to be systemstack_switch.
   505  TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   506  	UNDEF
   507  	// Make sure this function is not leaf,
   508  	// so the frame is saved.
   509  	CALL	runtime·abort(SB)
   510  	RET
   511  
   512  // func systemstack(fn func())
   513  TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   514  	MOVQ	fn+0(FP), DI	// DI = fn
   515  	get_tls(CX)
   516  	MOVQ	g(CX), AX	// AX = g
   517  	MOVQ	g_m(AX), BX	// BX = m
   518  
   519  	CMPQ	AX, m_gsignal(BX)
   520  	JEQ	noswitch
   521  
   522  	MOVQ	m_g0(BX), DX	// DX = g0
   523  	CMPQ	AX, DX
   524  	JEQ	noswitch
   525  
   526  	CMPQ	AX, m_curg(BX)
   527  	JNE	bad
   528  
   529  	// Switch stacks.
   530  	// The original frame pointer is stored in BP,
   531  	// which is useful for stack unwinding.
   532  	// Save our state in g->sched. Pretend to
   533  	// be systemstack_switch if the G stack is scanned.
   534  	CALL	gosave_systemstack_switch<>(SB)
   535  
   536  	// switch to g0
   537  	MOVQ	DX, g(CX)
   538  	MOVQ	DX, R14 // set the g register
   539  	MOVQ	(g_sched+gobuf_sp)(DX), SP
   540  
   541  	// call target function
   542  	MOVQ	DI, DX
   543  	MOVQ	0(DI), DI
   544  	CALL	DI
   545  
   546  	// switch back to g
   547  	get_tls(CX)
   548  	MOVQ	g(CX), AX
   549  	MOVQ	g_m(AX), BX
   550  	MOVQ	m_curg(BX), AX
   551  	MOVQ	AX, g(CX)
   552  	MOVQ	(g_sched+gobuf_sp)(AX), SP
   553  	MOVQ	(g_sched+gobuf_bp)(AX), BP
   554  	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   555  	MOVQ	$0, (g_sched+gobuf_bp)(AX)
   556  	RET
   557  
   558  noswitch:
   559  	// already on m stack; tail call the function
   560  	// Using a tail call here cleans up tracebacks since we won't stop
   561  	// at an intermediate systemstack.
   562  	MOVQ	DI, DX
   563  	MOVQ	0(DI), DI
   564  	// The function epilogue is not called on a tail call.
   565  	// Pop BP from the stack to simulate it.
   566  	POPQ	BP
   567  	JMP	DI
   568  
   569  bad:
   570  	// Bad: g is not gsignal, not g0, not curg. What is it?
   571  	MOVQ	$runtime·badsystemstack(SB), AX
   572  	CALL	AX
   573  	INT	$3
   574  
   575  // func switchToCrashStack0(fn func())
   576  TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
   577  	MOVQ	g_m(R14), BX // curm
   578  
   579  	// set g to gcrash
   580  	LEAQ	runtime·gcrash(SB), R14 // g = &gcrash
   581  	MOVQ	BX, g_m(R14)            // g.m = curm
   582  	MOVQ	R14, m_g0(BX)           // curm.g0 = g
   583  	get_tls(CX)
   584  	MOVQ	R14, g(CX)
   585  
   586  	// switch to crashstack
   587  	MOVQ	(g_stack+stack_hi)(R14), BX
   588  	SUBQ	$(4*8), BX
   589  	MOVQ	BX, SP
   590  
   591  	// call target function
   592  	MOVQ	AX, DX
   593  	MOVQ	0(AX), AX
   594  	CALL	AX
   595  
   596  	// should never return
   597  	CALL	runtime·abort(SB)
   598  	UNDEF
   599  
   600  /*
   601   * support for morestack
   602   */
   603  
   604  // Called during function prolog when more stack is needed.
   605  //
   606  // The traceback routines see morestack on a g0 as being
   607  // the top of a stack (for example, morestack calling newstack
   608  // calling the scheduler calling newm calling gc), so we must
   609  // record an argument size. For that purpose, it has no arguments.
   610  TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   611  	// Cannot grow scheduler stack (m->g0).
   612  	get_tls(CX)
   613  	MOVQ	g(CX), DI     // DI = g
   614  	MOVQ	g_m(DI), BX   // BX = m
   615  
   616  	// Set g->sched to context in f.
   617  	MOVQ	0(SP), AX // f's PC
   618  	MOVQ	AX, (g_sched+gobuf_pc)(DI)
   619  	LEAQ	8(SP), AX // f's SP
   620  	MOVQ	AX, (g_sched+gobuf_sp)(DI)
   621  	MOVQ	BP, (g_sched+gobuf_bp)(DI)
   622  	MOVQ	DX, (g_sched+gobuf_ctxt)(DI)
   623  
   624  	MOVQ	m_g0(BX), SI  // SI = m.g0
   625  	CMPQ	DI, SI
   626  	JNE	3(PC)
   627  	CALL	runtime·badmorestackg0(SB)
   628  	CALL	runtime·abort(SB)
   629  
   630  	// Cannot grow signal stack (m->gsignal).
   631  	MOVQ	m_gsignal(BX), SI
   632  	CMPQ	DI, SI
   633  	JNE	3(PC)
   634  	CALL	runtime·badmorestackgsignal(SB)
   635  	CALL	runtime·abort(SB)
   636  
   637  	// Called from f.
   638  	// Set m->morebuf to f's caller.
   639  	NOP	SP	// tell vet SP changed - stop checking offsets
   640  	MOVQ	8(SP), AX	// f's caller's PC
   641  	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   642  	LEAQ	16(SP), AX	// f's caller's SP
   643  	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   644  	MOVQ	DI, (m_morebuf+gobuf_g)(BX)
   645  
   646  	// Call newstack on m->g0's stack.
   647  	MOVQ	m_g0(BX), BX
   648  	MOVQ	BX, g(CX)
   649  	MOVQ	(g_sched+gobuf_sp)(BX), SP
   650  	MOVQ	$0, BP			// clear frame pointer, as caller may execute on another M
   651  	CALL	runtime·newstack(SB)
   652  	CALL	runtime·abort(SB)	// crash if newstack returns
   653  	RET
   654  
   655  // morestack but not preserving ctxt.
   656  TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   657  	MOVL	$0, DX
   658  	JMP	runtime·morestack(SB)
   659  
   660  // spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
   661  TEXT ·spillArgs(SB),NOSPLIT,$0-0
   662  	MOVQ AX, 0(R12)
   663  	MOVQ BX, 8(R12)
   664  	MOVQ CX, 16(R12)
   665  	MOVQ DI, 24(R12)
   666  	MOVQ SI, 32(R12)
   667  	MOVQ R8, 40(R12)
   668  	MOVQ R9, 48(R12)
   669  	MOVQ R10, 56(R12)
   670  	MOVQ R11, 64(R12)
   671  	MOVQ X0, 72(R12)
   672  	MOVQ X1, 80(R12)
   673  	MOVQ X2, 88(R12)
   674  	MOVQ X3, 96(R12)
   675  	MOVQ X4, 104(R12)
   676  	MOVQ X5, 112(R12)
   677  	MOVQ X6, 120(R12)
   678  	MOVQ X7, 128(R12)
   679  	MOVQ X8, 136(R12)
   680  	MOVQ X9, 144(R12)
   681  	MOVQ X10, 152(R12)
   682  	MOVQ X11, 160(R12)
   683  	MOVQ X12, 168(R12)
   684  	MOVQ X13, 176(R12)
   685  	MOVQ X14, 184(R12)
   686  	RET
   687  
   688  // unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
   689  TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   690  	MOVQ 0(R12), AX
   691  	MOVQ 8(R12), BX
   692  	MOVQ 16(R12), CX
   693  	MOVQ 24(R12), DI
   694  	MOVQ 32(R12), SI
   695  	MOVQ 40(R12), R8
   696  	MOVQ 48(R12), R9
   697  	MOVQ 56(R12), R10
   698  	MOVQ 64(R12), R11
   699  	MOVQ 72(R12), X0
   700  	MOVQ 80(R12), X1
   701  	MOVQ 88(R12), X2
   702  	MOVQ 96(R12), X3
   703  	MOVQ 104(R12), X4
   704  	MOVQ 112(R12), X5
   705  	MOVQ 120(R12), X6
   706  	MOVQ 128(R12), X7
   707  	MOVQ 136(R12), X8
   708  	MOVQ 144(R12), X9
   709  	MOVQ 152(R12), X10
   710  	MOVQ 160(R12), X11
   711  	MOVQ 168(R12), X12
   712  	MOVQ 176(R12), X13
   713  	MOVQ 184(R12), X14
   714  	RET
   715  
   716  // reflectcall: call a function with the given argument list
   717  // func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   718  // we don't have variable-sized frames, so we use a small number
   719  // of constant-sized-frame functions to encode a few bits of size in the pc.
   720  // Caution: ugly multiline assembly macros in your future!
   721  
   722  #define DISPATCH(NAME,MAXSIZE)		\
   723  	CMPQ	CX, $MAXSIZE;		\
   724  	JA	3(PC);			\
   725  	MOVQ	$NAME(SB), AX;		\
   726  	JMP	AX
   727  // Note: can't just "JMP NAME(SB)" - bad inlining results.
   728  
   729  TEXT ·reflectcall(SB), NOSPLIT, $0-48
   730  	MOVLQZX frameSize+32(FP), CX
   731  	DISPATCH(runtime·call16, 16)
   732  	DISPATCH(runtime·call32, 32)
   733  	DISPATCH(runtime·call64, 64)
   734  	DISPATCH(runtime·call128, 128)
   735  	DISPATCH(runtime·call256, 256)
   736  	DISPATCH(runtime·call512, 512)
   737  	DISPATCH(runtime·call1024, 1024)
   738  	DISPATCH(runtime·call2048, 2048)
   739  	DISPATCH(runtime·call4096, 4096)
   740  	DISPATCH(runtime·call8192, 8192)
   741  	DISPATCH(runtime·call16384, 16384)
   742  	DISPATCH(runtime·call32768, 32768)
   743  	DISPATCH(runtime·call65536, 65536)
   744  	DISPATCH(runtime·call131072, 131072)
   745  	DISPATCH(runtime·call262144, 262144)
   746  	DISPATCH(runtime·call524288, 524288)
   747  	DISPATCH(runtime·call1048576, 1048576)
   748  	DISPATCH(runtime·call2097152, 2097152)
   749  	DISPATCH(runtime·call4194304, 4194304)
   750  	DISPATCH(runtime·call8388608, 8388608)
   751  	DISPATCH(runtime·call16777216, 16777216)
   752  	DISPATCH(runtime·call33554432, 33554432)
   753  	DISPATCH(runtime·call67108864, 67108864)
   754  	DISPATCH(runtime·call134217728, 134217728)
   755  	DISPATCH(runtime·call268435456, 268435456)
   756  	DISPATCH(runtime·call536870912, 536870912)
   757  	DISPATCH(runtime·call1073741824, 1073741824)
   758  	MOVQ	$runtime·badreflectcall(SB), AX
   759  	JMP	AX
   760  
   761  #define CALLFN(NAME,MAXSIZE)			\
   762  TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   763  	NO_LOCAL_POINTERS;			\
   764  	/* copy arguments to stack */		\
   765  	MOVQ	stackArgs+16(FP), SI;		\
   766  	MOVLQZX stackArgsSize+24(FP), CX;		\
   767  	MOVQ	SP, DI;				\
   768  	REP;MOVSB;				\
   769  	/* set up argument registers */		\
   770  	MOVQ    regArgs+40(FP), R12;		\
   771  	CALL    ·unspillArgs(SB);		\
   772  	/* call function */			\
   773  	MOVQ	f+8(FP), DX;			\
   774  	PCDATA  $PCDATA_StackMapIndex, $0;	\
   775  	MOVQ	(DX), R12;			\
   776  	CALL	R12;				\
   777  	/* copy register return values back */		\
   778  	MOVQ    regArgs+40(FP), R12;		\
   779  	CALL    ·spillArgs(SB);		\
   780  	MOVLQZX	stackArgsSize+24(FP), CX;		\
   781  	MOVLQZX	stackRetOffset+28(FP), BX;		\
   782  	MOVQ	stackArgs+16(FP), DI;		\
   783  	MOVQ	stackArgsType+0(FP), DX;		\
   784  	MOVQ	SP, SI;				\
   785  	ADDQ	BX, DI;				\
   786  	ADDQ	BX, SI;				\
   787  	SUBQ	BX, CX;				\
   788  	CALL	callRet<>(SB);			\
   789  	RET
   790  
   791  // callRet copies return values back at the end of call*. This is a
   792  // separate function so it can allocate stack space for the arguments
   793  // to reflectcallmove. It does not follow the Go ABI; it expects its
   794  // arguments in registers.
   795  TEXT callRet<>(SB), NOSPLIT, $40-0
   796  	NO_LOCAL_POINTERS
   797  	MOVQ	DX, 0(SP)
   798  	MOVQ	DI, 8(SP)
   799  	MOVQ	SI, 16(SP)
   800  	MOVQ	CX, 24(SP)
   801  	MOVQ	R12, 32(SP)
   802  	CALL	runtime·reflectcallmove(SB)
   803  	RET
   804  
   805  CALLFN(·call16, 16)
   806  CALLFN(·call32, 32)
   807  CALLFN(·call64, 64)
   808  CALLFN(·call128, 128)
   809  CALLFN(·call256, 256)
   810  CALLFN(·call512, 512)
   811  CALLFN(·call1024, 1024)
   812  CALLFN(·call2048, 2048)
   813  CALLFN(·call4096, 4096)
   814  CALLFN(·call8192, 8192)
   815  CALLFN(·call16384, 16384)
   816  CALLFN(·call32768, 32768)
   817  CALLFN(·call65536, 65536)
   818  CALLFN(·call131072, 131072)
   819  CALLFN(·call262144, 262144)
   820  CALLFN(·call524288, 524288)
   821  CALLFN(·call1048576, 1048576)
   822  CALLFN(·call2097152, 2097152)
   823  CALLFN(·call4194304, 4194304)
   824  CALLFN(·call8388608, 8388608)
   825  CALLFN(·call16777216, 16777216)
   826  CALLFN(·call33554432, 33554432)
   827  CALLFN(·call67108864, 67108864)
   828  CALLFN(·call134217728, 134217728)
   829  CALLFN(·call268435456, 268435456)
   830  CALLFN(·call536870912, 536870912)
   831  CALLFN(·call1073741824, 1073741824)
   832  
   833  TEXT runtime·procyieldAsm(SB),NOSPLIT,$0-0
   834  	MOVL	cycles+0(FP), AX
   835  	TESTL	AX, AX
   836  	JZ	done
   837  again:
   838  	PAUSE
   839  	SUBL	$1, AX
   840  	JNZ	again
   841  done:
   842  	RET
   843  
   844  
   845  TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
   846  	// Stores are already ordered on x86, so this is just a
   847  	// compile barrier.
   848  	RET
   849  
   850  // Save state of caller into g->sched,
   851  // but using fake PC from systemstack_switch.
   852  // Must only be called from functions with frame pointer
   853  // and without locals ($0) or else unwinding from
   854  // systemstack_switch is incorrect.
   855  // Smashes R9.
   856  TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
   857  	// Take systemstack_switch PC and add 8 bytes to skip
   858  	// the prologue. The final location does not matter
   859  	// as long as we are between the prologue and the epilogue.
   860  	MOVQ	$runtime·systemstack_switch+8(SB), R9
   861  	MOVQ	R9, (g_sched+gobuf_pc)(R14)
   862  	LEAQ	8(SP), R9
   863  	MOVQ	R9, (g_sched+gobuf_sp)(R14)
   864  	MOVQ	BP, (g_sched+gobuf_bp)(R14)
   865  	// Assert ctxt is zero. See func save.
   866  	MOVQ	(g_sched+gobuf_ctxt)(R14), R9
   867  	TESTQ	R9, R9
   868  	JZ	2(PC)
   869  	CALL	runtime·abort(SB)
   870  	RET
   871  
   872  // func asmcgocall_no_g(fn, arg unsafe.Pointer)
   873  // Call fn(arg) aligned appropriately for the gcc ABI.
   874  // Called on a system stack, and there may be no g yet (during needm).
   875  TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
   876  	MOVQ	fn+0(FP), AX
   877  	MOVQ	arg+8(FP), BX
   878  	MOVQ	SP, DX
   879  	ANDQ	$~15, SP	// alignment
   880  	MOVQ	DX, 8(SP)
   881  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   882  	MOVQ	BX, CX		// CX = first argument in Win64
   883  	CALL	AX
   884  	MOVQ	8(SP), DX
   885  	MOVQ	DX, SP
   886  	RET
   887  
   888  // asmcgocall_landingpad calls AX with BX as argument.
   889  // Must be called on the system stack.
   890  TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
   891  #ifdef GOOS_windows
   892  	// Make sure we have enough room for 4 stack-backed fast-call
   893  	// registers as per Windows amd64 calling convention.
   894  	ADJSP	$32
   895  	// On Windows, asmcgocall_landingpad acts as landing pad for exceptions
   896  	// thrown in the cgo call. Exceptions that reach this function will be
   897  	// handled by runtime.sehtramp thanks to the SEH metadata added
   898  	// by the compiler.
   899  	// Note that runtime.sehtramp can't be attached directly to asmcgocall
   900  	// because its initial stack pointer can be outside the system stack bounds,
   901  	// and Windows stops the stack unwinding without calling the exception handler
   902  	// when it reaches that point.
   903  	MOVQ	BX, CX		// CX = first argument in Win64
   904  	CALL	AX
   905  	// The exception handler is not called if the next instruction is part of
   906  	// the epilogue, which includes the RET instruction, so we need to add a NOP here.
   907  	BYTE	$0x90
   908  	ADJSP	$-32
   909  	RET
   910  #endif
   911  	// Tail call AX on non-Windows, as the extra stack frame is not needed.
   912  	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   913  	JMP	AX
   914  
   915  // func asmcgocall(fn, arg unsafe.Pointer) int32
   916  // Call fn(arg) on the scheduler stack,
   917  // aligned appropriately for the gcc ABI.
   918  // See cgocall.go for more details.
   919  TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   920  	MOVQ	fn+0(FP), AX
   921  	MOVQ	arg+8(FP), BX
   922  
   923  	MOVQ	SP, DX
   924  
   925  	// Figure out if we need to switch to m->g0 stack.
   926  	// We get called to create new OS threads too, and those
   927  	// come in on the m->g0 stack already. Or we might already
   928  	// be on the m->gsignal stack.
   929  	get_tls(CX)
   930  	MOVQ	g(CX), DI
   931  	CMPQ	DI, $0
   932  	JEQ	nosave
   933  	MOVQ	g_m(DI), R8
   934  	MOVQ	m_gsignal(R8), SI
   935  	CMPQ	DI, SI
   936  	JEQ	nosave
   937  	MOVQ	m_g0(R8), SI
   938  	CMPQ	DI, SI
   939  	JEQ	nosave
   940  
   941  	// Switch to system stack.
   942  	// The original frame pointer is stored in BP,
   943  	// which is useful for stack unwinding.
   944  	CALL	gosave_systemstack_switch<>(SB)
   945  	MOVQ	SI, g(CX)
   946  	MOVQ	(g_sched+gobuf_sp)(SI), SP
   947  
   948  	// Now on a scheduling stack (a pthread-created stack).
   949  	SUBQ	$16, SP
   950  	ANDQ	$~15, SP	// alignment for gcc ABI
   951  	MOVQ	DI, 8(SP)	// save g
   952  	MOVQ	(g_stack+stack_hi)(DI), DI
   953  	SUBQ	DX, DI
   954  	MOVQ	DI, 0(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   955  	CALL	runtime·asmcgocall_landingpad(SB)
   956  
   957  	// Restore registers, g, stack pointer.
   958  	get_tls(CX)
   959  	MOVQ	8(SP), DI
   960  	MOVQ	(g_stack+stack_hi)(DI), SI
   961  	SUBQ	0(SP), SI
   962  	MOVQ	DI, g(CX)
   963  	MOVQ	SI, SP
   964  
   965  	MOVL	AX, ret+16(FP)
   966  	RET
   967  
   968  nosave:
   969  	// Running on a system stack, perhaps even without a g.
   970  	// Having no g can happen during thread creation or thread teardown
   971  	// (see needm/dropm on Solaris, for example).
   972  	// This code is like the above sequence but without saving/restoring g
   973  	// and without worrying about the stack moving out from under us
   974  	// (because we're on a system stack, not a goroutine stack).
   975  	// The above code could be used directly if already on a system stack,
   976  	// but then the only path through this code would be a rare case on Solaris.
   977  	// Using this code for all "already on system stack" calls exercises it more,
   978  	// which should help keep it correct.
   979  	SUBQ	$16, SP
   980  	ANDQ	$~15, SP
   981  	MOVQ	$0, 8(SP)		// where above code stores g, in case someone looks during debugging
   982  	MOVQ	DX, 0(SP)	// save original stack pointer
   983  	CALL	runtime·asmcgocall_landingpad(SB)
   984  	MOVQ	0(SP), SI	// restore original stack pointer
   985  	MOVQ	SI, SP
   986  	MOVL	AX, ret+16(FP)
   987  	RET
   988  
   989  #ifdef GOOS_windows
   990  // Dummy TLS that's used on Windows so that we don't crash trying
   991  // to restore the G register in needm. needm and its callees are
   992  // very careful never to actually use the G, the TLS just can't be
   993  // unset since we're in Go code.
   994  GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
   995  #endif
   996  
   997  // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   998  // See cgocall.go for more details.
   999  TEXT ·cgocallback(SB),NOSPLIT,$24-24
  1000  	NO_LOCAL_POINTERS
  1001  
  1002  	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
  1003  	// It is used to dropm while thread is exiting.
  1004  	MOVQ	fn+0(FP), AX
  1005  	CMPQ	AX, $0
  1006  	JNE	loadg
  1007  	// Restore the g from frame.
  1008  	get_tls(CX)
  1009  	MOVQ	frame+8(FP), BX
  1010  	MOVQ	BX, g(CX)
  1011  	JMP	dropm
  1012  
  1013  loadg:
  1014  	// If g is nil, Go did not create the current thread,
  1015  	// or if this thread never called into Go on pthread platforms.
  1016  	// Call needm to obtain one m for temporary use.
  1017  	// In this case, we're running on the thread stack, so there's
  1018  	// lots of space, but the linker doesn't know. Hide the call from
  1019  	// the linker analysis by using an indirect call through AX.
  1020  	get_tls(CX)
  1021  #ifdef GOOS_windows
  1022  	MOVL	$0, BX
  1023  	CMPQ	CX, $0
  1024  	JEQ	2(PC)
  1025  #endif
  1026  	MOVQ	g(CX), BX
  1027  	CMPQ	BX, $0
  1028  	JEQ	needm
  1029  	MOVQ	g_m(BX), BX
  1030  	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
  1031  	JMP	havem
  1032  needm:
  1033  #ifdef GOOS_windows
  1034  	// Set up a dummy TLS value. needm is careful not to use it,
  1035  	// but it needs to be there to prevent autogenerated code from
  1036  	// crashing when it loads from it.
  1037  	// We don't need to clear it or anything later because needm
  1038  	// will set up TLS properly.
  1039  	MOVQ	$zeroTLS<>(SB), DI
  1040  	CALL	runtime·settls(SB)
  1041  #endif
  1042  	// On some platforms (Windows) we cannot call needm through
  1043  	// an ABI wrapper because there's no TLS set up, and the ABI
  1044  	// wrapper will try to restore the G register (R14) from TLS.
  1045  	// Clear X15 because Go expects it and we're not calling
  1046  	// through a wrapper, but otherwise avoid setting the G
  1047  	// register in the wrapper and call needm directly. It
  1048  	// takes no arguments and doesn't return any values so
  1049  	// there's no need to handle that. Clear R14 so that there's
  1050  	// a bad value in there, in case needm tries to use it.
  1051  	XORPS	X15, X15
  1052  	CMPB	internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
  1053  	JNE	2(PC)
  1054  	VXORPS	X15, X15, X15
  1055  	XORQ    R14, R14
  1056  	MOVQ	$runtime·needAndBindM<ABIInternal>(SB), AX
  1057  	CALL	AX
  1058  	MOVQ	$0, savedm-8(SP)
  1059  	get_tls(CX)
  1060  	MOVQ	g(CX), BX
  1061  	MOVQ	g_m(BX), BX
  1062  
  1063  	// Set m->sched.sp = SP, so that if a panic happens
  1064  	// during the function we are about to execute, it will
  1065  	// have a valid SP to run on the g0 stack.
  1066  	// The next few lines (after the havem label)
  1067  	// will save this SP onto the stack and then write
  1068  	// the same SP back to m->sched.sp. That seems redundant,
  1069  	// but if an unrecovered panic happens, unwindm will
  1070  	// restore the g->sched.sp from the stack location
  1071  	// and then systemstack will try to use it. If we don't set it here,
  1072  	// that restored SP will be uninitialized (typically 0) and
  1073  	// will not be usable.
  1074  	MOVQ	m_g0(BX), SI
  1075  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
  1076  
  1077  havem:
  1078  	// Now there's a valid m, and we're running on its m->g0.
  1079  	// Save current m->g0->sched.sp on stack and then set it to SP.
  1080  	// Save current sp in m->g0->sched.sp in preparation for
  1081  	// switch back to m->curg stack.
  1082  	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
  1083  	MOVQ	m_g0(BX), SI
  1084  	MOVQ	(g_sched+gobuf_sp)(SI), AX
  1085  	MOVQ	AX, 0(SP)
  1086  	MOVQ	SP, (g_sched+gobuf_sp)(SI)
  1087  
  1088  	// Switch to m->curg stack and call runtime.cgocallbackg.
  1089  	// Because we are taking over the execution of m->curg
  1090  	// but *not* resuming what had been running, we need to
  1091  	// save that information (m->curg->sched) so we can restore it.
  1092  	// We can restore m->curg->sched.sp easily, because calling
  1093  	// runtime.cgocallbackg leaves SP unchanged upon return.
  1094  	// To save m->curg->sched.pc, we push it onto the curg stack and
  1095  	// open a frame the same size as cgocallback's g0 frame.
  1096  	// Once we switch to the curg stack, the pushed PC will appear
  1097  	// to be the return PC of cgocallback, so that the traceback
  1098  	// will seamlessly trace back into the earlier calls.
  1099  	MOVQ	m_curg(BX), SI
  1100  	MOVQ	SI, g(CX)
  1101  	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
  1102  	MOVQ	(g_sched+gobuf_pc)(SI), BX
  1103  	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
  1104  	// Gather our arguments into registers.
  1105  	MOVQ	fn+0(FP), BX
  1106  	MOVQ	frame+8(FP), CX
  1107  	MOVQ	ctxt+16(FP), DX
  1108  	// Compute the size of the frame, including return PC and, if
  1109  	// GOEXPERIMENT=framepointer, the saved base pointer
  1110  	LEAQ	fn+0(FP), AX
  1111  	SUBQ	SP, AX   // AX is our actual frame size
  1112  	SUBQ	AX, DI   // Allocate the same frame size on the g stack
  1113  	MOVQ	DI, SP
  1114  
  1115  	MOVQ	BX, 0(SP)
  1116  	MOVQ	CX, 8(SP)
  1117  	MOVQ	DX, 16(SP)
  1118  	MOVQ	$runtime·cgocallbackg(SB), AX
  1119  	CALL	AX	// indirect call to bypass nosplit check. We're on a different stack now.
  1120  
  1121  	// Compute the size of the frame again. FP and SP have
  1122  	// completely different values here than they did above,
  1123  	// but only their difference matters.
  1124  	LEAQ	fn+0(FP), AX
  1125  	SUBQ	SP, AX
  1126  
  1127  	// Restore g->sched (== m->curg->sched) from saved values.
  1128  	get_tls(CX)
  1129  	MOVQ	g(CX), SI
  1130  	MOVQ	SP, DI
  1131  	ADDQ	AX, DI
  1132  	MOVQ	-8(DI), BX
  1133  	MOVQ	BX, (g_sched+gobuf_pc)(SI)
  1134  	MOVQ	DI, (g_sched+gobuf_sp)(SI)
  1135  
  1136  	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1137  	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1138  	// so we do not have to restore it.)
  1139  	MOVQ	g(CX), BX
  1140  	MOVQ	g_m(BX), BX
  1141  	MOVQ	m_g0(BX), SI
  1142  	MOVQ	SI, g(CX)
  1143  	MOVQ	(g_sched+gobuf_sp)(SI), SP
  1144  	MOVQ	0(SP), AX
  1145  	MOVQ	AX, (g_sched+gobuf_sp)(SI)
  1146  
  1147  	// If the m on entry was nil, we called needm above to borrow an m,
  1148  	// 1. for the duration of the call on non-pthread platforms,
  1149  	// 2. or the duration of the C thread alive on pthread platforms.
  1150  	// If the m on entry wasn't nil,
  1151  	// 1. the thread might be a Go thread,
  1152  	// 2. or it wasn't the first call from a C thread on pthread platforms,
  1153  	//    since then we skip dropm to reuse the m in the first call.
  1154  	MOVQ	savedm-8(SP), BX
  1155  	CMPQ	BX, $0
  1156  	JNE	done
  1157  
  1158  	// Skip dropm to reuse it in the next call, when a pthread key has been created.
  1159  	MOVQ	_cgo_pthread_key_created(SB), AX
  1160  	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
  1161  	CMPQ	AX, $0
  1162  	JEQ	dropm
  1163  	CMPQ	(AX), $0
  1164  	JNE	done
  1165  
  1166  dropm:
  1167  	MOVQ	$runtime·dropm(SB), AX
  1168  	CALL	AX
  1169  #ifdef GOOS_windows
  1170  	// We need to clear the TLS pointer in case the next
  1171  	// thread that comes into Go tries to reuse that space
  1172  	// but uses the same M.
  1173  	XORQ	DI, DI
  1174  	CALL	runtime·settls(SB)
  1175  #endif
  1176  done:
  1177  
  1178  	// Done!
  1179  	RET
  1180  
  1181  // func setg(gg *g)
  1182  // set g. for use by needm.
  1183  TEXT runtime·setg(SB), NOSPLIT, $0-8
  1184  	MOVQ	gg+0(FP), BX
  1185  	get_tls(CX)
  1186  	MOVQ	BX, g(CX)
  1187  	RET
  1188  
  1189  // void setg_gcc(G*); set g called from gcc.
  1190  TEXT setg_gcc<>(SB),NOSPLIT,$0
  1191  	get_tls(AX)
  1192  	MOVQ	DI, g(AX)
  1193  	MOVQ	DI, R14 // set the g register
  1194  	RET
  1195  
  1196  TEXT runtime·abort(SB),NOSPLIT,$0-0
  1197  	INT	$3
  1198  loop:
  1199  	JMP	loop
  1200  
  1201  // check that SP is in range [g->stack.lo, g->stack.hi)
  1202  TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
  1203  	get_tls(CX)
  1204  	MOVQ	g(CX), AX
  1205  	CMPQ	(g_stack+stack_hi)(AX), SP
  1206  	JHI	2(PC)
  1207  	CALL	runtime·abort(SB)
  1208  	CMPQ	SP, (g_stack+stack_lo)(AX)
  1209  	JHI	2(PC)
  1210  	CALL	runtime·abort(SB)
  1211  	RET
  1212  
  1213  // func cputicks() int64
  1214  TEXT runtime·cputicks(SB),NOSPLIT,$0-0
  1215  	CMPB	internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
  1216  	JNE	fences
  1217  	// Instruction stream serializing RDTSCP is supported.
  1218  	// RDTSCP is supported by Intel Nehalem (2008) and
  1219  	// AMD K8 Rev. F (2006) and newer.
  1220  	RDTSCP
  1221  done:
  1222  	SHLQ	$32, DX
  1223  	ADDQ	DX, AX
  1224  	MOVQ	AX, ret+0(FP)
  1225  	RET
  1226  fences:
  1227  	// MFENCE is instruction stream serializing and flushes the
  1228  	// store buffers on AMD. The serialization semantics of LFENCE on AMD
  1229  	// are dependent on MSR C001_1029 and CPU generation.
  1230  	// LFENCE on Intel does wait for all previous instructions to have executed.
  1231  	// Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
  1232  	// previous instructions executed and all previous loads and stores to globally visible.
  1233  	// Using MFENCE;LFENCE here aligns the serializing properties without
  1234  	// runtime detection of CPU manufacturer.
  1235  	MFENCE
  1236  	LFENCE
  1237  	RDTSC
  1238  	JMP done
  1239  
  1240  // func memhash(p unsafe.Pointer, h, s uintptr) uintptr
  1241  // hash function using AES hardware instructions
  1242  TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
  1243  	// AX = ptr to data
  1244  	// BX = seed
  1245  	// CX = size
  1246  	CMPB	runtime·useAeshash(SB), $0
  1247  	JEQ	noaes
  1248  	JMP	aeshashbody<>(SB)
  1249  noaes:
  1250  	JMP	runtime·memhashFallback<ABIInternal>(SB)
  1251  
  1252  // func strhash(p unsafe.Pointer, h uintptr) uintptr
  1253  TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
  1254  	// AX = ptr to string struct
  1255  	// BX = seed
  1256  	CMPB	runtime·useAeshash(SB), $0
  1257  	JEQ	noaes
  1258  	MOVQ	8(AX), CX	// length of string
  1259  	MOVQ	(AX), AX	// string data
  1260  	JMP	aeshashbody<>(SB)
  1261  noaes:
  1262  	JMP	runtime·strhashFallback<ABIInternal>(SB)
  1263  
  1264  // AX: data
  1265  // BX: hash seed
  1266  // CX: length
  1267  // At return: AX = return value
  1268  TEXT aeshashbody<>(SB),NOSPLIT,$0-0
  1269  	// Fill an SSE register with our seeds.
  1270  	MOVQ	BX, X0				// 64 bits of per-table hash seed
  1271  	PINSRW	$4, CX, X0			// 16 bits of length
  1272  	PSHUFHW $0, X0, X0			// repeat length 4 times total
  1273  	MOVO	X0, X1				// save unscrambled seed
  1274  	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
  1275  	AESENC	X0, X0				// scramble seed
  1276  
  1277  	CMPQ	CX, $16
  1278  	JB	aes0to15
  1279  	JE	aes16
  1280  	CMPQ	CX, $32
  1281  	JBE	aes17to32
  1282  	CMPQ	CX, $64
  1283  	JBE	aes33to64
  1284  	CMPQ	CX, $128
  1285  	JBE	aes65to128
  1286  	JMP	aes129plus
  1287  
  1288  aes0to15:
  1289  	TESTQ	CX, CX
  1290  	JE	aes0
  1291  
  1292  	ADDQ	$16, AX
  1293  	TESTW	$0xff0, AX
  1294  	JE	endofpage
  1295  
  1296  	// 16 bytes loaded at this address won't cross
  1297  	// a page boundary, so we can load it directly.
  1298  	MOVOU	-16(AX), X1
  1299  	ADDQ	CX, CX
  1300  	MOVQ	$masks<>(SB), AX
  1301  	PAND	(AX)(CX*8), X1
  1302  final1:
  1303  	PXOR	X0, X1	// xor data with seed
  1304  	AESENC	X1, X1	// scramble combo 3 times
  1305  	AESENC	X1, X1
  1306  	AESENC	X1, X1
  1307  	MOVQ	X1, AX	// return X1
  1308  	RET
  1309  
  1310  endofpage:
  1311  	// address ends in 1111xxxx. Might be up against
  1312  	// a page boundary, so load ending at last byte.
  1313  	// Then shift bytes down using pshufb.
  1314  	MOVOU	-32(AX)(CX*1), X1
  1315  	ADDQ	CX, CX
  1316  	MOVQ	$shifts<>(SB), AX
  1317  	PSHUFB	(AX)(CX*8), X1
  1318  	JMP	final1
  1319  
  1320  aes0:
  1321  	// Return scrambled input seed
  1322  	AESENC	X0, X0
  1323  	MOVQ	X0, AX	// return X0
  1324  	RET
  1325  
  1326  aes16:
  1327  	MOVOU	(AX), X1
  1328  	JMP	final1
  1329  
  1330  aes17to32:
  1331  	// make second starting seed
  1332  	PXOR	runtime·aeskeysched+16(SB), X1
  1333  	AESENC	X1, X1
  1334  
  1335  	// load data to be hashed
  1336  	MOVOU	(AX), X2
  1337  	MOVOU	-16(AX)(CX*1), X3
  1338  
  1339  	// xor with seed
  1340  	PXOR	X0, X2
  1341  	PXOR	X1, X3
  1342  
  1343  	// scramble 3 times
  1344  	AESENC	X2, X2
  1345  	AESENC	X3, X3
  1346  	AESENC	X2, X2
  1347  	AESENC	X3, X3
  1348  	AESENC	X2, X2
  1349  	AESENC	X3, X3
  1350  
  1351  	// combine results
  1352  	PXOR	X3, X2
  1353  	MOVQ	X2, AX	// return X2
  1354  	RET
  1355  
  1356  aes33to64:
  1357  	// make 3 more starting seeds
  1358  	MOVO	X1, X2
  1359  	MOVO	X1, X3
  1360  	PXOR	runtime·aeskeysched+16(SB), X1
  1361  	PXOR	runtime·aeskeysched+32(SB), X2
  1362  	PXOR	runtime·aeskeysched+48(SB), X3
  1363  	AESENC	X1, X1
  1364  	AESENC	X2, X2
  1365  	AESENC	X3, X3
  1366  
  1367  	MOVOU	(AX), X4
  1368  	MOVOU	16(AX), X5
  1369  	MOVOU	-32(AX)(CX*1), X6
  1370  	MOVOU	-16(AX)(CX*1), X7
  1371  
  1372  	PXOR	X0, X4
  1373  	PXOR	X1, X5
  1374  	PXOR	X2, X6
  1375  	PXOR	X3, X7
  1376  
  1377  	AESENC	X4, X4
  1378  	AESENC	X5, X5
  1379  	AESENC	X6, X6
  1380  	AESENC	X7, X7
  1381  
  1382  	AESENC	X4, X4
  1383  	AESENC	X5, X5
  1384  	AESENC	X6, X6
  1385  	AESENC	X7, X7
  1386  
  1387  	AESENC	X4, X4
  1388  	AESENC	X5, X5
  1389  	AESENC	X6, X6
  1390  	AESENC	X7, X7
  1391  
  1392  	PXOR	X6, X4
  1393  	PXOR	X7, X5
  1394  	PXOR	X5, X4
  1395  	MOVQ	X4, AX	// return X4
  1396  	RET
  1397  
  1398  aes65to128:
  1399  	// make 7 more starting seeds
  1400  	MOVO	X1, X2
  1401  	MOVO	X1, X3
  1402  	MOVO	X1, X4
  1403  	MOVO	X1, X5
  1404  	MOVO	X1, X6
  1405  	MOVO	X1, X7
  1406  	PXOR	runtime·aeskeysched+16(SB), X1
  1407  	PXOR	runtime·aeskeysched+32(SB), X2
  1408  	PXOR	runtime·aeskeysched+48(SB), X3
  1409  	PXOR	runtime·aeskeysched+64(SB), X4
  1410  	PXOR	runtime·aeskeysched+80(SB), X5
  1411  	PXOR	runtime·aeskeysched+96(SB), X6
  1412  	PXOR	runtime·aeskeysched+112(SB), X7
  1413  	AESENC	X1, X1
  1414  	AESENC	X2, X2
  1415  	AESENC	X3, X3
  1416  	AESENC	X4, X4
  1417  	AESENC	X5, X5
  1418  	AESENC	X6, X6
  1419  	AESENC	X7, X7
  1420  
  1421  	// load data
  1422  	MOVOU	(AX), X8
  1423  	MOVOU	16(AX), X9
  1424  	MOVOU	32(AX), X10
  1425  	MOVOU	48(AX), X11
  1426  	MOVOU	-64(AX)(CX*1), X12
  1427  	MOVOU	-48(AX)(CX*1), X13
  1428  	MOVOU	-32(AX)(CX*1), X14
  1429  	MOVOU	-16(AX)(CX*1), X15
  1430  
  1431  	// xor with seed
  1432  	PXOR	X0, X8
  1433  	PXOR	X1, X9
  1434  	PXOR	X2, X10
  1435  	PXOR	X3, X11
  1436  	PXOR	X4, X12
  1437  	PXOR	X5, X13
  1438  	PXOR	X6, X14
  1439  	PXOR	X7, X15
  1440  
  1441  	// scramble 3 times
  1442  	AESENC	X8, X8
  1443  	AESENC	X9, X9
  1444  	AESENC	X10, X10
  1445  	AESENC	X11, X11
  1446  	AESENC	X12, X12
  1447  	AESENC	X13, X13
  1448  	AESENC	X14, X14
  1449  	AESENC	X15, X15
  1450  
  1451  	AESENC	X8, X8
  1452  	AESENC	X9, X9
  1453  	AESENC	X10, X10
  1454  	AESENC	X11, X11
  1455  	AESENC	X12, X12
  1456  	AESENC	X13, X13
  1457  	AESENC	X14, X14
  1458  	AESENC	X15, X15
  1459  
  1460  	AESENC	X8, X8
  1461  	AESENC	X9, X9
  1462  	AESENC	X10, X10
  1463  	AESENC	X11, X11
  1464  	AESENC	X12, X12
  1465  	AESENC	X13, X13
  1466  	AESENC	X14, X14
  1467  	AESENC	X15, X15
  1468  
  1469  	// combine results
  1470  	PXOR	X12, X8
  1471  	PXOR	X13, X9
  1472  	PXOR	X14, X10
  1473  	PXOR	X15, X11
  1474  	PXOR	X10, X8
  1475  	PXOR	X11, X9
  1476  	PXOR	X9, X8
  1477  	// X15 must be zero on return
  1478  	PXOR	X15, X15
  1479  	MOVQ	X8, AX	// return X8
  1480  	RET
  1481  
  1482  aes129plus:
  1483  	// make 7 more starting seeds
  1484  	MOVO	X1, X2
  1485  	MOVO	X1, X3
  1486  	MOVO	X1, X4
  1487  	MOVO	X1, X5
  1488  	MOVO	X1, X6
  1489  	MOVO	X1, X7
  1490  	PXOR	runtime·aeskeysched+16(SB), X1
  1491  	PXOR	runtime·aeskeysched+32(SB), X2
  1492  	PXOR	runtime·aeskeysched+48(SB), X3
  1493  	PXOR	runtime·aeskeysched+64(SB), X4
  1494  	PXOR	runtime·aeskeysched+80(SB), X5
  1495  	PXOR	runtime·aeskeysched+96(SB), X6
  1496  	PXOR	runtime·aeskeysched+112(SB), X7
  1497  	AESENC	X1, X1
  1498  	AESENC	X2, X2
  1499  	AESENC	X3, X3
  1500  	AESENC	X4, X4
  1501  	AESENC	X5, X5
  1502  	AESENC	X6, X6
  1503  	AESENC	X7, X7
  1504  
  1505  	// start with last (possibly overlapping) block
  1506  	MOVOU	-128(AX)(CX*1), X8
  1507  	MOVOU	-112(AX)(CX*1), X9
  1508  	MOVOU	-96(AX)(CX*1), X10
  1509  	MOVOU	-80(AX)(CX*1), X11
  1510  	MOVOU	-64(AX)(CX*1), X12
  1511  	MOVOU	-48(AX)(CX*1), X13
  1512  	MOVOU	-32(AX)(CX*1), X14
  1513  	MOVOU	-16(AX)(CX*1), X15
  1514  
  1515  	// xor in seed
  1516  	PXOR	X0, X8
  1517  	PXOR	X1, X9
  1518  	PXOR	X2, X10
  1519  	PXOR	X3, X11
  1520  	PXOR	X4, X12
  1521  	PXOR	X5, X13
  1522  	PXOR	X6, X14
  1523  	PXOR	X7, X15
  1524  
  1525  	// compute number of remaining 128-byte blocks
  1526  	DECQ	CX
  1527  	SHRQ	$7, CX
  1528  
  1529  	PCALIGN $16
  1530  aesloop:
  1531  	// scramble state
  1532  	AESENC	X8, X8
  1533  	AESENC	X9, X9
  1534  	AESENC	X10, X10
  1535  	AESENC	X11, X11
  1536  	AESENC	X12, X12
  1537  	AESENC	X13, X13
  1538  	AESENC	X14, X14
  1539  	AESENC	X15, X15
  1540  
  1541  	// scramble state, xor in a block
  1542  	MOVOU	(AX), X0
  1543  	MOVOU	16(AX), X1
  1544  	MOVOU	32(AX), X2
  1545  	MOVOU	48(AX), X3
  1546  	AESENC	X0, X8
  1547  	AESENC	X1, X9
  1548  	AESENC	X2, X10
  1549  	AESENC	X3, X11
  1550  	MOVOU	64(AX), X4
  1551  	MOVOU	80(AX), X5
  1552  	MOVOU	96(AX), X6
  1553  	MOVOU	112(AX), X7
  1554  	AESENC	X4, X12
  1555  	AESENC	X5, X13
  1556  	AESENC	X6, X14
  1557  	AESENC	X7, X15
  1558  
  1559  	ADDQ	$128, AX
  1560  	DECQ	CX
  1561  	JNE	aesloop
  1562  
  1563  	// 3 more scrambles to finish
  1564  	AESENC	X8, X8
  1565  	AESENC	X9, X9
  1566  	AESENC	X10, X10
  1567  	AESENC	X11, X11
  1568  	AESENC	X12, X12
  1569  	AESENC	X13, X13
  1570  	AESENC	X14, X14
  1571  	AESENC	X15, X15
  1572  	AESENC	X8, X8
  1573  	AESENC	X9, X9
  1574  	AESENC	X10, X10
  1575  	AESENC	X11, X11
  1576  	AESENC	X12, X12
  1577  	AESENC	X13, X13
  1578  	AESENC	X14, X14
  1579  	AESENC	X15, X15
  1580  	AESENC	X8, X8
  1581  	AESENC	X9, X9
  1582  	AESENC	X10, X10
  1583  	AESENC	X11, X11
  1584  	AESENC	X12, X12
  1585  	AESENC	X13, X13
  1586  	AESENC	X14, X14
  1587  	AESENC	X15, X15
  1588  
  1589  	PXOR	X12, X8
  1590  	PXOR	X13, X9
  1591  	PXOR	X14, X10
  1592  	PXOR	X15, X11
  1593  	PXOR	X10, X8
  1594  	PXOR	X11, X9
  1595  	PXOR	X9, X8
  1596  	// X15 must be zero on return
  1597  	PXOR	X15, X15
  1598  	MOVQ	X8, AX	// return X8
  1599  	RET
  1600  
  1601  // func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1602  // ABIInternal for performance.
  1603  TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
  1604  	// AX = ptr to data
  1605  	// BX = seed
  1606  	CMPB	runtime·useAeshash(SB), $0
  1607  	JEQ	noaes
  1608  	MOVQ	BX, X0	// X0 = seed
  1609  	PINSRD	$2, (AX), X0	// data
  1610  	AESENC	runtime·aeskeysched+0(SB), X0
  1611  	AESENC	runtime·aeskeysched+16(SB), X0
  1612  	AESENC	runtime·aeskeysched+32(SB), X0
  1613  	MOVQ	X0, AX	// return X0
  1614  	RET
  1615  noaes:
  1616  	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
  1617  
  1618  // func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1619  // ABIInternal for performance.
  1620  TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
  1621  	// AX = ptr to data
  1622  	// BX = seed
  1623  	CMPB	runtime·useAeshash(SB), $0
  1624  	JEQ	noaes
  1625  	MOVQ	BX, X0	// X0 = seed
  1626  	PINSRQ	$1, (AX), X0	// data
  1627  	AESENC	runtime·aeskeysched+0(SB), X0
  1628  	AESENC	runtime·aeskeysched+16(SB), X0
  1629  	AESENC	runtime·aeskeysched+32(SB), X0
  1630  	MOVQ	X0, AX	// return X0
  1631  	RET
  1632  noaes:
  1633  	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
  1634  
  1635  // simple mask to get rid of data in the high part of the register.
  1636  DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1637  DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1638  DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1639  DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1640  DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1641  DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1642  DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1643  DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1644  DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1645  DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1646  DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1647  DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1648  DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1649  DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1650  DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1651  DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1652  DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1653  DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1654  DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1655  DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1656  DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1657  DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1658  DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1659  DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1660  DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1661  DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1662  DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1663  DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1664  DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1665  DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1666  DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1667  DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1668  GLOBL masks<>(SB),RODATA,$256
  1669  
  1670  // func checkASM() bool
  1671  TEXT ·checkASM(SB),NOSPLIT,$0-1
  1672  	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1673  	MOVQ	$masks<>(SB), AX
  1674  	MOVQ	$shifts<>(SB), BX
  1675  	ORQ	BX, AX
  1676  	TESTQ	$15, AX
  1677  	SETEQ	ret+0(FP)
  1678  	RET
  1679  
  1680  // these are arguments to pshufb. They move data down from
  1681  // the high bytes of the register to the low bytes of the register.
  1682  // index is how many bytes to move.
  1683  DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1684  DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1685  DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1686  DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1687  DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1688  DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1689  DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1690  DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1691  DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1692  DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1693  DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1694  DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1695  DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1696  DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1697  DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1698  DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1699  DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1700  DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1701  DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1702  DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1703  DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1704  DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1705  DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1706  DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1707  DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1708  DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1709  DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1710  DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1711  DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1712  DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1713  DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1714  DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1715  GLOBL shifts<>(SB),RODATA,$256
  1716  
  1717  // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1718  // Must obey the gcc calling convention.
  1719  TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1720  	get_tls(CX)
  1721  	MOVQ	g(CX), AX
  1722  	MOVQ	g_m(AX), AX
  1723  	MOVQ	m_curg(AX), AX
  1724  	MOVQ	(g_stack+stack_hi)(AX), AX
  1725  	RET
  1726  
  1727  // The top-most function running on a goroutine
  1728  // returns to goexit+PCQuantum.
  1729  TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
  1730  	BYTE	$0x90	// NOP
  1731  	CALL	runtime·goexit1(SB)	// does not return
  1732  	// traceback from goexit1 must hit code range of goexit
  1733  	BYTE	$0x90	// NOP
  1734  
  1735  // This is called from .init_array and follows the platform, not Go, ABI.
  1736  TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1737  	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1738  	MOVQ	runtime·lastmoduledatap(SB), AX
  1739  	MOVQ	DI, moduledata_next(AX)
  1740  	MOVQ	DI, runtime·lastmoduledatap(SB)
  1741  	POPQ	R15
  1742  	RET
  1743  
  1744  // Initialize special registers then jump to sigpanic.
  1745  // This function is injected from the signal handler for panicking
  1746  // signals. It is quite painful to set X15 in the signal context,
  1747  // so we do it here.
  1748  TEXT ·sigpanic0(SB),NOSPLIT,$0-0
  1749  	get_tls(R14)
  1750  	MOVQ	g(R14), R14
  1751  	XORPS	X15, X15
  1752  	CMPB	internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
  1753  	JNE	2(PC)
  1754  	VXORPS	X15, X15, X15
  1755  	JMP	·sigpanic<ABIInternal>(SB)
  1756  
  1757  // gcWriteBarrier informs the GC about heap pointer writes.
  1758  //
  1759  // gcWriteBarrier returns space in a write barrier buffer which
  1760  // should be filled in by the caller.
  1761  // gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1762  // number of bytes of buffer needed in R11, and returns a pointer
  1763  // to the buffer space in R11.
  1764  // It clobbers FLAGS. It does not clobber any general-purpose registers,
  1765  // but may clobber others (e.g., SSE registers).
  1766  // Typical use would be, when doing *(CX+88) = AX
  1767  //     CMPL    $0, runtime.writeBarrier(SB)
  1768  //     JEQ     dowrite
  1769  //     CALL    runtime.gcBatchBarrier2(SB)
  1770  //     MOVQ    AX, (R11)
  1771  //     MOVQ    88(CX), DX
  1772  //     MOVQ    DX, 8(R11)
  1773  // dowrite:
  1774  //     MOVQ    AX, 88(CX)
  1775  TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
  1776  	// Save the registers clobbered by the fast path. This is slightly
  1777  	// faster than having the caller spill these.
  1778  	MOVQ	R12, 96(SP)
  1779  	MOVQ	R13, 104(SP)
  1780  retry:
  1781  	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1782  	// across a sequence of write barriers.
  1783  	MOVQ	g_m(R14), R13
  1784  	MOVQ	m_p(R13), R13
  1785  	// Get current buffer write position.
  1786  	MOVQ	(p_wbBuf+wbBuf_next)(R13), R12	// original next position
  1787  	ADDQ	R11, R12			// new next position
  1788  	// Is the buffer full?
  1789  	CMPQ	R12, (p_wbBuf+wbBuf_end)(R13)
  1790  	JA	flush
  1791  	// Commit to the larger buffer.
  1792  	MOVQ	R12, (p_wbBuf+wbBuf_next)(R13)
  1793  	// Make return value (the original next position)
  1794  	SUBQ	R11, R12
  1795  	MOVQ	R12, R11
  1796  	// Restore registers.
  1797  	MOVQ	96(SP), R12
  1798  	MOVQ	104(SP), R13
  1799  	RET
  1800  
  1801  flush:
  1802  	// Save all general purpose registers since these could be
  1803  	// clobbered by wbBufFlush and were not saved by the caller.
  1804  	// It is possible for wbBufFlush to clobber other registers
  1805  	// (e.g., SSE registers), but the compiler takes care of saving
  1806  	// those in the caller if necessary. This strikes a balance
  1807  	// with registers that are likely to be used.
  1808  	//
  1809  	// We don't have type information for these, but all code under
  1810  	// here is NOSPLIT, so nothing will observe these.
  1811  	//
  1812  	// TODO: We could strike a different balance; e.g., saving X0
  1813  	// and not saving GP registers that are less likely to be used.
  1814  	MOVQ	DI, 0(SP)
  1815  	MOVQ	AX, 8(SP)
  1816  	MOVQ	BX, 16(SP)
  1817  	MOVQ	CX, 24(SP)
  1818  	MOVQ	DX, 32(SP)
  1819  	// DI already saved
  1820  	MOVQ	SI, 40(SP)
  1821  	MOVQ	BP, 48(SP)
  1822  	MOVQ	R8, 56(SP)
  1823  	MOVQ	R9, 64(SP)
  1824  	MOVQ	R10, 72(SP)
  1825  	MOVQ	R11, 80(SP)
  1826  	// R12 already saved
  1827  	// R13 already saved
  1828  	// R14 is g
  1829  	MOVQ	R15, 88(SP)
  1830  
  1831  	CALL	runtime·wbBufFlush(SB)
  1832  
  1833  	MOVQ	0(SP), DI
  1834  	MOVQ	8(SP), AX
  1835  	MOVQ	16(SP), BX
  1836  	MOVQ	24(SP), CX
  1837  	MOVQ	32(SP), DX
  1838  	MOVQ	40(SP), SI
  1839  	MOVQ	48(SP), BP
  1840  	MOVQ	56(SP), R8
  1841  	MOVQ	64(SP), R9
  1842  	MOVQ	72(SP), R10
  1843  	MOVQ	80(SP), R11
  1844  	MOVQ	88(SP), R15
  1845  	JMP	retry
  1846  
  1847  TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1848  	MOVL   $8, R11
  1849  	JMP     gcWriteBarrier<>(SB)
  1850  TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1851  	MOVL   $16, R11
  1852  	JMP     gcWriteBarrier<>(SB)
  1853  TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1854  	MOVL   $24, R11
  1855  	JMP     gcWriteBarrier<>(SB)
  1856  TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1857  	MOVL   $32, R11
  1858  	JMP     gcWriteBarrier<>(SB)
  1859  TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1860  	MOVL   $40, R11
  1861  	JMP     gcWriteBarrier<>(SB)
  1862  TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1863  	MOVL   $48, R11
  1864  	JMP     gcWriteBarrier<>(SB)
  1865  TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1866  	MOVL   $56, R11
  1867  	JMP     gcWriteBarrier<>(SB)
  1868  TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1869  	MOVL   $64, R11
  1870  	JMP     gcWriteBarrier<>(SB)
  1871  
  1872  DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1873  GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1874  
  1875  // debugCallV2 is the entry point for debugger-injected function
  1876  // calls on running goroutines. It informs the runtime that a
  1877  // debug call has been injected and creates a call frame for the
  1878  // debugger to fill in.
  1879  //
  1880  // To inject a function call, a debugger should:
  1881  // 1. Check that the goroutine is in state _Grunning and that
  1882  //    there are at least 256 bytes free on the stack.
  1883  // 2. Push the current PC on the stack (updating SP).
  1884  // 3. Write the desired argument frame size at SP-16 (using the SP
  1885  //    after step 2).
  1886  // 4. Save all machine registers (including flags and XMM registers)
  1887  //    so they can be restored later by the debugger.
  1888  // 5. Set the PC to debugCallV2 and resume execution.
  1889  //
  1890  // If the goroutine is in state _Grunnable, then it's not generally
  1891  // safe to inject a call because it may return out via other runtime
  1892  // operations. Instead, the debugger should unwind the stack to find
  1893  // the return to non-runtime code, add a temporary breakpoint there,
  1894  // and inject the call once that breakpoint is hit.
  1895  //
  1896  // If the goroutine is in any other state, it's not safe to inject a call.
  1897  //
  1898  // This function communicates back to the debugger by setting R12 and
  1899  // invoking INT3 to raise a breakpoint signal. See the comments in the
  1900  // implementation for the protocol the debugger is expected to
  1901  // follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1902  //
  1903  // The debugger must ensure that any pointers passed to the function
  1904  // obey escape analysis requirements. Specifically, it must not pass
  1905  // a stack pointer to an escaping argument. debugCallV2 cannot check
  1906  // this invariant.
  1907  //
  1908  // This is ABIInternal because Go code injects its PC directly into new
  1909  // goroutine stacks.
  1910  TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
  1911  	// Save all registers that may contain pointers so they can be
  1912  	// conservatively scanned.
  1913  	//
  1914  	// We can't do anything that might clobber any of these
  1915  	// registers before this.
  1916  	MOVQ	R15, r15-(14*8+8)(SP)
  1917  	MOVQ	R14, r14-(13*8+8)(SP)
  1918  	MOVQ	R13, r13-(12*8+8)(SP)
  1919  	MOVQ	R12, r12-(11*8+8)(SP)
  1920  	MOVQ	R11, r11-(10*8+8)(SP)
  1921  	MOVQ	R10, r10-(9*8+8)(SP)
  1922  	MOVQ	R9, r9-(8*8+8)(SP)
  1923  	MOVQ	R8, r8-(7*8+8)(SP)
  1924  	MOVQ	DI, di-(6*8+8)(SP)
  1925  	MOVQ	SI, si-(5*8+8)(SP)
  1926  	MOVQ	BP, bp-(4*8+8)(SP)
  1927  	MOVQ	BX, bx-(3*8+8)(SP)
  1928  	MOVQ	DX, dx-(2*8+8)(SP)
  1929  	// Save the frame size before we clobber it. Either of the last
  1930  	// saves could clobber this depending on whether there's a saved BP.
  1931  	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1932  	MOVQ	CX, cx-(1*8+8)(SP)
  1933  	MOVQ	AX, ax-(0*8+8)(SP)
  1934  
  1935  	// Save the argument frame size.
  1936  	MOVQ	DX, frameSize-128(SP)
  1937  
  1938  	// Perform a safe-point check.
  1939  	MOVQ	retpc-8(FP), AX	// Caller's PC
  1940  	MOVQ	AX, 0(SP)
  1941  	CALL	runtime·debugCallCheck(SB)
  1942  	MOVQ	8(SP), AX
  1943  	TESTQ	AX, AX
  1944  	JZ	good
  1945  	// The safety check failed. Put the reason string at the top
  1946  	// of the stack.
  1947  	MOVQ	AX, 0(SP)
  1948  	MOVQ	16(SP), AX
  1949  	MOVQ	AX, 8(SP)
  1950  	// Set R12 to 8 and invoke INT3. The debugger should get the
  1951  	// reason a call can't be injected from the top of the stack
  1952  	// and resume execution.
  1953  	MOVQ	$8, R12
  1954  	BYTE	$0xcc
  1955  	JMP	restore
  1956  
  1957  good:
  1958  	// Registers are saved and it's safe to make a call.
  1959  	// Open up a call frame, moving the stack if necessary.
  1960  	//
  1961  	// Once the frame is allocated, this will set R12 to 0 and
  1962  	// invoke INT3. The debugger should write the argument
  1963  	// frame for the call at SP, set up argument registers, push
  1964  	// the trapping PC on the stack, set the PC to the function to
  1965  	// call, set RDX to point to the closure (if a closure call),
  1966  	// and resume execution.
  1967  	//
  1968  	// If the function returns, this will set R12 to 1 and invoke
  1969  	// INT3. The debugger can then inspect any return value saved
  1970  	// on the stack at SP and in registers and resume execution again.
  1971  	//
  1972  	// If the function panics, this will set R12 to 2 and invoke INT3.
  1973  	// The interface{} value of the panic will be at SP. The debugger
  1974  	// can inspect the panic value and resume execution again.
  1975  #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1976  	CMPQ	AX, $MAXSIZE;			\
  1977  	JA	5(PC);				\
  1978  	MOVQ	$NAME(SB), AX;			\
  1979  	MOVQ	AX, 0(SP);			\
  1980  	CALL	runtime·debugCallWrap(SB);	\
  1981  	JMP	restore
  1982  
  1983  	MOVQ	frameSize-128(SP), AX
  1984  	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1985  	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1986  	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1987  	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1988  	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1989  	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1990  	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1991  	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1992  	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1993  	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1994  	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1995  	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1996  	// The frame size is too large. Report the error.
  1997  	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1998  	MOVQ	AX, 0(SP)
  1999  	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  2000  	MOVQ	$8, R12
  2001  	BYTE	$0xcc
  2002  	JMP	restore
  2003  
  2004  restore:
  2005  	// Calls and failures resume here.
  2006  	//
  2007  	// Set R12 to 16 and invoke INT3. The debugger should restore
  2008  	// all registers except RIP and RSP and resume execution.
  2009  	MOVQ	$16, R12
  2010  	BYTE	$0xcc
  2011  	// We must not modify flags after this point.
  2012  
  2013  	// Restore pointer-containing registers, which may have been
  2014  	// modified from the debugger's copy by stack copying.
  2015  	MOVQ	ax-(0*8+8)(SP), AX
  2016  	MOVQ	cx-(1*8+8)(SP), CX
  2017  	MOVQ	dx-(2*8+8)(SP), DX
  2018  	MOVQ	bx-(3*8+8)(SP), BX
  2019  	MOVQ	bp-(4*8+8)(SP), BP
  2020  	MOVQ	si-(5*8+8)(SP), SI
  2021  	MOVQ	di-(6*8+8)(SP), DI
  2022  	MOVQ	r8-(7*8+8)(SP), R8
  2023  	MOVQ	r9-(8*8+8)(SP), R9
  2024  	MOVQ	r10-(9*8+8)(SP), R10
  2025  	MOVQ	r11-(10*8+8)(SP), R11
  2026  	MOVQ	r12-(11*8+8)(SP), R12
  2027  	MOVQ	r13-(12*8+8)(SP), R13
  2028  	MOVQ	r14-(13*8+8)(SP), R14
  2029  	MOVQ	r15-(14*8+8)(SP), R15
  2030  
  2031  	RET
  2032  
  2033  // runtime.debugCallCheck assumes that functions defined with the
  2034  // DEBUG_CALL_FN macro are safe points to inject calls.
  2035  #define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  2036  TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  2037  	NO_LOCAL_POINTERS;			\
  2038  	MOVQ	$0, R12;				\
  2039  	BYTE	$0xcc;				\
  2040  	MOVQ	$1, R12;				\
  2041  	BYTE	$0xcc;				\
  2042  	RET
  2043  DEBUG_CALL_FN(debugCall32<>, 32)
  2044  DEBUG_CALL_FN(debugCall64<>, 64)
  2045  DEBUG_CALL_FN(debugCall128<>, 128)
  2046  DEBUG_CALL_FN(debugCall256<>, 256)
  2047  DEBUG_CALL_FN(debugCall512<>, 512)
  2048  DEBUG_CALL_FN(debugCall1024<>, 1024)
  2049  DEBUG_CALL_FN(debugCall2048<>, 2048)
  2050  DEBUG_CALL_FN(debugCall4096<>, 4096)
  2051  DEBUG_CALL_FN(debugCall8192<>, 8192)
  2052  DEBUG_CALL_FN(debugCall16384<>, 16384)
  2053  DEBUG_CALL_FN(debugCall32768<>, 32768)
  2054  DEBUG_CALL_FN(debugCall65536<>, 65536)
  2055  
  2056  // func debugCallPanicked(val interface{})
  2057  TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  2058  	// Copy the panic value to the top of stack.
  2059  	MOVQ	val_type+0(FP), AX
  2060  	MOVQ	AX, 0(SP)
  2061  	MOVQ	val_data+8(FP), AX
  2062  	MOVQ	AX, 8(SP)
  2063  	MOVQ	$2, R12
  2064  	BYTE	$0xcc
  2065  	RET
  2066  
  2067  TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
  2068  	NO_LOCAL_POINTERS
  2069  	// Save all 14 int registers that could have an index in them.
  2070  	// They may be pointers, but if they are they are dead.
  2071  	MOVQ	AX, 16(SP)
  2072  	MOVQ	CX, 24(SP)
  2073  	MOVQ	DX, 32(SP)
  2074  	MOVQ	BX, 40(SP)
  2075  	// skip SP @ 48(SP)
  2076  	MOVQ	BP, 56(SP)
  2077  	MOVQ	SI, 64(SP)
  2078  	MOVQ	DI, 72(SP)
  2079  	MOVQ	R8, 80(SP)
  2080  	MOVQ	R9, 88(SP)
  2081  	MOVQ	R10, 96(SP)
  2082  	MOVQ	R11, 104(SP)
  2083  	MOVQ	R12, 112(SP)
  2084  	MOVQ	R13, 120(SP)
  2085  	// skip R14 @ 128(SP) (aka G)
  2086  	MOVQ	R15, 136(SP)
  2087  
  2088  	MOVQ	SP, AX		// hide SP read from vet
  2089  	MOVQ	152(AX), AX	// PC immediately after call to panicBounds
  2090  	LEAQ	16(SP), BX
  2091  	CALL	runtime·panicBounds64<ABIInternal>(SB)
  2092  	RET
  2093  
  2094  #ifdef GOOS_android
  2095  // Use the free TLS_SLOT_APP slot #2 on Android Q.
  2096  // Earlier androids are set up in gcc_android.c.
  2097  DATA runtime·tls_g+0(SB)/8, $16
  2098  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  2099  #endif
  2100  #ifdef GOOS_windows
  2101  GLOBL runtime·tls_g+0(SB), NOPTR, $8
  2102  #endif
  2103  
  2104  // The compiler and assembler's -spectre=ret mode rewrites
  2105  // all indirect CALL AX / JMP AX instructions to be
  2106  // CALL retpolineAX / JMP retpolineAX.
  2107  // See https://support.google.com/faqs/answer/7625886.
  2108  #define RETPOLINE(reg) \
  2109  	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  2110  	/* nospec: */									\
  2111  	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  2112  	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  2113  	/* setup: */									\
  2114  	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  2115  	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  2116  	/*   RET */             BYTE $0xC3
  2117  
  2118  TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
  2119  TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
  2120  TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
  2121  TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
  2122  /* SP is 4, can't happen / magic encodings */
  2123  TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
  2124  TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
  2125  TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
  2126  TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
  2127  TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
  2128  TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
  2129  TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
  2130  TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
  2131  TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
  2132  TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
  2133  TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
  2134  
  2135  TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  2136  	MOVQ BP, AX
  2137  	RET
  2138  

View as plain text