Text file src/runtime/race_arm64.s

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  #include "tls_arm64.h"
    11  #include "cgo/abi_arm64.h"
    12  
    13  // The following thunks allow calling the gcc-compiled race runtime directly
    14  // from Go code without going all the way through cgo.
    15  // First, it's much faster (up to 50% speedup for real Go programs).
    16  // Second, it eliminates race-related special cases from cgocall and scheduler.
    17  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18  
    19  // A brief recap of the arm64 calling convention.
    20  // Arguments are passed in R0...R7, the rest is on stack.
    21  // Callee-saved registers are: R19...R28.
    22  // Temporary registers are: R9...R15
    23  // SP must be 16-byte aligned.
    24  
    25  // When calling racecalladdr, R9 is the call target address.
    26  
    27  // The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    28  
    29  // Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
    30  // No-op on other OSes.
    31  #ifdef TLS_darwin
    32  #define TP_ALIGN	AND	$~7, R0
    33  #else
    34  #define TP_ALIGN
    35  #endif
    36  
    37  // Load g from TLS. (See tls_arm64.s)
    38  #define load_g \
    39  	MRS_TPIDR_R0 \
    40  	TP_ALIGN \
    41  	MOVD    runtime·tls_g(SB), R11 \
    42  	MOVD    (R0)(R11), g
    43  
    44  // func runtime·raceread(addr uintptr)
    45  // Called from instrumented code.
    46  // Defined as ABIInternal so as to avoid introducing a wrapper,
    47  // which would make caller's PC ineffective.
    48  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    49  	MOVD	R0, R1	// addr
    50  	MOVD	LR, R2
    51  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    52  	MOVD	$__tsan_read(SB), R9
    53  	JMP	racecalladdr<>(SB)
    54  
    55  // func runtime·RaceRead(addr uintptr)
    56  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    57  	// This needs to be a tail call, because raceread reads caller pc.
    58  	JMP	runtime·raceread(SB)
    59  
    60  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    61  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    62  	MOVD	addr+0(FP), R1
    63  	MOVD	callpc+8(FP), R2
    64  	MOVD	pc+16(FP), R3
    65  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66  	MOVD	$__tsan_read_pc(SB), R9
    67  	JMP	racecalladdr<>(SB)
    68  
    69  // func runtime·racewrite(addr uintptr)
    70  // Called from instrumented code.
    71  // Defined as ABIInternal so as to avoid introducing a wrapper,
    72  // which would make caller's PC ineffective.
    73  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74  	MOVD	R0, R1	// addr
    75  	MOVD	LR, R2
    76  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77  	MOVD	$__tsan_write(SB), R9
    78  	JMP	racecalladdr<>(SB)
    79  
    80  // func runtime·RaceWrite(addr uintptr)
    81  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82  	// This needs to be a tail call, because racewrite reads caller pc.
    83  	JMP	runtime·racewrite(SB)
    84  
    85  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    86  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87  	MOVD	addr+0(FP), R1
    88  	MOVD	callpc+8(FP), R2
    89  	MOVD	pc+16(FP), R3
    90  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    91  	MOVD	$__tsan_write_pc(SB), R9
    92  	JMP	racecalladdr<>(SB)
    93  
    94  // func runtime·racereadrange(addr, size uintptr)
    95  // Called from instrumented code.
    96  // Defined as ABIInternal so as to avoid introducing a wrapper,
    97  // which would make caller's PC ineffective.
    98  TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
    99  	MOVD	R1, R2	// size
   100  	MOVD	R0, R1	// addr
   101  	MOVD	LR, R3
   102  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   103  	MOVD	$__tsan_read_range(SB), R9
   104  	JMP	racecalladdr<>(SB)
   105  
   106  // func runtime·RaceReadRange(addr, size uintptr)
   107  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   108  	// This needs to be a tail call, because racereadrange reads caller pc.
   109  	JMP	runtime·racereadrange(SB)
   110  
   111  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   112  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   113  	MOVD	addr+0(FP), R1
   114  	MOVD	size+8(FP), R2
   115  	MOVD	pc+16(FP), R3
   116  	ADD	$4, R3	// pc is function start, tsan wants return address.
   117  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   118  	MOVD	$__tsan_read_range(SB), R9
   119  	JMP	racecalladdr<>(SB)
   120  
   121  // func runtime·racewriterange(addr, size uintptr)
   122  // Called from instrumented code.
   123  // Defined as ABIInternal so as to avoid introducing a wrapper,
   124  // which would make caller's PC ineffective.
   125  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   126  	MOVD	R1, R2	// size
   127  	MOVD	R0, R1	// addr
   128  	MOVD	LR, R3
   129  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   130  	MOVD	$__tsan_write_range(SB), R9
   131  	JMP	racecalladdr<>(SB)
   132  
   133  // func runtime·RaceWriteRange(addr, size uintptr)
   134  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   135  	// This needs to be a tail call, because racewriterange reads caller pc.
   136  	JMP	runtime·racewriterange(SB)
   137  
   138  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   139  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   140  	MOVD	addr+0(FP), R1
   141  	MOVD	size+8(FP), R2
   142  	MOVD	pc+16(FP), R3
   143  	ADD	$4, R3	// pc is function start, tsan wants return address.
   144  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   145  	MOVD	$__tsan_write_range(SB), R9
   146  	JMP	racecalladdr<>(SB)
   147  
   148  // If addr (R1) is out of range, do nothing.
   149  // Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   150  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   151  	load_g
   152  	MOVD	g_racectx(g), R0
   153  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154  	MOVD	runtime·racearenastart(SB), R10
   155  	CMP	R10, R1
   156  	BLT	data
   157  	MOVD	runtime·racearenaend(SB), R10
   158  	CMP	R10, R1
   159  	BLT	call
   160  data:
   161  	MOVD	runtime·racedatastart(SB), R10
   162  	CMP	R10, R1
   163  	BLT	ret
   164  	MOVD	runtime·racedataend(SB), R10
   165  	CMP	R10, R1
   166  	BGT	ret
   167  call:
   168  	JMP	racecall<>(SB)
   169  ret:
   170  	RET
   171  
   172  // func runtime·racefuncenter(pc uintptr)
   173  // Called from instrumented code.
   174  TEXT	runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
   175  	MOVD	R0, R9	// callpc
   176  	JMP	racefuncenter<>(SB)
   177  
   178  // Common code for racefuncenter
   179  // R9 = caller's return address
   180  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   181  	load_g
   182  	MOVD	g_racectx(g), R0	// goroutine racectx
   183  	MOVD	R9, R1
   184  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   185  	MOVD	$__tsan_func_enter(SB), R9
   186  	BL	racecall<>(SB)
   187  	RET
   188  
   189  // func runtime·racefuncexit()
   190  // Called from instrumented code.
   191  TEXT	runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
   192  	load_g
   193  	MOVD	g_racectx(g), R0	// race context
   194  	// void __tsan_func_exit(ThreadState *thr);
   195  	MOVD	$__tsan_func_exit(SB), R9
   196  	JMP	racecall<>(SB)
   197  
   198  // Atomic operations for sync/atomic package.
   199  // R3 = addr of arguments passed to this function, it can
   200  // be fetched at 40(RSP) in racecallatomic after two times BL
   201  // R0, R1, R2 set in racecallatomic
   202  
   203  // Load
   204  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   205  	GO_ARGS
   206  	MOVD	$__tsan_go_atomic32_load(SB), R9
   207  	BL	racecallatomic<>(SB)
   208  	RET
   209  
   210  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   211  	GO_ARGS
   212  	MOVD	$__tsan_go_atomic64_load(SB), R9
   213  	BL	racecallatomic<>(SB)
   214  	RET
   215  
   216  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   217  	GO_ARGS
   218  	JMP	sync∕atomic·LoadInt32(SB)
   219  
   220  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   221  	GO_ARGS
   222  	JMP	sync∕atomic·LoadInt64(SB)
   223  
   224  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   225  	GO_ARGS
   226  	JMP	sync∕atomic·LoadInt64(SB)
   227  
   228  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   229  	GO_ARGS
   230  	JMP	sync∕atomic·LoadInt64(SB)
   231  
   232  // Store
   233  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   234  	GO_ARGS
   235  	MOVD	$__tsan_go_atomic32_store(SB), R9
   236  	BL	racecallatomic<>(SB)
   237  	RET
   238  
   239  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   240  	GO_ARGS
   241  	MOVD	$__tsan_go_atomic64_store(SB), R9
   242  	BL	racecallatomic<>(SB)
   243  	RET
   244  
   245  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   246  	GO_ARGS
   247  	JMP	sync∕atomic·StoreInt32(SB)
   248  
   249  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   250  	GO_ARGS
   251  	JMP	sync∕atomic·StoreInt64(SB)
   252  
   253  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   254  	GO_ARGS
   255  	JMP	sync∕atomic·StoreInt64(SB)
   256  
   257  // Swap
   258  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   259  	GO_ARGS
   260  	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   261  	BL	racecallatomic<>(SB)
   262  	RET
   263  
   264  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   265  	GO_ARGS
   266  	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   267  	BL	racecallatomic<>(SB)
   268  	RET
   269  
   270  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   271  	GO_ARGS
   272  	JMP	sync∕atomic·SwapInt32(SB)
   273  
   274  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   275  	GO_ARGS
   276  	JMP	sync∕atomic·SwapInt64(SB)
   277  
   278  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   279  	GO_ARGS
   280  	JMP	sync∕atomic·SwapInt64(SB)
   281  
   282  // Add
   283  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   284  	GO_ARGS
   285  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   286  	BL	racecallatomic<>(SB)
   287  	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   288  	MOVW	ret+16(FP), R1
   289  	ADD	R0, R1, R0
   290  	MOVW	R0, ret+16(FP)
   291  	RET
   292  
   293  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   294  	GO_ARGS
   295  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   296  	BL	racecallatomic<>(SB)
   297  	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   298  	MOVD	ret+16(FP), R1
   299  	ADD	R0, R1, R0
   300  	MOVD	R0, ret+16(FP)
   301  	RET
   302  
   303  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   304  	GO_ARGS
   305  	JMP	sync∕atomic·AddInt32(SB)
   306  
   307  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   308  	GO_ARGS
   309  	JMP	sync∕atomic·AddInt64(SB)
   310  
   311  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   312  	GO_ARGS
   313  	JMP	sync∕atomic·AddInt64(SB)
   314  
   315  // And
   316  TEXT	sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
   317  	GO_ARGS
   318  	MOVD	$__tsan_go_atomic32_fetch_and(SB), R9
   319  	BL	racecallatomic<>(SB)
   320  	RET
   321  
   322  TEXT	sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
   323  	GO_ARGS
   324  	MOVD	$__tsan_go_atomic64_fetch_and(SB), R9
   325  	BL	racecallatomic<>(SB)
   326  	RET
   327  
   328  TEXT	sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
   329  	GO_ARGS
   330  	JMP	sync∕atomic·AndInt32(SB)
   331  
   332  TEXT	sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
   333  	GO_ARGS
   334  	JMP	sync∕atomic·AndInt64(SB)
   335  
   336  TEXT	sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
   337  	GO_ARGS
   338  	JMP	sync∕atomic·AndInt64(SB)
   339  
   340  // Or
   341  TEXT	sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
   342  	GO_ARGS
   343  	MOVD	$__tsan_go_atomic32_fetch_or(SB), R9
   344  	BL	racecallatomic<>(SB)
   345  	RET
   346  
   347  TEXT	sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
   348  	GO_ARGS
   349  	MOVD	$__tsan_go_atomic64_fetch_or(SB), R9
   350  	BL	racecallatomic<>(SB)
   351  	RET
   352  
   353  TEXT	sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
   354  	GO_ARGS
   355  	JMP	sync∕atomic·OrInt32(SB)
   356  
   357  TEXT	sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
   358  	GO_ARGS
   359  	JMP	sync∕atomic·OrInt64(SB)
   360  
   361  TEXT	sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
   362  	GO_ARGS
   363  	JMP	sync∕atomic·OrInt64(SB)
   364  
   365  // CompareAndSwap
   366  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   367  	GO_ARGS
   368  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   369  	BL	racecallatomic<>(SB)
   370  	RET
   371  
   372  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   373  	GO_ARGS
   374  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   375  	BL	racecallatomic<>(SB)
   376  	RET
   377  
   378  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   379  	GO_ARGS
   380  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   381  
   382  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   383  	GO_ARGS
   384  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   385  
   386  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   387  	GO_ARGS
   388  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   389  
   390  // Generic atomic operation implementation.
   391  // R9 = addr of target function
   392  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   393  	// Set up these registers
   394  	// R0 = *ThreadState
   395  	// R1 = caller pc
   396  	// R2 = pc
   397  	// R3 = addr of incoming arg list
   398  
   399  	// Trigger SIGSEGV early.
   400  	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   401  	MOVB	(R3), R13	// segv here if addr is bad
   402  	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   403  	MOVD	runtime·racearenastart(SB), R10
   404  	CMP	R10, R3
   405  	BLT	racecallatomic_data
   406  	MOVD	runtime·racearenaend(SB), R10
   407  	CMP	R10, R3
   408  	BLT	racecallatomic_ok
   409  racecallatomic_data:
   410  	MOVD	runtime·racedatastart(SB), R10
   411  	CMP	R10, R3
   412  	BLT	racecallatomic_ignore
   413  	MOVD	runtime·racedataend(SB), R10
   414  	CMP	R10, R3
   415  	BGE	racecallatomic_ignore
   416  racecallatomic_ok:
   417  	// Addr is within the good range, call the atomic function.
   418  	load_g
   419  	MOVD	g_racectx(g), R0	// goroutine context
   420  	MOVD	16(RSP), R1	// caller pc
   421  	MOVD	R9, R2	// pc
   422  	ADD	$40, RSP, R3
   423  	JMP	racecall<>(SB)	// does not return
   424  racecallatomic_ignore:
   425  	// Addr is outside the good range.
   426  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   427  	// An attempt to synchronize on the address would cause crash.
   428  	MOVD	R9, R21	// remember the original function
   429  	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   430  	load_g
   431  	MOVD	g_racectx(g), R0	// goroutine context
   432  	BL	racecall<>(SB)
   433  	MOVD	R21, R9	// restore the original function
   434  	// Call the atomic function.
   435  	// racecall will call LLVM race code which might clobber R28 (g)
   436  	load_g
   437  	MOVD	g_racectx(g), R0	// goroutine context
   438  	MOVD	16(RSP), R1	// caller pc
   439  	MOVD	R9, R2	// pc
   440  	ADD	$40, RSP, R3	// arguments
   441  	BL	racecall<>(SB)
   442  	// Call __tsan_go_ignore_sync_end.
   443  	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   444  	MOVD	g_racectx(g), R0	// goroutine context
   445  	BL	racecall<>(SB)
   446  	RET
   447  
   448  // func runtime·racecall(void(*f)(...), ...)
   449  // Calls C function f from race runtime and passes up to 4 arguments to it.
   450  // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   451  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   452  	MOVD	fn+0(FP), R9
   453  	MOVD	arg0+8(FP), R0
   454  	MOVD	arg1+16(FP), R1
   455  	MOVD	arg2+24(FP), R2
   456  	MOVD	arg3+32(FP), R3
   457  	JMP	racecall<>(SB)
   458  
   459  // Switches SP to g0 stack and calls (R9). Arguments already set.
   460  // Clobbers R19, R20.
   461  TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   462  	MOVD	g_m(g), R10
   463  	// Switch to g0 stack.
   464  	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   465  	MOVD	R30, R20	// callee-saved, preserved across the CALL
   466  	MOVD	m_g0(R10), R11
   467  	CMP	R11, g
   468  	BEQ	call	// already on g0
   469  	MOVD	(g_sched+gobuf_sp)(R11), R12
   470  	MOVD	R12, RSP
   471  call:
   472  	// Decrement SP past where the frame pointer is saved in the Go arm64
   473  	// ABI (one word below the stack pointer) so the race detector library
   474  	// code doesn't clobber it
   475  	SUB	$16, RSP
   476  	BL	R9
   477  	MOVD	R19, RSP
   478  	JMP	(R20)
   479  
   480  // C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   481  // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   482  // The overall effect of Go->C->Go call chain is similar to that of mcall.
   483  // R0 contains command code. R1 contains command-specific context.
   484  // See racecallback for command codes.
   485  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   486  	// Handle command raceGetProcCmd (0) here.
   487  	// First, code below assumes that we are on curg, while raceGetProcCmd
   488  	// can be executed on g0. Second, it is called frequently, so will
   489  	// benefit from this fast path.
   490  	CBNZ	R0, rest
   491  	MOVD	g, R13
   492  #ifdef TLS_darwin
   493  	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
   494  #endif
   495  	load_g
   496  #ifdef TLS_darwin
   497  	MOVD	R12, R27
   498  #endif
   499  	MOVD	g_m(g), R0
   500  	MOVD	m_p(R0), R0
   501  	MOVD	p_raceprocctx(R0), R0
   502  	MOVD	R0, (R1)
   503  	MOVD	R13, g
   504  	JMP	(LR)
   505  rest:
   506  	// Save callee-saved registers (Go code won't respect that).
   507  	// 8(RSP) and 16(RSP) are for args passed through racecallback
   508  	SUB	$176, RSP
   509  	MOVD	LR, 0(RSP)
   510  
   511  	SAVE_R19_TO_R28(8*3)
   512  	SAVE_F8_TO_F15(8*13)
   513  	MOVD	R29, (8*21)(RSP)
   514  	// Set g = g0.
   515  	// load_g will clobber R0, Save R0
   516  	MOVD	R0, R13
   517  	load_g
   518  	// restore R0
   519  	MOVD	R13, R0
   520  	MOVD	g_m(g), R13
   521  	MOVD	m_g0(R13), R14
   522  	CMP	R14, g
   523  	BEQ	noswitch	// branch if already on g0
   524  	MOVD	R14, g
   525  
   526  	MOVD	R0, 8(RSP)	// func arg
   527  	MOVD	R1, 16(RSP)	// func arg
   528  	BL	runtime·racecallback(SB)
   529  
   530  	// All registers are smashed after Go code, reload.
   531  	MOVD	g_m(g), R13
   532  	MOVD	m_curg(R13), g	// g = m->curg
   533  ret:
   534  	// Restore callee-saved registers.
   535  	MOVD	0(RSP), LR
   536  	MOVD	(8*21)(RSP), R29
   537  	RESTORE_F8_TO_F15(8*13)
   538  	RESTORE_R19_TO_R28(8*3)
   539  	ADD	$176, RSP
   540  	JMP	(LR)
   541  
   542  noswitch:
   543  	// already on g0
   544  	MOVD	R0, 8(RSP)	// func arg
   545  	MOVD	R1, 16(RSP)	// func arg
   546  	BL	runtime·racecallback(SB)
   547  	JMP	ret
   548  
   549  #ifndef TLSG_IS_VARIABLE
   550  // tls_g, g value for each thread in TLS
   551  GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
   552  #endif
   553  

View as plain text