Text file src/runtime/race_s390x.s

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build race
     6  
     7  #include "go_asm.h"
     8  #include "funcdata.h"
     9  #include "textflag.h"
    10  
    11  // The following thunks allow calling the gcc-compiled race runtime directly
    12  // from Go code without going all the way through cgo.
    13  // First, it's much faster (up to 50% speedup for real Go programs).
    14  // Second, it eliminates race-related special cases from cgocall and scheduler.
    15  // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    16  
    17  // A brief recap of the s390x C calling convention.
    18  // Arguments are passed in R2...R6, the rest is on stack.
    19  // Callee-saved registers are: R6...R13, R15.
    20  // Temporary registers are: R0...R5, R14.
    21  
    22  // When calling racecalladdr, R1 is the call target address.
    23  
    24  // The race ctx, ThreadState *thr below, is passed in R2 and loaded in racecalladdr.
    25  
    26  // func runtime·raceread(addr uintptr)
    27  // Called from instrumented code.
    28  TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    29  	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    30  	MOVD	$__tsan_read(SB), R1
    31  	MOVD	R2, R3
    32  	MOVD	R14, R4
    33  	JMP	racecalladdr<>(SB)
    34  
    35  // func runtime·RaceRead(addr uintptr)
    36  TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    37  	// This needs to be a tail call, because raceread reads caller pc.
    38  	JMP	runtime·raceread(SB)
    39  
    40  // func runtime·racereadpc(void *addr, void *callpc, void *pc)
    41  TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    42  	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    43  	MOVD	$__tsan_read_pc(SB), R1
    44  	LMG	addr+0(FP), R3, R5
    45  	JMP	racecalladdr<>(SB)
    46  
    47  // func runtime·racewrite(addr uintptr)
    48  // Called from instrumented code.
    49  TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    50  	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    51  	MOVD	$__tsan_write(SB), R1
    52  	MOVD	R2, R3
    53  	MOVD	R14, R4
    54  	JMP	racecalladdr<>(SB)
    55  
    56  // func runtime·RaceWrite(addr uintptr)
    57  TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    58  	// This needs to be a tail call, because racewrite reads caller pc.
    59  	JMP	runtime·racewrite(SB)
    60  
    61  // func runtime·racewritepc(void *addr, void *callpc, void *pc)
    62  TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    63  	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    64  	MOVD	$__tsan_write_pc(SB), R1
    65  	LMG	addr+0(FP), R3, R5
    66  	JMP	racecalladdr<>(SB)
    67  
    68  // func runtime·racereadrange(addr, size uintptr)
    69  // Called from instrumented code.
    70  TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
    71  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    72  	MOVD	$__tsan_read_range(SB), R1
    73  	MOVD	R3, R4
    74  	MOVD	R2, R3
    75  	MOVD	R14, R5
    76  	JMP	racecalladdr<>(SB)
    77  
    78  // func runtime·RaceReadRange(addr, size uintptr)
    79  TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
    80  	// This needs to be a tail call, because racereadrange reads caller pc.
    81  	JMP	runtime·racereadrange(SB)
    82  
    83  // func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
    84  TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
    85  	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    86  	MOVD	$__tsan_read_range(SB), R1
    87  	LMG	addr+0(FP), R3, R5
    88  	// pc is an interceptor address, but TSan expects it to point to the
    89  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
    90  	ADD	$2, R5
    91  	JMP	racecalladdr<>(SB)
    92  
    93  // func runtime·racewriterange(addr, size uintptr)
    94  // Called from instrumented code.
    95  TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
    96  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
    97  	MOVD	$__tsan_write_range(SB), R1
    98  	MOVD	R3, R4
    99  	MOVD	R2, R3
   100  	MOVD	R14, R5
   101  	JMP	racecalladdr<>(SB)
   102  
   103  // func runtime·RaceWriteRange(addr, size uintptr)
   104  TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   105  	// This needs to be a tail call, because racewriterange reads caller pc.
   106  	JMP	runtime·racewriterange(SB)
   107  
   108  // func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   109  TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   110  	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   111  	MOVD	$__tsan_write_range(SB), R1
   112  	LMG	addr+0(FP), R3, R5
   113  	// pc is an interceptor address, but TSan expects it to point to the
   114  	// middle of an interceptor (see LLVM's SCOPED_INTERCEPTOR_RAW).
   115  	ADD	$2, R5
   116  	JMP	racecalladdr<>(SB)
   117  
   118  // If R3 is out of range, do nothing. Otherwise, setup goroutine context and
   119  // invoke racecall. Other arguments are already set.
   120  TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   121  	MOVD	runtime·racearenastart(SB), R0
   122  	CMPUBLT	R3, R0, data			// Before racearena start?
   123  	MOVD	runtime·racearenaend(SB), R0
   124  	CMPUBLT	R3, R0, call			// Before racearena end?
   125  data:
   126  	MOVD	runtime·racedatastart(SB), R0
   127  	CMPUBLT	R3, R0, ret			// Before racedata start?
   128  	MOVD	runtime·racedataend(SB), R0
   129  	CMPUBGE	R3, R0, ret			// At or after racedata end?
   130  call:
   131  	MOVD	g_racectx(g), R2
   132  	JMP	racecall<>(SB)
   133  ret:
   134  	RET
   135  
   136  // func runtime·racefuncenter(pc uintptr)
   137  // Called from instrumented code.
   138  TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   139  	MOVD	callpc+0(FP), R3
   140  	JMP	racefuncenter<>(SB)
   141  
   142  // Common code for racefuncenter
   143  // R3 = caller's return address
   144  TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   145  	// void __tsan_func_enter(ThreadState *thr, void *pc);
   146  	MOVD	$__tsan_func_enter(SB), R1
   147  	MOVD	g_racectx(g), R2
   148  	BL	racecall<>(SB)
   149  	RET
   150  
   151  // func runtime·racefuncexit()
   152  // Called from instrumented code.
   153  TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   154  	// void __tsan_func_exit(ThreadState *thr);
   155  	MOVD	$__tsan_func_exit(SB), R1
   156  	MOVD	g_racectx(g), R2
   157  	JMP	racecall<>(SB)
   158  
   159  // Atomic operations for sync/atomic package.
   160  
   161  // Load
   162  
   163  TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   164  	GO_ARGS
   165  	MOVD	$__tsan_go_atomic32_load(SB), R1
   166  	BL	racecallatomic<>(SB)
   167  	RET
   168  
   169  TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   170  	GO_ARGS
   171  	MOVD	$__tsan_go_atomic64_load(SB), R1
   172  	BL	racecallatomic<>(SB)
   173  	RET
   174  
   175  TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   176  	GO_ARGS
   177  	JMP	sync∕atomic·LoadInt32(SB)
   178  
   179  TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   180  	GO_ARGS
   181  	JMP	sync∕atomic·LoadInt64(SB)
   182  
   183  TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   184  	GO_ARGS
   185  	JMP	sync∕atomic·LoadInt64(SB)
   186  
   187  TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   188  	GO_ARGS
   189  	JMP	sync∕atomic·LoadInt64(SB)
   190  
   191  // Store
   192  
   193  TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   194  	GO_ARGS
   195  	MOVD	$__tsan_go_atomic32_store(SB), R1
   196  	BL	racecallatomic<>(SB)
   197  	RET
   198  
   199  TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   200  	GO_ARGS
   201  	MOVD	$__tsan_go_atomic64_store(SB), R1
   202  	BL	racecallatomic<>(SB)
   203  	RET
   204  
   205  TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   206  	GO_ARGS
   207  	JMP	sync∕atomic·StoreInt32(SB)
   208  
   209  TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   210  	GO_ARGS
   211  	JMP	sync∕atomic·StoreInt64(SB)
   212  
   213  TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   214  	GO_ARGS
   215  	JMP	sync∕atomic·StoreInt64(SB)
   216  
   217  // Swap
   218  
   219  TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   220  	GO_ARGS
   221  	MOVD	$__tsan_go_atomic32_exchange(SB), R1
   222  	BL	racecallatomic<>(SB)
   223  	RET
   224  
   225  TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   226  	GO_ARGS
   227  	MOVD	$__tsan_go_atomic64_exchange(SB), R1
   228  	BL	racecallatomic<>(SB)
   229  	RET
   230  
   231  TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   232  	GO_ARGS
   233  	JMP	sync∕atomic·SwapInt32(SB)
   234  
   235  TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   236  	GO_ARGS
   237  	JMP	sync∕atomic·SwapInt64(SB)
   238  
   239  TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   240  	GO_ARGS
   241  	JMP	sync∕atomic·SwapInt64(SB)
   242  
   243  // Add
   244  
   245  TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   246  	GO_ARGS
   247  	MOVD	$__tsan_go_atomic32_fetch_add(SB), R1
   248  	BL	racecallatomic<>(SB)
   249  	// TSan performed fetch_add, but Go needs add_fetch.
   250  	MOVW	add+8(FP), R0
   251  	MOVW	ret+16(FP), R1
   252  	ADD	R0, R1, R0
   253  	MOVW	R0, ret+16(FP)
   254  	RET
   255  
   256  TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   257  	GO_ARGS
   258  	MOVD	$__tsan_go_atomic64_fetch_add(SB), R1
   259  	BL	racecallatomic<>(SB)
   260  	// TSan performed fetch_add, but Go needs add_fetch.
   261  	MOVD	add+8(FP), R0
   262  	MOVD	ret+16(FP), R1
   263  	ADD	R0, R1, R0
   264  	MOVD	R0, ret+16(FP)
   265  	RET
   266  
   267  TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   268  	GO_ARGS
   269  	JMP	sync∕atomic·AddInt32(SB)
   270  
   271  TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   272  	GO_ARGS
   273  	JMP	sync∕atomic·AddInt64(SB)
   274  
   275  TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   276  	GO_ARGS
   277  	JMP	sync∕atomic·AddInt64(SB)
   278  
   279  // And
   280  TEXT	sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
   281  	GO_ARGS
   282  	MOVD	$__tsan_go_atomic32_fetch_and(SB), R1
   283  	BL	racecallatomic<>(SB)
   284  	RET
   285  
   286  TEXT	sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
   287  	GO_ARGS
   288  	MOVD	$__tsan_go_atomic64_fetch_and(SB), R1
   289  	BL	racecallatomic<>(SB)
   290  	RET
   291  
   292  TEXT	sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
   293  	GO_ARGS
   294  	JMP	sync∕atomic·AndInt32(SB)
   295  
   296  TEXT	sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
   297  	GO_ARGS
   298  	JMP	sync∕atomic·AndInt64(SB)
   299  
   300  TEXT	sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
   301  	GO_ARGS
   302  	JMP	sync∕atomic·AndInt64(SB)
   303  
   304  // Or
   305  TEXT	sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
   306  	GO_ARGS
   307  	MOVD	$__tsan_go_atomic32_fetch_or(SB), R1
   308  	BL	racecallatomic<>(SB)
   309  	RET
   310  
   311  TEXT	sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
   312  	GO_ARGS
   313  	MOVD	$__tsan_go_atomic64_fetch_or(SB), R1
   314  	BL	racecallatomic<>(SB)
   315  	RET
   316  
   317  TEXT	sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
   318  	GO_ARGS
   319  	JMP	sync∕atomic·OrInt32(SB)
   320  
   321  TEXT	sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
   322  	GO_ARGS
   323  	JMP	sync∕atomic·OrInt64(SB)
   324  
   325  TEXT	sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
   326  	GO_ARGS
   327  	JMP	sync∕atomic·OrInt64(SB)
   328  
   329  // CompareAndSwap
   330  
   331  TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   332  	GO_ARGS
   333  	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R1
   334  	BL	racecallatomic<>(SB)
   335  	RET
   336  
   337  TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   338  	GO_ARGS
   339  	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R1
   340  	BL	racecallatomic<>(SB)
   341  	RET
   342  
   343  TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   344  	GO_ARGS
   345  	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   346  
   347  TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   348  	GO_ARGS
   349  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   350  
   351  TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   352  	GO_ARGS
   353  	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   354  
   355  // Common code for atomic operations. Calls R1.
   356  TEXT	racecallatomic<>(SB), NOSPLIT, $0
   357  	MOVD	24(R15), R5			// Address (arg1, after 2xBL).
   358  	// If we pass an invalid pointer to the TSan runtime, it will cause a
   359  	// "fatal error: unknown caller pc". So trigger a SEGV here instead.
   360  	MOVB	(R5), R0
   361  	MOVD	runtime·racearenastart(SB), R0
   362  	CMPUBLT	R5, R0, racecallatomic_data	// Before racearena start?
   363  	MOVD	runtime·racearenaend(SB), R0
   364  	CMPUBLT	R5, R0, racecallatomic_ok	// Before racearena end?
   365  racecallatomic_data:
   366  	MOVD	runtime·racedatastart(SB), R0
   367  	CMPUBLT	R5, R0, racecallatomic_ignore	// Before racedata start?
   368  	MOVD	runtime·racedataend(SB), R0
   369  	CMPUBGE	R5, R0,	racecallatomic_ignore	// At or after racearena end?
   370  racecallatomic_ok:
   371  	MOVD	g_racectx(g), R2		// ThreadState *.
   372  	MOVD	8(R15), R3			// Caller PC.
   373  	MOVD	R14, R4				// PC.
   374  	ADD	$24, R15, R5			// Arguments.
   375  	// Tail call fails to restore R15, so use a normal one.
   376  	BL	racecall<>(SB)
   377  	RET
   378  racecallatomic_ignore:
   379  	// Call __tsan_go_ignore_sync_begin to ignore synchronization during
   380  	// the atomic op. An attempt to synchronize on the address would cause
   381  	// a crash.
   382  	MOVD	R1, R6				// Save target function.
   383  	MOVD	R14, R7				// Save PC.
   384  	MOVD	$__tsan_go_ignore_sync_begin(SB), R1
   385  	MOVD	g_racectx(g), R2		// ThreadState *.
   386  	BL	racecall<>(SB)
   387  	MOVD	R6, R1				// Restore target function.
   388  	MOVD	g_racectx(g), R2		// ThreadState *.
   389  	MOVD	8(R15), R3			// Caller PC.
   390  	MOVD	R7, R4				// PC.
   391  	ADD	$24, R15, R5			// Arguments.
   392  	BL	racecall<>(SB)
   393  	MOVD	$__tsan_go_ignore_sync_end(SB), R1
   394  	MOVD	g_racectx(g), R2		// ThreadState *.
   395  	BL	racecall<>(SB)
   396  	RET
   397  
   398  // func runtime·racecall(void(*f)(...), ...)
   399  // Calls C function f from race runtime and passes up to 4 arguments to it.
   400  // The arguments are never heap-object-preserving pointers, so we pretend there
   401  // are no arguments.
   402  TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   403  	MOVD	fn+0(FP), R1
   404  	MOVD	arg0+8(FP), R2
   405  	MOVD	arg1+16(FP), R3
   406  	MOVD	arg2+24(FP), R4
   407  	MOVD	arg3+32(FP), R5
   408  	JMP	racecall<>(SB)
   409  
   410  // Switches SP to g0 stack and calls R1. Arguments are already set.
   411  TEXT	racecall<>(SB), NOSPLIT, $0-0
   412  	BL	runtime·save_g(SB)		// Save g for callbacks.
   413  	MOVD	R15, R7				// Save SP.
   414  	MOVD	g_m(g), R8			// R8 = thread.
   415  
   416  	// Switch to g0 stack if we aren't already on g0 or gsignal.
   417  	MOVD	m_gsignal(R8), R9
   418  	CMPBEQ	R9, g, call
   419  
   420  	MOVD	m_g0(R8), R9
   421  	CMPBEQ	R9, g, call
   422  
   423  	MOVD	(g_sched+gobuf_sp)(R9), R15	// Switch SP to g0.
   424  
   425  call:	SUB	$160, R15			// Allocate C frame.
   426  	BL	R1				// Call C code.
   427  	MOVD	R7, R15				// Restore SP.
   428  	RET					// Return to Go.
   429  
   430  // C->Go callback thunk that allows to call runtime·racesymbolize from C
   431  // code. racecall has only switched SP, finish g->g0 switch by setting correct
   432  // g. R2 contains command code, R3 contains command-specific context. See
   433  // racecallback for command codes.
   434  TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   435  	STMG	R6, R15, 48(R15)		// Save non-volatile regs.
   436  	BL	runtime·load_g(SB)		// Saved by racecall.
   437  	CMPBNE	R2, $0, rest			// raceGetProcCmd?
   438  	MOVD	g_m(g), R2			// R2 = thread.
   439  	MOVD	m_p(R2), R2			// R2 = processor.
   440  	MVC	$8, p_raceprocctx(R2), (R3)	// *R3 = ThreadState *.
   441  	LMG	48(R15), R6, R15		// Restore non-volatile regs.
   442  	BR	R14				// Return to C.
   443  rest:	MOVD	g_m(g), R4			// R4 = current thread.
   444  	MOVD	m_g0(R4), g			// Switch to g0.
   445  	SUB	$24, R15			// Allocate Go argument slots.
   446  	STMG	R2, R3, 8(R15)			// Fill Go frame.
   447  	BL	runtime·racecallback(SB)	// Call Go code.
   448  	LMG	72(R15), R6, R15		// Restore non-volatile regs.
   449  	BR	R14				// Return to C.
   450  

View as plain text