Text file src/runtime/memmove_ppc64x.s

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build ppc64 || ppc64le
     6  
     7  #include "textflag.h"
     8  
     9  // See memmove Go doc for important implementation constraints.
    10  
    11  // func memmove(to, from unsafe.Pointer, n uintptr)
    12  
    13  // target address
    14  #define TGT R3
    15  // source address
    16  #define SRC R4
    17  // length to move
    18  #define LEN R5
    19  // number of doublewords
    20  #define DWORDS R6
    21  // number of bytes < 8
    22  #define BYTES R7
    23  // const 16 used as index
    24  #define IDX16 R8
    25  // temp used for copies, etc.
    26  #define TMP R9
    27  // number of 64 byte chunks
    28  #define QWORDS R10
    29  // index values
    30  #define IDX32 R14
    31  #define IDX48 R15
    32  #define OCTWORDS R16
    33  
    34  TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
    35  	// R3 = TGT = to
    36  	// R4 = SRC = from
    37  	// R5 = LEN = n
    38  
    39  	// Determine if there are doublewords to
    40  	// copy so a more efficient move can be done
    41  check:
    42  #ifdef GOPPC64_power10
    43  	CMP	LEN, $16
    44  	BGT	mcopy
    45  	SLD	$56, LEN, TMP
    46  	LXVL	SRC, TMP, V0
    47  	STXVL	V0, TGT, TMP
    48  	RET
    49  #endif
    50  mcopy:
    51  	ANDCC	$7, LEN, BYTES	// R7: bytes to copy
    52  	SRD	$3, LEN, DWORDS	// R6: double words to copy
    53  	MOVFL	CR0, CR3	// save CR from ANDCC
    54  	CMP	DWORDS, $0, CR1	// CR1[EQ] set if no double words to copy
    55  
    56  	// Determine overlap by subtracting dest - src and comparing against the
    57  	// length.  This catches the cases where src and dest are in different types
    58  	// of storage such as stack and static to avoid doing backward move when not
    59  	// necessary.
    60  
    61  	SUB	SRC, TGT, TMP	// dest - src
    62  	CMPU	TMP, LEN, CR2	// < len?
    63  	BC	12, 8, backward // BLT CR2 backward
    64  
    65  	// Copying forward if no overlap.
    66  
    67  	BC	12, 6, checkbytes	// BEQ CR1, checkbytes
    68  	SRDCC	$3, DWORDS, OCTWORDS	// 64 byte chunks?
    69  	MOVD	$16, IDX16
    70  	BEQ	lt64gt8			// < 64 bytes
    71  
    72  	// Prepare for moves of 64 bytes at a time.
    73  
    74  forward64setup:
    75  	DCBTST	(TGT)			// prepare data cache
    76  	DCBT	(SRC)
    77  	MOVD	OCTWORDS, CTR		// Number of 64 byte chunks
    78  	MOVD	$32, IDX32
    79  	MOVD	$48, IDX48
    80  	PCALIGN	$16
    81  
    82  forward64:
    83  	LXVD2X	(R0)(SRC), VS32		// load 64 bytes
    84  	LXVD2X	(IDX16)(SRC), VS33
    85  	LXVD2X	(IDX32)(SRC), VS34
    86  	LXVD2X	(IDX48)(SRC), VS35
    87  	ADD	$64, SRC
    88  	STXVD2X	VS32, (R0)(TGT)		// store 64 bytes
    89  	STXVD2X	VS33, (IDX16)(TGT)
    90  	STXVD2X	VS34, (IDX32)(TGT)
    91  	STXVD2X VS35, (IDX48)(TGT)
    92  	ADD	$64,TGT			// bump up for next set
    93  	BC	16, 0, forward64	// continue
    94  	ANDCC	$7, DWORDS		// remaining doublewords
    95  	BEQ	checkbytes		// only bytes remain
    96  
    97  lt64gt8:
    98  	CMP	DWORDS, $4
    99  	BLT	lt32gt8
   100  	LXVD2X	(R0)(SRC), VS32
   101  	LXVD2X	(IDX16)(SRC), VS33
   102  	ADD	$-4, DWORDS
   103  	STXVD2X	VS32, (R0)(TGT)
   104  	STXVD2X	VS33, (IDX16)(TGT)
   105  	ADD	$32, SRC
   106  	ADD	$32, TGT
   107  
   108  lt32gt8:
   109  	// At this point >= 8 and < 32
   110  	// Move 16 bytes if possible
   111  	CMP     DWORDS, $2
   112  	BLT     lt16
   113  	LXVD2X	(R0)(SRC), VS32
   114  	ADD	$-2, DWORDS
   115  	STXVD2X	VS32, (R0)(TGT)
   116  	ADD     $16, SRC
   117  	ADD     $16, TGT
   118  
   119  lt16:	// Move 8 bytes if possible
   120  	CMP     DWORDS, $1
   121  	BLT     checkbytes
   122  #ifdef GOPPC64_power10
   123  	ADD	$8, BYTES
   124  	SLD	$56, BYTES, TMP
   125  	LXVL	SRC, TMP, V0
   126  	STXVL	V0, TGT, TMP
   127  	RET
   128  #endif
   129  
   130  	MOVD    0(SRC), TMP
   131  	ADD	$8, SRC
   132  	MOVD    TMP, 0(TGT)
   133  	ADD     $8, TGT
   134  checkbytes:
   135  	BC	12, 14, LR		// BEQ lr
   136  #ifdef GOPPC64_power10
   137  	SLD	$56, BYTES, TMP
   138  	LXVL	SRC, TMP, V0
   139  	STXVL	V0, TGT, TMP
   140  	RET
   141  #endif
   142  lt8:	// Move word if possible
   143  	CMP BYTES, $4
   144  	BLT lt4
   145  	MOVWZ 0(SRC), TMP
   146  	ADD $-4, BYTES
   147  	MOVW TMP, 0(TGT)
   148  	ADD $4, SRC
   149  	ADD $4, TGT
   150  lt4:	// Move halfword if possible
   151  	CMP BYTES, $2
   152  	BLT lt2
   153  	MOVHZ 0(SRC), TMP
   154  	ADD $-2, BYTES
   155  	MOVH TMP, 0(TGT)
   156  	ADD $2, SRC
   157  	ADD $2, TGT
   158  lt2:	// Move last byte if 1 left
   159  	CMP BYTES, $1
   160  	BC 12, 0, LR	// ble lr
   161  	MOVBZ 0(SRC), TMP
   162  	MOVBZ TMP, 0(TGT)
   163  	RET
   164  
   165  backward:
   166  	// Copying backwards proceeds by copying R7 bytes then copying R6 double words.
   167  	// R3 and R4 are advanced to the end of the destination/source buffers
   168  	// respectively and moved back as we copy.
   169  
   170  	ADD	LEN, SRC, SRC		// end of source
   171  	ADD	TGT, LEN, TGT		// end of dest
   172  
   173  	BEQ	nobackwardtail		// earlier condition
   174  
   175  	MOVD	BYTES, CTR			// bytes to move
   176  
   177  backwardtailloop:
   178  	MOVBZ 	-1(SRC), TMP		// point to last byte
   179  	SUB	$1,SRC
   180  	MOVBZ 	TMP, -1(TGT)
   181  	SUB	$1,TGT
   182  	BDNZ	backwardtailloop
   183  
   184  nobackwardtail:
   185  	BC	4, 5, LR		// blelr cr1, return if DWORDS == 0
   186  	SRDCC	$2,DWORDS,QWORDS	// Compute number of 32B blocks and compare to 0
   187  	BNE	backward32setup		// If QWORDS != 0, start the 32B copy loop.
   188  
   189  backward24:
   190  	// DWORDS is a value between 1-3.
   191  	CMP	DWORDS, $2
   192  
   193  	MOVD 	-8(SRC), TMP
   194  	MOVD 	TMP, -8(TGT)
   195  	BC	12, 0, LR		// bltlr, return if DWORDS == 1
   196  
   197  	MOVD 	-16(SRC), TMP
   198  	MOVD 	TMP, -16(TGT)
   199  	BC	12, 2, LR		// beqlr, return if DWORDS == 2
   200  
   201  	MOVD 	-24(SRC), TMP
   202  	MOVD 	TMP, -24(TGT)
   203  	RET
   204  
   205  backward32setup:
   206  	ANDCC   $3,DWORDS		// Compute remaining DWORDS and compare to 0
   207  	MOVD	QWORDS, CTR		// set up loop ctr
   208  	MOVD	$16, IDX16		// 32 bytes at a time
   209  	PCALIGN	$16
   210  
   211  backward32loop:
   212  	SUB	$32, TGT
   213  	SUB	$32, SRC
   214  	LXVD2X	(R0)(SRC), VS32		// load 16x2 bytes
   215  	LXVD2X	(IDX16)(SRC), VS33
   216  	STXVD2X	VS32, (R0)(TGT)		// store 16x2 bytes
   217  	STXVD2X	VS33, (IDX16)(TGT)
   218  	BDNZ	backward32loop
   219  	BC	12, 2, LR		// beqlr, return if DWORDS == 0
   220  	BR	backward24
   221  

View as plain text