1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 #define SMALL_TAIL \
10 SGTU $2, R7, R8; \
11 BNE R8, xor_1; \
12 SGTU $4, R7, R8; \
13 BNE R8, xor_2; \
14 SGTU $8, R7, R8; \
15 BNE R8, xor_4; \
16 SGTU $16, R7, R8; \
17 BNE R8, xor_8; \
18
19 #define SMALL \
20 xor_8_check:; \
21 SGTU $8, R7, R8; \
22 BNE R8, xor_4_check; \
23 xor_8:; \
24 SUBV $8, R7; \
25 MOVV (R5), R10; \
26 MOVV (R6), R11; \
27 XOR R10, R11; \
28 MOVV R11, (R4); \
29 ADDV $8, R5; \
30 ADDV $8, R6; \
31 ADDV $8, R4; \
32 BEQ R7, R0, end; \
33 xor_4_check:; \
34 SGTU $4, R7, R8; \
35 BNE R8, xor_2_check; \
36 xor_4:; \
37 SUBV $4, R7; \
38 MOVW (R5), R10; \
39 MOVW (R6), R11; \
40 XOR R10, R11; \
41 MOVW R11, (R4); \
42 ADDV $4, R5; \
43 ADDV $4, R6; \
44 ADDV $4, R4; \
45 BEQ R7, R0, end; \
46 xor_2_check:; \
47 SGTU $2, R7, R8; \
48 BNE R8, xor_1; \
49 xor_2:; \
50 SUBV $2, R7; \
51 MOVH (R5), R10; \
52 MOVH (R6), R11; \
53 XOR R10, R11; \
54 MOVH R11, (R4); \
55 ADDV $2, R5; \
56 ADDV $2, R6; \
57 ADDV $2, R4; \
58 BEQ R7, R0, end; \
59 xor_1:; \
60 MOVB (R5), R10; \
61 MOVB (R6), R11; \
62 XOR R10, R11; \
63 MOVB R11, (R4); \
64
65 // func xorBytesBasic(dst, a, b *byte, n int)
66 TEXT ·xorBytesBasic(SB), NOSPLIT, $0
67 MOVV dst+0(FP), R4
68 MOVV a+8(FP), R5
69 MOVV b+16(FP), R6
70 MOVV n+24(FP), R7
71
72 SMALL_TAIL
73
74 xor_64_check:
75 SGTU $64, R7, R8
76 BNE R8, xor_32_check
77 xor_64_loop:
78 SUBV $64, R7
79 MOVV (R5), R10
80 MOVV 8(R5), R11
81 MOVV 16(R5), R12
82 MOVV 24(R5), R13
83 MOVV (R6), R14
84 MOVV 8(R6), R15
85 MOVV 16(R6), R16
86 MOVV 24(R6), R17
87 XOR R10, R14
88 XOR R11, R15
89 XOR R12, R16
90 XOR R13, R17
91 MOVV R14, (R4)
92 MOVV R15, 8(R4)
93 MOVV R16, 16(R4)
94 MOVV R17, 24(R4)
95 MOVV 32(R5), R10
96 MOVV 40(R5), R11
97 MOVV 48(R5), R12
98 MOVV 56(R5), R13
99 MOVV 32(R6), R14
100 MOVV 40(R6), R15
101 MOVV 48(R6), R16
102 MOVV 56(R6), R17
103 XOR R10, R14
104 XOR R11, R15
105 XOR R12, R16
106 XOR R13, R17
107 MOVV R14, 32(R4)
108 MOVV R15, 40(R4)
109 MOVV R16, 48(R4)
110 MOVV R17, 56(R4)
111 SGTU $64, R7, R8
112 ADDV $64, R5
113 ADDV $64, R6
114 ADDV $64, R4
115 BEQ R8, xor_64_loop
116 BEQ R7, end
117
118 xor_32_check:
119 SGTU $32, R7, R8
120 BNE R8, xor_16_check
121 xor_32:
122 SUBV $32, R7
123 MOVV (R5), R10
124 MOVV 8(R5), R11
125 MOVV 16(R5), R12
126 MOVV 24(R5), R13
127 MOVV (R6), R14
128 MOVV 8(R6), R15
129 MOVV 16(R6), R16
130 MOVV 24(R6), R17
131 XOR R10, R14
132 XOR R11, R15
133 XOR R12, R16
134 XOR R13, R17
135 MOVV R14, (R4)
136 MOVV R15, 8(R4)
137 MOVV R16, 16(R4)
138 MOVV R17, 24(R4)
139 ADDV $32, R5
140 ADDV $32, R6
141 ADDV $32, R4
142 BEQ R7, R0, end
143
144 xor_16_check:
145 SGTU $16, R7, R8
146 BNE R8, xor_8_check
147 xor_16:
148 SUBV $16, R7
149 MOVV (R5), R10
150 MOVV 8(R5), R11
151 MOVV (R6), R12
152 MOVV 8(R6), R13
153 XOR R10, R12
154 XOR R11, R13
155 MOVV R12, (R4)
156 MOVV R13, 8(R4)
157 ADDV $16, R5
158 ADDV $16, R6
159 ADDV $16, R4
160 BEQ R7, R0, end
161
162 SMALL
163 end:
164 RET
165
166 // func xorBytesLSX(dst, a, b *byte, n int)
167 TEXT ·xorBytesLSX(SB), NOSPLIT, $0
168 MOVV dst+0(FP), R4
169 MOVV a+8(FP), R5
170 MOVV b+16(FP), R6
171 MOVV n+24(FP), R7
172
173 SMALL_TAIL
174
175 xor_128_lsx_check:
176 SGTU $128, R7, R8
177 BNE R8, xor_64_lsx_check
178 xor_128_lsx_loop:
179 SUBV $128, R7
180 VMOVQ (R5), V0
181 VMOVQ 16(R5), V1
182 VMOVQ 32(R5), V2
183 VMOVQ 48(R5), V3
184 VMOVQ 64(R5), V4
185 VMOVQ 80(R5), V5
186 VMOVQ 96(R5), V6
187 VMOVQ 112(R5), V7
188 VMOVQ (R6), V8
189 VMOVQ 16(R6), V9
190 VMOVQ 32(R6), V10
191 VMOVQ 48(R6), V11
192 VMOVQ 64(R6), V12
193 VMOVQ 80(R6), V13
194 VMOVQ 96(R6), V14
195 VMOVQ 112(R6), V15
196 VXORV V0, V8, V8
197 VXORV V1, V9, V9
198 VXORV V2, V10, V10
199 VXORV V3, V11, V11
200 VXORV V4, V12, V12
201 VXORV V5, V13, V13
202 VXORV V6, V14, V14
203 VXORV V7, V15, V15
204 VMOVQ V8, (R4)
205 VMOVQ V9, 16(R4)
206 VMOVQ V10, 32(R4)
207 VMOVQ V11, 48(R4)
208 VMOVQ V12, 64(R4)
209 VMOVQ V13, 80(R4)
210 VMOVQ V14, 96(R4)
211 VMOVQ V15, 112(R4)
212 SGTU $128, R7, R8
213 ADDV $128, R5
214 ADDV $128, R6
215 ADDV $128, R4
216 BEQ R8, xor_128_lsx_loop
217 BEQ R7, end
218
219 xor_64_lsx_check:
220 SGTU $64, R7, R8
221 BNE R8, xor_32_lsx_check
222 xor_64_lsx:
223 SUBV $64, R7
224 VMOVQ (R5), V0
225 VMOVQ 16(R5), V1
226 VMOVQ 32(R5), V2
227 VMOVQ 48(R5), V3
228 VMOVQ (R6), V4
229 VMOVQ 16(R6), V5
230 VMOVQ 32(R6), V6
231 VMOVQ 48(R6), V7
232 VXORV V0, V4, V4
233 VXORV V1, V5, V5
234 VXORV V2, V6, V6
235 VXORV V3, V7, V7
236 VMOVQ V4, (R4)
237 VMOVQ V5, 16(R4)
238 VMOVQ V6, 32(R4)
239 VMOVQ V7, 48(R4)
240 ADDV $64, R5
241 ADDV $64, R6
242 ADDV $64, R4
243 BEQ R7, end
244
245 xor_32_lsx_check:
246 SGTU $32, R7, R8
247 BNE R8, xor_16_lsx_check
248 xor_32_lsx:
249 SUBV $32, R7
250 VMOVQ (R5), V0
251 VMOVQ 16(R5), V1
252 VMOVQ (R6), V2
253 VMOVQ 16(R6), V3
254 VXORV V0, V2, V2
255 VXORV V1, V3, V3
256 VMOVQ V2, (R4)
257 VMOVQ V3, 16(R4)
258 ADDV $32, R5
259 ADDV $32, R6
260 ADDV $32, R4
261 BEQ R7, end
262
263 xor_16_lsx_check:
264 SGTU $16, R7, R8
265 BNE R8, xor_8_check
266 xor_16_lsx:
267 SUBV $16, R7
268 VMOVQ (R5), V0
269 VMOVQ (R6), V1
270 VXORV V0, V1, V1
271 VMOVQ V1, (R4)
272 ADDV $16, R5
273 ADDV $16, R6
274 ADDV $16, R4
275 BEQ R7, end
276
277 SMALL
278 end:
279 RET
280
281 // func xorBytesLASX(dst, a, b *byte, n int)
282 TEXT ·xorBytesLASX(SB), NOSPLIT, $0
283 MOVV dst+0(FP), R4
284 MOVV a+8(FP), R5
285 MOVV b+16(FP), R6
286 MOVV n+24(FP), R7
287
288 SMALL_TAIL
289
290 xor_256_lasx_check:
291 SGTU $256, R7, R8
292 BNE R8, xor_128_lasx_check
293 xor_256_lasx_loop:
294 SUBV $256, R7
295 XVMOVQ (R5), X0
296 XVMOVQ 32(R5), X1
297 XVMOVQ 64(R5), X2
298 XVMOVQ 96(R5), X3
299 XVMOVQ 128(R5), X4
300 XVMOVQ 160(R5), X5
301 XVMOVQ 192(R5), X6
302 XVMOVQ 224(R5), X7
303 XVMOVQ (R6), X8
304 XVMOVQ 32(R6), X9
305 XVMOVQ 64(R6), X10
306 XVMOVQ 96(R6), X11
307 XVMOVQ 128(R6), X12
308 XVMOVQ 160(R6), X13
309 XVMOVQ 192(R6), X14
310 XVMOVQ 224(R6), X15
311 XVXORV X0, X8, X8
312 XVXORV X1, X9, X9
313 XVXORV X2, X10, X10
314 XVXORV X3, X11, X11
315 XVXORV X4, X12, X12
316 XVXORV X5, X13, X13
317 XVXORV X6, X14, X14
318 XVXORV X7, X15, X15
319 XVMOVQ X8, (R4)
320 XVMOVQ X9, 32(R4)
321 XVMOVQ X10, 64(R4)
322 XVMOVQ X11, 96(R4)
323 XVMOVQ X12, 128(R4)
324 XVMOVQ X13, 160(R4)
325 XVMOVQ X14, 192(R4)
326 XVMOVQ X15, 224(R4)
327 SGTU $256, R7, R8
328 ADDV $256, R5
329 ADDV $256, R6
330 ADDV $256, R4
331 BEQ R8, xor_256_lasx_loop
332 BEQ R7, end
333
334 xor_128_lasx_check:
335 SGTU $128, R7, R8
336 BNE R8, xor_64_lasx_check
337 xor_128_lasx:
338 SUBV $128, R7
339 XVMOVQ (R5), X0
340 XVMOVQ 32(R5), X1
341 XVMOVQ 64(R5), X2
342 XVMOVQ 96(R5), X3
343 XVMOVQ (R6), X4
344 XVMOVQ 32(R6), X5
345 XVMOVQ 64(R6), X6
346 XVMOVQ 96(R6), X7
347 XVXORV X0, X4, X4
348 XVXORV X1, X5, X5
349 XVXORV X2, X6, X6
350 XVXORV X3, X7, X7
351 XVMOVQ X4, (R4)
352 XVMOVQ X5, 32(R4)
353 XVMOVQ X6, 64(R4)
354 XVMOVQ X7, 96(R4)
355 ADDV $128, R5
356 ADDV $128, R6
357 ADDV $128, R4
358 BEQ R7, end
359
360 xor_64_lasx_check:
361 SGTU $64, R7, R8
362 BNE R8, xor_32_lasx_check
363 xor_64_lasx:
364 SUBV $64, R7
365 XVMOVQ (R5), X0
366 XVMOVQ 32(R5), X1
367 XVMOVQ (R6), X2
368 XVMOVQ 32(R6), X3
369 XVXORV X0, X2, X2
370 XVXORV X1, X3, X3
371 XVMOVQ X2, (R4)
372 XVMOVQ X3, 32(R4)
373 ADDV $64, R5
374 ADDV $64, R6
375 ADDV $64, R4
376 BEQ R7, end
377
378 xor_32_lasx_check:
379 SGTU $32, R7, R8
380 BNE R8, xor_16_lasx_check
381 xor_32_lasx:
382 SUBV $32, R7
383 XVMOVQ (R5), X0
384 XVMOVQ (R6), X1
385 XVXORV X0, X1, X1
386 XVMOVQ X1, (R4)
387 ADDV $32, R5
388 ADDV $32, R6
389 ADDV $32, R4
390 BEQ R7, end
391
392 xor_16_lasx_check:
393 SGTU $16, R7, R8
394 BNE R8, xor_8_check
395 xor_16_lasx:
396 SUBV $16, R7
397 VMOVQ (R5), V0
398 VMOVQ (R6), V1
399 VXORV V0, V1, V1
400 VMOVQ V1, (R4)
401 ADDV $16, R5
402 ADDV $16, R6
403 ADDV $16, R4
404 BEQ R7, end
405
406 SMALL
407 end:
408 RET
409
410
View as plain text