1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Index<ABIInternal>(SB),NOSPLIT,$0-56
9 MOVV R7, R6 // R6 = separator pointer
10 MOVV R8, R7 // R7 = separator length
11 JMP indexbody<>(SB)
12
13 TEXT ·IndexString<ABIInternal>(SB),NOSPLIT,$0-40
14 JMP indexbody<>(SB)
15
16 // input:
17 // R4 = string
18 // R5 = length
19 // R6 = separator pointer
20 // R7 = separator length (2 <= len <= 64)
21 TEXT indexbody<>(SB),NOSPLIT,$0
22 // main idea is to load 'sep' into separate register(s)
23 // to avoid repeatedly re-load it again and again
24 // for sebsequent substring comparisons
25 SUBV R7, R5, R8
26 ADDV R4, R8 // R8 contains the start of last substring for comparison
27 ADDV $1, R4, R9 // store base for later
28
29 MOVV $8, R5
30 BGE R7, R5, len_gt_or_eq_8
31 len_2_7:
32 AND $0x4, R7, R5
33 BNE R5, len_4_7
34
35 len_2_3:
36 AND $0x1, R7, R5
37 BNE R5, len_3
38
39 len_2:
40 MOVHU (R6), R10
41 loop_2:
42 BLT R8, R4, not_found
43 MOVHU (R4), R11
44 ADDV $1, R4
45 BNE R10, R11, loop_2
46 JMP found
47
48 len_3:
49 MOVHU (R6), R10
50 MOVBU 2(R6), R11
51 loop_3:
52 BLT R8, R4, not_found
53 MOVHU (R4), R12
54 ADDV $1, R4
55 BNE R10, R12, loop_3
56 MOVBU 1(R4), R13
57 BNE R11, R13, loop_3
58 JMP found
59
60 len_4_7:
61 AND $0x2, R7, R5
62 BNE R5, len_6_7
63 AND $0x1, R7, R5
64 BNE R5, len_5
65 len_4:
66 MOVWU (R6), R10
67 loop_4:
68 BLT R8, R4, not_found
69 MOVWU (R4), R11
70 ADDV $1, R4
71 BNE R10, R11, loop_4
72 JMP found
73
74 len_5:
75 MOVWU (R6), R10
76 MOVBU 4(R6), R11
77 loop_5:
78 BLT R8, R4, not_found
79 MOVWU (R4), R12
80 ADDV $1, R4
81 BNE R10, R12, loop_5
82 MOVBU 3(R4), R13
83 BNE R11, R13, loop_5
84 JMP found
85
86 len_6_7:
87 AND $0x1, R7, R5
88 BNE R5, len_7
89 len_6:
90 MOVWU (R6), R10
91 MOVHU 4(R6), R11
92 loop_6:
93 BLT R8, R4, not_found
94 MOVWU (R4), R12
95 ADDV $1, R4
96 BNE R10, R12, loop_6
97 MOVHU 3(R4), R13
98 BNE R11, R13, loop_6
99 JMP found
100
101 len_7:
102 MOVWU (R6), R10
103 MOVWU 3(R6), R11
104 loop_7:
105 BLT R8, R4, not_found
106 MOVWU (R4), R12
107 ADDV $1, R4
108 BNE R10, R12, loop_7
109 MOVWU 2(R4), R13
110 BNE R11, R13, loop_7
111 JMP found
112
113 len_gt_or_eq_8:
114 BEQ R5, R7, len_8
115 MOVV $17, R5
116 BGE R7, R5, len_gt_or_eq_17
117 JMP len_9_16
118 len_8:
119 MOVV (R6), R10
120 loop_8:
121 BLT R8, R4, not_found
122 MOVV (R4), R11
123 ADDV $1, R4
124 BNE R10, R11, loop_8
125 JMP found
126
127 len_9_16:
128 MOVV (R6), R10
129 SUBV $8, R7
130 MOVV (R6)(R7), R11
131 SUBV $1, R7
132 loop_9_16:
133 BLT R8, R4, not_found
134 MOVV (R4), R12
135 ADDV $1, R4
136 BNE R10, R12, loop_9_16
137 MOVV (R4)(R7), R13
138 BNE R11, R13, loop_9_16
139 JMP found
140
141 len_gt_or_eq_17:
142 MOVV $25, R5
143 BGE R7, R5, len_gt_or_eq_25
144 len_17_24:
145 MOVV 0(R6), R10
146 MOVV 8(R6), R11
147 SUBV $8, R7
148 MOVV (R6)(R7), R12
149 SUBV $1, R7
150 loop_17_24:
151 BLT R8, R4, not_found
152 MOVV (R4), R13
153 ADDV $1, R4
154 BNE R10, R13, loop_17_24
155 MOVV 7(R4), R14
156 BNE R11, R14, loop_17_24
157 MOVV (R4)(R7), R15
158 BNE R12, R15, loop_17_24
159 JMP found
160
161 len_gt_or_eq_25:
162 MOVV $33, R5
163 BGE R7, R5, len_gt_or_eq_33
164 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R10
165 BNE R10, lsx_len_25_32
166 len_25_32:
167 MOVV 0(R6), R10
168 MOVV 8(R6), R11
169 MOVV 16(R6), R12
170 SUBV $8, R7
171 MOVV (R6)(R7), R13
172 SUBV $1, R7
173 loop_25_32:
174 BLT R8, R4, not_found
175 MOVV (R4), R14
176 ADDV $1, R4
177 BNE R10, R14, loop_25_32
178 MOVV 7(R4), R15
179 BNE R11, R15, loop_25_32
180 MOVV 15(R4), R16
181 BNE R12, R16, loop_25_32
182 MOVV (R4)(R7), R17
183 BNE R13, R17, loop_25_32
184 JMP found
185
186 // On loong64, LSX is included if LASX is supported.
187 lasx_len_25_32:
188 lsx_len_25_32:
189 VMOVQ 0(R6), V0
190 SUBV $16, R7
191 VMOVQ (R6)(R7), V1
192 SUBV $1, R7
193 lsx_loop_25_32:
194 BLT R8, R4, not_found
195 VMOVQ (R4), V2
196 ADDV $1, R4
197 VSEQV V0, V2, V2
198 VSETANYEQV V2, FCC0
199 BFPT FCC0, lsx_loop_25_32
200
201 VMOVQ (R4)(R7), V3
202 VSEQV V1, V3, V3
203 VSETANYEQV V3, FCC1
204 BFPT FCC1, lsx_loop_25_32
205 JMP found
206
207 len_gt_or_eq_33:
208 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R10
209 MOVV $49, R5
210 BGE R7, R5, len_gt_or_eq_49
211 len_33_48:
212 BNE R10, lasx_len_33_48
213 JMP lsx_len_33_48
214
215 len_gt_or_eq_49:
216 len_49_64:
217 BNE R10, lasx_len_49_64
218 JMP lsx_len_49_64
219
220 lsx_len_33_48:
221 VMOVQ 0(R6), V0
222 VMOVQ 16(R6), V1
223 SUBV $16, R7
224 VMOVQ (R6)(R7), V2
225 SUBV $1, R7
226 lsx_loop_33_48:
227 BLT R8, R4, not_found
228 VMOVQ 0(R4), V3
229 ADDV $1, R4
230 VSEQV V0, V3, V3
231 VSETANYEQV V3, FCC0
232 BFPT FCC0, lsx_loop_33_48
233
234 VMOVQ 15(R4), V4
235 VSEQV V1, V4, V4
236 VSETANYEQV V4, FCC1
237 BFPT FCC1, lsx_loop_33_48
238
239 VMOVQ (R4)(R7), V5
240 VSEQV V2, V5, V5
241 VSETANYEQV V5, FCC2
242 BFPT FCC2, lsx_loop_33_48
243 JMP found
244
245 lsx_len_49_64:
246 VMOVQ 0(R6), V0
247 VMOVQ 16(R6), V1
248 VMOVQ 32(R6), V2
249 SUBV $16, R7
250 VMOVQ (R6)(R7), V3
251 SUBV $1, R7
252 lsx_loop_49_64:
253 BLT R8, R4, not_found
254 VMOVQ 0(R4), V4
255 ADDV $1, R4
256 VSEQV V0, V4, V4
257 VSETANYEQV V4, FCC0
258 BFPT FCC0, lsx_loop_49_64
259
260 VMOVQ 15(R4), V5
261 VSEQV V1, V5, V5
262 VSETANYEQV V5, FCC1
263 BFPT FCC1, lsx_loop_49_64
264
265 VMOVQ 31(R4), V6
266 VSEQV V2, V6, V6
267 VSETANYEQV V6, FCC2
268 BFPT FCC2, lsx_loop_49_64
269
270 VMOVQ (R4)(R7), V7
271 VSEQV V3, V7, V7
272 VSETANYEQV V7, FCC3
273 BFPT FCC3, lsx_loop_49_64
274 JMP found
275
276 lasx_len_33_48:
277 lasx_len_49_64:
278 lasx_len_33_64:
279 XVMOVQ (R6), X0
280 SUBV $32, R7
281 XVMOVQ (R6)(R7), X1
282 SUBV $1, R7
283 lasx_loop_33_64:
284 BLT R8, R4, not_found
285 XVMOVQ (R4), X2
286 ADDV $1, R4
287 XVSEQV X0, X2, X3
288 XVSETANYEQV X3, FCC0
289 BFPT FCC0, lasx_loop_33_64
290
291 XVMOVQ (R4)(R7), X4
292 XVSEQV X1, X4, X5
293 XVSETANYEQV X5, FCC1
294 BFPT FCC1, lasx_loop_33_64
295 JMP found
296
297 found:
298 SUBV R9, R4
299 RET
300
301 not_found:
302 MOVV $-1, R4
303 RET
304
View as plain text