Source file
src/simd/archsimd/shuffles_amd64.go
1
2
3
4
5
6
7 package archsimd
8
9
10
11
12
13
14
15
16
17
18
19
20 const (
21 _LLLL = iota
22 _HLLL
23 _LHLL
24 _HHLL
25 _LLHL
26 _HLHL
27 _LHHL
28 _HHHL
29 _LLLH
30 _HLLH
31 _LHLH
32 _HHLH
33 _LLHH
34 _HLHH
35 _LHHH
36 _HHHH
37 )
38
39
40
41
42 const (
43 _LL = iota
44 _HL
45 _LH
46 _HH
47 )
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 {
67
68
69
70
71 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
72
73
74
75
76 a, b, c, d = a&3, b&3, c&3, d&3
77
78 switch pattern {
79 case _LLLL:
80 return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
81 case _HHHH:
82 return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
83 case _LLHH:
84 return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
85 case _HHLL:
86 return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
87
88 case _HLLL:
89 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
90 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
91 case _LHLL:
92 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
93 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
94
95 case _HLHH:
96 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
97 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
98 case _LHHH:
99 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
100 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
101
102 case _LLLH:
103 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
104 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
105 case _LLHL:
106 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
107 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
108 case _HHLH:
109 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
110 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
111 case _HHHL:
112 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
113 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
114
115 case _LHLH:
116 z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
117 return z.concatSelectedConstant(0b11_01_10_00 , z)
118 case _HLHL:
119 z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
120 return z.concatSelectedConstant(0b01_11_00_10 , z)
121 case _HLLH:
122 z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
123 return z.concatSelectedConstant(0b11_01_00_10 , z)
124 case _LHHL:
125 z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
126 return z.concatSelectedConstant(0b01_11_10_00 , z)
127 }
128 panic("missing case, switch should be exhaustive")
129 }
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148 func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 {
149 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
150
151 a, b, c, d = a&3, b&3, c&3, d&3
152
153 switch pattern {
154 case _LLLL:
155 return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
156 case _HHHH:
157 return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
158 case _LLHH:
159 return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
160 case _HHLL:
161 return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
162
163 case _HLLL:
164 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
165 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
166 case _LHLL:
167 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
168 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
169
170 case _HLHH:
171 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
172 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
173 case _LHHH:
174 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
175 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
176
177 case _LLLH:
178 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
179 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
180 case _LLHL:
181 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
182 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
183 case _HHLH:
184 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
185 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
186 case _HHHL:
187 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
188 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
189
190 case _LHLH:
191 z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
192 return z.concatSelectedConstant(0b11_01_10_00 , z)
193 case _HLHL:
194 z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
195 return z.concatSelectedConstant(0b01_11_00_10 , z)
196 case _HLLH:
197 z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
198 return z.concatSelectedConstant(0b11_01_00_10 , z)
199 case _LHHL:
200 z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
201 return z.concatSelectedConstant(0b01_11_10_00 , z)
202 }
203 panic("missing case, switch should be exhaustive")
204 }
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223 func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 {
224 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
225
226 a, b, c, d = a&3, b&3, c&3, d&3
227
228 switch pattern {
229 case _LLLL:
230 return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
231 case _HHHH:
232 return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
233 case _LLHH:
234 return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
235 case _HHLL:
236 return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
237
238 case _HLLL:
239 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
240 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
241 case _LHLL:
242 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
243 return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
244
245 case _HLHH:
246 z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
247 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
248 case _LHHH:
249 z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
250 return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
251
252 case _LLLH:
253 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
254 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
255 case _LLHL:
256 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
257 return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
258 case _HHLH:
259 z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
260 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
261 case _HHHL:
262 z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
263 return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
264
265 case _LHLH:
266 z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
267 return z.concatSelectedConstant(0b11_01_10_00 , z)
268 case _HLHL:
269 z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
270 return z.concatSelectedConstant(0b01_11_00_10 , z)
271 case _HLLH:
272 z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
273 return z.concatSelectedConstant(0b11_01_00_10 , z)
274 case _LHHL:
275 z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
276 return z.concatSelectedConstant(0b01_11_10_00 , z)
277 }
278 panic("missing case, switch should be exhaustive")
279 }
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299 func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 {
300 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
301
302 a, b, c, d = a&3, b&3, c&3, d&3
303
304 switch pattern {
305 case _LLLL:
306 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
307 case _HHHH:
308 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
309 case _LLHH:
310 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
311 case _HHLL:
312 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
313
314 case _HLLL:
315 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
316 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
317 case _LHLL:
318 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
319 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
320
321 case _HLHH:
322 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
323 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
324 case _LHHH:
325 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
326 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
327
328 case _LLLH:
329 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
330 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
331 case _LLHL:
332 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
333 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
334 case _HHLH:
335 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
336 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
337 case _HHHL:
338 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
339 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
340
341 case _LHLH:
342 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
343 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
344 case _HLHL:
345 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
346 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
347 case _HLLH:
348 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
349 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
350 case _LHHL:
351 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
352 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
353 }
354 panic("missing case, switch should be exhaustive")
355 }
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375 func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 {
376 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
377
378 a, b, c, d = a&3, b&3, c&3, d&3
379
380 switch pattern {
381 case _LLLL:
382 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
383 case _HHHH:
384 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
385 case _LLHH:
386 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
387 case _HHLL:
388 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
389
390 case _HLLL:
391 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
392 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
393 case _LHLL:
394 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
395 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
396
397 case _HLHH:
398 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
399 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
400 case _LHHH:
401 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
402 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
403
404 case _LLLH:
405 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
406 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
407 case _LLHL:
408 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
409 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
410 case _HHLH:
411 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
412 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
413 case _HHHL:
414 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
415 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
416
417 case _LHLH:
418 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
419 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
420 case _HLHL:
421 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
422 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
423 case _HLLH:
424 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
425 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
426 case _LHHL:
427 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
428 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
429 }
430 panic("missing case, switch should be exhaustive")
431 }
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451 func (x Float32x8) SelectFromPairGrouped(a, b, c, d uint8, y Float32x8) Float32x8 {
452 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
453
454 a, b, c, d = a&3, b&3, c&3, d&3
455
456 switch pattern {
457 case _LLLL:
458 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
459 case _HHHH:
460 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
461 case _LLHH:
462 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
463 case _HHLL:
464 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
465
466 case _HLLL:
467 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
468 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
469 case _LHLL:
470 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
471 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
472
473 case _HLHH:
474 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
475 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
476 case _LHHH:
477 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
478 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
479
480 case _LLLH:
481 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
482 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
483 case _LLHL:
484 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
485 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
486 case _HHLH:
487 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
488 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
489 case _HHHL:
490 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
491 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
492
493 case _LHLH:
494 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
495 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
496 case _HLHL:
497 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
498 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
499 case _HLLH:
500 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
501 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
502 case _LHHL:
503 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
504 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
505 }
506 panic("missing case, switch should be exhaustive")
507 }
508
509
510
511
512
513
514
515
516
517
518
519
520
521 func (x Int32x16) SelectFromPairGrouped(a, b, c, d uint8, y Int32x16) Int32x16 {
522 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
523
524 a, b, c, d = a&3, b&3, c&3, d&3
525
526 switch pattern {
527 case _LLLL:
528 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
529 case _HHHH:
530 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
531 case _LLHH:
532 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
533 case _HHLL:
534 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
535
536 case _HLLL:
537 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
538 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
539 case _LHLL:
540 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
541 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
542
543 case _HLHH:
544 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
545 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
546 case _LHHH:
547 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
548 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
549
550 case _LLLH:
551 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
552 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
553 case _LLHL:
554 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
555 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
556 case _HHLH:
557 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
558 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
559 case _HHHL:
560 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
561 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
562
563 case _LHLH:
564 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
565 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
566 case _HLHL:
567 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
568 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
569 case _HLLH:
570 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
571 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
572 case _LHHL:
573 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
574 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
575 }
576 panic("missing case, switch should be exhaustive")
577 }
578
579
580
581
582
583
584
585
586
587
588
589
590
591 func (x Uint32x16) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x16) Uint32x16 {
592 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
593
594 a, b, c, d = a&3, b&3, c&3, d&3
595
596 switch pattern {
597 case _LLLL:
598 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
599 case _HHHH:
600 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
601 case _LLHH:
602 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
603 case _HHLL:
604 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
605
606 case _HLLL:
607 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
608 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
609 case _LHLL:
610 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
611 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
612
613 case _HLHH:
614 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
615 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
616 case _LHHH:
617 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
618 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
619
620 case _LLLH:
621 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
622 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
623 case _LLHL:
624 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
625 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
626 case _HHLH:
627 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
628 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
629 case _HHHL:
630 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
631 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
632
633 case _LHLH:
634 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
635 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
636 case _HLHL:
637 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
638 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
639 case _HLLH:
640 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
641 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
642 case _LHHL:
643 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
644 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
645 }
646 panic("missing case, switch should be exhaustive")
647 }
648
649
650
651
652
653
654
655
656
657
658
659
660
661 func (x Float32x16) SelectFromPairGrouped(a, b, c, d uint8, y Float32x16) Float32x16 {
662 pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
663
664 a, b, c, d = a&3, b&3, c&3, d&3
665
666 switch pattern {
667 case _LLLL:
668 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
669 case _HHHH:
670 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
671 case _LLHH:
672 return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
673 case _HHLL:
674 return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
675
676 case _HLLL:
677 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
678 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
679 case _LHLL:
680 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
681 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
682
683 case _HLHH:
684 z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
685 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
686 case _LHHH:
687 z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
688 return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
689
690 case _LLLH:
691 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
692 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
693 case _LLHL:
694 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
695 return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
696 case _HHLH:
697 z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
698 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
699 case _HHHL:
700 z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
701 return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
702
703 case _LHLH:
704 z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
705 return z.concatSelectedConstantGrouped(0b11_01_10_00 , z)
706 case _HLHL:
707 z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
708 return z.concatSelectedConstantGrouped(0b01_11_00_10 , z)
709 case _HLLH:
710 z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
711 return z.concatSelectedConstantGrouped(0b11_01_00_10 , z)
712 case _LHHL:
713 z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
714 return z.concatSelectedConstantGrouped(0b01_11_10_00 , z)
715 }
716 panic("missing case, switch should be exhaustive")
717 }
718
719
720
721 func cscimm4(a, b, c, d uint8) uint8 {
722 return uint8(a + b<<2 + c<<4 + d<<6)
723 }
724
725
726
727 func cscimm2(a, b uint8) uint8 {
728 return uint8(a + b<<1)
729 }
730
731
732
733
734 func cscimm2g2(a, b uint8) uint8 {
735 g := cscimm2(a, b)
736 return g + g<<2
737 }
738
739
740
741
742 func cscimm2g4(a, b uint8) uint8 {
743 g := cscimm2g2(a, b)
744 return g + g<<4
745 }
746
747
748
749
750
751
752
753
754
755
756
757 func (x Uint64x2) SelectFromPair(a, b uint8, y Uint64x2) Uint64x2 {
758 pattern := (a&2)>>1 + (b & 2)
759
760 a, b = a&1, b&1
761
762 switch pattern {
763 case _LL:
764 return x.concatSelectedConstant(cscimm2(a, b), x)
765 case _HH:
766 return y.concatSelectedConstant(cscimm2(a, b), y)
767 case _LH:
768 return x.concatSelectedConstant(cscimm2(a, b), y)
769 case _HL:
770 return y.concatSelectedConstant(cscimm2(a, b), x)
771 }
772 panic("missing case, switch should be exhaustive")
773 }
774
775
776
777
778
779
780
781
782
783
784
785
786 func (x Uint64x4) SelectFromPairGrouped(a, b uint8, y Uint64x4) Uint64x4 {
787 pattern := (a&2)>>1 + (b & 2)
788
789 a, b = a&1, b&1
790
791 switch pattern {
792 case _LL:
793 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
794 case _HH:
795 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
796 case _LH:
797 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
798 case _HL:
799 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
800 }
801 panic("missing case, switch should be exhaustive")
802 }
803
804
805
806
807
808
809
810
811
812
813
814
815 func (x Uint64x8) SelectFromPairGrouped(a, b uint8, y Uint64x8) Uint64x8 {
816 pattern := (a&2)>>1 + (b & 2)
817
818 a, b = a&1, b&1
819
820 switch pattern {
821 case _LL:
822 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
823 case _HH:
824 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
825 case _LH:
826 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
827 case _HL:
828 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
829 }
830 panic("missing case, switch should be exhaustive")
831 }
832
833
834
835
836
837
838
839
840
841
842
843 func (x Float64x2) SelectFromPair(a, b uint8, y Float64x2) Float64x2 {
844 pattern := (a&2)>>1 + (b & 2)
845
846 a, b = a&1, b&1
847
848 switch pattern {
849 case _LL:
850 return x.concatSelectedConstant(cscimm2(a, b), x)
851 case _HH:
852 return y.concatSelectedConstant(cscimm2(a, b), y)
853 case _LH:
854 return x.concatSelectedConstant(cscimm2(a, b), y)
855 case _HL:
856 return y.concatSelectedConstant(cscimm2(a, b), x)
857 }
858 panic("missing case, switch should be exhaustive")
859 }
860
861
862
863
864
865
866
867
868
869
870
871
872 func (x Float64x4) SelectFromPairGrouped(a, b uint8, y Float64x4) Float64x4 {
873 pattern := (a&2)>>1 + (b & 2)
874
875 a, b = a&1, b&1
876
877 switch pattern {
878 case _LL:
879 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
880 case _HH:
881 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
882 case _LH:
883 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
884 case _HL:
885 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
886 }
887 panic("missing case, switch should be exhaustive")
888 }
889
890
891
892
893
894
895
896
897
898
899
900
901 func (x Float64x8) SelectFromPairGrouped(a, b uint8, y Float64x8) Float64x8 {
902 pattern := (a&2)>>1 + (b & 2)
903
904 a, b = a&1, b&1
905
906 switch pattern {
907 case _LL:
908 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
909 case _HH:
910 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
911 case _LH:
912 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
913 case _HL:
914 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
915 }
916 panic("missing case, switch should be exhaustive")
917 }
918
919
920
921
922
923
924
925
926
927
928
929 func (x Int64x2) SelectFromPair(a, b uint8, y Int64x2) Int64x2 {
930 pattern := (a&2)>>1 + (b & 2)
931
932 a, b = a&1, b&1
933
934 switch pattern {
935 case _LL:
936 return x.concatSelectedConstant(cscimm2(a, b), x)
937 case _HH:
938 return y.concatSelectedConstant(cscimm2(a, b), y)
939 case _LH:
940 return x.concatSelectedConstant(cscimm2(a, b), y)
941 case _HL:
942 return y.concatSelectedConstant(cscimm2(a, b), x)
943 }
944 panic("missing case, switch should be exhaustive")
945 }
946
947
948
949
950
951
952
953
954
955
956
957
958 func (x Int64x4) SelectFromPairGrouped(a, b uint8, y Int64x4) Int64x4 {
959 pattern := (a&2)>>1 + (b & 2)
960
961 a, b = a&1, b&1
962
963 switch pattern {
964 case _LL:
965 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
966 case _HH:
967 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
968 case _LH:
969 return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
970 case _HL:
971 return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
972 }
973 panic("missing case, switch should be exhaustive")
974 }
975
976
977
978
979
980
981
982
983
984
985
986
987 func (x Int64x8) SelectFromPairGrouped(a, b uint8, y Int64x8) Int64x8 {
988 pattern := (a&2)>>1 + (b & 2)
989
990 a, b = a&1, b&1
991
992 switch pattern {
993 case _LL:
994 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
995 case _HH:
996 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
997 case _LH:
998 return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
999 case _HL:
1000 return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
1001 }
1002 panic("missing case, switch should be exhaustive")
1003 }
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 func (x Int32x4) PermuteScalars(a, b, c, d uint8) Int32x4 {
1017 return x.permuteScalars(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1018 }
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 func (x Uint32x4) PermuteScalars(a, b, c, d uint8) Uint32x4 {
1030 return x.permuteScalars(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1031 }
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 func (x Int32x8) PermuteScalarsGrouped(a, b, c, d uint8) Int32x8 {
1045 return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1046 }
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059 func (x Int32x16) PermuteScalarsGrouped(a, b, c, d uint8) Int32x16 {
1060 return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1061 }
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 func (x Uint32x8) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x8 {
1073 return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1074 }
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 func (x Uint32x16) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x16 {
1088 return x.permuteScalarsGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1089 }
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102 func (x Int16x8) PermuteScalarsHi(a, b, c, d uint8) Int16x8 {
1103 return x.permuteScalarsHi(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1104 }
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 func (x Uint16x8) PermuteScalarsHi(a, b, c, d uint8) Uint16x8 {
1116 return x.permuteScalarsHi(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1117 }
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132 func (x Int16x16) PermuteScalarsHiGrouped(a, b, c, d uint8) Int16x16 {
1133 return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1134 }
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 func (x Int16x32) PermuteScalarsHiGrouped(a, b, c, d uint8) Int16x32 {
1150 return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1151 }
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166 func (x Uint16x16) PermuteScalarsHiGrouped(a, b, c, d uint8) Uint16x16 {
1167 return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1168 }
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183 func (x Uint16x32) PermuteScalarsHiGrouped(a, b, c, d uint8) Uint16x32 {
1184 return x.permuteScalarsHiGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1185 }
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198 func (x Int16x8) PermuteScalarsLo(a, b, c, d uint8) Int16x8 {
1199 return x.permuteScalarsLo(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1200 }
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211 func (x Uint16x8) PermuteScalarsLo(a, b, c, d uint8) Uint16x8 {
1212 return x.permuteScalarsLo(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1213 }
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 func (x Int16x16) PermuteScalarsLoGrouped(a, b, c, d uint8) Int16x16 {
1229 return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1230 }
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245 func (x Int16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Int16x32 {
1246 return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1247 }
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259 func (x Uint16x16) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x16 {
1260 return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1261 }
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 func (x Uint16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x32 {
1279 return x.permuteScalarsLoGrouped(a&3 | (b&3)<<2 | (c&3)<<4 | d<<6)
1280 }
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 func (x Uint64x2) CarrylessMultiply(a, b uint8, y Uint64x2) Uint64x2 {
1304 return x.carrylessMultiply(a&1+((b&1)<<4), y)
1305 }
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 func (x Uint64x4) CarrylessMultiplyGrouped(a, b uint8, y Uint64x4) Uint64x4 {
1329 return x.carrylessMultiply(a&1+((b&1)<<4), y)
1330 }
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 func (x Uint64x8) CarrylessMultiplyGrouped(a, b uint8, y Uint64x8) Uint64x8 {
1354 return x.carrylessMultiply(a&1+((b&1)<<4), y)
1355 }
1356
View as plain text