1
2
3
4
5
6
7 package simd_test
8
9 import (
10 "fmt"
11 "os"
12 "reflect"
13 "simd/archsimd"
14 "slices"
15 "testing"
16 "unsafe"
17 )
18
19 func TestMain(m *testing.M) {
20 if !archsimd.X86.AVX() {
21 fmt.Fprintln(os.Stderr, "Skipping tests: AVX is not available")
22 os.Exit(0)
23 }
24 os.Exit(m.Run())
25 }
26
27 var sink any
28
29 func TestType(t *testing.T) {
30
31
32
33
34
35
36
37 type alias = archsimd.Int32x4
38 type maskT archsimd.Mask32x4
39 type myStruct struct {
40 x alias
41 y *archsimd.Int32x4
42 z maskT
43 }
44 vals := [4]int32{1, 2, 3, 4}
45 v := myStruct{x: archsimd.LoadInt32x4(&vals)}
46
47 want := []int32{2, 4, 0, 0}
48 y := archsimd.LoadInt32x4(&vals)
49 v.y = &y
50 sink = y
51
52 if !archsimd.X86.AVX512GFNI() {
53 t.Skip("Test requires X86.AVX512, not available on this hardware")
54 return
55 }
56 v.z = maskT(archsimd.Mask32x4FromBits(0b0011))
57 *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z))
58
59 got := [4]int32{}
60 v.y.Store(&got)
61 checkSlices(t, got[:], want)
62 }
63
64 func TestUncomparable(t *testing.T) {
65
66 var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
67 shouldPanic := func(fn func()) {
68 defer func() {
69 if recover() == nil {
70 panic("did not panic")
71 }
72 }()
73 fn()
74 }
75 shouldPanic(func() { _ = x == y })
76 }
77
78 func TestFuncValue(t *testing.T) {
79
80 xv := [4]int32{1, 2, 3, 4}
81 yv := [4]int32{5, 6, 7, 8}
82 want := []int32{6, 8, 10, 12}
83 x := archsimd.LoadInt32x4(&xv)
84 y := archsimd.LoadInt32x4(&yv)
85 fn := archsimd.Int32x4.Add
86 sink = fn
87 x = fn(x, y)
88 got := [4]int32{}
89 x.Store(&got)
90 checkSlices(t, got[:], want)
91 }
92
93 func TestReflectMethod(t *testing.T) {
94
95
96 xv := [4]int32{1, 2, 3, 4}
97 yv := [4]int32{5, 6, 7, 8}
98 want := []int32{6, 8, 10, 12}
99 x := archsimd.LoadInt32x4(&xv)
100 y := archsimd.LoadInt32x4(&yv)
101 m, ok := reflect.TypeOf(x).MethodByName("Add")
102 if !ok {
103 t.Fatal("Add method not found")
104 }
105 fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4)
106 x = fn(x, y)
107 got := [4]int32{}
108 x.Store(&got)
109 checkSlices(t, got[:], want)
110 }
111
112 func TestVectorConversion(t *testing.T) {
113 if !archsimd.X86.AVX512GFNI() {
114 t.Skip("Test requires X86.AVX512, not available on this hardware")
115 return
116 }
117 xv := [4]int32{1, 2, 3, 4}
118 x := archsimd.LoadInt32x4(&xv)
119 xPromoted := x.AsInt64x2()
120 xPromotedDemoted := xPromoted.AsInt32x4()
121 got := [4]int32{}
122 xPromotedDemoted.Store(&got)
123 for i := range 4 {
124 if xv[i] != got[i] {
125 t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
126 }
127 }
128 }
129
130 func TestMaskConversion(t *testing.T) {
131 if !archsimd.X86.AVX512GFNI() {
132 t.Skip("Test requires X86.AVX512, not available on this hardware")
133 return
134 }
135 x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
136 mask := archsimd.Int32x4{}.Sub(x).ToMask()
137 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
138 want := [4]int32{6, 0, 10, 0}
139 got := make([]int32, 4)
140 y.StoreSlice(got)
141 checkSlices(t, got[:], want[:])
142 }
143
144 func TestPermute(t *testing.T) {
145 if !archsimd.X86.AVX512() {
146 t.Skip("Test requires X86.AVX512, not available on this hardware")
147 return
148 }
149 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
150 indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
151 want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
152 got := make([]int64, 8)
153 archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
154 checkSlices(t, got, want)
155 }
156
157 func TestPermuteOrZero(t *testing.T) {
158 x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
159 indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
160 want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
161 got := make([]uint8, len(x))
162 archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got)
163 checkSlices(t, got, want)
164 }
165
166 func TestConcatPermute(t *testing.T) {
167 if !archsimd.X86.AVX512() {
168 t.Skip("Test requires X86.AVX512, not available on this hardware")
169 return
170 }
171 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
172 y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
173 indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
174 want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
175 got := make([]int64, 8)
176 archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
177 checkSlices(t, got, want)
178 }
179
180 func TestCompress(t *testing.T) {
181 if !archsimd.X86.AVX512() {
182 t.Skip("Test requires X86.AVX512, not available on this hardware")
183 return
184 }
185 v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
186 v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010))
187 got := make([]int32, 4)
188 v2400.StoreSlice(got)
189 want := []int32{2, 4, 0, 0}
190 if !slices.Equal(got, want) {
191 t.Errorf("want and got differ, want=%v, got=%v", want, got)
192 }
193 }
194
195 func TestExpand(t *testing.T) {
196 if !archsimd.X86.AVX512() {
197 t.Skip("Test requires X86.AVX512, not available on this hardware")
198 return
199 }
200 v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
201 v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010))
202 got := make([]int32, 4)
203 v2400.StoreSlice(got)
204 want := []int32{0, 3, 0, 4}
205 if !slices.Equal(got, want) {
206 t.Errorf("want and got differ, want=%v, got=%v", want, got)
207 }
208 }
209
210 var testShiftAllVal uint64 = 3
211
212 func TestShiftAll(t *testing.T) {
213 got := make([]int32, 4)
214 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
215 for _, v := range got {
216 if v != 0b1100 {
217 t.Errorf("expect 0b1100, got %b", v)
218 }
219 }
220 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
221 for _, v := range got {
222 if v != 0b11000 {
223 t.Errorf("expect 0b11000, got %b", v)
224 }
225 }
226 }
227
228 func TestSlicesInt8(t *testing.T) {
229 if !archsimd.X86.AVX2() {
230 t.Skip("Test requires X86.AVX2, not available on this hardware")
231 return
232 }
233 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
234 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
235 v := archsimd.LoadInt8x32Slice(a)
236 b := make([]int8, 32, 32)
237 v.StoreSlice(b)
238 checkSlices(t, a, b)
239 }
240
241 func TestSlicesInt8SetElem(t *testing.T) {
242 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
243 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
244 v := archsimd.LoadInt8x16Slice(a)
245
246 v = v.SetElem(3, 13)
247 a[3] = 13
248
249 b := make([]int8, 16, 16)
250 v.StoreSlice(b)
251 checkSlices(t, a, b)
252 }
253
254 func TestSlicesInt8GetElem(t *testing.T) {
255 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
256 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
257 v := archsimd.LoadInt8x16Slice(a)
258 e := v.GetElem(2)
259 if e != a[2] {
260 t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
261 }
262
263 }
264
265 func TestSlicesInt8TooShortLoad(t *testing.T) {
266 if !archsimd.X86.AVX2() {
267 t.Skip("Test requires X86.AVX2, not available on this hardware")
268 return
269 }
270 defer func() {
271 if r := recover(); r != nil {
272 t.Logf("Saw EXPECTED panic %v", r)
273 } else {
274 t.Errorf("Did not see expected panic")
275 }
276 }()
277 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
278 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
279 v := archsimd.LoadInt8x32Slice(a)
280 b := make([]int8, 32, 32)
281 v.StoreSlice(b)
282 checkSlices(t, a, b)
283 }
284
285 func TestSlicesInt8TooShortStore(t *testing.T) {
286 if !archsimd.X86.AVX2() {
287 t.Skip("Test requires X86.AVX2, not available on this hardware")
288 return
289 }
290 defer func() {
291 if r := recover(); r != nil {
292 t.Logf("Saw EXPECTED panic %v", r)
293 } else {
294 t.Errorf("Did not see expected panic")
295 }
296 }()
297 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
298 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
299 v := archsimd.LoadInt8x32Slice(a)
300 b := make([]int8, 31)
301 v.StoreSlice(b)
302 checkSlices(t, a, b)
303 }
304
305 func TestSlicesFloat64(t *testing.T) {
306 a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
307 v := archsimd.LoadFloat64x4Slice(a)
308 b := make([]float64, 4, 4)
309 v.StoreSlice(b)
310 for i := range b {
311 if a[i] != b[i] {
312 t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
313 }
314 }
315 }
316
317
318 func TestMergeLocals(t *testing.T) {
319 if !archsimd.X86.AVX2() {
320 t.Skip("Test requires X86.AVX2, not available on this hardware")
321 return
322 }
323 testMergeLocalswrapper(t, archsimd.Int64x4.Add)
324 }
325
326
327 func forceSpill() {}
328
329 func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) {
330 t.Helper()
331 s0 := []int64{0, 1, 2, 3}
332 s1 := []int64{-1, 0, -1, 0}
333 want := []int64{-1, 1, 1, 3}
334 v := archsimd.LoadInt64x4Slice(s0)
335 m := archsimd.LoadInt64x4Slice(s1)
336 forceSpill()
337 got := make([]int64, 4)
338 gotv := op(v, m)
339 gotv.StoreSlice(got)
340 for i := range len(want) {
341 if !(got[i] == want[i]) {
342 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
343 }
344 }
345 }
346
347 func TestBitMaskFromBits(t *testing.T) {
348 if !archsimd.X86.AVX512() {
349 t.Skip("Test requires X86.AVX512, not available on this hardware")
350 return
351 }
352 results := [2]int64{}
353 want := [2]int64{0, 6}
354 m := archsimd.Mask64x2FromBits(0b10)
355 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
356 for i := range 2 {
357 if results[i] != want[i] {
358 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
359 }
360 }
361 }
362
363 var maskForTestBitMaskFromBitsLoad = uint8(0b10)
364
365 func TestBitMaskFromBitsLoad(t *testing.T) {
366 if !archsimd.X86.AVX512() {
367 t.Skip("Test requires X86.AVX512, not available on this hardware")
368 return
369 }
370 results := [2]int64{}
371 want := [2]int64{0, 6}
372 m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
373 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
374 for i := range 2 {
375 if results[i] != want[i] {
376 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
377 }
378 }
379 }
380
381 func TestBitMaskToBits(t *testing.T) {
382 int8s := []int8{
383 0, 1, 1, 0, 0, 1, 0, 1,
384 1, 0, 1, 1, 0, 0, 1, 0,
385 1, 0, 0, 1, 1, 0, 1, 0,
386 0, 1, 1, 0, 0, 1, 0, 1,
387 1, 0, 0, 1, 0, 1, 1, 0,
388 0, 1, 0, 1, 1, 0, 0, 1,
389 1, 0, 1, 0, 0, 1, 1, 0,
390 0, 1, 1, 0, 1, 0, 0, 1,
391 }
392 int16s := make([]int16, 32)
393 for i := range int16s {
394 int16s[i] = int16(int8s[i])
395 }
396 int32s := make([]int32, 16)
397 for i := range int32s {
398 int32s[i] = int32(int8s[i])
399 }
400 int64s := make([]int64, 8)
401 for i := range int64s {
402 int64s[i] = int64(int8s[i])
403 }
404 want64 := uint64(0)
405 for i := range int8s {
406 want64 |= uint64(int8s[i]) << i
407 }
408 want32 := uint32(want64)
409 want16 := uint16(want64)
410 want8 := uint8(want64)
411 want4 := want8 & 0b1111
412 want2 := want4 & 0b11
413
414 if v := archsimd.LoadInt8x16Slice(int8s[:16]).ToMask().ToBits(); v != want16 {
415 t.Errorf("want %b, got %b", want16, v)
416 }
417 if v := archsimd.LoadInt32x4Slice(int32s[:4]).ToMask().ToBits(); v != want4 {
418 t.Errorf("want %b, got %b", want4, v)
419 }
420 if v := archsimd.LoadInt32x8Slice(int32s[:8]).ToMask().ToBits(); v != want8 {
421 t.Errorf("want %b, got %b", want8, v)
422 }
423 if v := archsimd.LoadInt64x2Slice(int64s[:2]).ToMask().ToBits(); v != want2 {
424 t.Errorf("want %b, got %b", want2, v)
425 }
426 if v := archsimd.LoadInt64x4Slice(int64s[:4]).ToMask().ToBits(); v != want4 {
427 t.Errorf("want %b, got %b", want4, v)
428 }
429
430 if archsimd.X86.AVX2() {
431 if v := archsimd.LoadInt8x32Slice(int8s[:32]).ToMask().ToBits(); v != want32 {
432 t.Errorf("want %b, got %b", want32, v)
433 }
434 }
435
436 if archsimd.X86.AVX512() {
437 if v := archsimd.LoadInt8x64Slice(int8s).ToMask().ToBits(); v != want64 {
438 t.Errorf("want %b, got %b", want64, v)
439 }
440 if v := archsimd.LoadInt16x8Slice(int16s[:8]).ToMask().ToBits(); v != want8 {
441 t.Errorf("want %b, got %b", want8, v)
442 }
443 if v := archsimd.LoadInt16x16Slice(int16s[:16]).ToMask().ToBits(); v != want16 {
444 t.Errorf("want %b, got %b", want16, v)
445 }
446 if v := archsimd.LoadInt16x32Slice(int16s).ToMask().ToBits(); v != want32 {
447 t.Errorf("want %b, got %b", want32, v)
448 }
449 if v := archsimd.LoadInt32x16Slice(int32s).ToMask().ToBits(); v != want16 {
450 t.Errorf("want %b, got %b", want16, v)
451 }
452 if v := archsimd.LoadInt64x8Slice(int64s).ToMask().ToBits(); v != want8 {
453 t.Errorf("want %b, got %b", want8, v)
454 }
455 }
456 }
457
458 var maskForTestBitMaskFromBitsStore uint8
459
460 func TestBitMaskToBitsStore(t *testing.T) {
461 if !archsimd.X86.AVX512() {
462 t.Skip("Test requires X86.AVX512, not available on this hardware")
463 return
464 }
465 maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
466 if maskForTestBitMaskFromBitsStore != 0b101 {
467 t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
468 }
469 }
470
471 func TestMergeFloat(t *testing.T) {
472 if !archsimd.X86.AVX2() {
473 t.Skip("Test requires X86.AVX2, not available on this hardware")
474 return
475 }
476 k := make([]int64, 4, 4)
477 s := make([]float64, 4, 4)
478
479 a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
480 b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
481 g := a.Greater(b)
482 g.ToInt64x4().StoreSlice(k)
483 c := a.Merge(b, g)
484
485 c.StoreSlice(s)
486
487 checkSlices[int64](t, k, []int64{0, 0, 0, -1})
488 checkSlices[float64](t, s, []float64{4, 2, 3, 4})
489 }
490
491 func TestMergeFloat512(t *testing.T) {
492 if !archsimd.X86.AVX512() {
493 t.Skip("Test requires X86.AVX512, not available on this hardware")
494 return
495 }
496
497 k := make([]int64, 8, 8)
498 s := make([]float64, 8, 8)
499
500 a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
501 b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
502 g := a.Greater(b)
503 g.ToInt64x8().StoreSlice(k)
504 c := a.Merge(b, g)
505 d := a.Masked(g)
506
507 checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
508
509 c.StoreSlice(s)
510 checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
511
512 d.StoreSlice(s)
513 checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
514 }
515
516 var ro uint8 = 2
517
518 func TestRotateAllVariable(t *testing.T) {
519 if !archsimd.X86.AVX512() {
520 t.Skip("Test requires X86.AVX512, not available on this hardware")
521 return
522 }
523 got := make([]int32, 4)
524 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
525 for _, v := range got {
526 if v != 0b1100 {
527 t.Errorf("Want 0b1100, got %b", v)
528 }
529 }
530 }
531
532 func TestBroadcastUint32x4(t *testing.T) {
533 s := make([]uint32, 4, 4)
534 archsimd.BroadcastUint32x4(123456789).StoreSlice(s)
535 checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
536 }
537
538 func TestBroadcastFloat32x8(t *testing.T) {
539 s := make([]float32, 8, 8)
540 archsimd.BroadcastFloat32x8(123456789).StoreSlice(s)
541 checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
542 }
543
544 func TestBroadcastFloat64x2(t *testing.T) {
545 s := make([]float64, 2, 2)
546 archsimd.BroadcastFloat64x2(123456789).StoreSlice(s)
547 checkSlices(t, s, []float64{123456789, 123456789})
548 }
549
550 func TestBroadcastUint64x2(t *testing.T) {
551 s := make([]uint64, 2, 2)
552 archsimd.BroadcastUint64x2(123456789).StoreSlice(s)
553 checkSlices(t, s, []uint64{123456789, 123456789})
554 }
555
556 func TestBroadcastUint16x8(t *testing.T) {
557 s := make([]uint16, 8, 8)
558 archsimd.BroadcastUint16x8(12345).StoreSlice(s)
559 checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
560 }
561
562 func TestBroadcastInt8x32(t *testing.T) {
563 if !archsimd.X86.AVX2() {
564 t.Skip("Test requires X86.AVX2, not available on this hardware")
565 return
566 }
567 s := make([]int8, 32, 32)
568 archsimd.BroadcastInt8x32(-123).StoreSlice(s)
569 checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
570 -123, -123, -123, -123, -123, -123, -123, -123,
571 -123, -123, -123, -123, -123, -123, -123, -123,
572 -123, -123, -123, -123, -123, -123, -123, -123,
573 })
574 }
575
576 func TestMaskOpt512(t *testing.T) {
577 if !archsimd.X86.AVX512() {
578 t.Skip("Test requires X86.AVX512, not available on this hardware")
579 return
580 }
581
582 k := make([]int64, 8, 8)
583 s := make([]float64, 8, 8)
584
585 a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
586 b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
587 c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
588 d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
589 g := a.Greater(b)
590 e := c.Add(d).Masked(g)
591 e.StoreSlice(s)
592 g.ToInt64x8().StoreSlice(k)
593 checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
594 checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
595 }
596
597
598
599
600
601 func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) {
602 return x.InterleaveLo(y), x.InterleaveHi(y)
603 }
604
605 func TestFlattenedTranspose(t *testing.T) {
606 r := make([]int32, 4, 4)
607 s := make([]int32, 4, 4)
608
609 x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
610 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
611 a, b := flattenedTranspose(x, y)
612
613 a.StoreSlice(r)
614 b.StoreSlice(s)
615
616 checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
617 checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
618
619 }
620
621 func TestClearAVXUpperBits(t *testing.T) {
622
623
624 if !archsimd.X86.AVX2() {
625 t.Skip("Test requires X86.AVX2, not available on this hardware")
626 return
627 }
628
629 r := make([]int64, 4)
630 s := make([]int64, 4)
631
632 x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
633 y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
634
635 x.Add(y).StoreSlice(r)
636 archsimd.ClearAVXUpperBits()
637 x.Sub(y).StoreSlice(s)
638
639 checkSlices[int64](t, r, []int64{11, 22, 33, 44})
640 checkSlices[int64](t, s, []int64{9, 18, 27, 36})
641 }
642
643 func TestLeadingZeros(t *testing.T) {
644 if !archsimd.X86.AVX512() {
645 t.Skip("Test requires X86.AVX512, not available on this hardware")
646 return
647 }
648
649 src := []uint64{0b1111, 0}
650 want := []uint64{60, 64}
651 got := make([]uint64, 2)
652 archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
653 for i := range 2 {
654 if want[i] != got[i] {
655 t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
656 }
657 }
658 }
659
660 func TestIsZero(t *testing.T) {
661 v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
662 v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
663 if v1.IsZero() {
664 t.Errorf("Result incorrect, want false, got true")
665 }
666 if !v2.IsZero() {
667 t.Errorf("Result incorrect, want true, got false")
668 }
669 if !v1.And(v2).IsZero() {
670 t.Errorf("Result incorrect, want true, got false")
671 }
672 if v1.AndNot(v2).IsZero() {
673 t.Errorf("Result incorrect, want false, got true")
674 }
675 if !v2.And(v1).IsZero() {
676 t.Errorf("Result incorrect, want true, got false")
677 }
678 if !v2.AndNot(v1).IsZero() {
679 t.Errorf("Result incorrect, want true, got false")
680 }
681 }
682
683 func TestSelect4FromPairConst(t *testing.T) {
684 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
685 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
686
687 llll := x.SelectFromPair(0, 1, 2, 3, y)
688 hhhh := x.SelectFromPair(4, 5, 6, 7, y)
689 llhh := x.SelectFromPair(0, 1, 6, 7, y)
690 hhll := x.SelectFromPair(6, 7, 0, 1, y)
691
692 lllh := x.SelectFromPair(0, 1, 2, 7, y)
693 llhl := x.SelectFromPair(0, 1, 7, 2, y)
694 lhll := x.SelectFromPair(0, 7, 1, 2, y)
695 hlll := x.SelectFromPair(7, 0, 1, 2, y)
696
697 hhhl := x.SelectFromPair(4, 5, 6, 0, y)
698 hhlh := x.SelectFromPair(4, 5, 0, 6, y)
699 hlhh := x.SelectFromPair(4, 0, 5, 6, y)
700 lhhh := x.SelectFromPair(0, 4, 5, 6, y)
701
702 lhlh := x.SelectFromPair(0, 4, 1, 5, y)
703 hlhl := x.SelectFromPair(4, 0, 5, 1, y)
704 lhhl := x.SelectFromPair(0, 4, 5, 1, y)
705 hllh := x.SelectFromPair(4, 0, 1, 5, y)
706
707 r := make([]int32, 4, 4)
708
709 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
710 v.StoreSlice(r)
711 checkSlices[int32](t, r, []int32{a, b, c, d})
712 }
713
714 foo(llll, 0, 1, 2, 3)
715 foo(hhhh, 4, 5, 6, 7)
716 foo(llhh, 0, 1, 6, 7)
717 foo(hhll, 6, 7, 0, 1)
718
719 foo(lllh, 0, 1, 2, 7)
720 foo(llhl, 0, 1, 7, 2)
721 foo(lhll, 0, 7, 1, 2)
722 foo(hlll, 7, 0, 1, 2)
723
724 foo(hhhl, 4, 5, 6, 0)
725 foo(hhlh, 4, 5, 0, 6)
726 foo(hlhh, 4, 0, 5, 6)
727 foo(lhhh, 0, 4, 5, 6)
728
729 foo(lhlh, 0, 4, 1, 5)
730 foo(hlhl, 4, 0, 5, 1)
731 foo(lhhl, 0, 4, 5, 1)
732 foo(hllh, 4, 0, 1, 5)
733 }
734
735
736 func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
737 return x.SelectFromPair(a, b, c, d, y)
738 }
739
740 func TestSelect4FromPairVar(t *testing.T) {
741 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
742 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
743
744 llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
745 hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
746 llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y)
747 hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y)
748
749 lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y)
750 llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y)
751 lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y)
752 hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y)
753
754 hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y)
755 hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y)
756 hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y)
757 lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y)
758
759 lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y)
760 hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y)
761 lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y)
762 hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y)
763
764 r := make([]int32, 4, 4)
765
766 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
767 v.StoreSlice(r)
768 checkSlices[int32](t, r, []int32{a, b, c, d})
769 }
770
771 foo(llll, 0, 1, 2, 3)
772 foo(hhhh, 4, 5, 6, 7)
773 foo(llhh, 0, 1, 6, 7)
774 foo(hhll, 6, 7, 0, 1)
775
776 foo(lllh, 0, 1, 2, 7)
777 foo(llhl, 0, 1, 7, 2)
778 foo(lhll, 0, 7, 1, 2)
779 foo(hlll, 7, 0, 1, 2)
780
781 foo(hhhl, 4, 5, 6, 0)
782 foo(hhlh, 4, 5, 0, 6)
783 foo(hlhh, 4, 0, 5, 6)
784 foo(lhhh, 0, 4, 5, 6)
785
786 foo(lhlh, 0, 4, 1, 5)
787 foo(hlhl, 4, 0, 5, 1)
788 foo(lhhl, 0, 4, 5, 1)
789 foo(hllh, 4, 0, 1, 5)
790 }
791
792 func TestSelect4FromPairConstGrouped(t *testing.T) {
793 x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
794 y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
795
796 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
797 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
798 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
799 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
800
801 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
802 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
803 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
804 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
805
806 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
807 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
808 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
809 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
810
811 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
812 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
813 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
814 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
815
816 r := make([]float32, 8, 8)
817
818 foo := func(v archsimd.Float32x8, a, b, c, d float32) {
819 v.StoreSlice(r)
820 checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
821 }
822
823 foo(llll, 0, 1, 2, 3)
824 foo(hhhh, 4, 5, 6, 7)
825 foo(llhh, 0, 1, 6, 7)
826 foo(hhll, 6, 7, 0, 1)
827
828 foo(lllh, 0, 1, 2, 7)
829 foo(llhl, 0, 1, 7, 2)
830 foo(lhll, 0, 7, 1, 2)
831 foo(hlll, 7, 0, 1, 2)
832
833 foo(hhhl, 4, 5, 6, 0)
834 foo(hhlh, 4, 5, 0, 6)
835 foo(hlhh, 4, 0, 5, 6)
836 foo(lhhh, 0, 4, 5, 6)
837
838 foo(lhlh, 0, 4, 1, 5)
839 foo(hlhl, 4, 0, 5, 1)
840 foo(lhhl, 0, 4, 5, 1)
841 foo(hllh, 4, 0, 1, 5)
842 }
843
844 func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
845 if !archsimd.X86.AVX512() {
846 t.Skip("Test requires X86.AVX512, not available on this hardware")
847 return
848 }
849 x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
850 y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
851
852 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
853 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
854 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
855 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
856
857 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
858 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
859 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
860 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
861
862 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
863 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
864 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
865 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
866
867 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
868 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
869 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
870 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
871
872 r := make([]uint32, 16, 16)
873
874 foo := func(v archsimd.Uint32x16, a, b, c, d uint32) {
875 v.StoreSlice(r)
876 checkSlices[uint32](t, r, []uint32{a, b, c, d,
877 10 + a, 10 + b, 10 + c, 10 + d,
878 20 + a, 20 + b, 20 + c, 20 + d,
879 30 + a, 30 + b, 30 + c, 30 + d,
880 })
881 }
882
883 foo(llll, 0, 1, 2, 3)
884 foo(hhhh, 4, 5, 6, 7)
885 foo(llhh, 0, 1, 6, 7)
886 foo(hhll, 6, 7, 0, 1)
887
888 foo(lllh, 0, 1, 2, 7)
889 foo(llhl, 0, 1, 7, 2)
890 foo(lhll, 0, 7, 1, 2)
891 foo(hlll, 7, 0, 1, 2)
892
893 foo(hhhl, 4, 5, 6, 0)
894 foo(hhlh, 4, 5, 0, 6)
895 foo(hlhh, 4, 0, 5, 6)
896 foo(lhhh, 0, 4, 5, 6)
897
898 foo(lhlh, 0, 4, 1, 5)
899 foo(hlhl, 4, 0, 5, 1)
900 foo(lhhl, 0, 4, 5, 1)
901 foo(hllh, 4, 0, 1, 5)
902 }
903
904 func TestSelect128FromPair(t *testing.T) {
905 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
906 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
907
908 aa := x.Select128FromPair(0, 0, y)
909 ab := x.Select128FromPair(0, 1, y)
910 bc := x.Select128FromPair(1, 2, y)
911 cd := x.Select128FromPair(2, 3, y)
912 da := x.Select128FromPair(3, 0, y)
913 dc := x.Select128FromPair(3, 2, y)
914
915 r := make([]uint64, 4, 4)
916
917 foo := func(v archsimd.Uint64x4, a, b uint64) {
918 a, b = 2*a, 2*b
919 v.StoreSlice(r)
920 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
921 }
922
923 foo(aa, 0, 0)
924 foo(ab, 0, 1)
925 foo(bc, 1, 2)
926 foo(cd, 2, 3)
927 foo(da, 3, 0)
928 foo(dc, 3, 2)
929 }
930
931 func TestSelect128FromPairError(t *testing.T) {
932 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
933 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
934
935 defer func() {
936 if r := recover(); r != nil {
937 t.Logf("Saw expected panic %v", r)
938 }
939 }()
940 _ = x.Select128FromPair(0, 4, y)
941
942 t.Errorf("Should have panicked")
943 }
944
945
946 func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 {
947 return x.Select128FromPair(lo, hi, y)
948 }
949
950 func TestSelect128FromPairVar(t *testing.T) {
951 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
952 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
953
954 aa := select128FromPair(x, 0, 0, y)
955 ab := select128FromPair(x, 0, 1, y)
956 bc := select128FromPair(x, 1, 2, y)
957 cd := select128FromPair(x, 2, 3, y)
958 da := select128FromPair(x, 3, 0, y)
959 dc := select128FromPair(x, 3, 2, y)
960
961 r := make([]uint64, 4, 4)
962
963 foo := func(v archsimd.Uint64x4, a, b uint64) {
964 a, b = 2*a, 2*b
965 v.StoreSlice(r)
966 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
967 }
968
969 foo(aa, 0, 0)
970 foo(ab, 0, 1)
971 foo(bc, 1, 2)
972 foo(cd, 2, 3)
973 foo(da, 3, 0)
974 foo(dc, 3, 2)
975 }
976
977 func TestSelect2FromPairConst(t *testing.T) {
978 x := archsimd.LoadUint64x2Slice([]uint64{0, 1})
979 y := archsimd.LoadUint64x2Slice([]uint64{2, 3})
980
981 ll := x.SelectFromPair(0, 1, y)
982 hh := x.SelectFromPair(3, 2, y)
983 lh := x.SelectFromPair(0, 3, y)
984 hl := x.SelectFromPair(2, 1, y)
985
986 r := make([]uint64, 2, 2)
987
988 foo := func(v archsimd.Uint64x2, a, b uint64) {
989 v.StoreSlice(r)
990 checkSlices[uint64](t, r, []uint64{a, b})
991 }
992
993 foo(ll, 0, 1)
994 foo(hh, 3, 2)
995 foo(lh, 0, 3)
996 foo(hl, 2, 1)
997 }
998
999 func TestSelect2FromPairConstGroupedUint(t *testing.T) {
1000 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
1001 y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
1002
1003 ll := x.SelectFromPairGrouped(0, 1, y)
1004 hh := x.SelectFromPairGrouped(3, 2, y)
1005 lh := x.SelectFromPairGrouped(0, 3, y)
1006 hl := x.SelectFromPairGrouped(2, 1, y)
1007
1008 r := make([]uint64, 4, 4)
1009
1010 foo := func(v archsimd.Uint64x4, a, b uint64) {
1011 v.StoreSlice(r)
1012 checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
1013 }
1014
1015 foo(ll, 0, 1)
1016 foo(hh, 3, 2)
1017 foo(lh, 0, 3)
1018 foo(hl, 2, 1)
1019 }
1020
1021 func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
1022 x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
1023 y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
1024
1025 ll := x.SelectFromPairGrouped(0, 1, y)
1026 hh := x.SelectFromPairGrouped(3, 2, y)
1027 lh := x.SelectFromPairGrouped(0, 3, y)
1028 hl := x.SelectFromPairGrouped(2, 1, y)
1029
1030 r := make([]float64, 4, 4)
1031
1032 foo := func(v archsimd.Float64x4, a, b float64) {
1033 v.StoreSlice(r)
1034 checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
1035 }
1036
1037 foo(ll, 0, 1)
1038 foo(hh, 3, 2)
1039 foo(lh, 0, 3)
1040 foo(hl, 2, 1)
1041 }
1042
1043 func TestSelect2FromPairConstGroupedInt(t *testing.T) {
1044 x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
1045 y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
1046
1047 ll := x.SelectFromPairGrouped(0, 1, y)
1048 hh := x.SelectFromPairGrouped(3, 2, y)
1049 lh := x.SelectFromPairGrouped(0, 3, y)
1050 hl := x.SelectFromPairGrouped(2, 1, y)
1051
1052 r := make([]int64, 4, 4)
1053
1054 foo := func(v archsimd.Int64x4, a, b int64) {
1055 v.StoreSlice(r)
1056 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
1057 }
1058
1059 foo(ll, 0, 1)
1060 foo(hh, 3, 2)
1061 foo(lh, 0, 3)
1062 foo(hl, 2, 1)
1063 }
1064
1065 func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
1066 if !archsimd.X86.AVX512() {
1067 t.Skip("Test requires X86.AVX512, not available on this hardware")
1068 return
1069 }
1070
1071 x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
1072 y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
1073
1074 ll := x.SelectFromPairGrouped(0, 1, y)
1075 hh := x.SelectFromPairGrouped(3, 2, y)
1076 lh := x.SelectFromPairGrouped(0, 3, y)
1077 hl := x.SelectFromPairGrouped(2, 1, y)
1078
1079 r := make([]int64, 8, 8)
1080
1081 foo := func(v archsimd.Int64x8, a, b int64) {
1082 v.StoreSlice(r)
1083 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
1084 }
1085
1086 foo(ll, 0, 1)
1087 foo(hh, 3, 2)
1088 foo(lh, 0, 3)
1089 foo(hl, 2, 1)
1090 }
1091
1092 func TestString(t *testing.T) {
1093 x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
1094 y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
1095 z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
1096 w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
1097
1098 sx := "{0,1,2,3}"
1099 sy := "{-4,-5,-6,-7}"
1100 sz := "{0.5,1.5,-2.5,3.5e+09}"
1101 sw := sz
1102
1103 if x.String() != sx {
1104 t.Errorf("x=%s wanted %s", x, sx)
1105 }
1106 if y.String() != sy {
1107 t.Errorf("y=%s wanted %s", y, sy)
1108 }
1109 if z.String() != sz {
1110 t.Errorf("z=%s wanted %s", z, sz)
1111 }
1112 if w.String() != sw {
1113 t.Errorf("w=%s wanted %s", w, sw)
1114 }
1115 t.Logf("w=%s", w)
1116 t.Logf("x=%s", x)
1117 t.Logf("y=%s", y)
1118 t.Logf("z=%s", z)
1119 }
1120
1121
1122 func a() []int32 {
1123 return make([]int32, 16, 16)
1124 }
1125
1126
1127
1128 func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 {
1129 ax, ay, az := a(), a(), a()
1130 x.StoreSlice(ax)
1131 y.StoreSlice(ay)
1132 z.StoreSlice(az)
1133
1134 r := a()
1135 for i := range r {
1136 r[i] = f(ax[i], ay[i], az[i])
1137 }
1138 return r
1139 }
1140
1141
1142
1143 func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
1144 ax, ay, az, aw := a(), a(), a(), a()
1145 x.StoreSlice(ax)
1146 y.StoreSlice(ay)
1147 z.StoreSlice(az)
1148 w.StoreSlice(aw)
1149
1150 r := make([]int32, len(ax), len(ax))
1151 for i := range r {
1152 r[i] = f(ax[i], ay[i], az[i], aw[i])
1153 }
1154 return r
1155 }
1156
1157 func TestSelectTernOptInt32x16(t *testing.T) {
1158 if !archsimd.X86.AVX512() {
1159 t.Skip("Test requires X86.AVX512, not available on this hardware")
1160 return
1161 }
1162 ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1163 ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}
1164 az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
1165 aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1166 am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
1167
1168 x := archsimd.LoadInt32x16Slice(ax)
1169 y := archsimd.LoadInt32x16Slice(ay)
1170 z := archsimd.LoadInt32x16Slice(az)
1171 w := archsimd.LoadInt32x16Slice(aw)
1172 m := archsimd.LoadInt32x16Slice(am)
1173
1174 foo := func(v archsimd.Int32x16, s []int32) {
1175 r := make([]int32, 16, 16)
1176 v.StoreSlice(r)
1177 checkSlices[int32](t, r, s)
1178 }
1179
1180 t0 := w.Xor(y).Xor(z)
1181 ft0 := func(w, y, z int32) int32 {
1182 return w ^ y ^ z
1183 }
1184 foo(t0, applyTo3(w, y, z, ft0))
1185
1186 t1 := m.And(w.Xor(y).Xor(z.Not()))
1187 ft1 := func(m, w, y, z int32) int32 {
1188 return m & (w ^ y ^ ^z)
1189 }
1190 foo(t1, applyTo4(m, w, y, z, ft1))
1191
1192 t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not()))
1193 ft2 := func(x, y, z int32) int32 {
1194 return (x ^ y ^ z) & (x ^ y ^ ^z)
1195 }
1196 foo(t2, applyTo3(x, y, z, ft2))
1197 }
1198
1199 func TestMaskedMerge(t *testing.T) {
1200 if !archsimd.X86.AVX2() {
1201 t.Skip("Test requires X86.AVX2, not available on this hardware")
1202 return
1203 }
1204 x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
1205 y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
1206 z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
1207 res := make([]int64, 4)
1208 expected := []int64{6, 8, -3, -4}
1209 mask := x.Less(y)
1210 if archsimd.X86.AVX512() {
1211 x.Add(y).Merge(z, mask).StoreSlice(res)
1212 } else {
1213 x.Add(y).Merge(z, mask).StoreSlice(res)
1214 }
1215 for i := range 4 {
1216 if res[i] != expected[i] {
1217 t.Errorf("got %d wanted %d", res[i], expected[i])
1218 }
1219 }
1220 }
1221
1222 func TestPermuteScalars(t *testing.T) {
1223 x := []int32{11, 12, 13, 14}
1224 want := []int32{12, 13, 14, 11}
1225 got := make([]int32, 4)
1226 archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
1227 checkSlices(t, got, want)
1228 }
1229
1230 func TestPermuteScalarsGrouped(t *testing.T) {
1231 if !archsimd.X86.AVX2() {
1232 t.Skip("Test requires X86.AVX2, not available on this hardware")
1233 return
1234 }
1235 x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
1236 want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
1237 got := make([]int32, 8)
1238 archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
1239 checkSlices(t, got, want)
1240 }
1241
1242 func TestPermuteScalarsHi(t *testing.T) {
1243 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
1244 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
1245 got := make([]int16, len(x))
1246 archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
1247 checkSlices(t, got, want)
1248 }
1249
1250 func TestPermuteScalarsLo(t *testing.T) {
1251 x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
1252 want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
1253 got := make([]int16, len(x))
1254 archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
1255 checkSlices(t, got, want)
1256 }
1257
1258 func TestPermuteScalarsHiGrouped(t *testing.T) {
1259 if !archsimd.X86.AVX2() {
1260 t.Skip("Test requires X86.AVX2, not available on this hardware")
1261 return
1262 }
1263 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
1264 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
1265 got := make([]int16, len(x))
1266 archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
1267 checkSlices(t, got, want)
1268 }
1269
1270 func TestPermuteScalarsLoGrouped(t *testing.T) {
1271 if !archsimd.X86.AVX2() {
1272 t.Skip("Test requires X86.AVX2, not available on this hardware")
1273 return
1274 }
1275 x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
1276 want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
1277 got := make([]int16, len(x))
1278 archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
1279 checkSlices(t, got, want)
1280 }
1281
1282 func TestClMul(t *testing.T) {
1283 var x = archsimd.LoadUint64x2Slice([]uint64{1, 5})
1284 var y = archsimd.LoadUint64x2Slice([]uint64{3, 9})
1285
1286 foo := func(v archsimd.Uint64x2, s []uint64) {
1287 r := make([]uint64, 2, 2)
1288 v.StoreSlice(r)
1289 checkSlices[uint64](t, r, s)
1290 }
1291
1292 foo(x.CarrylessMultiply(0, 0, y), []uint64{3, 0})
1293 foo(x.CarrylessMultiply(0, 1, y), []uint64{9, 0})
1294 foo(x.CarrylessMultiply(1, 0, y), []uint64{15, 0})
1295 foo(x.CarrylessMultiply(1, 1, y), []uint64{45, 0})
1296 foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0})
1297
1298 }
1299
1300 func addPairsSlice[T number](a, b []T) []T {
1301 r := make([]T, len(a))
1302 for i := range len(a) / 2 {
1303 r[i] = a[2*i] + a[2*i+1]
1304 r[i+len(a)/2] = b[2*i] + b[2*i+1]
1305 }
1306 return r
1307 }
1308
1309 func subPairsSlice[T number](a, b []T) []T {
1310 r := make([]T, len(a))
1311 for i := range len(a) / 2 {
1312 r[i] = a[2*i] - a[2*i+1]
1313 r[i+len(a)/2] = b[2*i] - b[2*i+1]
1314 }
1315 return r
1316 }
1317
1318 func addPairsGroupedSlice[T number](a, b []T) []T {
1319 group := int(128 / unsafe.Sizeof(a[0]))
1320 r := make([]T, 0, len(a))
1321 for i := range len(a) / group {
1322 r = append(r, addPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...)
1323 }
1324 return r
1325 }
1326
1327 func subPairsGroupedSlice[T number](a, b []T) []T {
1328 group := int(128 / unsafe.Sizeof(a[0]))
1329 r := make([]T, 0, len(a))
1330 for i := range len(a) / group {
1331 r = append(r, subPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...)
1332 }
1333 return r
1334 }
1335
1336 func TestAddSubPairs(t *testing.T) {
1337 testInt16x8Binary(t, archsimd.Int16x8.AddPairs, addPairsSlice[int16])
1338 testInt16x8Binary(t, archsimd.Int16x8.SubPairs, subPairsSlice[int16])
1339 testUint16x8Binary(t, archsimd.Uint16x8.AddPairs, addPairsSlice[uint16])
1340 testUint16x8Binary(t, archsimd.Uint16x8.SubPairs, subPairsSlice[uint16])
1341 testInt32x4Binary(t, archsimd.Int32x4.AddPairs, addPairsSlice[int32])
1342 testInt32x4Binary(t, archsimd.Int32x4.SubPairs, subPairsSlice[int32])
1343 testUint32x4Binary(t, archsimd.Uint32x4.AddPairs, addPairsSlice[uint32])
1344 testUint32x4Binary(t, archsimd.Uint32x4.SubPairs, subPairsSlice[uint32])
1345 testFloat32x4Binary(t, archsimd.Float32x4.AddPairs, addPairsSlice[float32])
1346 testFloat32x4Binary(t, archsimd.Float32x4.SubPairs, subPairsSlice[float32])
1347 testFloat64x2Binary(t, archsimd.Float64x2.AddPairs, addPairsSlice[float64])
1348 testFloat64x2Binary(t, archsimd.Float64x2.SubPairs, subPairsSlice[float64])
1349
1350
1351 if archsimd.X86.AVX2() {
1352 testInt16x16Binary(t, archsimd.Int16x16.AddPairsGrouped, addPairsGroupedSlice[int16])
1353 testInt16x16Binary(t, archsimd.Int16x16.SubPairsGrouped, subPairsGroupedSlice[int16])
1354 testUint16x16Binary(t, archsimd.Uint16x16.AddPairsGrouped, addPairsGroupedSlice[uint16])
1355 testUint16x16Binary(t, archsimd.Uint16x16.SubPairsGrouped, subPairsGroupedSlice[uint16])
1356 testInt32x8Binary(t, archsimd.Int32x8.AddPairsGrouped, addPairsGroupedSlice[int32])
1357 testInt32x8Binary(t, archsimd.Int32x8.SubPairsGrouped, subPairsGroupedSlice[int32])
1358 testUint32x8Binary(t, archsimd.Uint32x8.AddPairsGrouped, addPairsGroupedSlice[uint32])
1359 testUint32x8Binary(t, archsimd.Uint32x8.SubPairsGrouped, subPairsGroupedSlice[uint32])
1360 testFloat32x8Binary(t, archsimd.Float32x8.AddPairsGrouped, addPairsGroupedSlice[float32])
1361 testFloat32x8Binary(t, archsimd.Float32x8.SubPairsGrouped, subPairsGroupedSlice[float32])
1362 testFloat64x4Binary(t, archsimd.Float64x4.AddPairsGrouped, addPairsGroupedSlice[float64])
1363 testFloat64x4Binary(t, archsimd.Float64x4.SubPairsGrouped, subPairsGroupedSlice[float64])
1364 }
1365 }
1366
1367 func convConcatSlice[T, U number](a, b []T, conv func(T) U) []U {
1368 r := make([]U, len(a)+len(b))
1369 for i, v := range a {
1370 r[i] = conv(v)
1371 }
1372 for i, v := range b {
1373 r[len(a)+i] = conv(v)
1374 }
1375 return r
1376 }
1377
1378 func convConcatGroupedSlice[T, U number](a, b []T, conv func(T) U) []U {
1379 group := int(128 / unsafe.Sizeof(a[0]))
1380 r := make([]U, 0, len(a)+len(b))
1381 for i := 0; i < len(a)/group; i++ {
1382 r = append(r, convConcatSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group], conv)...)
1383 }
1384 return r
1385 }
1386
1387 func TestSaturateConcat(t *testing.T) {
1388
1389 forSlicePair(t, int32s, 4, func(x, y []int32) bool {
1390 a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y)
1391 var out [8]int16
1392 a.SaturateToInt16Concat(b).Store(&out)
1393 want := convConcatSlice(x, y, satToInt16)
1394 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1395 })
1396
1397 forSlicePair(t, int32s, 4, func(x, y []int32) bool {
1398 a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y)
1399 var out [8]uint16
1400 a.SaturateToUint16Concat(b).Store(&out)
1401 want := convConcatSlice(x, y, satToUint16)
1402 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1403 })
1404
1405 if archsimd.X86.AVX2() {
1406
1407 forSlicePair(t, int32s, 8, func(x, y []int32) bool {
1408 a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y)
1409 var out [16]int16
1410 a.SaturateToInt16ConcatGrouped(b).Store(&out)
1411 want := convConcatGroupedSlice(x, y, satToInt16)
1412 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1413 })
1414
1415 forSlicePair(t, int32s, 8, func(x, y []int32) bool {
1416 a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y)
1417 var out [16]uint16
1418 a.SaturateToUint16ConcatGrouped(b).Store(&out)
1419 want := convConcatGroupedSlice(x, y, satToUint16)
1420 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1421 })
1422 }
1423
1424 if archsimd.X86.AVX512() {
1425
1426 forSlicePair(t, int32s, 16, func(x, y []int32) bool {
1427 a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y)
1428 var out [32]int16
1429 a.SaturateToInt16ConcatGrouped(b).Store(&out)
1430 want := convConcatGroupedSlice(x, y, satToInt16)
1431 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1432 })
1433
1434 forSlicePair(t, int32s, 16, func(x, y []int32) bool {
1435 a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y)
1436 var out [32]uint16
1437 a.SaturateToUint16ConcatGrouped(b).Store(&out)
1438 want := convConcatGroupedSlice(x, y, satToUint16)
1439 return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
1440 })
1441 }
1442 }
1443
View as plain text