1
2
3
4
5
6
7 package simd_test
8
9 import (
10 "reflect"
11 "simd"
12 "slices"
13 "testing"
14 )
15
16 var sink any
17
18 func TestType(t *testing.T) {
19
20
21
22
23
24
25
26 type alias = simd.Int32x4
27 type maskT simd.Mask32x4
28 type myStruct struct {
29 x alias
30 y *simd.Int32x4
31 z maskT
32 }
33 vals := [4]int32{1, 2, 3, 4}
34 v := myStruct{x: simd.LoadInt32x4(&vals)}
35
36 want := []int32{2, 4, 0, 0}
37 y := simd.LoadInt32x4(&vals)
38 v.y = &y
39 sink = y
40
41 if !simd.X86.AVX512GFNI() {
42 t.Skip("Test requires X86.AVX512, not available on this hardware")
43 return
44 }
45 v.z = maskT(simd.Mask32x4FromBits(0b0011))
46 *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
47
48 got := [4]int32{}
49 v.y.Store(&got)
50 for i := range 4 {
51 if want[i] != got[i] {
52 t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
53 }
54 }
55 }
56
57 func TestUncomparable(t *testing.T) {
58
59 var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
60 shouldPanic := func(fn func()) {
61 defer func() {
62 if recover() == nil {
63 panic("did not panic")
64 }
65 }()
66 fn()
67 }
68 shouldPanic(func() { _ = x == y })
69 }
70
71 func TestFuncValue(t *testing.T) {
72
73 xv := [4]int32{1, 2, 3, 4}
74 yv := [4]int32{5, 6, 7, 8}
75 want := []int32{6, 8, 10, 12}
76 x := simd.LoadInt32x4(&xv)
77 y := simd.LoadInt32x4(&yv)
78 fn := simd.Int32x4.Add
79 sink = fn
80 x = fn(x, y)
81 got := [4]int32{}
82 x.Store(&got)
83 for i := range 4 {
84 if want[i] != got[i] {
85 t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
86 }
87 }
88 }
89
90 func TestReflectMethod(t *testing.T) {
91
92
93 xv := [4]int32{1, 2, 3, 4}
94 yv := [4]int32{5, 6, 7, 8}
95 want := []int32{6, 8, 10, 12}
96 x := simd.LoadInt32x4(&xv)
97 y := simd.LoadInt32x4(&yv)
98 m, ok := reflect.TypeOf(x).MethodByName("Add")
99 if !ok {
100 t.Fatal("Add method not found")
101 }
102 fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4)
103 x = fn(x, y)
104 got := [4]int32{}
105 x.Store(&got)
106 for i := range 4 {
107 if want[i] != got[i] {
108 t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
109 }
110 }
111 }
112
113 func TestVectorConversion(t *testing.T) {
114 if !simd.X86.AVX512GFNI() {
115 t.Skip("Test requires X86.AVX512, not available on this hardware")
116 return
117 }
118 xv := [4]int32{1, 2, 3, 4}
119 x := simd.LoadInt32x4(&xv)
120 xPromoted := x.AsInt64x2()
121 xPromotedDemoted := xPromoted.AsInt32x4()
122 got := [4]int32{}
123 xPromotedDemoted.Store(&got)
124 for i := range 4 {
125 if xv[i] != got[i] {
126 t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
127 }
128 }
129 }
130
131 func TestMaskConversion(t *testing.T) {
132 if !simd.X86.AVX512GFNI() {
133 t.Skip("Test requires X86.AVX512, not available on this hardware")
134 return
135 }
136 x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
137 mask := simd.Int32x4{}.Sub(x).ToMask()
138 y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
139 want := [4]int32{6, 0, 10, 0}
140 got := make([]int32, 4)
141 y.StoreSlice(got)
142 for i := range 4 {
143 if want[i] != got[i] {
144 t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
145 }
146 }
147 }
148
149 func TestPermute(t *testing.T) {
150 if !simd.X86.AVX512() {
151 t.Skip("Test requires X86.AVX512, not available on this hardware")
152 return
153 }
154 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
155 indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
156 want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
157 got := make([]int64, 8)
158 simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got)
159 for i := range 8 {
160 if want[i] != got[i] {
161 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
162 }
163 }
164 }
165
166 func TestPermuteOrZero(t *testing.T) {
167 x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
168 indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
169 want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
170 got := make([]uint8, len(x))
171 simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got)
172 for i := range 8 {
173 if want[i] != got[i] {
174 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
175 }
176 }
177 }
178
179 func TestConcatPermute(t *testing.T) {
180 if !simd.X86.AVX512() {
181 t.Skip("Test requires X86.AVX512, not available on this hardware")
182 return
183 }
184 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
185 y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
186 indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
187 want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
188 got := make([]int64, 8)
189 simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
190 for i := range 8 {
191 if want[i] != got[i] {
192 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
193 }
194 }
195 }
196
197 func TestCompress(t *testing.T) {
198 if !simd.X86.AVX512() {
199 t.Skip("Test requires X86.AVX512, not available on this hardware")
200 return
201 }
202 v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
203 v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010))
204 got := make([]int32, 4)
205 v2400.StoreSlice(got)
206 want := []int32{2, 4, 0, 0}
207 if !slices.Equal(got, want) {
208 t.Errorf("want and got differ, want=%v, got=%v", want, got)
209 }
210 }
211
212 func TestExpand(t *testing.T) {
213 if !simd.X86.AVX512() {
214 t.Skip("Test requires X86.AVX512, not available on this hardware")
215 return
216 }
217 v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
218 v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010))
219 got := make([]int32, 4)
220 v2400.StoreSlice(got)
221 want := []int32{0, 3, 0, 4}
222 if !slices.Equal(got, want) {
223 t.Errorf("want and got differ, want=%v, got=%v", want, got)
224 }
225 }
226
227 var testShiftAllVal uint64 = 3
228
229 func TestShiftAll(t *testing.T) {
230 got := make([]int32, 4)
231 simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
232 for _, v := range got {
233 if v != 0b1100 {
234 t.Errorf("expect 0b1100, got %b", v)
235 }
236 }
237 simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
238 for _, v := range got {
239 if v != 0b11000 {
240 t.Errorf("expect 0b11000, got %b", v)
241 }
242 }
243 }
244
245 func TestSlicesInt8(t *testing.T) {
246 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
247 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
248 v := simd.LoadInt8x32Slice(a)
249 b := make([]int8, 32, 32)
250 v.StoreSlice(b)
251 checkSlices(t, a, b)
252 }
253
254 func TestSlicesInt8SetElem(t *testing.T) {
255 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
256 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
257 v := simd.LoadInt8x16Slice(a)
258
259 v = v.SetElem(3, 13)
260 a[3] = 13
261
262 b := make([]int8, 16, 16)
263 v.StoreSlice(b)
264 checkSlices(t, a, b)
265 }
266
267 func TestSlicesInt8GetElem(t *testing.T) {
268 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
269 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
270 v := simd.LoadInt8x16Slice(a)
271 e := v.GetElem(2)
272 if e != a[2] {
273 t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
274 }
275
276 }
277
278 func TestSlicesInt8TooShortLoad(t *testing.T) {
279 defer func() {
280 if r := recover(); r != nil {
281 t.Logf("Saw EXPECTED panic %v", r)
282 } else {
283 t.Errorf("Did not see expected panic")
284 }
285 }()
286 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
287 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
288 v := simd.LoadInt8x32Slice(a)
289 b := make([]int8, 32, 32)
290 v.StoreSlice(b)
291 checkSlices(t, a, b)
292 }
293
294 func TestSlicesInt8TooShortStore(t *testing.T) {
295 defer func() {
296 if r := recover(); r != nil {
297 t.Logf("Saw EXPECTED panic %v", r)
298 } else {
299 t.Errorf("Did not see expected panic")
300 }
301 }()
302 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
303 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
304 v := simd.LoadInt8x32Slice(a)
305 b := make([]int8, 31)
306 v.StoreSlice(b)
307 checkSlices(t, a, b)
308 }
309
310 func TestSlicesFloat64(t *testing.T) {
311 a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
312 v := simd.LoadFloat64x4Slice(a)
313 b := make([]float64, 4, 4)
314 v.StoreSlice(b)
315 for i := range b {
316 if a[i] != b[i] {
317 t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
318 }
319 }
320 }
321
322
323 func TestMergeLocals(t *testing.T) {
324 testMergeLocalswrapper(t, simd.Int64x4.Add)
325 }
326
327
328 func forceSpill() {}
329
330 func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) {
331 t.Helper()
332 s0 := []int64{0, 1, 2, 3}
333 s1 := []int64{-1, 0, -1, 0}
334 want := []int64{-1, 1, 1, 3}
335 v := simd.LoadInt64x4Slice(s0)
336 m := simd.LoadInt64x4Slice(s1)
337 forceSpill()
338 got := make([]int64, 4)
339 gotv := op(v, m)
340 gotv.StoreSlice(got)
341 for i := range len(want) {
342 if !(got[i] == want[i]) {
343 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
344 }
345 }
346 }
347
348 func TestBitMaskFromBits(t *testing.T) {
349 if !simd.X86.AVX512() {
350 t.Skip("Test requires X86.AVX512, not available on this hardware")
351 return
352 }
353 results := [2]int64{}
354 want := [2]int64{0, 6}
355 m := simd.Mask64x2FromBits(0b10)
356 simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
357 for i := range 2 {
358 if results[i] != want[i] {
359 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
360 }
361 }
362 }
363
364 var maskForTestBitMaskFromBitsLoad = uint8(0b10)
365
366 func TestBitMaskFromBitsLoad(t *testing.T) {
367 if !simd.X86.AVX512() {
368 t.Skip("Test requires X86.AVX512, not available on this hardware")
369 return
370 }
371 results := [2]int64{}
372 want := [2]int64{0, 6}
373 m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
374 simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
375 for i := range 2 {
376 if results[i] != want[i] {
377 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
378 }
379 }
380 }
381
382 func TestBitMaskToBits(t *testing.T) {
383 if !simd.X86.AVX512() {
384 t.Skip("Test requires X86.AVX512, not available on this hardware")
385 return
386 }
387 if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
388 t.Errorf("Want 0b101, got %b", v)
389 }
390 }
391
392 var maskForTestBitMaskFromBitsStore uint8
393
394 func TestBitMaskToBitsStore(t *testing.T) {
395 if !simd.X86.AVX512() {
396 t.Skip("Test requires X86.AVX512, not available on this hardware")
397 return
398 }
399 maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
400 if maskForTestBitMaskFromBitsStore != 0b101 {
401 t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
402 }
403 }
404
405 func TestMergeFloat(t *testing.T) {
406 k := make([]int64, 4, 4)
407 s := make([]float64, 4, 4)
408
409 a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
410 b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
411 g := a.Greater(b)
412 g.AsInt64x4().StoreSlice(k)
413 c := a.Merge(b, g)
414
415 c.StoreSlice(s)
416
417 checkSlices[int64](t, k, []int64{0, 0, 0, -1})
418 checkSlices[float64](t, s, []float64{4, 2, 3, 4})
419 }
420
421 func TestMergeFloat512(t *testing.T) {
422 if !simd.X86.AVX512() {
423 t.Skip("Test requires X86.AVX512, not available on this hardware")
424 return
425 }
426
427 k := make([]int64, 8, 8)
428 s := make([]float64, 8, 8)
429
430 a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
431 b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
432 g := a.Greater(b)
433 g.AsInt64x8().StoreSlice(k)
434 c := a.Merge(b, g)
435 d := a.Masked(g)
436
437 checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
438
439 c.StoreSlice(s)
440 checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
441
442 d.StoreSlice(s)
443 checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
444 }
445
446 var ro uint8 = 2
447
448 func TestRotateAllVariable(t *testing.T) {
449 if !simd.X86.AVX512() {
450 t.Skip("Test requires X86.AVX512, not available on this hardware")
451 return
452 }
453 got := make([]int32, 4)
454 simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
455 for _, v := range got {
456 if v != 0b1100 {
457 t.Errorf("Want 0b1100, got %b", v)
458 }
459 }
460 }
461
462 func TestBroadcastUint32x4(t *testing.T) {
463 s := make([]uint32, 4, 4)
464 simd.BroadcastUint32x4(123456789).StoreSlice(s)
465 checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
466 }
467
468 func TestBroadcastFloat32x8(t *testing.T) {
469 s := make([]float32, 8, 8)
470 simd.BroadcastFloat32x8(123456789).StoreSlice(s)
471 checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
472 }
473
474 func TestBroadcastFloat64x2(t *testing.T) {
475 s := make([]float64, 2, 2)
476 simd.BroadcastFloat64x2(123456789).StoreSlice(s)
477 checkSlices(t, s, []float64{123456789, 123456789})
478 }
479
480 func TestBroadcastUint64x2(t *testing.T) {
481 s := make([]uint64, 2, 2)
482 simd.BroadcastUint64x2(123456789).StoreSlice(s)
483 checkSlices(t, s, []uint64{123456789, 123456789})
484 }
485
486 func TestBroadcastUint16x8(t *testing.T) {
487 s := make([]uint16, 8, 8)
488 simd.BroadcastUint16x8(12345).StoreSlice(s)
489 checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
490 }
491
492 func TestBroadcastInt8x32(t *testing.T) {
493 s := make([]int8, 32, 32)
494 simd.BroadcastInt8x32(-123).StoreSlice(s)
495 checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
496 -123, -123, -123, -123, -123, -123, -123, -123,
497 -123, -123, -123, -123, -123, -123, -123, -123,
498 -123, -123, -123, -123, -123, -123, -123, -123,
499 })
500 }
501
502 func TestMaskOpt512(t *testing.T) {
503 if !simd.X86.AVX512() {
504 t.Skip("Test requires X86.AVX512, not available on this hardware")
505 return
506 }
507
508 k := make([]int64, 8, 8)
509 s := make([]float64, 8, 8)
510
511 a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
512 b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
513 c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
514 d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
515 g := a.Greater(b)
516 e := c.Add(d).Masked(g)
517 e.StoreSlice(s)
518 g.AsInt64x8().StoreSlice(k)
519 checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
520 checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
521 }
522
523
524
525
526
527 func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) {
528 return x.InterleaveLo(y), x.InterleaveHi(y)
529 }
530
531 func TestFlattenedTranspose(t *testing.T) {
532 r := make([]int32, 4, 4)
533 s := make([]int32, 4, 4)
534
535 x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
536 y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
537 a, b := flattenedTranspose(x, y)
538
539 a.StoreSlice(r)
540 b.StoreSlice(s)
541
542 checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
543 checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
544
545 }
546
547 func TestClearAVXUpperBits(t *testing.T) {
548
549
550 if !simd.X86.AVX2() {
551 t.Skip("Test requires X86.AVX2, not available on this hardware")
552 return
553 }
554
555 r := make([]int64, 4)
556 s := make([]int64, 4)
557
558 x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
559 y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
560
561 x.Add(y).StoreSlice(r)
562 simd.ClearAVXUpperBits()
563 x.Sub(y).StoreSlice(s)
564
565 checkSlices[int64](t, r, []int64{11, 22, 33, 44})
566 checkSlices[int64](t, s, []int64{9, 18, 27, 36})
567 }
568
569 func TestLeadingZeros(t *testing.T) {
570 if !simd.X86.AVX512() {
571 t.Skip("Test requires X86.AVX512, not available on this hardware")
572 return
573 }
574
575 src := []uint64{0b1111, 0}
576 want := []uint64{60, 64}
577 got := make([]uint64, 2)
578 simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
579 for i := range 2 {
580 if want[i] != got[i] {
581 t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
582 }
583 }
584 }
585
586 func TestIsZero(t *testing.T) {
587 v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
588 v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
589 if v1.IsZero() {
590 t.Errorf("Result incorrect, want false, got true")
591 }
592 if !v2.IsZero() {
593 t.Errorf("Result incorrect, want true, got false")
594 }
595 if !v1.And(v2).IsZero() {
596 t.Errorf("Result incorrect, want true, got false")
597 }
598 if v1.AndNot(v2).IsZero() {
599 t.Errorf("Result incorrect, want false, got true")
600 }
601 if !v2.And(v1).IsZero() {
602 t.Errorf("Result incorrect, want true, got false")
603 }
604 if !v2.AndNot(v1).IsZero() {
605 t.Errorf("Result incorrect, want true, got false")
606 }
607 }
608
609 func TestSelect4FromPairConst(t *testing.T) {
610 x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
611 y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
612
613 llll := x.SelectFromPair(0, 1, 2, 3, y)
614 hhhh := x.SelectFromPair(4, 5, 6, 7, y)
615 llhh := x.SelectFromPair(0, 1, 6, 7, y)
616 hhll := x.SelectFromPair(6, 7, 0, 1, y)
617
618 lllh := x.SelectFromPair(0, 1, 2, 7, y)
619 llhl := x.SelectFromPair(0, 1, 7, 2, y)
620 lhll := x.SelectFromPair(0, 7, 1, 2, y)
621 hlll := x.SelectFromPair(7, 0, 1, 2, y)
622
623 hhhl := x.SelectFromPair(4, 5, 6, 0, y)
624 hhlh := x.SelectFromPair(4, 5, 0, 6, y)
625 hlhh := x.SelectFromPair(4, 0, 5, 6, y)
626 lhhh := x.SelectFromPair(0, 4, 5, 6, y)
627
628 lhlh := x.SelectFromPair(0, 4, 1, 5, y)
629 hlhl := x.SelectFromPair(4, 0, 5, 1, y)
630 lhhl := x.SelectFromPair(0, 4, 5, 1, y)
631 hllh := x.SelectFromPair(4, 0, 1, 5, y)
632
633 r := make([]int32, 4, 4)
634
635 foo := func(v simd.Int32x4, a, b, c, d int32) {
636 v.StoreSlice(r)
637 checkSlices[int32](t, r, []int32{a, b, c, d})
638 }
639
640 foo(llll, 0, 1, 2, 3)
641 foo(hhhh, 4, 5, 6, 7)
642 foo(llhh, 0, 1, 6, 7)
643 foo(hhll, 6, 7, 0, 1)
644
645 foo(lllh, 0, 1, 2, 7)
646 foo(llhl, 0, 1, 7, 2)
647 foo(lhll, 0, 7, 1, 2)
648 foo(hlll, 7, 0, 1, 2)
649
650 foo(hhhl, 4, 5, 6, 0)
651 foo(hhlh, 4, 5, 0, 6)
652 foo(hlhh, 4, 0, 5, 6)
653 foo(lhhh, 0, 4, 5, 6)
654
655 foo(lhlh, 0, 4, 1, 5)
656 foo(hlhl, 4, 0, 5, 1)
657 foo(lhhl, 0, 4, 5, 1)
658 foo(hllh, 4, 0, 1, 5)
659 }
660
661
662 func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 {
663 return x.SelectFromPair(a, b, c, d, y)
664 }
665
666 func TestSelect4FromPairVar(t *testing.T) {
667 x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
668 y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
669
670 llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
671 hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
672 llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y)
673 hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y)
674
675 lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y)
676 llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y)
677 lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y)
678 hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y)
679
680 hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y)
681 hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y)
682 hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y)
683 lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y)
684
685 lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y)
686 hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y)
687 lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y)
688 hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y)
689
690 r := make([]int32, 4, 4)
691
692 foo := func(v simd.Int32x4, a, b, c, d int32) {
693 v.StoreSlice(r)
694 checkSlices[int32](t, r, []int32{a, b, c, d})
695 }
696
697 foo(llll, 0, 1, 2, 3)
698 foo(hhhh, 4, 5, 6, 7)
699 foo(llhh, 0, 1, 6, 7)
700 foo(hhll, 6, 7, 0, 1)
701
702 foo(lllh, 0, 1, 2, 7)
703 foo(llhl, 0, 1, 7, 2)
704 foo(lhll, 0, 7, 1, 2)
705 foo(hlll, 7, 0, 1, 2)
706
707 foo(hhhl, 4, 5, 6, 0)
708 foo(hhlh, 4, 5, 0, 6)
709 foo(hlhh, 4, 0, 5, 6)
710 foo(lhhh, 0, 4, 5, 6)
711
712 foo(lhlh, 0, 4, 1, 5)
713 foo(hlhl, 4, 0, 5, 1)
714 foo(lhhl, 0, 4, 5, 1)
715 foo(hllh, 4, 0, 1, 5)
716 }
717
718 func TestSelect4FromPairConstGrouped(t *testing.T) {
719 x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
720 y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
721
722 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
723 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
724 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
725 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
726
727 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
728 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
729 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
730 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
731
732 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
733 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
734 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
735 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
736
737 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
738 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
739 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
740 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
741
742 r := make([]float32, 8, 8)
743
744 foo := func(v simd.Float32x8, a, b, c, d float32) {
745 v.StoreSlice(r)
746 checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
747 }
748
749 foo(llll, 0, 1, 2, 3)
750 foo(hhhh, 4, 5, 6, 7)
751 foo(llhh, 0, 1, 6, 7)
752 foo(hhll, 6, 7, 0, 1)
753
754 foo(lllh, 0, 1, 2, 7)
755 foo(llhl, 0, 1, 7, 2)
756 foo(lhll, 0, 7, 1, 2)
757 foo(hlll, 7, 0, 1, 2)
758
759 foo(hhhl, 4, 5, 6, 0)
760 foo(hhlh, 4, 5, 0, 6)
761 foo(hlhh, 4, 0, 5, 6)
762 foo(lhhh, 0, 4, 5, 6)
763
764 foo(lhlh, 0, 4, 1, 5)
765 foo(hlhl, 4, 0, 5, 1)
766 foo(lhhl, 0, 4, 5, 1)
767 foo(hllh, 4, 0, 1, 5)
768 }
769
770 func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
771 if !simd.X86.AVX512() {
772 t.Skip("Test requires X86.AVX512, not available on this hardware")
773 return
774 }
775 x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
776 y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
777
778 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
779 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
780 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
781 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
782
783 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
784 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
785 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
786 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
787
788 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
789 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
790 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
791 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
792
793 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
794 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
795 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
796 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
797
798 r := make([]uint32, 16, 16)
799
800 foo := func(v simd.Uint32x16, a, b, c, d uint32) {
801 v.StoreSlice(r)
802 checkSlices[uint32](t, r, []uint32{a, b, c, d,
803 10 + a, 10 + b, 10 + c, 10 + d,
804 20 + a, 20 + b, 20 + c, 20 + d,
805 30 + a, 30 + b, 30 + c, 30 + d,
806 })
807 }
808
809 foo(llll, 0, 1, 2, 3)
810 foo(hhhh, 4, 5, 6, 7)
811 foo(llhh, 0, 1, 6, 7)
812 foo(hhll, 6, 7, 0, 1)
813
814 foo(lllh, 0, 1, 2, 7)
815 foo(llhl, 0, 1, 7, 2)
816 foo(lhll, 0, 7, 1, 2)
817 foo(hlll, 7, 0, 1, 2)
818
819 foo(hhhl, 4, 5, 6, 0)
820 foo(hhlh, 4, 5, 0, 6)
821 foo(hlhh, 4, 0, 5, 6)
822 foo(lhhh, 0, 4, 5, 6)
823
824 foo(lhlh, 0, 4, 1, 5)
825 foo(hlhl, 4, 0, 5, 1)
826 foo(lhhl, 0, 4, 5, 1)
827 foo(hllh, 4, 0, 1, 5)
828 }
829
830 func TestSelect128FromPair(t *testing.T) {
831 x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
832 y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
833
834 aa := x.Select128FromPair(0, 0, y)
835 ab := x.Select128FromPair(0, 1, y)
836 bc := x.Select128FromPair(1, 2, y)
837 cd := x.Select128FromPair(2, 3, y)
838 da := x.Select128FromPair(3, 0, y)
839 dc := x.Select128FromPair(3, 2, y)
840
841 r := make([]uint64, 4, 4)
842
843 foo := func(v simd.Uint64x4, a, b uint64) {
844 a, b = 2*a, 2*b
845 v.StoreSlice(r)
846 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
847 }
848
849 foo(aa, 0, 0)
850 foo(ab, 0, 1)
851 foo(bc, 1, 2)
852 foo(cd, 2, 3)
853 foo(da, 3, 0)
854 foo(dc, 3, 2)
855 }
856
857 func TestSelect128FromPairError(t *testing.T) {
858 x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
859 y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
860
861 defer func() {
862 if r := recover(); r != nil {
863 t.Logf("Saw expected panic %v", r)
864 }
865 }()
866 _ = x.Select128FromPair(0, 4, y)
867
868 t.Errorf("Should have panicked")
869 }
870
871
872 func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 {
873 return x.Select128FromPair(lo, hi, y)
874 }
875
876 func TestSelect128FromPairVar(t *testing.T) {
877 x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
878 y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
879
880 aa := select128FromPair(x, 0, 0, y)
881 ab := select128FromPair(x, 0, 1, y)
882 bc := select128FromPair(x, 1, 2, y)
883 cd := select128FromPair(x, 2, 3, y)
884 da := select128FromPair(x, 3, 0, y)
885 dc := select128FromPair(x, 3, 2, y)
886
887 r := make([]uint64, 4, 4)
888
889 foo := func(v simd.Uint64x4, a, b uint64) {
890 a, b = 2*a, 2*b
891 v.StoreSlice(r)
892 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
893 }
894
895 foo(aa, 0, 0)
896 foo(ab, 0, 1)
897 foo(bc, 1, 2)
898 foo(cd, 2, 3)
899 foo(da, 3, 0)
900 foo(dc, 3, 2)
901 }
902
903 func TestSelect2FromPairConst(t *testing.T) {
904 x := simd.LoadUint64x2Slice([]uint64{0, 1})
905 y := simd.LoadUint64x2Slice([]uint64{2, 3})
906
907 ll := x.SelectFromPair(0, 1, y)
908 hh := x.SelectFromPair(3, 2, y)
909 lh := x.SelectFromPair(0, 3, y)
910 hl := x.SelectFromPair(2, 1, y)
911
912 r := make([]uint64, 2, 2)
913
914 foo := func(v simd.Uint64x2, a, b uint64) {
915 v.StoreSlice(r)
916 checkSlices[uint64](t, r, []uint64{a, b})
917 }
918
919 foo(ll, 0, 1)
920 foo(hh, 3, 2)
921 foo(lh, 0, 3)
922 foo(hl, 2, 1)
923 }
924
925 func TestSelect2FromPairConstGroupedUint(t *testing.T) {
926 x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
927 y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
928
929 ll := x.SelectFromPairGrouped(0, 1, y)
930 hh := x.SelectFromPairGrouped(3, 2, y)
931 lh := x.SelectFromPairGrouped(0, 3, y)
932 hl := x.SelectFromPairGrouped(2, 1, y)
933
934 r := make([]uint64, 4, 4)
935
936 foo := func(v simd.Uint64x4, a, b uint64) {
937 v.StoreSlice(r)
938 checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
939 }
940
941 foo(ll, 0, 1)
942 foo(hh, 3, 2)
943 foo(lh, 0, 3)
944 foo(hl, 2, 1)
945 }
946
947 func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
948 x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
949 y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
950
951 ll := x.SelectFromPairGrouped(0, 1, y)
952 hh := x.SelectFromPairGrouped(3, 2, y)
953 lh := x.SelectFromPairGrouped(0, 3, y)
954 hl := x.SelectFromPairGrouped(2, 1, y)
955
956 r := make([]float64, 4, 4)
957
958 foo := func(v simd.Float64x4, a, b float64) {
959 v.StoreSlice(r)
960 checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
961 }
962
963 foo(ll, 0, 1)
964 foo(hh, 3, 2)
965 foo(lh, 0, 3)
966 foo(hl, 2, 1)
967 }
968
969 func TestSelect2FromPairConstGroupedInt(t *testing.T) {
970 x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
971 y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
972
973 ll := x.SelectFromPairGrouped(0, 1, y)
974 hh := x.SelectFromPairGrouped(3, 2, y)
975 lh := x.SelectFromPairGrouped(0, 3, y)
976 hl := x.SelectFromPairGrouped(2, 1, y)
977
978 r := make([]int64, 4, 4)
979
980 foo := func(v simd.Int64x4, a, b int64) {
981 v.StoreSlice(r)
982 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
983 }
984
985 foo(ll, 0, 1)
986 foo(hh, 3, 2)
987 foo(lh, 0, 3)
988 foo(hl, 2, 1)
989 }
990
991 func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
992 if !simd.X86.AVX512() {
993 t.Skip("Test requires X86.AVX512, not available on this hardware")
994 return
995 }
996
997 x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
998 y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
999
1000 ll := x.SelectFromPairGrouped(0, 1, y)
1001 hh := x.SelectFromPairGrouped(3, 2, y)
1002 lh := x.SelectFromPairGrouped(0, 3, y)
1003 hl := x.SelectFromPairGrouped(2, 1, y)
1004
1005 r := make([]int64, 8, 8)
1006
1007 foo := func(v simd.Int64x8, a, b int64) {
1008 v.StoreSlice(r)
1009 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
1010 }
1011
1012 foo(ll, 0, 1)
1013 foo(hh, 3, 2)
1014 foo(lh, 0, 3)
1015 foo(hl, 2, 1)
1016 }
1017
1018 func TestString(t *testing.T) {
1019 x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
1020 y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
1021 z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
1022 w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
1023
1024 sx := "{0,1,2,3}"
1025 sy := "{-4,-5,-6,-7}"
1026 sz := "{0.5,1.5,-2.5,3.5e+09}"
1027 sw := sz
1028
1029 if x.String() != sx {
1030 t.Errorf("x=%s wanted %s", x, sx)
1031 }
1032 if y.String() != sy {
1033 t.Errorf("y=%s wanted %s", y, sy)
1034 }
1035 if z.String() != sz {
1036 t.Errorf("z=%s wanted %s", z, sz)
1037 }
1038 if w.String() != sw {
1039 t.Errorf("w=%s wanted %s", w, sw)
1040 }
1041 t.Logf("w=%s", w)
1042 t.Logf("x=%s", x)
1043 t.Logf("y=%s", y)
1044 t.Logf("z=%s", z)
1045 }
1046
1047
1048 func a() []int32 {
1049 return make([]int32, 16, 16)
1050 }
1051
1052
1053
1054 func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 {
1055 ax, ay, az := a(), a(), a()
1056 x.StoreSlice(ax)
1057 y.StoreSlice(ay)
1058 z.StoreSlice(az)
1059
1060 r := a()
1061 for i := range r {
1062 r[i] = f(ax[i], ay[i], az[i])
1063 }
1064 return r
1065 }
1066
1067
1068
1069 func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
1070 ax, ay, az, aw := a(), a(), a(), a()
1071 x.StoreSlice(ax)
1072 y.StoreSlice(ay)
1073 z.StoreSlice(az)
1074 w.StoreSlice(aw)
1075
1076 r := make([]int32, len(ax), len(ax))
1077 for i := range r {
1078 r[i] = f(ax[i], ay[i], az[i], aw[i])
1079 }
1080 return r
1081 }
1082
1083 func TestSelectTernOptInt32x16(t *testing.T) {
1084 if !simd.X86.AVX512() {
1085 t.Skip("Test requires X86.AVX512, not available on this hardware")
1086 return
1087 }
1088 ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1089 ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}
1090 az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
1091 aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1092 am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
1093
1094 x := simd.LoadInt32x16Slice(ax)
1095 y := simd.LoadInt32x16Slice(ay)
1096 z := simd.LoadInt32x16Slice(az)
1097 w := simd.LoadInt32x16Slice(aw)
1098 m := simd.LoadInt32x16Slice(am)
1099
1100 foo := func(v simd.Int32x16, s []int32) {
1101 r := make([]int32, 16, 16)
1102 v.StoreSlice(r)
1103 checkSlices[int32](t, r, s)
1104 }
1105
1106 t0 := w.Xor(y).Xor(z)
1107 ft0 := func(w, y, z int32) int32 {
1108 return w ^ y ^ z
1109 }
1110 foo(t0, applyTo3(w, y, z, ft0))
1111
1112 t1 := m.And(w.Xor(y).Xor(z.Not()))
1113 ft1 := func(m, w, y, z int32) int32 {
1114 return m & (w ^ y ^ ^z)
1115 }
1116 foo(t1, applyTo4(m, w, y, z, ft1))
1117
1118 t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not()))
1119 ft2 := func(x, y, z int32) int32 {
1120 return (x ^ y ^ z) & (x ^ y ^ ^z)
1121 }
1122 foo(t2, applyTo3(x, y, z, ft2))
1123 }
1124
1125 func TestMaskedMerge(t *testing.T) {
1126 x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
1127 y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
1128 z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
1129 res := make([]int64, 4)
1130 expected := []int64{6, 8, -3, -4}
1131 mask := x.Less(y)
1132 if simd.X86.AVX512() {
1133 x.Add(y).Merge(z, mask).StoreSlice(res)
1134 } else {
1135 x.Add(y).Merge(z, mask).StoreSlice(res)
1136 }
1137 for i := range 4 {
1138 if res[i] != expected[i] {
1139 t.Errorf("got %d wanted %d", res[i], expected[i])
1140 }
1141 }
1142 }
1143
1144 func TestDotProductQuadruple(t *testing.T) {
1145 if !simd.X86.AVXVNNI() {
1146 t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
1147 return
1148 }
1149 xd := make([]int8, 16)
1150 yd := make([]uint8, 16)
1151 zd := make([]int32, 4)
1152 wanted1 := make([]int32, 4)
1153 wanted2 := make([]int32, 4)
1154 res1 := make([]int32, 4)
1155 res2 := make([]int32, 4)
1156 for i := range 4 {
1157 xd[i] = 5
1158 yd[i] = 6
1159 zd[i] = 3
1160 wanted1[i] = 30
1161 wanted2[i] = 30
1162 }
1163 x := simd.LoadInt8x16Slice(xd)
1164 y := simd.LoadUint8x16Slice(yd)
1165 z := simd.LoadInt32x4Slice(zd)
1166 x.DotProductQuadruple(y).StoreSlice(res1)
1167 x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
1168 for i := range 4 {
1169 if res1[i] != wanted1[i] {
1170 t.Errorf("got %d wanted %d", res1[i], wanted1[i])
1171 }
1172 if res2[i] != wanted2[i] {
1173 t.Errorf("got %d wanted %d", res2[i], wanted2[i])
1174 }
1175 }
1176 }
1177
1178 func TestPermuteScalars(t *testing.T) {
1179 x := []int32{11, 12, 13, 14}
1180 want := []int32{12, 13, 14, 11}
1181 got := make([]int32, 4)
1182 simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
1183 for i := range 4 {
1184 if want[i] != got[i] {
1185 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1186 }
1187 }
1188 }
1189
1190 func TestPermuteScalarsGrouped(t *testing.T) {
1191 x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
1192 want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
1193 got := make([]int32, 8)
1194 simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
1195 for i := range 8 {
1196 if want[i] != got[i] {
1197 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1198 }
1199 }
1200 }
1201
1202 func TestPermuteScalarsHi(t *testing.T) {
1203 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
1204 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
1205 got := make([]int16, len(x))
1206 simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
1207 for i := range got {
1208 if want[i] != got[i] {
1209 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1210 }
1211 }
1212 }
1213
1214 func TestPermuteScalarsLo(t *testing.T) {
1215 x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
1216 want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
1217 got := make([]int16, len(x))
1218 simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
1219 for i := range got {
1220 if want[i] != got[i] {
1221 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1222 }
1223 }
1224 }
1225
1226 func TestPermuteScalarsHiGrouped(t *testing.T) {
1227 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
1228 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
1229 got := make([]int16, len(x))
1230 simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
1231 for i := range got {
1232 if want[i] != got[i] {
1233 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1234 }
1235 }
1236 }
1237
1238 func TestPermuteScalarsLoGrouped(t *testing.T) {
1239 x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
1240 want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
1241 got := make([]int16, len(x))
1242 simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
1243 for i := range got {
1244 if want[i] != got[i] {
1245 t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
1246 }
1247 }
1248 }
1249
View as plain text