// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build goexperiment.simd && amd64 package simd_test import ( "reflect" "simd" "slices" "testing" ) var sink any func TestType(t *testing.T) { // Testing: // - Defined as another struct's field is ok // - Pointer is ok // - Type defition is ok // - Type alias is ok // - Type conversion is ok // - Conversion to interface is ok type alias = simd.Int32x4 type maskT simd.Mask32x4 type myStruct struct { x alias y *simd.Int32x4 z maskT } vals := [4]int32{1, 2, 3, 4} v := myStruct{x: simd.LoadInt32x4(&vals)} // masking elements 1 and 2. want := []int32{2, 4, 0, 0} y := simd.LoadInt32x4(&vals) v.y = &y sink = y if !simd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } v.z = maskT(simd.Mask32x4FromBits(0b0011)) *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z)) got := [4]int32{} v.y.Store(&got) for i := range 4 { if want[i] != got[i] { t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) } } } func TestUncomparable(t *testing.T) { // Test that simd vectors are not comparable var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8}) shouldPanic := func(fn func()) { defer func() { if recover() == nil { panic("did not panic") } }() fn() } shouldPanic(func() { _ = x == y }) } func TestFuncValue(t *testing.T) { // Test that simd intrinsic can be used as a function value. xv := [4]int32{1, 2, 3, 4} yv := [4]int32{5, 6, 7, 8} want := []int32{6, 8, 10, 12} x := simd.LoadInt32x4(&xv) y := simd.LoadInt32x4(&yv) fn := simd.Int32x4.Add sink = fn x = fn(x, y) got := [4]int32{} x.Store(&got) for i := range 4 { if want[i] != got[i] { t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) } } } func TestReflectMethod(t *testing.T) { // Test that simd intrinsic can be accessed via reflection. // NOTE: we don't yet support reflect method.Call. xv := [4]int32{1, 2, 3, 4} yv := [4]int32{5, 6, 7, 8} want := []int32{6, 8, 10, 12} x := simd.LoadInt32x4(&xv) y := simd.LoadInt32x4(&yv) m, ok := reflect.TypeOf(x).MethodByName("Add") if !ok { t.Fatal("Add method not found") } fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4) x = fn(x, y) got := [4]int32{} x.Store(&got) for i := range 4 { if want[i] != got[i] { t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) } } } func TestVectorConversion(t *testing.T) { if !simd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } xv := [4]int32{1, 2, 3, 4} x := simd.LoadInt32x4(&xv) xPromoted := x.AsInt64x2() xPromotedDemoted := xPromoted.AsInt32x4() got := [4]int32{} xPromotedDemoted.Store(&got) for i := range 4 { if xv[i] != got[i] { t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i]) } } } func TestMaskConversion(t *testing.T) { if !simd.X86.AVX512GFNI() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) mask := simd.Int32x4{}.Sub(x).ToMask() y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask) want := [4]int32{6, 0, 10, 0} got := make([]int32, 4) y.StoreSlice(got) for i := range 4 { if want[i] != got[i] { t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) } } } func TestPermute(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } x := []int64{1, 2, 3, 4, 5, 6, 7, 8} indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0} want := []int64{8, 7, 6, 5, 4, 3, 2, 1} got := make([]int64, 8) simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got) for i := range 8 { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteOrZero(t *testing.T) { x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11} want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12} got := make([]uint8, len(x)) simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got) for i := range 8 { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestConcatPermute(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } x := []int64{1, 2, 3, 4, 5, 6, 7, 8} y := []int64{-1, -2, -3, -4, -5, -6, -7, -8} indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0} want := []int64{-8, 7, -6, 5, -4, 3, -2, 1} got := make([]int64, 8) simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got) for i := range 8 { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestCompress(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{2, 4, 0, 0} if !slices.Equal(got, want) { t.Errorf("want and got differ, want=%v, got=%v", want, got) } } func TestExpand(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010)) got := make([]int32, 4) v2400.StoreSlice(got) want := []int32{0, 3, 0, 4} if !slices.Equal(got, want) { t.Errorf("want and got differ, want=%v, got=%v", want, got) } } var testShiftAllVal uint64 = 3 func TestShiftAll(t *testing.T) { got := make([]int32, 4) simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) for _, v := range got { if v != 0b1100 { t.Errorf("expect 0b1100, got %b", v) } } simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) for _, v := range got { if v != 0b11000 { t.Errorf("expect 0b11000, got %b", v) } } } func TestSlicesInt8(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} v := simd.LoadInt8x32Slice(a) b := make([]int8, 32, 32) v.StoreSlice(b) checkSlices(t, a, b) } func TestSlicesInt8SetElem(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} v := simd.LoadInt8x16Slice(a) v = v.SetElem(3, 13) a[3] = 13 b := make([]int8, 16, 16) v.StoreSlice(b) checkSlices(t, a, b) } func TestSlicesInt8GetElem(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} v := simd.LoadInt8x16Slice(a) e := v.GetElem(2) if e != a[2] { t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2]) } } func TestSlicesInt8TooShortLoad(t *testing.T) { defer func() { if r := recover(); r != nil { t.Logf("Saw EXPECTED panic %v", r) } else { t.Errorf("Did not see expected panic") } }() a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic v := simd.LoadInt8x32Slice(a) b := make([]int8, 32, 32) v.StoreSlice(b) checkSlices(t, a, b) } func TestSlicesInt8TooShortStore(t *testing.T) { defer func() { if r := recover(); r != nil { t.Logf("Saw EXPECTED panic %v", r) } else { t.Errorf("Did not see expected panic") } }() a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} v := simd.LoadInt8x32Slice(a) b := make([]int8, 31) // TOO SHORT, should panic v.StoreSlice(b) checkSlices(t, a, b) } func TestSlicesFloat64(t *testing.T) { a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine v := simd.LoadFloat64x4Slice(a) b := make([]float64, 4, 4) v.StoreSlice(b) for i := range b { if a[i] != b[i] { t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i]) } } } // TODO: try to reduce this test to be smaller. func TestMergeLocals(t *testing.T) { testMergeLocalswrapper(t, simd.Int64x4.Add) } //go:noinline func forceSpill() {} func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) { t.Helper() s0 := []int64{0, 1, 2, 3} s1 := []int64{-1, 0, -1, 0} want := []int64{-1, 1, 1, 3} v := simd.LoadInt64x4Slice(s0) m := simd.LoadInt64x4Slice(s1) forceSpill() got := make([]int64, 4) gotv := op(v, m) gotv.StoreSlice(got) for i := range len(want) { if !(got[i] == want[i]) { t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) } } } func TestBitMaskFromBits(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } results := [2]int64{} want := [2]int64{0, 6} m := simd.Mask64x2FromBits(0b10) simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) for i := range 2 { if results[i] != want[i] { t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) } } } var maskForTestBitMaskFromBitsLoad = uint8(0b10) func TestBitMaskFromBitsLoad(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } results := [2]int64{} want := [2]int64{0, 6} m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad) simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) for i := range 2 { if results[i] != want[i] { t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) } } } func TestBitMaskToBits(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { t.Errorf("Want 0b101, got %b", v) } } var maskForTestBitMaskFromBitsStore uint8 func TestBitMaskToBitsStore(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits() if maskForTestBitMaskFromBitsStore != 0b101 { t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore) } } func TestMergeFloat(t *testing.T) { k := make([]int64, 4, 4) s := make([]float64, 4, 4) a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) g := a.Greater(b) g.AsInt64x4().StoreSlice(k) c := a.Merge(b, g) c.StoreSlice(s) checkSlices[int64](t, k, []int64{0, 0, 0, -1}) checkSlices[float64](t, s, []float64{4, 2, 3, 4}) } func TestMergeFloat512(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } k := make([]int64, 8, 8) s := make([]float64, 8, 8) a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1}) g := a.Greater(b) g.AsInt64x8().StoreSlice(k) c := a.Merge(b, g) d := a.Masked(g) checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1}) c.StoreSlice(s) checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8}) d.StoreSlice(s) checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8}) } var ro uint8 = 2 func TestRotateAllVariable(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } got := make([]int32, 4) simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got) for _, v := range got { if v != 0b1100 { t.Errorf("Want 0b1100, got %b", v) } } } func TestBroadcastUint32x4(t *testing.T) { s := make([]uint32, 4, 4) simd.BroadcastUint32x4(123456789).StoreSlice(s) checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789}) } func TestBroadcastFloat32x8(t *testing.T) { s := make([]float32, 8, 8) simd.BroadcastFloat32x8(123456789).StoreSlice(s) checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789}) } func TestBroadcastFloat64x2(t *testing.T) { s := make([]float64, 2, 2) simd.BroadcastFloat64x2(123456789).StoreSlice(s) checkSlices(t, s, []float64{123456789, 123456789}) } func TestBroadcastUint64x2(t *testing.T) { s := make([]uint64, 2, 2) simd.BroadcastUint64x2(123456789).StoreSlice(s) checkSlices(t, s, []uint64{123456789, 123456789}) } func TestBroadcastUint16x8(t *testing.T) { s := make([]uint16, 8, 8) simd.BroadcastUint16x8(12345).StoreSlice(s) checkSlices(t, s, []uint16{12345, 12345, 12345, 12345}) } func TestBroadcastInt8x32(t *testing.T) { s := make([]int8, 32, 32) simd.BroadcastInt8x32(-123).StoreSlice(s) checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, -123, }) } func TestMaskOpt512(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } k := make([]int64, 8, 8) s := make([]float64, 8, 8) a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0}) b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1}) c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16}) g := a.Greater(b) e := c.Add(d).Masked(g) e.StoreSlice(s) g.AsInt64x8().StoreSlice(k) checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0}) checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0}) } // flattenedTranspose tranposes x and y, regarded as a pair of 2x2 // matrices, but then flattens the rows in order, i.e // x: ABCD ==> a: A1B2 // y: 1234 b: C3D4 func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) { return x.InterleaveLo(y), x.InterleaveHi(y) } func TestFlattenedTranspose(t *testing.T) { r := make([]int32, 4, 4) s := make([]int32, 4, 4) x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) a, b := flattenedTranspose(x, y) a.StoreSlice(r) b.StoreSlice(s) checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2}) checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4}) } func TestClearAVXUpperBits(t *testing.T) { // Test that ClearAVXUpperBits is safe even if there are SIMD values // alive (although usually one should not do this). if !simd.X86.AVX2() { t.Skip("Test requires X86.AVX2, not available on this hardware") return } r := make([]int64, 4) s := make([]int64, 4) x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) x.Add(y).StoreSlice(r) simd.ClearAVXUpperBits() x.Sub(y).StoreSlice(s) checkSlices[int64](t, r, []int64{11, 22, 33, 44}) checkSlices[int64](t, s, []int64{9, 18, 27, 36}) } func TestLeadingZeros(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } src := []uint64{0b1111, 0} want := []uint64{60, 64} got := make([]uint64, 2) simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got) for i := range 2 { if want[i] != got[i] { t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i]) } } } func TestIsZero(t *testing.T) { v1 := simd.LoadUint64x2Slice([]uint64{0, 1}) v2 := simd.LoadUint64x2Slice([]uint64{0, 0}) if v1.IsZero() { t.Errorf("Result incorrect, want false, got true") } if !v2.IsZero() { t.Errorf("Result incorrect, want true, got false") } if !v1.And(v2).IsZero() { t.Errorf("Result incorrect, want true, got false") } if v1.AndNot(v2).IsZero() { t.Errorf("Result incorrect, want false, got true") } if !v2.And(v1).IsZero() { t.Errorf("Result incorrect, want true, got false") } if !v2.AndNot(v1).IsZero() { t.Errorf("Result incorrect, want true, got false") } } func TestSelect4FromPairConst(t *testing.T) { x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) llll := x.SelectFromPair(0, 1, 2, 3, y) hhhh := x.SelectFromPair(4, 5, 6, 7, y) llhh := x.SelectFromPair(0, 1, 6, 7, y) hhll := x.SelectFromPair(6, 7, 0, 1, y) lllh := x.SelectFromPair(0, 1, 2, 7, y) llhl := x.SelectFromPair(0, 1, 7, 2, y) lhll := x.SelectFromPair(0, 7, 1, 2, y) hlll := x.SelectFromPair(7, 0, 1, 2, y) hhhl := x.SelectFromPair(4, 5, 6, 0, y) hhlh := x.SelectFromPair(4, 5, 0, 6, y) hlhh := x.SelectFromPair(4, 0, 5, 6, y) lhhh := x.SelectFromPair(0, 4, 5, 6, y) lhlh := x.SelectFromPair(0, 4, 1, 5, y) hlhl := x.SelectFromPair(4, 0, 5, 1, y) lhhl := x.SelectFromPair(0, 4, 5, 1, y) hllh := x.SelectFromPair(4, 0, 1, 5, y) r := make([]int32, 4, 4) foo := func(v simd.Int32x4, a, b, c, d int32) { v.StoreSlice(r) checkSlices[int32](t, r, []int32{a, b, c, d}) } foo(llll, 0, 1, 2, 3) foo(hhhh, 4, 5, 6, 7) foo(llhh, 0, 1, 6, 7) foo(hhll, 6, 7, 0, 1) foo(lllh, 0, 1, 2, 7) foo(llhl, 0, 1, 7, 2) foo(lhll, 0, 7, 1, 2) foo(hlll, 7, 0, 1, 2) foo(hhhl, 4, 5, 6, 0) foo(hhlh, 4, 5, 0, 6) foo(hlhh, 4, 0, 5, 6) foo(lhhh, 0, 4, 5, 6) foo(lhlh, 0, 4, 1, 5) foo(hlhl, 4, 0, 5, 1) foo(lhhl, 0, 4, 5, 1) foo(hllh, 4, 0, 1, 5) } //go:noinline func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 { return x.SelectFromPair(a, b, c, d, y) } func TestSelect4FromPairVar(t *testing.T) { x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y) hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y) llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y) hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y) lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y) llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y) lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y) hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y) hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y) hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y) hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y) lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y) lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y) hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y) lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y) hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y) r := make([]int32, 4, 4) foo := func(v simd.Int32x4, a, b, c, d int32) { v.StoreSlice(r) checkSlices[int32](t, r, []int32{a, b, c, d}) } foo(llll, 0, 1, 2, 3) foo(hhhh, 4, 5, 6, 7) foo(llhh, 0, 1, 6, 7) foo(hhll, 6, 7, 0, 1) foo(lllh, 0, 1, 2, 7) foo(llhl, 0, 1, 7, 2) foo(lhll, 0, 7, 1, 2) foo(hlll, 7, 0, 1, 2) foo(hhhl, 4, 5, 6, 0) foo(hhlh, 4, 5, 0, 6) foo(hlhh, 4, 0, 5, 6) foo(lhhh, 0, 4, 5, 6) foo(lhlh, 0, 4, 1, 5) foo(hlhl, 4, 0, 5, 1) foo(lhhl, 0, 4, 5, 1) foo(hllh, 4, 0, 1, 5) } func TestSelect4FromPairConstGrouped(t *testing.T) { x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13}) y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17}) llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) r := make([]float32, 8, 8) foo := func(v simd.Float32x8, a, b, c, d float32) { v.StoreSlice(r) checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d}) } foo(llll, 0, 1, 2, 3) foo(hhhh, 4, 5, 6, 7) foo(llhh, 0, 1, 6, 7) foo(hhll, 6, 7, 0, 1) foo(lllh, 0, 1, 2, 7) foo(llhl, 0, 1, 7, 2) foo(lhll, 0, 7, 1, 2) foo(hlll, 7, 0, 1, 2) foo(hhhl, 4, 5, 6, 0) foo(hhlh, 4, 5, 0, 6) foo(hlhh, 4, 0, 5, 6) foo(lhhh, 0, 4, 5, 6) foo(lhlh, 0, 4, 1, 5) foo(hlhl, 4, 0, 5, 1) foo(lhhl, 0, 4, 5, 1) foo(hllh, 4, 0, 1, 5) } func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33}) y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37}) llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) r := make([]uint32, 16, 16) foo := func(v simd.Uint32x16, a, b, c, d uint32) { v.StoreSlice(r) checkSlices[uint32](t, r, []uint32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d, 20 + a, 20 + b, 20 + c, 20 + d, 30 + a, 30 + b, 30 + c, 30 + d, }) } foo(llll, 0, 1, 2, 3) foo(hhhh, 4, 5, 6, 7) foo(llhh, 0, 1, 6, 7) foo(hhll, 6, 7, 0, 1) foo(lllh, 0, 1, 2, 7) foo(llhl, 0, 1, 7, 2) foo(lhll, 0, 7, 1, 2) foo(hlll, 7, 0, 1, 2) foo(hhhl, 4, 5, 6, 0) foo(hhlh, 4, 5, 0, 6) foo(hlhh, 4, 0, 5, 6) foo(lhhh, 0, 4, 5, 6) foo(lhlh, 0, 4, 1, 5) foo(hlhl, 4, 0, 5, 1) foo(lhhl, 0, 4, 5, 1) foo(hllh, 4, 0, 1, 5) } func TestSelect128FromPair(t *testing.T) { x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) aa := x.Select128FromPair(0, 0, y) ab := x.Select128FromPair(0, 1, y) bc := x.Select128FromPair(1, 2, y) cd := x.Select128FromPair(2, 3, y) da := x.Select128FromPair(3, 0, y) dc := x.Select128FromPair(3, 2, y) r := make([]uint64, 4, 4) foo := func(v simd.Uint64x4, a, b uint64) { a, b = 2*a, 2*b v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) } foo(aa, 0, 0) foo(ab, 0, 1) foo(bc, 1, 2) foo(cd, 2, 3) foo(da, 3, 0) foo(dc, 3, 2) } func TestSelect128FromPairError(t *testing.T) { x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) defer func() { if r := recover(); r != nil { t.Logf("Saw expected panic %v", r) } }() _ = x.Select128FromPair(0, 4, y) t.Errorf("Should have panicked") } //go:noinline func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 { return x.Select128FromPair(lo, hi, y) } func TestSelect128FromPairVar(t *testing.T) { x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) aa := select128FromPair(x, 0, 0, y) ab := select128FromPair(x, 0, 1, y) bc := select128FromPair(x, 1, 2, y) cd := select128FromPair(x, 2, 3, y) da := select128FromPair(x, 3, 0, y) dc := select128FromPair(x, 3, 2, y) r := make([]uint64, 4, 4) foo := func(v simd.Uint64x4, a, b uint64) { a, b = 2*a, 2*b v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) } foo(aa, 0, 0) foo(ab, 0, 1) foo(bc, 1, 2) foo(cd, 2, 3) foo(da, 3, 0) foo(dc, 3, 2) } func TestSelect2FromPairConst(t *testing.T) { x := simd.LoadUint64x2Slice([]uint64{0, 1}) y := simd.LoadUint64x2Slice([]uint64{2, 3}) ll := x.SelectFromPair(0, 1, y) hh := x.SelectFromPair(3, 2, y) lh := x.SelectFromPair(0, 3, y) hl := x.SelectFromPair(2, 1, y) r := make([]uint64, 2, 2) foo := func(v simd.Uint64x2, a, b uint64) { v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, b}) } foo(ll, 0, 1) foo(hh, 3, 2) foo(lh, 0, 3) foo(hl, 2, 1) } func TestSelect2FromPairConstGroupedUint(t *testing.T) { x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11}) y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) lh := x.SelectFromPairGrouped(0, 3, y) hl := x.SelectFromPairGrouped(2, 1, y) r := make([]uint64, 4, 4) foo := func(v simd.Uint64x4, a, b uint64) { v.StoreSlice(r) checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10}) } foo(ll, 0, 1) foo(hh, 3, 2) foo(lh, 0, 3) foo(hl, 2, 1) } func TestSelect2FromPairConstGroupedFloat(t *testing.T) { x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11}) y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) lh := x.SelectFromPairGrouped(0, 3, y) hl := x.SelectFromPairGrouped(2, 1, y) r := make([]float64, 4, 4) foo := func(v simd.Float64x4, a, b float64) { v.StoreSlice(r) checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10}) } foo(ll, 0, 1) foo(hh, 3, 2) foo(lh, 0, 3) foo(hl, 2, 1) } func TestSelect2FromPairConstGroupedInt(t *testing.T) { x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11}) y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) lh := x.SelectFromPairGrouped(0, 3, y) hl := x.SelectFromPairGrouped(2, 1, y) r := make([]int64, 4, 4) foo := func(v simd.Int64x4, a, b int64) { v.StoreSlice(r) checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10}) } foo(ll, 0, 1) foo(hh, 3, 2) foo(lh, 0, 3) foo(hl, 2, 1) } func TestSelect2FromPairConstGroupedInt512(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31}) y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33}) ll := x.SelectFromPairGrouped(0, 1, y) hh := x.SelectFromPairGrouped(3, 2, y) lh := x.SelectFromPairGrouped(0, 3, y) hl := x.SelectFromPairGrouped(2, 1, y) r := make([]int64, 8, 8) foo := func(v simd.Int64x8, a, b int64) { v.StoreSlice(r) checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30}) } foo(ll, 0, 1) foo(hh, 3, 2) foo(lh, 0, 3) foo(hl, 2, 1) } func TestString(t *testing.T) { x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3}) y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7}) z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9}) w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9}) sx := "{0,1,2,3}" sy := "{-4,-5,-6,-7}" sz := "{0.5,1.5,-2.5,3.5e+09}" sw := sz if x.String() != sx { t.Errorf("x=%s wanted %s", x, sx) } if y.String() != sy { t.Errorf("y=%s wanted %s", y, sy) } if z.String() != sz { t.Errorf("z=%s wanted %s", z, sz) } if w.String() != sw { t.Errorf("w=%s wanted %s", w, sw) } t.Logf("w=%s", w) t.Logf("x=%s", x) t.Logf("y=%s", y) t.Logf("z=%s", z) } // a returns an slice of 16 int32 func a() []int32 { return make([]int32, 16, 16) } // applyTo3 returns a 16-element slice of the results of // applying f to the respective elements of vectors x, y, and z. func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 { ax, ay, az := a(), a(), a() x.StoreSlice(ax) y.StoreSlice(ay) z.StoreSlice(az) r := a() for i := range r { r[i] = f(ax[i], ay[i], az[i]) } return r } // applyTo3 returns a 16-element slice of the results of // applying f to the respective elements of vectors x, y, z, and w. func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 { ax, ay, az, aw := a(), a(), a(), a() x.StoreSlice(ax) y.StoreSlice(ay) z.StoreSlice(az) w.StoreSlice(aw) r := make([]int32, len(ax), len(ax)) for i := range r { r[i] = f(ax[i], ay[i], az[i], aw[i]) } return r } func TestSelectTernOptInt32x16(t *testing.T) { if !simd.X86.AVX512() { t.Skip("Test requires X86.AVX512, not available on this hardware") return } ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1} az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1} aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} x := simd.LoadInt32x16Slice(ax) y := simd.LoadInt32x16Slice(ay) z := simd.LoadInt32x16Slice(az) w := simd.LoadInt32x16Slice(aw) m := simd.LoadInt32x16Slice(am) foo := func(v simd.Int32x16, s []int32) { r := make([]int32, 16, 16) v.StoreSlice(r) checkSlices[int32](t, r, s) } t0 := w.Xor(y).Xor(z) ft0 := func(w, y, z int32) int32 { return w ^ y ^ z } foo(t0, applyTo3(w, y, z, ft0)) t1 := m.And(w.Xor(y).Xor(z.Not())) ft1 := func(m, w, y, z int32) int32 { return m & (w ^ y ^ ^z) } foo(t1, applyTo4(m, w, y, z, ft1)) t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not())) ft2 := func(x, y, z int32) int32 { return (x ^ y ^ z) & (x ^ y ^ ^z) } foo(t2, applyTo3(x, y, z, ft2)) } func TestMaskedMerge(t *testing.T) { x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) res := make([]int64, 4) expected := []int64{6, 8, -3, -4} mask := x.Less(y) if simd.X86.AVX512() { x.Add(y).Merge(z, mask).StoreSlice(res) } else { x.Add(y).Merge(z, mask).StoreSlice(res) } for i := range 4 { if res[i] != expected[i] { t.Errorf("got %d wanted %d", res[i], expected[i]) } } } func TestDotProductQuadruple(t *testing.T) { if !simd.X86.AVXVNNI() { t.Skip("Test requires X86.AVXVNNI, not available on this hardware") return } xd := make([]int8, 16) yd := make([]uint8, 16) zd := make([]int32, 4) wanted1 := make([]int32, 4) wanted2 := make([]int32, 4) res1 := make([]int32, 4) res2 := make([]int32, 4) for i := range 4 { xd[i] = 5 yd[i] = 6 zd[i] = 3 wanted1[i] = 30 wanted2[i] = 30 } x := simd.LoadInt8x16Slice(xd) y := simd.LoadUint8x16Slice(yd) z := simd.LoadInt32x4Slice(zd) x.DotProductQuadruple(y).StoreSlice(res1) x.DotProductQuadruple(y).Add(z).StoreSlice(res1) for i := range 4 { if res1[i] != wanted1[i] { t.Errorf("got %d wanted %d", res1[i], wanted1[i]) } if res2[i] != wanted2[i] { t.Errorf("got %d wanted %d", res2[i], wanted2[i]) } } } func TestPermuteScalars(t *testing.T) { x := []int32{11, 12, 13, 14} want := []int32{12, 13, 14, 11} got := make([]int32, 4) simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got) for i := range 4 { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteScalarsGrouped(t *testing.T) { x := []int32{11, 12, 13, 14, 21, 22, 23, 24} want := []int32{12, 13, 14, 11, 22, 23, 24, 21} got := make([]int32, 8) simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got) for i := range 8 { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteScalarsHi(t *testing.T) { x := []int16{-1, -2, -3, -4, 11, 12, 13, 14} want := []int16{-1, -2, -3, -4, 12, 13, 14, 11} got := make([]int16, len(x)) simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got) for i := range got { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteScalarsLo(t *testing.T) { x := []int16{11, 12, 13, 14, 4, 5, 6, 7} want := []int16{12, 13, 14, 11, 4, 5, 6, 7} got := make([]int16, len(x)) simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got) for i := range got { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteScalarsHiGrouped(t *testing.T) { x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114} want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111} got := make([]int16, len(x)) simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got) for i := range got { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } } func TestPermuteScalarsLoGrouped(t *testing.T) { x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17} want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17} got := make([]int16, len(x)) simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got) for i := range got { if want[i] != got[i] { t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) } } }