Source file src/simd/internal/simd_test/simd_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd && amd64
     6  
     7  package simd_test
     8  
     9  import (
    10  	"reflect"
    11  	"simd"
    12  	"slices"
    13  	"testing"
    14  )
    15  
    16  var sink any
    17  
    18  func TestType(t *testing.T) {
    19  	// Testing:
    20  	// - Defined as another struct's field is ok
    21  	// - Pointer is ok
    22  	// - Type defition is ok
    23  	// - Type alias is ok
    24  	// - Type conversion is ok
    25  	// - Conversion to interface is ok
    26  	type alias = simd.Int32x4
    27  	type maskT simd.Mask32x4
    28  	type myStruct struct {
    29  		x alias
    30  		y *simd.Int32x4
    31  		z maskT
    32  	}
    33  	vals := [4]int32{1, 2, 3, 4}
    34  	v := myStruct{x: simd.LoadInt32x4(&vals)}
    35  	// masking elements 1 and 2.
    36  	want := []int32{2, 4, 0, 0}
    37  	y := simd.LoadInt32x4(&vals)
    38  	v.y = &y
    39  	sink = y
    40  
    41  	if !simd.X86.AVX512GFNI() {
    42  		t.Skip("Test requires X86.AVX512, not available on this hardware")
    43  		return
    44  	}
    45  	v.z = maskT(simd.Mask32x4FromBits(0b0011))
    46  	*v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
    47  
    48  	got := [4]int32{}
    49  	v.y.Store(&got)
    50  	for i := range 4 {
    51  		if want[i] != got[i] {
    52  			t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
    53  		}
    54  	}
    55  }
    56  
    57  func TestUncomparable(t *testing.T) {
    58  	// Test that simd vectors are not comparable
    59  	var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
    60  	shouldPanic := func(fn func()) {
    61  		defer func() {
    62  			if recover() == nil {
    63  				panic("did not panic")
    64  			}
    65  		}()
    66  		fn()
    67  	}
    68  	shouldPanic(func() { _ = x == y })
    69  }
    70  
    71  func TestFuncValue(t *testing.T) {
    72  	// Test that simd intrinsic can be used as a function value.
    73  	xv := [4]int32{1, 2, 3, 4}
    74  	yv := [4]int32{5, 6, 7, 8}
    75  	want := []int32{6, 8, 10, 12}
    76  	x := simd.LoadInt32x4(&xv)
    77  	y := simd.LoadInt32x4(&yv)
    78  	fn := simd.Int32x4.Add
    79  	sink = fn
    80  	x = fn(x, y)
    81  	got := [4]int32{}
    82  	x.Store(&got)
    83  	for i := range 4 {
    84  		if want[i] != got[i] {
    85  			t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
    86  		}
    87  	}
    88  }
    89  
    90  func TestReflectMethod(t *testing.T) {
    91  	// Test that simd intrinsic can be accessed via reflection.
    92  	// NOTE: we don't yet support reflect method.Call.
    93  	xv := [4]int32{1, 2, 3, 4}
    94  	yv := [4]int32{5, 6, 7, 8}
    95  	want := []int32{6, 8, 10, 12}
    96  	x := simd.LoadInt32x4(&xv)
    97  	y := simd.LoadInt32x4(&yv)
    98  	m, ok := reflect.TypeOf(x).MethodByName("Add")
    99  	if !ok {
   100  		t.Fatal("Add method not found")
   101  	}
   102  	fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4)
   103  	x = fn(x, y)
   104  	got := [4]int32{}
   105  	x.Store(&got)
   106  	for i := range 4 {
   107  		if want[i] != got[i] {
   108  			t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
   109  		}
   110  	}
   111  }
   112  
   113  func TestVectorConversion(t *testing.T) {
   114  	if !simd.X86.AVX512GFNI() {
   115  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   116  		return
   117  	}
   118  	xv := [4]int32{1, 2, 3, 4}
   119  	x := simd.LoadInt32x4(&xv)
   120  	xPromoted := x.AsInt64x2()
   121  	xPromotedDemoted := xPromoted.AsInt32x4()
   122  	got := [4]int32{}
   123  	xPromotedDemoted.Store(&got)
   124  	for i := range 4 {
   125  		if xv[i] != got[i] {
   126  			t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
   127  		}
   128  	}
   129  }
   130  
   131  func TestMaskConversion(t *testing.T) {
   132  	if !simd.X86.AVX512GFNI() {
   133  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   134  		return
   135  	}
   136  	x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
   137  	mask := simd.Int32x4{}.Sub(x).ToMask()
   138  	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
   139  	want := [4]int32{6, 0, 10, 0}
   140  	got := make([]int32, 4)
   141  	y.StoreSlice(got)
   142  	for i := range 4 {
   143  		if want[i] != got[i] {
   144  			t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i])
   145  		}
   146  	}
   147  }
   148  
   149  func TestPermute(t *testing.T) {
   150  	if !simd.X86.AVX512() {
   151  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   152  		return
   153  	}
   154  	x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
   155  	indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
   156  	want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
   157  	got := make([]int64, 8)
   158  	simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got)
   159  	for i := range 8 {
   160  		if want[i] != got[i] {
   161  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
   162  		}
   163  	}
   164  }
   165  
   166  func TestPermuteOrZero(t *testing.T) {
   167  	x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
   168  	indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
   169  	want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
   170  	got := make([]uint8, len(x))
   171  	simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got)
   172  	for i := range 8 {
   173  		if want[i] != got[i] {
   174  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
   175  		}
   176  	}
   177  }
   178  
   179  func TestConcatPermute(t *testing.T) {
   180  	if !simd.X86.AVX512() {
   181  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   182  		return
   183  	}
   184  	x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
   185  	y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
   186  	indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
   187  	want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
   188  	got := make([]int64, 8)
   189  	simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got)
   190  	for i := range 8 {
   191  		if want[i] != got[i] {
   192  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
   193  		}
   194  	}
   195  }
   196  
   197  func TestCompress(t *testing.T) {
   198  	if !simd.X86.AVX512() {
   199  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   200  		return
   201  	}
   202  	v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
   203  	v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010))
   204  	got := make([]int32, 4)
   205  	v2400.StoreSlice(got)
   206  	want := []int32{2, 4, 0, 0}
   207  	if !slices.Equal(got, want) {
   208  		t.Errorf("want and got differ, want=%v, got=%v", want, got)
   209  	}
   210  }
   211  
   212  func TestExpand(t *testing.T) {
   213  	if !simd.X86.AVX512() {
   214  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   215  		return
   216  	}
   217  	v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
   218  	v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010))
   219  	got := make([]int32, 4)
   220  	v2400.StoreSlice(got)
   221  	want := []int32{0, 3, 0, 4}
   222  	if !slices.Equal(got, want) {
   223  		t.Errorf("want and got differ, want=%v, got=%v", want, got)
   224  	}
   225  }
   226  
   227  var testShiftAllVal uint64 = 3
   228  
   229  func TestShiftAll(t *testing.T) {
   230  	got := make([]int32, 4)
   231  	simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
   232  	for _, v := range got {
   233  		if v != 0b1100 {
   234  			t.Errorf("expect 0b1100, got %b", v)
   235  		}
   236  	}
   237  	simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
   238  	for _, v := range got {
   239  		if v != 0b11000 {
   240  			t.Errorf("expect 0b11000, got %b", v)
   241  		}
   242  	}
   243  }
   244  
   245  func TestSlicesInt8(t *testing.T) {
   246  	a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   247  		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
   248  	v := simd.LoadInt8x32Slice(a)
   249  	b := make([]int8, 32, 32)
   250  	v.StoreSlice(b)
   251  	checkSlices(t, a, b)
   252  }
   253  
   254  func TestSlicesInt8SetElem(t *testing.T) {
   255  	a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   256  		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
   257  	v := simd.LoadInt8x16Slice(a)
   258  
   259  	v = v.SetElem(3, 13)
   260  	a[3] = 13
   261  
   262  	b := make([]int8, 16, 16)
   263  	v.StoreSlice(b)
   264  	checkSlices(t, a, b)
   265  }
   266  
   267  func TestSlicesInt8GetElem(t *testing.T) {
   268  	a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   269  		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
   270  	v := simd.LoadInt8x16Slice(a)
   271  	e := v.GetElem(2)
   272  	if e != a[2] {
   273  		t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
   274  	}
   275  
   276  }
   277  
   278  func TestSlicesInt8TooShortLoad(t *testing.T) {
   279  	defer func() {
   280  		if r := recover(); r != nil {
   281  			t.Logf("Saw EXPECTED panic %v", r)
   282  		} else {
   283  			t.Errorf("Did not see expected panic")
   284  		}
   285  	}()
   286  	a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   287  		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic
   288  	v := simd.LoadInt8x32Slice(a)
   289  	b := make([]int8, 32, 32)
   290  	v.StoreSlice(b)
   291  	checkSlices(t, a, b)
   292  }
   293  
   294  func TestSlicesInt8TooShortStore(t *testing.T) {
   295  	defer func() {
   296  		if r := recover(); r != nil {
   297  			t.Logf("Saw EXPECTED panic %v", r)
   298  		} else {
   299  			t.Errorf("Did not see expected panic")
   300  		}
   301  	}()
   302  	a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   303  		17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
   304  	v := simd.LoadInt8x32Slice(a)
   305  	b := make([]int8, 31) // TOO SHORT, should panic
   306  	v.StoreSlice(b)
   307  	checkSlices(t, a, b)
   308  }
   309  
   310  func TestSlicesFloat64(t *testing.T) {
   311  	a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine
   312  	v := simd.LoadFloat64x4Slice(a)
   313  	b := make([]float64, 4, 4)
   314  	v.StoreSlice(b)
   315  	for i := range b {
   316  		if a[i] != b[i] {
   317  			t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
   318  		}
   319  	}
   320  }
   321  
   322  // TODO: try to reduce this test to be smaller.
   323  func TestMergeLocals(t *testing.T) {
   324  	testMergeLocalswrapper(t, simd.Int64x4.Add)
   325  }
   326  
   327  //go:noinline
   328  func forceSpill() {}
   329  
   330  func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) {
   331  	t.Helper()
   332  	s0 := []int64{0, 1, 2, 3}
   333  	s1 := []int64{-1, 0, -1, 0}
   334  	want := []int64{-1, 1, 1, 3}
   335  	v := simd.LoadInt64x4Slice(s0)
   336  	m := simd.LoadInt64x4Slice(s1)
   337  	forceSpill()
   338  	got := make([]int64, 4)
   339  	gotv := op(v, m)
   340  	gotv.StoreSlice(got)
   341  	for i := range len(want) {
   342  		if !(got[i] == want[i]) {
   343  			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
   344  		}
   345  	}
   346  }
   347  
   348  func TestBitMaskFromBits(t *testing.T) {
   349  	if !simd.X86.AVX512() {
   350  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   351  		return
   352  	}
   353  	results := [2]int64{}
   354  	want := [2]int64{0, 6}
   355  	m := simd.Mask64x2FromBits(0b10)
   356  	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
   357  	for i := range 2 {
   358  		if results[i] != want[i] {
   359  			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
   360  		}
   361  	}
   362  }
   363  
   364  var maskForTestBitMaskFromBitsLoad = uint8(0b10)
   365  
   366  func TestBitMaskFromBitsLoad(t *testing.T) {
   367  	if !simd.X86.AVX512() {
   368  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   369  		return
   370  	}
   371  	results := [2]int64{}
   372  	want := [2]int64{0, 6}
   373  	m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
   374  	simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
   375  	for i := range 2 {
   376  		if results[i] != want[i] {
   377  			t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
   378  		}
   379  	}
   380  }
   381  
   382  func TestBitMaskToBits(t *testing.T) {
   383  	if !simd.X86.AVX512() {
   384  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   385  		return
   386  	}
   387  	if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
   388  		t.Errorf("Want 0b101, got %b", v)
   389  	}
   390  }
   391  
   392  var maskForTestBitMaskFromBitsStore uint8
   393  
   394  func TestBitMaskToBitsStore(t *testing.T) {
   395  	if !simd.X86.AVX512() {
   396  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   397  		return
   398  	}
   399  	maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
   400  	if maskForTestBitMaskFromBitsStore != 0b101 {
   401  		t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
   402  	}
   403  }
   404  
   405  func TestMergeFloat(t *testing.T) {
   406  	k := make([]int64, 4, 4)
   407  	s := make([]float64, 4, 4)
   408  
   409  	a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
   410  	b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
   411  	g := a.Greater(b)
   412  	g.AsInt64x4().StoreSlice(k)
   413  	c := a.Merge(b, g)
   414  
   415  	c.StoreSlice(s)
   416  
   417  	checkSlices[int64](t, k, []int64{0, 0, 0, -1})
   418  	checkSlices[float64](t, s, []float64{4, 2, 3, 4})
   419  }
   420  
   421  func TestMergeFloat512(t *testing.T) {
   422  	if !simd.X86.AVX512() {
   423  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   424  		return
   425  	}
   426  
   427  	k := make([]int64, 8, 8)
   428  	s := make([]float64, 8, 8)
   429  
   430  	a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
   431  	b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
   432  	g := a.Greater(b)
   433  	g.AsInt64x8().StoreSlice(k)
   434  	c := a.Merge(b, g)
   435  	d := a.Masked(g)
   436  
   437  	checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
   438  
   439  	c.StoreSlice(s)
   440  	checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
   441  
   442  	d.StoreSlice(s)
   443  	checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
   444  }
   445  
   446  var ro uint8 = 2
   447  
   448  func TestRotateAllVariable(t *testing.T) {
   449  	if !simd.X86.AVX512() {
   450  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   451  		return
   452  	}
   453  	got := make([]int32, 4)
   454  	simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
   455  	for _, v := range got {
   456  		if v != 0b1100 {
   457  			t.Errorf("Want 0b1100, got %b", v)
   458  		}
   459  	}
   460  }
   461  
   462  func TestBroadcastUint32x4(t *testing.T) {
   463  	s := make([]uint32, 4, 4)
   464  	simd.BroadcastUint32x4(123456789).StoreSlice(s)
   465  	checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
   466  }
   467  
   468  func TestBroadcastFloat32x8(t *testing.T) {
   469  	s := make([]float32, 8, 8)
   470  	simd.BroadcastFloat32x8(123456789).StoreSlice(s)
   471  	checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
   472  }
   473  
   474  func TestBroadcastFloat64x2(t *testing.T) {
   475  	s := make([]float64, 2, 2)
   476  	simd.BroadcastFloat64x2(123456789).StoreSlice(s)
   477  	checkSlices(t, s, []float64{123456789, 123456789})
   478  }
   479  
   480  func TestBroadcastUint64x2(t *testing.T) {
   481  	s := make([]uint64, 2, 2)
   482  	simd.BroadcastUint64x2(123456789).StoreSlice(s)
   483  	checkSlices(t, s, []uint64{123456789, 123456789})
   484  }
   485  
   486  func TestBroadcastUint16x8(t *testing.T) {
   487  	s := make([]uint16, 8, 8)
   488  	simd.BroadcastUint16x8(12345).StoreSlice(s)
   489  	checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
   490  }
   491  
   492  func TestBroadcastInt8x32(t *testing.T) {
   493  	s := make([]int8, 32, 32)
   494  	simd.BroadcastInt8x32(-123).StoreSlice(s)
   495  	checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
   496  		-123, -123, -123, -123, -123, -123, -123, -123,
   497  		-123, -123, -123, -123, -123, -123, -123, -123,
   498  		-123, -123, -123, -123, -123, -123, -123, -123,
   499  	})
   500  }
   501  
   502  func TestMaskOpt512(t *testing.T) {
   503  	if !simd.X86.AVX512() {
   504  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   505  		return
   506  	}
   507  
   508  	k := make([]int64, 8, 8)
   509  	s := make([]float64, 8, 8)
   510  
   511  	a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
   512  	b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
   513  	c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
   514  	d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
   515  	g := a.Greater(b)
   516  	e := c.Add(d).Masked(g)
   517  	e.StoreSlice(s)
   518  	g.AsInt64x8().StoreSlice(k)
   519  	checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
   520  	checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
   521  }
   522  
   523  // flattenedTranspose tranposes x and y, regarded as a pair of 2x2
   524  // matrices, but then flattens the rows in order, i.e
   525  // x: ABCD ==> a: A1B2
   526  // y: 1234     b: C3D4
   527  func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) {
   528  	return x.InterleaveLo(y), x.InterleaveHi(y)
   529  }
   530  
   531  func TestFlattenedTranspose(t *testing.T) {
   532  	r := make([]int32, 4, 4)
   533  	s := make([]int32, 4, 4)
   534  
   535  	x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
   536  	y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
   537  	a, b := flattenedTranspose(x, y)
   538  
   539  	a.StoreSlice(r)
   540  	b.StoreSlice(s)
   541  
   542  	checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
   543  	checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
   544  
   545  }
   546  
   547  func TestClearAVXUpperBits(t *testing.T) {
   548  	// Test that ClearAVXUpperBits is safe even if there are SIMD values
   549  	// alive (although usually one should not do this).
   550  	if !simd.X86.AVX2() {
   551  		t.Skip("Test requires X86.AVX2, not available on this hardware")
   552  		return
   553  	}
   554  
   555  	r := make([]int64, 4)
   556  	s := make([]int64, 4)
   557  
   558  	x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
   559  	y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
   560  
   561  	x.Add(y).StoreSlice(r)
   562  	simd.ClearAVXUpperBits()
   563  	x.Sub(y).StoreSlice(s)
   564  
   565  	checkSlices[int64](t, r, []int64{11, 22, 33, 44})
   566  	checkSlices[int64](t, s, []int64{9, 18, 27, 36})
   567  }
   568  
   569  func TestLeadingZeros(t *testing.T) {
   570  	if !simd.X86.AVX512() {
   571  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   572  		return
   573  	}
   574  
   575  	src := []uint64{0b1111, 0}
   576  	want := []uint64{60, 64}
   577  	got := make([]uint64, 2)
   578  	simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
   579  	for i := range 2 {
   580  		if want[i] != got[i] {
   581  			t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
   582  		}
   583  	}
   584  }
   585  
   586  func TestIsZero(t *testing.T) {
   587  	v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
   588  	v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
   589  	if v1.IsZero() {
   590  		t.Errorf("Result incorrect, want false, got true")
   591  	}
   592  	if !v2.IsZero() {
   593  		t.Errorf("Result incorrect, want true, got false")
   594  	}
   595  	if !v1.And(v2).IsZero() {
   596  		t.Errorf("Result incorrect, want true, got false")
   597  	}
   598  	if v1.AndNot(v2).IsZero() {
   599  		t.Errorf("Result incorrect, want false, got true")
   600  	}
   601  	if !v2.And(v1).IsZero() {
   602  		t.Errorf("Result incorrect, want true, got false")
   603  	}
   604  	if !v2.AndNot(v1).IsZero() {
   605  		t.Errorf("Result incorrect, want true, got false")
   606  	}
   607  }
   608  
   609  func TestSelect4FromPairConst(t *testing.T) {
   610  	x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
   611  	y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
   612  
   613  	llll := x.SelectFromPair(0, 1, 2, 3, y)
   614  	hhhh := x.SelectFromPair(4, 5, 6, 7, y)
   615  	llhh := x.SelectFromPair(0, 1, 6, 7, y)
   616  	hhll := x.SelectFromPair(6, 7, 0, 1, y)
   617  
   618  	lllh := x.SelectFromPair(0, 1, 2, 7, y)
   619  	llhl := x.SelectFromPair(0, 1, 7, 2, y)
   620  	lhll := x.SelectFromPair(0, 7, 1, 2, y)
   621  	hlll := x.SelectFromPair(7, 0, 1, 2, y)
   622  
   623  	hhhl := x.SelectFromPair(4, 5, 6, 0, y)
   624  	hhlh := x.SelectFromPair(4, 5, 0, 6, y)
   625  	hlhh := x.SelectFromPair(4, 0, 5, 6, y)
   626  	lhhh := x.SelectFromPair(0, 4, 5, 6, y)
   627  
   628  	lhlh := x.SelectFromPair(0, 4, 1, 5, y)
   629  	hlhl := x.SelectFromPair(4, 0, 5, 1, y)
   630  	lhhl := x.SelectFromPair(0, 4, 5, 1, y)
   631  	hllh := x.SelectFromPair(4, 0, 1, 5, y)
   632  
   633  	r := make([]int32, 4, 4)
   634  
   635  	foo := func(v simd.Int32x4, a, b, c, d int32) {
   636  		v.StoreSlice(r)
   637  		checkSlices[int32](t, r, []int32{a, b, c, d})
   638  	}
   639  
   640  	foo(llll, 0, 1, 2, 3)
   641  	foo(hhhh, 4, 5, 6, 7)
   642  	foo(llhh, 0, 1, 6, 7)
   643  	foo(hhll, 6, 7, 0, 1)
   644  
   645  	foo(lllh, 0, 1, 2, 7)
   646  	foo(llhl, 0, 1, 7, 2)
   647  	foo(lhll, 0, 7, 1, 2)
   648  	foo(hlll, 7, 0, 1, 2)
   649  
   650  	foo(hhhl, 4, 5, 6, 0)
   651  	foo(hhlh, 4, 5, 0, 6)
   652  	foo(hlhh, 4, 0, 5, 6)
   653  	foo(lhhh, 0, 4, 5, 6)
   654  
   655  	foo(lhlh, 0, 4, 1, 5)
   656  	foo(hlhl, 4, 0, 5, 1)
   657  	foo(lhhl, 0, 4, 5, 1)
   658  	foo(hllh, 4, 0, 1, 5)
   659  }
   660  
   661  //go:noinline
   662  func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 {
   663  	return x.SelectFromPair(a, b, c, d, y)
   664  }
   665  
   666  func TestSelect4FromPairVar(t *testing.T) {
   667  	x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
   668  	y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
   669  
   670  	llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
   671  	hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
   672  	llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y)
   673  	hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y)
   674  
   675  	lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y)
   676  	llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y)
   677  	lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y)
   678  	hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y)
   679  
   680  	hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y)
   681  	hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y)
   682  	hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y)
   683  	lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y)
   684  
   685  	lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y)
   686  	hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y)
   687  	lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y)
   688  	hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y)
   689  
   690  	r := make([]int32, 4, 4)
   691  
   692  	foo := func(v simd.Int32x4, a, b, c, d int32) {
   693  		v.StoreSlice(r)
   694  		checkSlices[int32](t, r, []int32{a, b, c, d})
   695  	}
   696  
   697  	foo(llll, 0, 1, 2, 3)
   698  	foo(hhhh, 4, 5, 6, 7)
   699  	foo(llhh, 0, 1, 6, 7)
   700  	foo(hhll, 6, 7, 0, 1)
   701  
   702  	foo(lllh, 0, 1, 2, 7)
   703  	foo(llhl, 0, 1, 7, 2)
   704  	foo(lhll, 0, 7, 1, 2)
   705  	foo(hlll, 7, 0, 1, 2)
   706  
   707  	foo(hhhl, 4, 5, 6, 0)
   708  	foo(hhlh, 4, 5, 0, 6)
   709  	foo(hlhh, 4, 0, 5, 6)
   710  	foo(lhhh, 0, 4, 5, 6)
   711  
   712  	foo(lhlh, 0, 4, 1, 5)
   713  	foo(hlhl, 4, 0, 5, 1)
   714  	foo(lhhl, 0, 4, 5, 1)
   715  	foo(hllh, 4, 0, 1, 5)
   716  }
   717  
   718  func TestSelect4FromPairConstGrouped(t *testing.T) {
   719  	x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
   720  	y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
   721  
   722  	llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
   723  	hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
   724  	llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
   725  	hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
   726  
   727  	lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
   728  	llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
   729  	lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
   730  	hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
   731  
   732  	hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
   733  	hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
   734  	hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
   735  	lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
   736  
   737  	lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
   738  	hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
   739  	lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
   740  	hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
   741  
   742  	r := make([]float32, 8, 8)
   743  
   744  	foo := func(v simd.Float32x8, a, b, c, d float32) {
   745  		v.StoreSlice(r)
   746  		checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
   747  	}
   748  
   749  	foo(llll, 0, 1, 2, 3)
   750  	foo(hhhh, 4, 5, 6, 7)
   751  	foo(llhh, 0, 1, 6, 7)
   752  	foo(hhll, 6, 7, 0, 1)
   753  
   754  	foo(lllh, 0, 1, 2, 7)
   755  	foo(llhl, 0, 1, 7, 2)
   756  	foo(lhll, 0, 7, 1, 2)
   757  	foo(hlll, 7, 0, 1, 2)
   758  
   759  	foo(hhhl, 4, 5, 6, 0)
   760  	foo(hhlh, 4, 5, 0, 6)
   761  	foo(hlhh, 4, 0, 5, 6)
   762  	foo(lhhh, 0, 4, 5, 6)
   763  
   764  	foo(lhlh, 0, 4, 1, 5)
   765  	foo(hlhl, 4, 0, 5, 1)
   766  	foo(lhhl, 0, 4, 5, 1)
   767  	foo(hllh, 4, 0, 1, 5)
   768  }
   769  
   770  func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
   771  	if !simd.X86.AVX512() {
   772  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   773  		return
   774  	}
   775  	x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
   776  	y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
   777  
   778  	llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
   779  	hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
   780  	llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
   781  	hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
   782  
   783  	lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
   784  	llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
   785  	lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
   786  	hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
   787  
   788  	hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
   789  	hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
   790  	hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
   791  	lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
   792  
   793  	lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
   794  	hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
   795  	lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
   796  	hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
   797  
   798  	r := make([]uint32, 16, 16)
   799  
   800  	foo := func(v simd.Uint32x16, a, b, c, d uint32) {
   801  		v.StoreSlice(r)
   802  		checkSlices[uint32](t, r, []uint32{a, b, c, d,
   803  			10 + a, 10 + b, 10 + c, 10 + d,
   804  			20 + a, 20 + b, 20 + c, 20 + d,
   805  			30 + a, 30 + b, 30 + c, 30 + d,
   806  		})
   807  	}
   808  
   809  	foo(llll, 0, 1, 2, 3)
   810  	foo(hhhh, 4, 5, 6, 7)
   811  	foo(llhh, 0, 1, 6, 7)
   812  	foo(hhll, 6, 7, 0, 1)
   813  
   814  	foo(lllh, 0, 1, 2, 7)
   815  	foo(llhl, 0, 1, 7, 2)
   816  	foo(lhll, 0, 7, 1, 2)
   817  	foo(hlll, 7, 0, 1, 2)
   818  
   819  	foo(hhhl, 4, 5, 6, 0)
   820  	foo(hhlh, 4, 5, 0, 6)
   821  	foo(hlhh, 4, 0, 5, 6)
   822  	foo(lhhh, 0, 4, 5, 6)
   823  
   824  	foo(lhlh, 0, 4, 1, 5)
   825  	foo(hlhl, 4, 0, 5, 1)
   826  	foo(lhhl, 0, 4, 5, 1)
   827  	foo(hllh, 4, 0, 1, 5)
   828  }
   829  
   830  func TestSelect128FromPair(t *testing.T) {
   831  	x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
   832  	y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
   833  
   834  	aa := x.Select128FromPair(0, 0, y)
   835  	ab := x.Select128FromPair(0, 1, y)
   836  	bc := x.Select128FromPair(1, 2, y)
   837  	cd := x.Select128FromPair(2, 3, y)
   838  	da := x.Select128FromPair(3, 0, y)
   839  	dc := x.Select128FromPair(3, 2, y)
   840  
   841  	r := make([]uint64, 4, 4)
   842  
   843  	foo := func(v simd.Uint64x4, a, b uint64) {
   844  		a, b = 2*a, 2*b
   845  		v.StoreSlice(r)
   846  		checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
   847  	}
   848  
   849  	foo(aa, 0, 0)
   850  	foo(ab, 0, 1)
   851  	foo(bc, 1, 2)
   852  	foo(cd, 2, 3)
   853  	foo(da, 3, 0)
   854  	foo(dc, 3, 2)
   855  }
   856  
   857  func TestSelect128FromPairError(t *testing.T) {
   858  	x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
   859  	y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
   860  
   861  	defer func() {
   862  		if r := recover(); r != nil {
   863  			t.Logf("Saw expected panic %v", r)
   864  		}
   865  	}()
   866  	_ = x.Select128FromPair(0, 4, y)
   867  
   868  	t.Errorf("Should have panicked")
   869  }
   870  
   871  //go:noinline
   872  func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 {
   873  	return x.Select128FromPair(lo, hi, y)
   874  }
   875  
   876  func TestSelect128FromPairVar(t *testing.T) {
   877  	x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
   878  	y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
   879  
   880  	aa := select128FromPair(x, 0, 0, y)
   881  	ab := select128FromPair(x, 0, 1, y)
   882  	bc := select128FromPair(x, 1, 2, y)
   883  	cd := select128FromPair(x, 2, 3, y)
   884  	da := select128FromPair(x, 3, 0, y)
   885  	dc := select128FromPair(x, 3, 2, y)
   886  
   887  	r := make([]uint64, 4, 4)
   888  
   889  	foo := func(v simd.Uint64x4, a, b uint64) {
   890  		a, b = 2*a, 2*b
   891  		v.StoreSlice(r)
   892  		checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
   893  	}
   894  
   895  	foo(aa, 0, 0)
   896  	foo(ab, 0, 1)
   897  	foo(bc, 1, 2)
   898  	foo(cd, 2, 3)
   899  	foo(da, 3, 0)
   900  	foo(dc, 3, 2)
   901  }
   902  
   903  func TestSelect2FromPairConst(t *testing.T) {
   904  	x := simd.LoadUint64x2Slice([]uint64{0, 1})
   905  	y := simd.LoadUint64x2Slice([]uint64{2, 3})
   906  
   907  	ll := x.SelectFromPair(0, 1, y)
   908  	hh := x.SelectFromPair(3, 2, y)
   909  	lh := x.SelectFromPair(0, 3, y)
   910  	hl := x.SelectFromPair(2, 1, y)
   911  
   912  	r := make([]uint64, 2, 2)
   913  
   914  	foo := func(v simd.Uint64x2, a, b uint64) {
   915  		v.StoreSlice(r)
   916  		checkSlices[uint64](t, r, []uint64{a, b})
   917  	}
   918  
   919  	foo(ll, 0, 1)
   920  	foo(hh, 3, 2)
   921  	foo(lh, 0, 3)
   922  	foo(hl, 2, 1)
   923  }
   924  
   925  func TestSelect2FromPairConstGroupedUint(t *testing.T) {
   926  	x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
   927  	y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
   928  
   929  	ll := x.SelectFromPairGrouped(0, 1, y)
   930  	hh := x.SelectFromPairGrouped(3, 2, y)
   931  	lh := x.SelectFromPairGrouped(0, 3, y)
   932  	hl := x.SelectFromPairGrouped(2, 1, y)
   933  
   934  	r := make([]uint64, 4, 4)
   935  
   936  	foo := func(v simd.Uint64x4, a, b uint64) {
   937  		v.StoreSlice(r)
   938  		checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
   939  	}
   940  
   941  	foo(ll, 0, 1)
   942  	foo(hh, 3, 2)
   943  	foo(lh, 0, 3)
   944  	foo(hl, 2, 1)
   945  }
   946  
   947  func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
   948  	x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
   949  	y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
   950  
   951  	ll := x.SelectFromPairGrouped(0, 1, y)
   952  	hh := x.SelectFromPairGrouped(3, 2, y)
   953  	lh := x.SelectFromPairGrouped(0, 3, y)
   954  	hl := x.SelectFromPairGrouped(2, 1, y)
   955  
   956  	r := make([]float64, 4, 4)
   957  
   958  	foo := func(v simd.Float64x4, a, b float64) {
   959  		v.StoreSlice(r)
   960  		checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
   961  	}
   962  
   963  	foo(ll, 0, 1)
   964  	foo(hh, 3, 2)
   965  	foo(lh, 0, 3)
   966  	foo(hl, 2, 1)
   967  }
   968  
   969  func TestSelect2FromPairConstGroupedInt(t *testing.T) {
   970  	x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
   971  	y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
   972  
   973  	ll := x.SelectFromPairGrouped(0, 1, y)
   974  	hh := x.SelectFromPairGrouped(3, 2, y)
   975  	lh := x.SelectFromPairGrouped(0, 3, y)
   976  	hl := x.SelectFromPairGrouped(2, 1, y)
   977  
   978  	r := make([]int64, 4, 4)
   979  
   980  	foo := func(v simd.Int64x4, a, b int64) {
   981  		v.StoreSlice(r)
   982  		checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
   983  	}
   984  
   985  	foo(ll, 0, 1)
   986  	foo(hh, 3, 2)
   987  	foo(lh, 0, 3)
   988  	foo(hl, 2, 1)
   989  }
   990  
   991  func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
   992  	if !simd.X86.AVX512() {
   993  		t.Skip("Test requires X86.AVX512, not available on this hardware")
   994  		return
   995  	}
   996  
   997  	x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
   998  	y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
   999  
  1000  	ll := x.SelectFromPairGrouped(0, 1, y)
  1001  	hh := x.SelectFromPairGrouped(3, 2, y)
  1002  	lh := x.SelectFromPairGrouped(0, 3, y)
  1003  	hl := x.SelectFromPairGrouped(2, 1, y)
  1004  
  1005  	r := make([]int64, 8, 8)
  1006  
  1007  	foo := func(v simd.Int64x8, a, b int64) {
  1008  		v.StoreSlice(r)
  1009  		checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
  1010  	}
  1011  
  1012  	foo(ll, 0, 1)
  1013  	foo(hh, 3, 2)
  1014  	foo(lh, 0, 3)
  1015  	foo(hl, 2, 1)
  1016  }
  1017  
  1018  func TestString(t *testing.T) {
  1019  	x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
  1020  	y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
  1021  	z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
  1022  	w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
  1023  
  1024  	sx := "{0,1,2,3}"
  1025  	sy := "{-4,-5,-6,-7}"
  1026  	sz := "{0.5,1.5,-2.5,3.5e+09}"
  1027  	sw := sz
  1028  
  1029  	if x.String() != sx {
  1030  		t.Errorf("x=%s wanted %s", x, sx)
  1031  	}
  1032  	if y.String() != sy {
  1033  		t.Errorf("y=%s wanted %s", y, sy)
  1034  	}
  1035  	if z.String() != sz {
  1036  		t.Errorf("z=%s wanted %s", z, sz)
  1037  	}
  1038  	if w.String() != sw {
  1039  		t.Errorf("w=%s wanted %s", w, sw)
  1040  	}
  1041  	t.Logf("w=%s", w)
  1042  	t.Logf("x=%s", x)
  1043  	t.Logf("y=%s", y)
  1044  	t.Logf("z=%s", z)
  1045  }
  1046  
  1047  // a returns an slice of 16 int32
  1048  func a() []int32 {
  1049  	return make([]int32, 16, 16)
  1050  }
  1051  
  1052  // applyTo3 returns a 16-element slice of the results of
  1053  // applying f to the respective elements of vectors x, y, and z.
  1054  func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 {
  1055  	ax, ay, az := a(), a(), a()
  1056  	x.StoreSlice(ax)
  1057  	y.StoreSlice(ay)
  1058  	z.StoreSlice(az)
  1059  
  1060  	r := a()
  1061  	for i := range r {
  1062  		r[i] = f(ax[i], ay[i], az[i])
  1063  	}
  1064  	return r
  1065  }
  1066  
  1067  // applyTo3 returns a 16-element slice of the results of
  1068  // applying f to the respective elements of vectors x, y, z, and w.
  1069  func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
  1070  	ax, ay, az, aw := a(), a(), a(), a()
  1071  	x.StoreSlice(ax)
  1072  	y.StoreSlice(ay)
  1073  	z.StoreSlice(az)
  1074  	w.StoreSlice(aw)
  1075  
  1076  	r := make([]int32, len(ax), len(ax))
  1077  	for i := range r {
  1078  		r[i] = f(ax[i], ay[i], az[i], aw[i])
  1079  	}
  1080  	return r
  1081  }
  1082  
  1083  func TestSelectTernOptInt32x16(t *testing.T) {
  1084  	if !simd.X86.AVX512() {
  1085  		t.Skip("Test requires X86.AVX512, not available on this hardware")
  1086  		return
  1087  	}
  1088  	ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
  1089  	ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}
  1090  	az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
  1091  	aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
  1092  	am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
  1093  
  1094  	x := simd.LoadInt32x16Slice(ax)
  1095  	y := simd.LoadInt32x16Slice(ay)
  1096  	z := simd.LoadInt32x16Slice(az)
  1097  	w := simd.LoadInt32x16Slice(aw)
  1098  	m := simd.LoadInt32x16Slice(am)
  1099  
  1100  	foo := func(v simd.Int32x16, s []int32) {
  1101  		r := make([]int32, 16, 16)
  1102  		v.StoreSlice(r)
  1103  		checkSlices[int32](t, r, s)
  1104  	}
  1105  
  1106  	t0 := w.Xor(y).Xor(z)
  1107  	ft0 := func(w, y, z int32) int32 {
  1108  		return w ^ y ^ z
  1109  	}
  1110  	foo(t0, applyTo3(w, y, z, ft0))
  1111  
  1112  	t1 := m.And(w.Xor(y).Xor(z.Not()))
  1113  	ft1 := func(m, w, y, z int32) int32 {
  1114  		return m & (w ^ y ^ ^z)
  1115  	}
  1116  	foo(t1, applyTo4(m, w, y, z, ft1))
  1117  
  1118  	t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not()))
  1119  	ft2 := func(x, y, z int32) int32 {
  1120  		return (x ^ y ^ z) & (x ^ y ^ ^z)
  1121  	}
  1122  	foo(t2, applyTo3(x, y, z, ft2))
  1123  }
  1124  
  1125  func TestMaskedMerge(t *testing.T) {
  1126  	x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
  1127  	y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
  1128  	z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
  1129  	res := make([]int64, 4)
  1130  	expected := []int64{6, 8, -3, -4}
  1131  	mask := x.Less(y)
  1132  	if simd.X86.AVX512() {
  1133  		x.Add(y).Merge(z, mask).StoreSlice(res)
  1134  	} else {
  1135  		x.Add(y).Merge(z, mask).StoreSlice(res)
  1136  	}
  1137  	for i := range 4 {
  1138  		if res[i] != expected[i] {
  1139  			t.Errorf("got %d wanted %d", res[i], expected[i])
  1140  		}
  1141  	}
  1142  }
  1143  
  1144  func TestDotProductQuadruple(t *testing.T) {
  1145  	if !simd.X86.AVXVNNI() {
  1146  		t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
  1147  		return
  1148  	}
  1149  	xd := make([]int8, 16)
  1150  	yd := make([]uint8, 16)
  1151  	zd := make([]int32, 4)
  1152  	wanted1 := make([]int32, 4)
  1153  	wanted2 := make([]int32, 4)
  1154  	res1 := make([]int32, 4)
  1155  	res2 := make([]int32, 4)
  1156  	for i := range 4 {
  1157  		xd[i] = 5
  1158  		yd[i] = 6
  1159  		zd[i] = 3
  1160  		wanted1[i] = 30
  1161  		wanted2[i] = 30
  1162  	}
  1163  	x := simd.LoadInt8x16Slice(xd)
  1164  	y := simd.LoadUint8x16Slice(yd)
  1165  	z := simd.LoadInt32x4Slice(zd)
  1166  	x.DotProductQuadruple(y).StoreSlice(res1)
  1167  	x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
  1168  	for i := range 4 {
  1169  		if res1[i] != wanted1[i] {
  1170  			t.Errorf("got %d wanted %d", res1[i], wanted1[i])
  1171  		}
  1172  		if res2[i] != wanted2[i] {
  1173  			t.Errorf("got %d wanted %d", res2[i], wanted2[i])
  1174  		}
  1175  	}
  1176  }
  1177  
  1178  func TestPermuteScalars(t *testing.T) {
  1179  	x := []int32{11, 12, 13, 14}
  1180  	want := []int32{12, 13, 14, 11}
  1181  	got := make([]int32, 4)
  1182  	simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
  1183  	for i := range 4 {
  1184  		if want[i] != got[i] {
  1185  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1186  		}
  1187  	}
  1188  }
  1189  
  1190  func TestPermuteScalarsGrouped(t *testing.T) {
  1191  	x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
  1192  	want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
  1193  	got := make([]int32, 8)
  1194  	simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
  1195  	for i := range 8 {
  1196  		if want[i] != got[i] {
  1197  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1198  		}
  1199  	}
  1200  }
  1201  
  1202  func TestPermuteScalarsHi(t *testing.T) {
  1203  	x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
  1204  	want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
  1205  	got := make([]int16, len(x))
  1206  	simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
  1207  	for i := range got {
  1208  		if want[i] != got[i] {
  1209  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1210  		}
  1211  	}
  1212  }
  1213  
  1214  func TestPermuteScalarsLo(t *testing.T) {
  1215  	x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
  1216  	want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
  1217  	got := make([]int16, len(x))
  1218  	simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
  1219  	for i := range got {
  1220  		if want[i] != got[i] {
  1221  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1222  		}
  1223  	}
  1224  }
  1225  
  1226  func TestPermuteScalarsHiGrouped(t *testing.T) {
  1227  	x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
  1228  	want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
  1229  	got := make([]int16, len(x))
  1230  	simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
  1231  	for i := range got {
  1232  		if want[i] != got[i] {
  1233  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1234  		}
  1235  	}
  1236  }
  1237  
  1238  func TestPermuteScalarsLoGrouped(t *testing.T) {
  1239  	x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
  1240  	want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
  1241  	got := make([]int16, len(x))
  1242  	simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
  1243  	for i := range got {
  1244  		if want[i] != got[i] {
  1245  			t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i])
  1246  		}
  1247  	}
  1248  }
  1249  

View as plain text