Source file src/simd/ops_internal_amd64.go

     1  // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
     2  
     3  //go:build goexperiment.simd
     4  
     5  package simd
     6  
     7  /* blend */
     8  
     9  // blend blends two vectors based on mask values, choosing either
    10  // the first or the second based on whether the third is false or true
    11  //
    12  // Asm: VPBLENDVB, CPU Feature: AVX
    13  func (x Int8x16) blend(y Int8x16, mask Int8x16) Int8x16
    14  
    15  // blend blends two vectors based on mask values, choosing either
    16  // the first or the second based on whether the third is false or true
    17  //
    18  // Asm: VPBLENDVB, CPU Feature: AVX2
    19  func (x Int8x32) blend(y Int8x32, mask Int8x32) Int8x32
    20  
    21  /* blendMasked */
    22  
    23  // blendMasked blends two vectors based on mask values, choosing either
    24  // the first or the second based on whether the third is false or true
    25  //
    26  // This operation is applied selectively under a write mask.
    27  //
    28  // Asm: VPBLENDMB, CPU Feature: AVX512
    29  func (x Int8x64) blendMasked(y Int8x64, mask Mask8x64) Int8x64
    30  
    31  // blendMasked blends two vectors based on mask values, choosing either
    32  // the first or the second based on whether the third is false or true
    33  //
    34  // This operation is applied selectively under a write mask.
    35  //
    36  // Asm: VPBLENDMW, CPU Feature: AVX512
    37  func (x Int16x32) blendMasked(y Int16x32, mask Mask16x32) Int16x32
    38  
    39  // blendMasked blends two vectors based on mask values, choosing either
    40  // the first or the second based on whether the third is false or true
    41  //
    42  // This operation is applied selectively under a write mask.
    43  //
    44  // Asm: VPBLENDMD, CPU Feature: AVX512
    45  func (x Int32x16) blendMasked(y Int32x16, mask Mask32x16) Int32x16
    46  
    47  // blendMasked blends two vectors based on mask values, choosing either
    48  // the first or the second based on whether the third is false or true
    49  //
    50  // This operation is applied selectively under a write mask.
    51  //
    52  // Asm: VPBLENDMQ, CPU Feature: AVX512
    53  func (x Int64x8) blendMasked(y Int64x8, mask Mask64x8) Int64x8
    54  
    55  /* concatSelectedConstant */
    56  
    57  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
    58  // halves of the output.  The selection is chosen by the constant parameter h1h0l1l0
    59  // where each {h,l}{1,0} is two bits specify which element from y or x to select.
    60  // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns
    61  // {2, 0, 5, 7} (don't forget that the binary constant is written big-endian).
    62  //
    63  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
    64  //
    65  // Asm: VSHUFPS, CPU Feature: AVX
    66  func (x Float32x4) concatSelectedConstant(h1h0l1l0 uint8, y Float32x4) Float32x4
    67  
    68  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
    69  // halves of the output.  The selection is chosen by the constant parameter hilo
    70  // where hi and lo are each one bit specifying which 64-bit element to select
    71  // from y and x.  For example {4,5}.concatSelectedConstant(0b10, {6,7})
    72  // returns {4,7}; bit 0, selecting from x, is zero, and selects 4, and bit 1,
    73  // selecting from y, is 1, and selects 7.
    74  //
    75  // hilo results in better performance when it's a constant, a non-constant value will be translated into a jump table.
    76  //
    77  // Asm: VSHUFPD, CPU Feature: AVX
    78  func (x Float64x2) concatSelectedConstant(hilo uint8, y Float64x2) Float64x2
    79  
    80  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
    81  // halves of the output.  The selection is chosen by the constant parameter h1h0l1l0
    82  // where each {h,l}{1,0} is two bits specify which element from y or x to select.
    83  // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns
    84  // {2, 0, 5, 7} (don't forget that the binary constant is written big-endian).
    85  //
    86  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
    87  //
    88  // Asm: VSHUFPS, CPU Feature: AVX
    89  func (x Int32x4) concatSelectedConstant(h1h0l1l0 uint8, y Int32x4) Int32x4
    90  
    91  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
    92  // halves of the output.  The selection is chosen by the constant parameter hilo
    93  // where hi and lo are each one bit specifying which 64-bit element to select
    94  // from y and x.  For example {4,5}.concatSelectedConstant(0b10, {6,7})
    95  // returns {4,7}; bit 0, selecting from x, is zero, and selects 4, and bit 1,
    96  // selecting from y, is 1, and selects 7.
    97  //
    98  // hilo results in better performance when it's a constant, a non-constant value will be translated into a jump table.
    99  //
   100  // Asm: VSHUFPD, CPU Feature: AVX
   101  func (x Int64x2) concatSelectedConstant(hilo uint8, y Int64x2) Int64x2
   102  
   103  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
   104  // halves of the output.  The selection is chosen by the constant parameter h1h0l1l0
   105  // where each {h,l}{1,0} is two bits specify which element from y or x to select.
   106  // For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns
   107  // {2, 0, 5, 7} (don't forget that the binary constant is written big-endian).
   108  //
   109  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   110  //
   111  // Asm: VSHUFPS, CPU Feature: AVX
   112  func (x Uint32x4) concatSelectedConstant(h1h0l1l0 uint8, y Uint32x4) Uint32x4
   113  
   114  // concatSelectedConstant concatenates selected elements from x and y into the lower and upper
   115  // halves of the output.  The selection is chosen by the constant parameter hilo
   116  // where hi and lo are each one bit specifying which 64-bit element to select
   117  // from y and x.  For example {4,5}.concatSelectedConstant(0b10, {6,7})
   118  // returns {4,7}; bit 0, selecting from x, is zero, and selects 4, and bit 1,
   119  // selecting from y, is 1, and selects 7.
   120  //
   121  // hilo results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   122  //
   123  // Asm: VSHUFPD, CPU Feature: AVX
   124  func (x Uint64x2) concatSelectedConstant(hilo uint8, y Uint64x2) Uint64x2
   125  
   126  /* concatSelectedConstantGrouped */
   127  
   128  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   129  // into the lower and upper halves of corresponding subvectors of the output.
   130  // The selection is chosen by the constant parameter h1h0l1l0
   131  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   132  // For example,
   133  // {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15})
   134  // returns {2,0,5,7,10,8,13,15}
   135  // (don't forget that the binary constant is written big-endian).
   136  //
   137  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   138  //
   139  // Asm: VSHUFPS, CPU Feature: AVX
   140  func (x Float32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Float32x8) Float32x8
   141  
   142  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   143  // into the lower and upper halves of corresponding subvectors of the output.
   144  // The selection is chosen by the constant parameter h1h0l1l0
   145  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   146  // For example,
   147  //
   148  //	{0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
   149  //	 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
   150  //
   151  // returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
   152  //
   153  // (don't forget that the binary constant is written big-endian).
   154  //
   155  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   156  //
   157  // Asm: VSHUFPS, CPU Feature: AVX512
   158  func (x Float32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Float32x16) Float32x16
   159  
   160  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   161  // into the lower and upper halves of corresponding subvectors of the output.
   162  // The selections are specified by the constant parameter hilos where each
   163  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   164  // subvectors of x and y.
   165  //
   166  // For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11})
   167  // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least
   168  // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   169  // then 1, selecting element 1 from x's upper 128 bits (9), then 1,
   170  // selecting element 1 from y's upper 128 bits (11).
   171  // This differs from the same method applied to a 32x8 vector, where
   172  // the 8-bit constant performs the same selection on both subvectors.
   173  //
   174  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   175  //
   176  // Asm: VSHUFPD, CPU Feature: AVX
   177  func (x Float64x4) concatSelectedConstantGrouped(hilos uint8, y Float64x4) Float64x4
   178  
   179  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   180  // into the lower and upper halves of corresponding subvectors of the output.
   181  // The selections are specified by the constant parameter hilos where each
   182  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   183  // subvectors of x and y.
   184  //
   185  // For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19})
   186  // returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's
   187  // least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   188  // then 1, selecting element 1 from x's next 128 bits (9), then 1,
   189  // selecting element 1 from y's upper 128 bits (11).  The next two 0 bits select
   190  // the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two
   191  // 1 bits select the upper elements from x and y's last 128 bits (17, 19).
   192  // This differs from the same method applied to a 32x8 or 32x16 vector, where
   193  // the 8-bit constant performs the same selection on all the subvectors.
   194  //
   195  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   196  //
   197  // Asm: VSHUFPD, CPU Feature: AVX512
   198  func (x Float64x8) concatSelectedConstantGrouped(hilos uint8, y Float64x8) Float64x8
   199  
   200  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   201  // into the lower and upper halves of corresponding subvectors of the output.
   202  // The selection is chosen by the constant parameter h1h0l1l0
   203  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   204  // For example,
   205  // {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15})
   206  // returns {2,0,5,7,10,8,13,15}
   207  // (don't forget that the binary constant is written big-endian).
   208  //
   209  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   210  //
   211  // Asm: VSHUFPS, CPU Feature: AVX
   212  func (x Int32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Int32x8) Int32x8
   213  
   214  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   215  // into the lower and upper halves of corresponding subvectors of the output.
   216  // The selection is chosen by the constant parameter h1h0l1l0
   217  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   218  // For example,
   219  //
   220  //	{0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
   221  //	 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
   222  //
   223  // returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
   224  //
   225  // (don't forget that the binary constant is written big-endian).
   226  //
   227  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   228  //
   229  // Asm: VSHUFPS, CPU Feature: AVX512
   230  func (x Int32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Int32x16) Int32x16
   231  
   232  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   233  // into the lower and upper halves of corresponding subvectors of the output.
   234  // The selections are specified by the constant parameter hilos where each
   235  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   236  // subvectors of x and y.
   237  //
   238  // For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11})
   239  // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least
   240  // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   241  // then 1, selecting element 1 from x's upper 128 bits (9), then 1,
   242  // selecting element 1 from y's upper 128 bits (11).
   243  // This differs from the same method applied to a 32x8 vector, where
   244  // the 8-bit constant performs the same selection on both subvectors.
   245  //
   246  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   247  //
   248  // Asm: VSHUFPD, CPU Feature: AVX
   249  func (x Int64x4) concatSelectedConstantGrouped(hilos uint8, y Int64x4) Int64x4
   250  
   251  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   252  // into the lower and upper halves of corresponding subvectors of the output.
   253  // The selections are specified by the constant parameter hilos where each
   254  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   255  // subvectors of x and y.
   256  //
   257  // For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19})
   258  // returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's
   259  // least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   260  // then 1, selecting element 1 from x's next 128 bits (9), then 1,
   261  // selecting element 1 from y's upper 128 bits (11).  The next two 0 bits select
   262  // the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two
   263  // 1 bits select the upper elements from x and y's last 128 bits (17, 19).
   264  // This differs from the same method applied to a 32x8 or 32x16 vector, where
   265  // the 8-bit constant performs the same selection on all the subvectors.
   266  //
   267  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   268  //
   269  // Asm: VSHUFPD, CPU Feature: AVX512
   270  func (x Int64x8) concatSelectedConstantGrouped(hilos uint8, y Int64x8) Int64x8
   271  
   272  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   273  // into the lower and upper halves of corresponding subvectors of the output.
   274  // The selection is chosen by the constant parameter h1h0l1l0
   275  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   276  // For example,
   277  // {0,1,2,3,8,9,10,11}.concatSelectedConstantGrouped(0b_11_01_00_10, {4,5,6,7,12,13,14,15})
   278  // returns {2,0,5,7,10,8,13,15}
   279  // (don't forget that the binary constant is written big-endian).
   280  //
   281  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   282  //
   283  // Asm: VSHUFPS, CPU Feature: AVX
   284  func (x Uint32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Uint32x8) Uint32x8
   285  
   286  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   287  // into the lower and upper halves of corresponding subvectors of the output.
   288  // The selection is chosen by the constant parameter h1h0l1l0
   289  // where each {h,l}{1,0} is two bits specifying which element from y or x to select.
   290  // For example,
   291  //
   292  //	{0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
   293  //	 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
   294  //
   295  // returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
   296  //
   297  // (don't forget that the binary constant is written big-endian).
   298  //
   299  // h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   300  //
   301  // Asm: VSHUFPS, CPU Feature: AVX512
   302  func (x Uint32x16) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Uint32x16) Uint32x16
   303  
   304  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   305  // into the lower and upper halves of corresponding subvectors of the output.
   306  // The selections are specified by the constant parameter hilos where each
   307  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   308  // subvectors of x and y.
   309  //
   310  // For example {4,5,8,9}.concatSelectedConstantGrouped(0b_11_10, {6,7,10,11})
   311  // returns {4,7,9,11}; bit 0 is zero, selecting element 0 from x's least
   312  // 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   313  // then 1, selecting element 1 from x's upper 128 bits (9), then 1,
   314  // selecting element 1 from y's upper 128 bits (11).
   315  // This differs from the same method applied to a 32x8 vector, where
   316  // the 8-bit constant performs the same selection on both subvectors.
   317  //
   318  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   319  //
   320  // Asm: VSHUFPD, CPU Feature: AVX
   321  func (x Uint64x4) concatSelectedConstantGrouped(hilos uint8, y Uint64x4) Uint64x4
   322  
   323  // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y
   324  // into the lower and upper halves of corresponding subvectors of the output.
   325  // The selections are specified by the constant parameter hilos where each
   326  // hi and lo pair select 64-bit elements from the corresponding 128-bit
   327  // subvectors of x and y.
   328  //
   329  // For example {4,5,8,9,12,13,16,17}.concatSelectedConstantGrouped(0b11_00_11_10, {6,7,10,11,14,15,18,19})
   330  // returns {4,7,9,11,12,14,17,19}; bit 0 is zero, selecting element 0 from x's
   331  // least 128-bits (4), then 1, selects the element 1 from y's least 128-bits (7),
   332  // then 1, selecting element 1 from x's next 128 bits (9), then 1,
   333  // selecting element 1 from y's upper 128 bits (11).  The next two 0 bits select
   334  // the lower elements from x and y's 3rd 128 bit groups (12, 14), the last two
   335  // 1 bits select the upper elements from x and y's last 128 bits (17, 19).
   336  // This differs from the same method applied to a 32x8 or 32x16 vector, where
   337  // the 8-bit constant performs the same selection on all the subvectors.
   338  //
   339  // hilos results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   340  //
   341  // Asm: VSHUFPD, CPU Feature: AVX512
   342  func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x8
   343  
   344  /* permuteScalars */
   345  
   346  // permuteScalars performs a permutation of vector x using constant indices:
   347  // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
   348  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   349  //
   350  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   351  //
   352  // Asm: VPSHUFD, CPU Feature: AVX
   353  func (x Int32x4) permuteScalars(indices uint8) Int32x4
   354  
   355  // permuteScalars performs a permutation of vector x using constant indices:
   356  // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
   357  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   358  //
   359  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   360  //
   361  // Asm: VPSHUFD, CPU Feature: AVX
   362  func (x Uint32x4) permuteScalars(indices uint8) Uint32x4
   363  
   364  /* permuteScalarsGrouped */
   365  
   366  // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
   367  // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   368  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   369  // Each group is of size 128-bit.
   370  //
   371  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   372  //
   373  // Asm: VPSHUFD, CPU Feature: AVX2
   374  func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8
   375  
   376  // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
   377  // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   378  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   379  // Each group is of size 128-bit.
   380  //
   381  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   382  //
   383  // Asm: VPSHUFD, CPU Feature: AVX512
   384  func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16
   385  
   386  // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
   387  // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   388  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   389  // Each group is of size 128-bit.
   390  //
   391  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   392  //
   393  // Asm: VPSHUFD, CPU Feature: AVX2
   394  func (x Uint32x8) permuteScalarsGrouped(indices uint8) Uint32x8
   395  
   396  // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
   397  // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   398  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   399  // Each group is of size 128-bit.
   400  //
   401  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   402  //
   403  // Asm: VPSHUFD, CPU Feature: AVX512
   404  func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16
   405  
   406  /* permuteScalarsHi */
   407  
   408  // permuteScalarsHi performs a permutation of vector x using constant indices:
   409  // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
   410  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   411  //
   412  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   413  //
   414  // Asm: VPSHUFHW, CPU Feature: AVX512
   415  func (x Int16x8) permuteScalarsHi(indices uint8) Int16x8
   416  
   417  // permuteScalarsHi performs a permutation of vector x using constant indices:
   418  // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
   419  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   420  //
   421  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   422  //
   423  // Asm: VPSHUFHW, CPU Feature: AVX512
   424  func (x Uint16x8) permuteScalarsHi(indices uint8) Uint16x8
   425  
   426  /* permuteScalarsHiGrouped */
   427  
   428  // permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
   429  // result =
   430  //
   431  //	{x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   432  //	 x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   433  //
   434  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   435  // Each group is of size 128-bit.
   436  //
   437  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   438  //
   439  // Asm: VPSHUFHW, CPU Feature: AVX2
   440  func (x Int16x16) permuteScalarsHiGrouped(indices uint8) Int16x16
   441  
   442  // permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
   443  // result =
   444  //
   445  //	{x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   446  //	 x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   447  //
   448  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   449  // Each group is of size 128-bit.
   450  //
   451  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   452  //
   453  // Asm: VPSHUFHW, CPU Feature: AVX512
   454  func (x Int16x32) permuteScalarsHiGrouped(indices uint8) Int16x32
   455  
   456  // permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
   457  // result =
   458  //
   459  //	{x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   460  //	 x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   461  //
   462  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   463  // Each group is of size 128-bit.
   464  //
   465  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   466  //
   467  // Asm: VPSHUFHW, CPU Feature: AVX2
   468  func (x Uint16x16) permuteScalarsHiGrouped(indices uint8) Uint16x16
   469  
   470  // permuteScalarsHiGrouped performs a grouped permutation of vector x using constant indices:
   471  // result =
   472  //
   473  //	{x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   474  //	 x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   475  //
   476  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   477  // Each group is of size 128-bit.
   478  //
   479  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   480  //
   481  // Asm: VPSHUFHW, CPU Feature: AVX512
   482  func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32
   483  
   484  /* permuteScalarsLo */
   485  
   486  // permuteScalarsLo performs a permutation of vector x using constant indices:
   487  // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
   488  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   489  //
   490  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   491  //
   492  // Asm: VPSHUFLW, CPU Feature: AVX512
   493  func (x Int16x8) permuteScalarsLo(indices uint8) Int16x8
   494  
   495  // permuteScalarsLo performs a permutation of vector x using constant indices:
   496  // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
   497  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   498  //
   499  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   500  //
   501  // Asm: VPSHUFLW, CPU Feature: AVX512
   502  func (x Uint16x8) permuteScalarsLo(indices uint8) Uint16x8
   503  
   504  /* permuteScalarsLoGrouped */
   505  
   506  // permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
   507  //
   508  //	result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   509  //	 x_group1[indices[0:2]], ...}
   510  //
   511  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   512  // Each group is of size 128-bit.
   513  //
   514  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   515  //
   516  // Asm: VPSHUFLW, CPU Feature: AVX2
   517  func (x Int16x16) permuteScalarsLoGrouped(indices uint8) Int16x16
   518  
   519  // permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
   520  //
   521  //	result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   522  //	 x_group1[indices[0:2]], ...}
   523  //
   524  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   525  // Each group is of size 128-bit.
   526  //
   527  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   528  //
   529  // Asm: VPSHUFLW, CPU Feature: AVX512
   530  func (x Int16x32) permuteScalarsLoGrouped(indices uint8) Int16x32
   531  
   532  // permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
   533  //
   534  //	result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   535  //	 x_group1[indices[0:2]], ...}
   536  //
   537  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   538  // Each group is of size 128-bit.
   539  //
   540  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   541  //
   542  // Asm: VPSHUFLW, CPU Feature: AVX2
   543  func (x Uint16x16) permuteScalarsLoGrouped(indices uint8) Uint16x16
   544  
   545  // permuteScalarsLoGrouped performs a grouped permutation of vector x using constant indices:
   546  //
   547  //	result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   548  //	 x_group1[indices[0:2]], ...}
   549  //
   550  // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   551  // Each group is of size 128-bit.
   552  //
   553  // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   554  //
   555  // Asm: VPSHUFLW, CPU Feature: AVX512
   556  func (x Uint16x32) permuteScalarsLoGrouped(indices uint8) Uint16x32
   557  
   558  /* tern */
   559  
   560  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   561  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   562  //
   563  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   564  //
   565  // Asm: VPTERNLOGD, CPU Feature: AVX512
   566  func (x Int32x4) tern(table uint8, y Int32x4, z Int32x4) Int32x4
   567  
   568  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   569  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   570  //
   571  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   572  //
   573  // Asm: VPTERNLOGD, CPU Feature: AVX512
   574  func (x Int32x8) tern(table uint8, y Int32x8, z Int32x8) Int32x8
   575  
   576  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   577  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   578  //
   579  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   580  //
   581  // Asm: VPTERNLOGD, CPU Feature: AVX512
   582  func (x Int32x16) tern(table uint8, y Int32x16, z Int32x16) Int32x16
   583  
   584  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   585  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   586  //
   587  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   588  //
   589  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   590  func (x Int64x2) tern(table uint8, y Int64x2, z Int64x2) Int64x2
   591  
   592  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   593  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   594  //
   595  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   596  //
   597  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   598  func (x Int64x4) tern(table uint8, y Int64x4, z Int64x4) Int64x4
   599  
   600  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   601  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   602  //
   603  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   604  //
   605  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   606  func (x Int64x8) tern(table uint8, y Int64x8, z Int64x8) Int64x8
   607  
   608  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   609  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   610  //
   611  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   612  //
   613  // Asm: VPTERNLOGD, CPU Feature: AVX512
   614  func (x Uint32x4) tern(table uint8, y Uint32x4, z Uint32x4) Uint32x4
   615  
   616  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   617  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   618  //
   619  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   620  //
   621  // Asm: VPTERNLOGD, CPU Feature: AVX512
   622  func (x Uint32x8) tern(table uint8, y Uint32x8, z Uint32x8) Uint32x8
   623  
   624  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   625  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   626  //
   627  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   628  //
   629  // Asm: VPTERNLOGD, CPU Feature: AVX512
   630  func (x Uint32x16) tern(table uint8, y Uint32x16, z Uint32x16) Uint32x16
   631  
   632  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   633  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   634  //
   635  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   636  //
   637  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   638  func (x Uint64x2) tern(table uint8, y Uint64x2, z Uint64x2) Uint64x2
   639  
   640  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   641  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   642  //
   643  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   644  //
   645  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   646  func (x Uint64x4) tern(table uint8, y Uint64x4, z Uint64x4) Uint64x4
   647  
   648  // tern performs a logical operation on three vectors based on the 8-bit truth table.
   649  // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
   650  //
   651  // table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   652  //
   653  // Asm: VPTERNLOGQ, CPU Feature: AVX512
   654  func (x Uint64x8) tern(table uint8, y Uint64x8, z Uint64x8) Uint64x8
   655  

View as plain text