ops_amd64.go

     1  // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
     2  
     3  //go:build goexperiment.simd
     4  
     5  package simd
     6  
     7  /* AESDecryptLastRound */
     8  
     9  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    10  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    11  // y is the chunk of dw array in use.
    12  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    13  //
    14  // Asm: VAESDECLAST, CPU Feature: AVX, AES
    15  func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
    16  
    17  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    18  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    19  // y is the chunk of dw array in use.
    20  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    21  //
    22  // Asm: VAESDECLAST, CPU Feature: AVX512VAES
    23  func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
    24  
    25  // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    26  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    27  // y is the chunk of dw array in use.
    28  // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
    29  //
    30  // Asm: VAESDECLAST, CPU Feature: AVX512VAES
    31  func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64
    32  
    33  /* AESDecryptOneRound */
    34  
    35  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    36  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    37  // y is the chunk of dw array in use.
    38  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    39  //
    40  // Asm: VAESDEC, CPU Feature: AVX, AES
    41  func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
    42  
    43  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    44  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    45  // y is the chunk of dw array in use.
    46  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    47  //
    48  // Asm: VAESDEC, CPU Feature: AVX512VAES
    49  func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32
    50  
    51  // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    52  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    53  // y is the chunk of dw array in use.
    54  // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
    55  //
    56  // Asm: VAESDEC, CPU Feature: AVX512VAES
    57  func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64
    58  
    59  /* AESEncryptLastRound */
    60  
    61  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    62  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    63  // y is the chunk of w array in use.
    64  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    65  //
    66  // Asm: VAESENCLAST, CPU Feature: AVX, AES
    67  func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
    68  
    69  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    70  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    71  // y is the chunk of w array in use.
    72  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    73  //
    74  // Asm: VAESENCLAST, CPU Feature: AVX512VAES
    75  func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
    76  
    77  // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    78  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    79  // y is the chunk of w array in use.
    80  // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
    81  //
    82  // Asm: VAESENCLAST, CPU Feature: AVX512VAES
    83  func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64
    84  
    85  /* AESEncryptOneRound */
    86  
    87  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    88  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    89  // y is the chunk of w array in use.
    90  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
    91  //
    92  // Asm: VAESENC, CPU Feature: AVX, AES
    93  func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
    94  
    95  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
    96  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
    97  // y is the chunk of w array in use.
    98  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
    99  //
   100  // Asm: VAESENC, CPU Feature: AVX512VAES
   101  func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32
   102  
   103  // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
   104  // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
   105  // y is the chunk of w array in use.
   106  // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
   107  //
   108  // Asm: VAESENC, CPU Feature: AVX512VAES
   109  func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64
   110  
   111  /* AESInvMixColumns */
   112  
   113  // AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
   114  // x is the chunk of w array in use.
   115  // result = InvMixColumns(x)
   116  //
   117  // Asm: VAESIMC, CPU Feature: AVX, AES
   118  func (x Uint32x4) AESInvMixColumns() Uint32x4
   119  
   120  /* AESRoundKeyGenAssist */
   121  
   122  // AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
   123  // x is an array of AES words, but only x[0] and x[2] are used.
   124  // r is a value from the Rcon constant array.
   125  // result[0] = XOR(SubWord(RotWord(x[0])), r)
   126  // result[1] = SubWord(x[1])
   127  // result[2] = XOR(SubWord(RotWord(x[2])), r)
   128  // result[3] = SubWord(x[3])
   129  //
   130  // rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
   131  //
   132  // Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
   133  func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
   134  
   135  /* Abs */
   136  
   137  // Abs computes the absolute value of each element.
   138  //
   139  // Asm: VPABSB, CPU Feature: AVX
   140  func (x Int8x16) Abs() Int8x16
   141  
   142  // Abs computes the absolute value of each element.
   143  //
   144  // Asm: VPABSB, CPU Feature: AVX2
   145  func (x Int8x32) Abs() Int8x32
   146  
   147  // Abs computes the absolute value of each element.
   148  //
   149  // Asm: VPABSB, CPU Feature: AVX512
   150  func (x Int8x64) Abs() Int8x64
   151  
   152  // Abs computes the absolute value of each element.
   153  //
   154  // Asm: VPABSW, CPU Feature: AVX
   155  func (x Int16x8) Abs() Int16x8
   156  
   157  // Abs computes the absolute value of each element.
   158  //
   159  // Asm: VPABSW, CPU Feature: AVX2
   160  func (x Int16x16) Abs() Int16x16
   161  
   162  // Abs computes the absolute value of each element.
   163  //
   164  // Asm: VPABSW, CPU Feature: AVX512
   165  func (x Int16x32) Abs() Int16x32
   166  
   167  // Abs computes the absolute value of each element.
   168  //
   169  // Asm: VPABSD, CPU Feature: AVX
   170  func (x Int32x4) Abs() Int32x4
   171  
   172  // Abs computes the absolute value of each element.
   173  //
   174  // Asm: VPABSD, CPU Feature: AVX2
   175  func (x Int32x8) Abs() Int32x8
   176  
   177  // Abs computes the absolute value of each element.
   178  //
   179  // Asm: VPABSD, CPU Feature: AVX512
   180  func (x Int32x16) Abs() Int32x16
   181  
   182  // Abs computes the absolute value of each element.
   183  //
   184  // Asm: VPABSQ, CPU Feature: AVX512
   185  func (x Int64x2) Abs() Int64x2
   186  
   187  // Abs computes the absolute value of each element.
   188  //
   189  // Asm: VPABSQ, CPU Feature: AVX512
   190  func (x Int64x4) Abs() Int64x4
   191  
   192  // Abs computes the absolute value of each element.
   193  //
   194  // Asm: VPABSQ, CPU Feature: AVX512
   195  func (x Int64x8) Abs() Int64x8
   196  
   197  /* Add */
   198  
   199  // Add adds corresponding elements of two vectors.
   200  //
   201  // Asm: VADDPS, CPU Feature: AVX
   202  func (x Float32x4) Add(y Float32x4) Float32x4
   203  
   204  // Add adds corresponding elements of two vectors.
   205  //
   206  // Asm: VADDPS, CPU Feature: AVX
   207  func (x Float32x8) Add(y Float32x8) Float32x8
   208  
   209  // Add adds corresponding elements of two vectors.
   210  //
   211  // Asm: VADDPS, CPU Feature: AVX512
   212  func (x Float32x16) Add(y Float32x16) Float32x16
   213  
   214  // Add adds corresponding elements of two vectors.
   215  //
   216  // Asm: VADDPD, CPU Feature: AVX
   217  func (x Float64x2) Add(y Float64x2) Float64x2
   218  
   219  // Add adds corresponding elements of two vectors.
   220  //
   221  // Asm: VADDPD, CPU Feature: AVX
   222  func (x Float64x4) Add(y Float64x4) Float64x4
   223  
   224  // Add adds corresponding elements of two vectors.
   225  //
   226  // Asm: VADDPD, CPU Feature: AVX512
   227  func (x Float64x8) Add(y Float64x8) Float64x8
   228  
   229  // Add adds corresponding elements of two vectors.
   230  //
   231  // Asm: VPADDB, CPU Feature: AVX
   232  func (x Int8x16) Add(y Int8x16) Int8x16
   233  
   234  // Add adds corresponding elements of two vectors.
   235  //
   236  // Asm: VPADDB, CPU Feature: AVX2
   237  func (x Int8x32) Add(y Int8x32) Int8x32
   238  
   239  // Add adds corresponding elements of two vectors.
   240  //
   241  // Asm: VPADDB, CPU Feature: AVX512
   242  func (x Int8x64) Add(y Int8x64) Int8x64
   243  
   244  // Add adds corresponding elements of two vectors.
   245  //
   246  // Asm: VPADDW, CPU Feature: AVX
   247  func (x Int16x8) Add(y Int16x8) Int16x8
   248  
   249  // Add adds corresponding elements of two vectors.
   250  //
   251  // Asm: VPADDW, CPU Feature: AVX2
   252  func (x Int16x16) Add(y Int16x16) Int16x16
   253  
   254  // Add adds corresponding elements of two vectors.
   255  //
   256  // Asm: VPADDW, CPU Feature: AVX512
   257  func (x Int16x32) Add(y Int16x32) Int16x32
   258  
   259  // Add adds corresponding elements of two vectors.
   260  //
   261  // Asm: VPADDD, CPU Feature: AVX
   262  func (x Int32x4) Add(y Int32x4) Int32x4
   263  
   264  // Add adds corresponding elements of two vectors.
   265  //
   266  // Asm: VPADDD, CPU Feature: AVX2
   267  func (x Int32x8) Add(y Int32x8) Int32x8
   268  
   269  // Add adds corresponding elements of two vectors.
   270  //
   271  // Asm: VPADDD, CPU Feature: AVX512
   272  func (x Int32x16) Add(y Int32x16) Int32x16
   273  
   274  // Add adds corresponding elements of two vectors.
   275  //
   276  // Asm: VPADDQ, CPU Feature: AVX
   277  func (x Int64x2) Add(y Int64x2) Int64x2
   278  
   279  // Add adds corresponding elements of two vectors.
   280  //
   281  // Asm: VPADDQ, CPU Feature: AVX2
   282  func (x Int64x4) Add(y Int64x4) Int64x4
   283  
   284  // Add adds corresponding elements of two vectors.
   285  //
   286  // Asm: VPADDQ, CPU Feature: AVX512
   287  func (x Int64x8) Add(y Int64x8) Int64x8
   288  
   289  // Add adds corresponding elements of two vectors.
   290  //
   291  // Asm: VPADDB, CPU Feature: AVX
   292  func (x Uint8x16) Add(y Uint8x16) Uint8x16
   293  
   294  // Add adds corresponding elements of two vectors.
   295  //
   296  // Asm: VPADDB, CPU Feature: AVX2
   297  func (x Uint8x32) Add(y Uint8x32) Uint8x32
   298  
   299  // Add adds corresponding elements of two vectors.
   300  //
   301  // Asm: VPADDB, CPU Feature: AVX512
   302  func (x Uint8x64) Add(y Uint8x64) Uint8x64
   303  
   304  // Add adds corresponding elements of two vectors.
   305  //
   306  // Asm: VPADDW, CPU Feature: AVX
   307  func (x Uint16x8) Add(y Uint16x8) Uint16x8
   308  
   309  // Add adds corresponding elements of two vectors.
   310  //
   311  // Asm: VPADDW, CPU Feature: AVX2
   312  func (x Uint16x16) Add(y Uint16x16) Uint16x16
   313  
   314  // Add adds corresponding elements of two vectors.
   315  //
   316  // Asm: VPADDW, CPU Feature: AVX512
   317  func (x Uint16x32) Add(y Uint16x32) Uint16x32
   318  
   319  // Add adds corresponding elements of two vectors.
   320  //
   321  // Asm: VPADDD, CPU Feature: AVX
   322  func (x Uint32x4) Add(y Uint32x4) Uint32x4
   323  
   324  // Add adds corresponding elements of two vectors.
   325  //
   326  // Asm: VPADDD, CPU Feature: AVX2
   327  func (x Uint32x8) Add(y Uint32x8) Uint32x8
   328  
   329  // Add adds corresponding elements of two vectors.
   330  //
   331  // Asm: VPADDD, CPU Feature: AVX512
   332  func (x Uint32x16) Add(y Uint32x16) Uint32x16
   333  
   334  // Add adds corresponding elements of two vectors.
   335  //
   336  // Asm: VPADDQ, CPU Feature: AVX
   337  func (x Uint64x2) Add(y Uint64x2) Uint64x2
   338  
   339  // Add adds corresponding elements of two vectors.
   340  //
   341  // Asm: VPADDQ, CPU Feature: AVX2
   342  func (x Uint64x4) Add(y Uint64x4) Uint64x4
   343  
   344  // Add adds corresponding elements of two vectors.
   345  //
   346  // Asm: VPADDQ, CPU Feature: AVX512
   347  func (x Uint64x8) Add(y Uint64x8) Uint64x8
   348  
   349  /* AddPairs */
   350  
   351  // AddPairs horizontally adds adjacent pairs of elements.
   352  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   353  //
   354  // Asm: VHADDPS, CPU Feature: AVX
   355  func (x Float32x4) AddPairs(y Float32x4) Float32x4
   356  
   357  // AddPairs horizontally adds adjacent pairs of elements.
   358  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   359  //
   360  // Asm: VHADDPS, CPU Feature: AVX
   361  func (x Float32x8) AddPairs(y Float32x8) Float32x8
   362  
   363  // AddPairs horizontally adds adjacent pairs of elements.
   364  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   365  //
   366  // Asm: VHADDPD, CPU Feature: AVX
   367  func (x Float64x2) AddPairs(y Float64x2) Float64x2
   368  
   369  // AddPairs horizontally adds adjacent pairs of elements.
   370  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   371  //
   372  // Asm: VHADDPD, CPU Feature: AVX
   373  func (x Float64x4) AddPairs(y Float64x4) Float64x4
   374  
   375  // AddPairs horizontally adds adjacent pairs of elements.
   376  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   377  //
   378  // Asm: VPHADDW, CPU Feature: AVX
   379  func (x Int16x8) AddPairs(y Int16x8) Int16x8
   380  
   381  // AddPairs horizontally adds adjacent pairs of elements.
   382  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   383  //
   384  // Asm: VPHADDW, CPU Feature: AVX2
   385  func (x Int16x16) AddPairs(y Int16x16) Int16x16
   386  
   387  // AddPairs horizontally adds adjacent pairs of elements.
   388  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   389  //
   390  // Asm: VPHADDD, CPU Feature: AVX
   391  func (x Int32x4) AddPairs(y Int32x4) Int32x4
   392  
   393  // AddPairs horizontally adds adjacent pairs of elements.
   394  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   395  //
   396  // Asm: VPHADDD, CPU Feature: AVX2
   397  func (x Int32x8) AddPairs(y Int32x8) Int32x8
   398  
   399  // AddPairs horizontally adds adjacent pairs of elements.
   400  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   401  //
   402  // Asm: VPHADDW, CPU Feature: AVX
   403  func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
   404  
   405  // AddPairs horizontally adds adjacent pairs of elements.
   406  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   407  //
   408  // Asm: VPHADDW, CPU Feature: AVX2
   409  func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
   410  
   411  // AddPairs horizontally adds adjacent pairs of elements.
   412  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   413  //
   414  // Asm: VPHADDD, CPU Feature: AVX
   415  func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
   416  
   417  // AddPairs horizontally adds adjacent pairs of elements.
   418  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   419  //
   420  // Asm: VPHADDD, CPU Feature: AVX2
   421  func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
   422  
   423  /* AddPairsSaturated */
   424  
   425  // AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
   426  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   427  //
   428  // Asm: VPHADDSW, CPU Feature: AVX
   429  func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
   430  
   431  // AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
   432  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
   433  //
   434  // Asm: VPHADDSW, CPU Feature: AVX2
   435  func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
   436  
   437  /* AddSaturated */
   438  
   439  // AddSaturated adds corresponding elements of two vectors with saturation.
   440  //
   441  // Asm: VPADDSB, CPU Feature: AVX
   442  func (x Int8x16) AddSaturated(y Int8x16) Int8x16
   443  
   444  // AddSaturated adds corresponding elements of two vectors with saturation.
   445  //
   446  // Asm: VPADDSB, CPU Feature: AVX2
   447  func (x Int8x32) AddSaturated(y Int8x32) Int8x32
   448  
   449  // AddSaturated adds corresponding elements of two vectors with saturation.
   450  //
   451  // Asm: VPADDSB, CPU Feature: AVX512
   452  func (x Int8x64) AddSaturated(y Int8x64) Int8x64
   453  
   454  // AddSaturated adds corresponding elements of two vectors with saturation.
   455  //
   456  // Asm: VPADDSW, CPU Feature: AVX
   457  func (x Int16x8) AddSaturated(y Int16x8) Int16x8
   458  
   459  // AddSaturated adds corresponding elements of two vectors with saturation.
   460  //
   461  // Asm: VPADDSW, CPU Feature: AVX2
   462  func (x Int16x16) AddSaturated(y Int16x16) Int16x16
   463  
   464  // AddSaturated adds corresponding elements of two vectors with saturation.
   465  //
   466  // Asm: VPADDSW, CPU Feature: AVX512
   467  func (x Int16x32) AddSaturated(y Int16x32) Int16x32
   468  
   469  // AddSaturated adds corresponding elements of two vectors with saturation.
   470  //
   471  // Asm: VPADDUSB, CPU Feature: AVX
   472  func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
   473  
   474  // AddSaturated adds corresponding elements of two vectors with saturation.
   475  //
   476  // Asm: VPADDUSB, CPU Feature: AVX2
   477  func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
   478  
   479  // AddSaturated adds corresponding elements of two vectors with saturation.
   480  //
   481  // Asm: VPADDUSB, CPU Feature: AVX512
   482  func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
   483  
   484  // AddSaturated adds corresponding elements of two vectors with saturation.
   485  //
   486  // Asm: VPADDUSW, CPU Feature: AVX
   487  func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
   488  
   489  // AddSaturated adds corresponding elements of two vectors with saturation.
   490  //
   491  // Asm: VPADDUSW, CPU Feature: AVX2
   492  func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
   493  
   494  // AddSaturated adds corresponding elements of two vectors with saturation.
   495  //
   496  // Asm: VPADDUSW, CPU Feature: AVX512
   497  func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
   498  
   499  /* AddSub */
   500  
   501  // AddSub subtracts even elements and adds odd elements of two vectors.
   502  //
   503  // Asm: VADDSUBPS, CPU Feature: AVX
   504  func (x Float32x4) AddSub(y Float32x4) Float32x4
   505  
   506  // AddSub subtracts even elements and adds odd elements of two vectors.
   507  //
   508  // Asm: VADDSUBPS, CPU Feature: AVX
   509  func (x Float32x8) AddSub(y Float32x8) Float32x8
   510  
   511  // AddSub subtracts even elements and adds odd elements of two vectors.
   512  //
   513  // Asm: VADDSUBPD, CPU Feature: AVX
   514  func (x Float64x2) AddSub(y Float64x2) Float64x2
   515  
   516  // AddSub subtracts even elements and adds odd elements of two vectors.
   517  //
   518  // Asm: VADDSUBPD, CPU Feature: AVX
   519  func (x Float64x4) AddSub(y Float64x4) Float64x4
   520  
   521  /* And */
   522  
   523  // And performs a bitwise AND operation between two vectors.
   524  //
   525  // Asm: VPAND, CPU Feature: AVX
   526  func (x Int8x16) And(y Int8x16) Int8x16
   527  
   528  // And performs a bitwise AND operation between two vectors.
   529  //
   530  // Asm: VPAND, CPU Feature: AVX2
   531  func (x Int8x32) And(y Int8x32) Int8x32
   532  
   533  // And performs a bitwise AND operation between two vectors.
   534  //
   535  // Asm: VPANDD, CPU Feature: AVX512
   536  func (x Int8x64) And(y Int8x64) Int8x64
   537  
   538  // And performs a bitwise AND operation between two vectors.
   539  //
   540  // Asm: VPAND, CPU Feature: AVX
   541  func (x Int16x8) And(y Int16x8) Int16x8
   542  
   543  // And performs a bitwise AND operation between two vectors.
   544  //
   545  // Asm: VPAND, CPU Feature: AVX2
   546  func (x Int16x16) And(y Int16x16) Int16x16
   547  
   548  // And performs a bitwise AND operation between two vectors.
   549  //
   550  // Asm: VPANDD, CPU Feature: AVX512
   551  func (x Int16x32) And(y Int16x32) Int16x32
   552  
   553  // And performs a bitwise AND operation between two vectors.
   554  //
   555  // Asm: VPAND, CPU Feature: AVX
   556  func (x Int32x4) And(y Int32x4) Int32x4
   557  
   558  // And performs a bitwise AND operation between two vectors.
   559  //
   560  // Asm: VPAND, CPU Feature: AVX2
   561  func (x Int32x8) And(y Int32x8) Int32x8
   562  
   563  // And performs a bitwise AND operation between two vectors.
   564  //
   565  // Asm: VPANDD, CPU Feature: AVX512
   566  func (x Int32x16) And(y Int32x16) Int32x16
   567  
   568  // And performs a bitwise AND operation between two vectors.
   569  //
   570  // Asm: VPAND, CPU Feature: AVX
   571  func (x Int64x2) And(y Int64x2) Int64x2
   572  
   573  // And performs a bitwise AND operation between two vectors.
   574  //
   575  // Asm: VPAND, CPU Feature: AVX2
   576  func (x Int64x4) And(y Int64x4) Int64x4
   577  
   578  // And performs a bitwise AND operation between two vectors.
   579  //
   580  // Asm: VPANDQ, CPU Feature: AVX512
   581  func (x Int64x8) And(y Int64x8) Int64x8
   582  
   583  // And performs a bitwise AND operation between two vectors.
   584  //
   585  // Asm: VPAND, CPU Feature: AVX
   586  func (x Uint8x16) And(y Uint8x16) Uint8x16
   587  
   588  // And performs a bitwise AND operation between two vectors.
   589  //
   590  // Asm: VPAND, CPU Feature: AVX2
   591  func (x Uint8x32) And(y Uint8x32) Uint8x32
   592  
   593  // And performs a bitwise AND operation between two vectors.
   594  //
   595  // Asm: VPANDD, CPU Feature: AVX512
   596  func (x Uint8x64) And(y Uint8x64) Uint8x64
   597  
   598  // And performs a bitwise AND operation between two vectors.
   599  //
   600  // Asm: VPAND, CPU Feature: AVX
   601  func (x Uint16x8) And(y Uint16x8) Uint16x8
   602  
   603  // And performs a bitwise AND operation between two vectors.
   604  //
   605  // Asm: VPAND, CPU Feature: AVX2
   606  func (x Uint16x16) And(y Uint16x16) Uint16x16
   607  
   608  // And performs a bitwise AND operation between two vectors.
   609  //
   610  // Asm: VPANDD, CPU Feature: AVX512
   611  func (x Uint16x32) And(y Uint16x32) Uint16x32
   612  
   613  // And performs a bitwise AND operation between two vectors.
   614  //
   615  // Asm: VPAND, CPU Feature: AVX
   616  func (x Uint32x4) And(y Uint32x4) Uint32x4
   617  
   618  // And performs a bitwise AND operation between two vectors.
   619  //
   620  // Asm: VPAND, CPU Feature: AVX2
   621  func (x Uint32x8) And(y Uint32x8) Uint32x8
   622  
   623  // And performs a bitwise AND operation between two vectors.
   624  //
   625  // Asm: VPANDD, CPU Feature: AVX512
   626  func (x Uint32x16) And(y Uint32x16) Uint32x16
   627  
   628  // And performs a bitwise AND operation between two vectors.
   629  //
   630  // Asm: VPAND, CPU Feature: AVX
   631  func (x Uint64x2) And(y Uint64x2) Uint64x2
   632  
   633  // And performs a bitwise AND operation between two vectors.
   634  //
   635  // Asm: VPAND, CPU Feature: AVX2
   636  func (x Uint64x4) And(y Uint64x4) Uint64x4
   637  
   638  // And performs a bitwise AND operation between two vectors.
   639  //
   640  // Asm: VPANDQ, CPU Feature: AVX512
   641  func (x Uint64x8) And(y Uint64x8) Uint64x8
   642  
   643  /* AndNot */
   644  
   645  // AndNot performs a bitwise x &^ y.
   646  //
   647  // Asm: VPANDN, CPU Feature: AVX
   648  func (x Int8x16) AndNot(y Int8x16) Int8x16
   649  
   650  // AndNot performs a bitwise x &^ y.
   651  //
   652  // Asm: VPANDN, CPU Feature: AVX2
   653  func (x Int8x32) AndNot(y Int8x32) Int8x32
   654  
   655  // AndNot performs a bitwise x &^ y.
   656  //
   657  // Asm: VPANDND, CPU Feature: AVX512
   658  func (x Int8x64) AndNot(y Int8x64) Int8x64
   659  
   660  // AndNot performs a bitwise x &^ y.
   661  //
   662  // Asm: VPANDN, CPU Feature: AVX
   663  func (x Int16x8) AndNot(y Int16x8) Int16x8
   664  
   665  // AndNot performs a bitwise x &^ y.
   666  //
   667  // Asm: VPANDN, CPU Feature: AVX2
   668  func (x Int16x16) AndNot(y Int16x16) Int16x16
   669  
   670  // AndNot performs a bitwise x &^ y.
   671  //
   672  // Asm: VPANDND, CPU Feature: AVX512
   673  func (x Int16x32) AndNot(y Int16x32) Int16x32
   674  
   675  // AndNot performs a bitwise x &^ y.
   676  //
   677  // Asm: VPANDN, CPU Feature: AVX
   678  func (x Int32x4) AndNot(y Int32x4) Int32x4
   679  
   680  // AndNot performs a bitwise x &^ y.
   681  //
   682  // Asm: VPANDN, CPU Feature: AVX2
   683  func (x Int32x8) AndNot(y Int32x8) Int32x8
   684  
   685  // AndNot performs a bitwise x &^ y.
   686  //
   687  // Asm: VPANDND, CPU Feature: AVX512
   688  func (x Int32x16) AndNot(y Int32x16) Int32x16
   689  
   690  // AndNot performs a bitwise x &^ y.
   691  //
   692  // Asm: VPANDN, CPU Feature: AVX
   693  func (x Int64x2) AndNot(y Int64x2) Int64x2
   694  
   695  // AndNot performs a bitwise x &^ y.
   696  //
   697  // Asm: VPANDN, CPU Feature: AVX2
   698  func (x Int64x4) AndNot(y Int64x4) Int64x4
   699  
   700  // AndNot performs a bitwise x &^ y.
   701  //
   702  // Asm: VPANDNQ, CPU Feature: AVX512
   703  func (x Int64x8) AndNot(y Int64x8) Int64x8
   704  
   705  // AndNot performs a bitwise x &^ y.
   706  //
   707  // Asm: VPANDN, CPU Feature: AVX
   708  func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
   709  
   710  // AndNot performs a bitwise x &^ y.
   711  //
   712  // Asm: VPANDN, CPU Feature: AVX2
   713  func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
   714  
   715  // AndNot performs a bitwise x &^ y.
   716  //
   717  // Asm: VPANDND, CPU Feature: AVX512
   718  func (x Uint8x64) AndNot(y Uint8x64) Uint8x64
   719  
   720  // AndNot performs a bitwise x &^ y.
   721  //
   722  // Asm: VPANDN, CPU Feature: AVX
   723  func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
   724  
   725  // AndNot performs a bitwise x &^ y.
   726  //
   727  // Asm: VPANDN, CPU Feature: AVX2
   728  func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
   729  
   730  // AndNot performs a bitwise x &^ y.
   731  //
   732  // Asm: VPANDND, CPU Feature: AVX512
   733  func (x Uint16x32) AndNot(y Uint16x32) Uint16x32
   734  
   735  // AndNot performs a bitwise x &^ y.
   736  //
   737  // Asm: VPANDN, CPU Feature: AVX
   738  func (x Uint32x4) AndNot(y Uint32x4) Uint32x4
   739  
   740  // AndNot performs a bitwise x &^ y.
   741  //
   742  // Asm: VPANDN, CPU Feature: AVX2
   743  func (x Uint32x8) AndNot(y Uint32x8) Uint32x8
   744  
   745  // AndNot performs a bitwise x &^ y.
   746  //
   747  // Asm: VPANDND, CPU Feature: AVX512
   748  func (x Uint32x16) AndNot(y Uint32x16) Uint32x16
   749  
   750  // AndNot performs a bitwise x &^ y.
   751  //
   752  // Asm: VPANDN, CPU Feature: AVX
   753  func (x Uint64x2) AndNot(y Uint64x2) Uint64x2
   754  
   755  // AndNot performs a bitwise x &^ y.
   756  //
   757  // Asm: VPANDN, CPU Feature: AVX2
   758  func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
   759  
   760  // AndNot performs a bitwise x &^ y.
   761  //
   762  // Asm: VPANDNQ, CPU Feature: AVX512
   763  func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
   764  
   765  /* Average */
   766  
   767  // Average computes the rounded average of corresponding elements.
   768  //
   769  // Asm: VPAVGB, CPU Feature: AVX
   770  func (x Uint8x16) Average(y Uint8x16) Uint8x16
   771  
   772  // Average computes the rounded average of corresponding elements.
   773  //
   774  // Asm: VPAVGB, CPU Feature: AVX2
   775  func (x Uint8x32) Average(y Uint8x32) Uint8x32
   776  
   777  // Average computes the rounded average of corresponding elements.
   778  //
   779  // Asm: VPAVGB, CPU Feature: AVX512
   780  func (x Uint8x64) Average(y Uint8x64) Uint8x64
   781  
   782  // Average computes the rounded average of corresponding elements.
   783  //
   784  // Asm: VPAVGW, CPU Feature: AVX
   785  func (x Uint16x8) Average(y Uint16x8) Uint16x8
   786  
   787  // Average computes the rounded average of corresponding elements.
   788  //
   789  // Asm: VPAVGW, CPU Feature: AVX2
   790  func (x Uint16x16) Average(y Uint16x16) Uint16x16
   791  
   792  // Average computes the rounded average of corresponding elements.
   793  //
   794  // Asm: VPAVGW, CPU Feature: AVX512
   795  func (x Uint16x32) Average(y Uint16x32) Uint16x32
   796  
   797  /* Broadcast128 */
   798  
   799  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   800  // the 128-bit output vector.
   801  //
   802  // Asm: VBROADCASTSS, CPU Feature: AVX2
   803  func (x Float32x4) Broadcast128() Float32x4
   804  
   805  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   806  // the 128-bit output vector.
   807  //
   808  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   809  func (x Float64x2) Broadcast128() Float64x2
   810  
   811  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   812  // the 128-bit output vector.
   813  //
   814  // Asm: VPBROADCASTB, CPU Feature: AVX2
   815  func (x Int8x16) Broadcast128() Int8x16
   816  
   817  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   818  // the 128-bit output vector.
   819  //
   820  // Asm: VPBROADCASTW, CPU Feature: AVX2
   821  func (x Int16x8) Broadcast128() Int16x8
   822  
   823  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   824  // the 128-bit output vector.
   825  //
   826  // Asm: VPBROADCASTD, CPU Feature: AVX2
   827  func (x Int32x4) Broadcast128() Int32x4
   828  
   829  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   830  // the 128-bit output vector.
   831  //
   832  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   833  func (x Int64x2) Broadcast128() Int64x2
   834  
   835  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   836  // the 128-bit output vector.
   837  //
   838  // Asm: VPBROADCASTB, CPU Feature: AVX2
   839  func (x Uint8x16) Broadcast128() Uint8x16
   840  
   841  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   842  // the 128-bit output vector.
   843  //
   844  // Asm: VPBROADCASTW, CPU Feature: AVX2
   845  func (x Uint16x8) Broadcast128() Uint16x8
   846  
   847  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   848  // the 128-bit output vector.
   849  //
   850  // Asm: VPBROADCASTD, CPU Feature: AVX2
   851  func (x Uint32x4) Broadcast128() Uint32x4
   852  
   853  // Broadcast128 copies element zero of its (128-bit) input to all elements of
   854  // the 128-bit output vector.
   855  //
   856  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   857  func (x Uint64x2) Broadcast128() Uint64x2
   858  
   859  /* Broadcast256 */
   860  
   861  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   862  // the 256-bit output vector.
   863  //
   864  // Asm: VBROADCASTSS, CPU Feature: AVX2
   865  func (x Float32x4) Broadcast256() Float32x8
   866  
   867  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   868  // the 256-bit output vector.
   869  //
   870  // Asm: VBROADCASTSD, CPU Feature: AVX2
   871  func (x Float64x2) Broadcast256() Float64x4
   872  
   873  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   874  // the 256-bit output vector.
   875  //
   876  // Asm: VPBROADCASTB, CPU Feature: AVX2
   877  func (x Int8x16) Broadcast256() Int8x32
   878  
   879  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   880  // the 256-bit output vector.
   881  //
   882  // Asm: VPBROADCASTW, CPU Feature: AVX2
   883  func (x Int16x8) Broadcast256() Int16x16
   884  
   885  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   886  // the 256-bit output vector.
   887  //
   888  // Asm: VPBROADCASTD, CPU Feature: AVX2
   889  func (x Int32x4) Broadcast256() Int32x8
   890  
   891  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   892  // the 256-bit output vector.
   893  //
   894  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   895  func (x Int64x2) Broadcast256() Int64x4
   896  
   897  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   898  // the 256-bit output vector.
   899  //
   900  // Asm: VPBROADCASTB, CPU Feature: AVX2
   901  func (x Uint8x16) Broadcast256() Uint8x32
   902  
   903  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   904  // the 256-bit output vector.
   905  //
   906  // Asm: VPBROADCASTW, CPU Feature: AVX2
   907  func (x Uint16x8) Broadcast256() Uint16x16
   908  
   909  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   910  // the 256-bit output vector.
   911  //
   912  // Asm: VPBROADCASTD, CPU Feature: AVX2
   913  func (x Uint32x4) Broadcast256() Uint32x8
   914  
   915  // Broadcast256 copies element zero of its (128-bit) input to all elements of
   916  // the 256-bit output vector.
   917  //
   918  // Asm: VPBROADCASTQ, CPU Feature: AVX2
   919  func (x Uint64x2) Broadcast256() Uint64x4
   920  
   921  /* Broadcast512 */
   922  
   923  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   924  // the 512-bit output vector.
   925  //
   926  // Asm: VBROADCASTSS, CPU Feature: AVX512
   927  func (x Float32x4) Broadcast512() Float32x16
   928  
   929  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   930  // the 512-bit output vector.
   931  //
   932  // Asm: VBROADCASTSD, CPU Feature: AVX512
   933  func (x Float64x2) Broadcast512() Float64x8
   934  
   935  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   936  // the 512-bit output vector.
   937  //
   938  // Asm: VPBROADCASTB, CPU Feature: AVX512
   939  func (x Int8x16) Broadcast512() Int8x64
   940  
   941  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   942  // the 512-bit output vector.
   943  //
   944  // Asm: VPBROADCASTW, CPU Feature: AVX512
   945  func (x Int16x8) Broadcast512() Int16x32
   946  
   947  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   948  // the 512-bit output vector.
   949  //
   950  // Asm: VPBROADCASTD, CPU Feature: AVX512
   951  func (x Int32x4) Broadcast512() Int32x16
   952  
   953  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   954  // the 512-bit output vector.
   955  //
   956  // Asm: VPBROADCASTQ, CPU Feature: AVX512
   957  func (x Int64x2) Broadcast512() Int64x8
   958  
   959  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   960  // the 512-bit output vector.
   961  //
   962  // Asm: VPBROADCASTB, CPU Feature: AVX512
   963  func (x Uint8x16) Broadcast512() Uint8x64
   964  
   965  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   966  // the 512-bit output vector.
   967  //
   968  // Asm: VPBROADCASTW, CPU Feature: AVX512
   969  func (x Uint16x8) Broadcast512() Uint16x32
   970  
   971  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   972  // the 512-bit output vector.
   973  //
   974  // Asm: VPBROADCASTD, CPU Feature: AVX512
   975  func (x Uint32x4) Broadcast512() Uint32x16
   976  
   977  // Broadcast512 copies element zero of its (128-bit) input to all elements of
   978  // the 512-bit output vector.
   979  //
   980  // Asm: VPBROADCASTQ, CPU Feature: AVX512
   981  func (x Uint64x2) Broadcast512() Uint64x8
   982  
   983  /* Ceil */
   984  
   985  // Ceil rounds elements up to the nearest integer.
   986  //
   987  // Asm: VROUNDPS, CPU Feature: AVX
   988  func (x Float32x4) Ceil() Float32x4
   989  
   990  // Ceil rounds elements up to the nearest integer.
   991  //
   992  // Asm: VROUNDPS, CPU Feature: AVX
   993  func (x Float32x8) Ceil() Float32x8
   994  
   995  // Ceil rounds elements up to the nearest integer.
   996  //
   997  // Asm: VROUNDPD, CPU Feature: AVX
   998  func (x Float64x2) Ceil() Float64x2
   999  
  1000  // Ceil rounds elements up to the nearest integer.
  1001  //
  1002  // Asm: VROUNDPD, CPU Feature: AVX
  1003  func (x Float64x4) Ceil() Float64x4
  1004  
  1005  /* CeilScaled */
  1006  
  1007  // CeilScaled rounds elements up with specified precision.
  1008  //
  1009  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1010  //
  1011  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1012  func (x Float32x4) CeilScaled(prec uint8) Float32x4
  1013  
  1014  // CeilScaled rounds elements up with specified precision.
  1015  //
  1016  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1017  //
  1018  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1019  func (x Float32x8) CeilScaled(prec uint8) Float32x8
  1020  
  1021  // CeilScaled rounds elements up with specified precision.
  1022  //
  1023  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1024  //
  1025  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  1026  func (x Float32x16) CeilScaled(prec uint8) Float32x16
  1027  
  1028  // CeilScaled rounds elements up with specified precision.
  1029  //
  1030  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1031  //
  1032  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1033  func (x Float64x2) CeilScaled(prec uint8) Float64x2
  1034  
  1035  // CeilScaled rounds elements up with specified precision.
  1036  //
  1037  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1038  //
  1039  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1040  func (x Float64x4) CeilScaled(prec uint8) Float64x4
  1041  
  1042  // CeilScaled rounds elements up with specified precision.
  1043  //
  1044  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1045  //
  1046  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  1047  func (x Float64x8) CeilScaled(prec uint8) Float64x8
  1048  
  1049  /* CeilScaledResidue */
  1050  
  1051  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1052  //
  1053  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1054  //
  1055  // Asm: VREDUCEPS, CPU Feature: AVX512
  1056  func (x Float32x4) CeilScaledResidue(prec uint8) Float32x4
  1057  
  1058  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1059  //
  1060  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1061  //
  1062  // Asm: VREDUCEPS, CPU Feature: AVX512
  1063  func (x Float32x8) CeilScaledResidue(prec uint8) Float32x8
  1064  
  1065  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1066  //
  1067  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1068  //
  1069  // Asm: VREDUCEPS, CPU Feature: AVX512
  1070  func (x Float32x16) CeilScaledResidue(prec uint8) Float32x16
  1071  
  1072  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1073  //
  1074  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1075  //
  1076  // Asm: VREDUCEPD, CPU Feature: AVX512
  1077  func (x Float64x2) CeilScaledResidue(prec uint8) Float64x2
  1078  
  1079  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1080  //
  1081  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1082  //
  1083  // Asm: VREDUCEPD, CPU Feature: AVX512
  1084  func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
  1085  
  1086  // CeilScaledResidue computes the difference after ceiling with specified precision.
  1087  //
  1088  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1089  //
  1090  // Asm: VREDUCEPD, CPU Feature: AVX512
  1091  func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
  1092  
  1093  /* Compress */
  1094  
  1095  // Compress performs a compression on vector x using mask by
  1096  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1097  //
  1098  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1099  func (x Float32x4) Compress(mask Mask32x4) Float32x4
  1100  
  1101  // Compress performs a compression on vector x using mask by
  1102  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1103  //
  1104  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1105  func (x Float32x8) Compress(mask Mask32x8) Float32x8
  1106  
  1107  // Compress performs a compression on vector x using mask by
  1108  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1109  //
  1110  // Asm: VCOMPRESSPS, CPU Feature: AVX512
  1111  func (x Float32x16) Compress(mask Mask32x16) Float32x16
  1112  
  1113  // Compress performs a compression on vector x using mask by
  1114  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1115  //
  1116  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1117  func (x Float64x2) Compress(mask Mask64x2) Float64x2
  1118  
  1119  // Compress performs a compression on vector x using mask by
  1120  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1121  //
  1122  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1123  func (x Float64x4) Compress(mask Mask64x4) Float64x4
  1124  
  1125  // Compress performs a compression on vector x using mask by
  1126  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1127  //
  1128  // Asm: VCOMPRESSPD, CPU Feature: AVX512
  1129  func (x Float64x8) Compress(mask Mask64x8) Float64x8
  1130  
  1131  // Compress performs a compression on vector x using mask by
  1132  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1133  //
  1134  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1135  func (x Int8x16) Compress(mask Mask8x16) Int8x16
  1136  
  1137  // Compress performs a compression on vector x using mask by
  1138  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1139  //
  1140  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1141  func (x Int8x32) Compress(mask Mask8x32) Int8x32
  1142  
  1143  // Compress performs a compression on vector x using mask by
  1144  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1145  //
  1146  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1147  func (x Int8x64) Compress(mask Mask8x64) Int8x64
  1148  
  1149  // Compress performs a compression on vector x using mask by
  1150  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1151  //
  1152  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1153  func (x Int16x8) Compress(mask Mask16x8) Int16x8
  1154  
  1155  // Compress performs a compression on vector x using mask by
  1156  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1157  //
  1158  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1159  func (x Int16x16) Compress(mask Mask16x16) Int16x16
  1160  
  1161  // Compress performs a compression on vector x using mask by
  1162  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1163  //
  1164  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1165  func (x Int16x32) Compress(mask Mask16x32) Int16x32
  1166  
  1167  // Compress performs a compression on vector x using mask by
  1168  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1169  //
  1170  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1171  func (x Int32x4) Compress(mask Mask32x4) Int32x4
  1172  
  1173  // Compress performs a compression on vector x using mask by
  1174  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1175  //
  1176  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1177  func (x Int32x8) Compress(mask Mask32x8) Int32x8
  1178  
  1179  // Compress performs a compression on vector x using mask by
  1180  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1181  //
  1182  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1183  func (x Int32x16) Compress(mask Mask32x16) Int32x16
  1184  
  1185  // Compress performs a compression on vector x using mask by
  1186  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1187  //
  1188  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1189  func (x Int64x2) Compress(mask Mask64x2) Int64x2
  1190  
  1191  // Compress performs a compression on vector x using mask by
  1192  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1193  //
  1194  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1195  func (x Int64x4) Compress(mask Mask64x4) Int64x4
  1196  
  1197  // Compress performs a compression on vector x using mask by
  1198  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1199  //
  1200  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1201  func (x Int64x8) Compress(mask Mask64x8) Int64x8
  1202  
  1203  // Compress performs a compression on vector x using mask by
  1204  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1205  //
  1206  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1207  func (x Uint8x16) Compress(mask Mask8x16) Uint8x16
  1208  
  1209  // Compress performs a compression on vector x using mask by
  1210  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1211  //
  1212  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1213  func (x Uint8x32) Compress(mask Mask8x32) Uint8x32
  1214  
  1215  // Compress performs a compression on vector x using mask by
  1216  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1217  //
  1218  // Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2
  1219  func (x Uint8x64) Compress(mask Mask8x64) Uint8x64
  1220  
  1221  // Compress performs a compression on vector x using mask by
  1222  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1223  //
  1224  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1225  func (x Uint16x8) Compress(mask Mask16x8) Uint16x8
  1226  
  1227  // Compress performs a compression on vector x using mask by
  1228  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1229  //
  1230  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1231  func (x Uint16x16) Compress(mask Mask16x16) Uint16x16
  1232  
  1233  // Compress performs a compression on vector x using mask by
  1234  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1235  //
  1236  // Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2
  1237  func (x Uint16x32) Compress(mask Mask16x32) Uint16x32
  1238  
  1239  // Compress performs a compression on vector x using mask by
  1240  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1241  //
  1242  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1243  func (x Uint32x4) Compress(mask Mask32x4) Uint32x4
  1244  
  1245  // Compress performs a compression on vector x using mask by
  1246  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1247  //
  1248  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1249  func (x Uint32x8) Compress(mask Mask32x8) Uint32x8
  1250  
  1251  // Compress performs a compression on vector x using mask by
  1252  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1253  //
  1254  // Asm: VPCOMPRESSD, CPU Feature: AVX512
  1255  func (x Uint32x16) Compress(mask Mask32x16) Uint32x16
  1256  
  1257  // Compress performs a compression on vector x using mask by
  1258  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1259  //
  1260  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1261  func (x Uint64x2) Compress(mask Mask64x2) Uint64x2
  1262  
  1263  // Compress performs a compression on vector x using mask by
  1264  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1265  //
  1266  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1267  func (x Uint64x4) Compress(mask Mask64x4) Uint64x4
  1268  
  1269  // Compress performs a compression on vector x using mask by
  1270  // selecting elements as indicated by mask, and pack them to lower indexed elements.
  1271  //
  1272  // Asm: VPCOMPRESSQ, CPU Feature: AVX512
  1273  func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
  1274  
  1275  /* ConcatPermute */
  1276  
  1277  // ConcatPermute performs a full permutation of vector x, y using indices:
  1278  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1279  // where xy is the concatenation of x (lower half) and y (upper half).
  1280  // Only the needed bits to represent xy's index are used in indices' elements.
  1281  //
  1282  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1283  func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16
  1284  
  1285  // ConcatPermute performs a full permutation of vector x, y using indices:
  1286  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1287  // where xy is the concatenation of x (lower half) and y (upper half).
  1288  // Only the needed bits to represent xy's index are used in indices' elements.
  1289  //
  1290  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1291  func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16
  1292  
  1293  // ConcatPermute performs a full permutation of vector x, y using indices:
  1294  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1295  // where xy is the concatenation of x (lower half) and y (upper half).
  1296  // Only the needed bits to represent xy's index are used in indices' elements.
  1297  //
  1298  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1299  func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32
  1300  
  1301  // ConcatPermute performs a full permutation of vector x, y using indices:
  1302  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1303  // where xy is the concatenation of x (lower half) and y (upper half).
  1304  // Only the needed bits to represent xy's index are used in indices' elements.
  1305  //
  1306  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1307  func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32
  1308  
  1309  // ConcatPermute performs a full permutation of vector x, y using indices:
  1310  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1311  // where xy is the concatenation of x (lower half) and y (upper half).
  1312  // Only the needed bits to represent xy's index are used in indices' elements.
  1313  //
  1314  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1315  func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64
  1316  
  1317  // ConcatPermute performs a full permutation of vector x, y using indices:
  1318  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1319  // where xy is the concatenation of x (lower half) and y (upper half).
  1320  // Only the needed bits to represent xy's index are used in indices' elements.
  1321  //
  1322  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
  1323  func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64
  1324  
  1325  // ConcatPermute performs a full permutation of vector x, y using indices:
  1326  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1327  // where xy is the concatenation of x (lower half) and y (upper half).
  1328  // Only the needed bits to represent xy's index are used in indices' elements.
  1329  //
  1330  // Asm: VPERMI2W, CPU Feature: AVX512
  1331  func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8
  1332  
  1333  // ConcatPermute performs a full permutation of vector x, y using indices:
  1334  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1335  // where xy is the concatenation of x (lower half) and y (upper half).
  1336  // Only the needed bits to represent xy's index are used in indices' elements.
  1337  //
  1338  // Asm: VPERMI2W, CPU Feature: AVX512
  1339  func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8
  1340  
  1341  // ConcatPermute performs a full permutation of vector x, y using indices:
  1342  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1343  // where xy is the concatenation of x (lower half) and y (upper half).
  1344  // Only the needed bits to represent xy's index are used in indices' elements.
  1345  //
  1346  // Asm: VPERMI2W, CPU Feature: AVX512
  1347  func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16
  1348  
  1349  // ConcatPermute performs a full permutation of vector x, y using indices:
  1350  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1351  // where xy is the concatenation of x (lower half) and y (upper half).
  1352  // Only the needed bits to represent xy's index are used in indices' elements.
  1353  //
  1354  // Asm: VPERMI2W, CPU Feature: AVX512
  1355  func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16
  1356  
  1357  // ConcatPermute performs a full permutation of vector x, y using indices:
  1358  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1359  // where xy is the concatenation of x (lower half) and y (upper half).
  1360  // Only the needed bits to represent xy's index are used in indices' elements.
  1361  //
  1362  // Asm: VPERMI2W, CPU Feature: AVX512
  1363  func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32
  1364  
  1365  // ConcatPermute performs a full permutation of vector x, y using indices:
  1366  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1367  // where xy is the concatenation of x (lower half) and y (upper half).
  1368  // Only the needed bits to represent xy's index are used in indices' elements.
  1369  //
  1370  // Asm: VPERMI2W, CPU Feature: AVX512
  1371  func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32
  1372  
  1373  // ConcatPermute performs a full permutation of vector x, y using indices:
  1374  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1375  // where xy is the concatenation of x (lower half) and y (upper half).
  1376  // Only the needed bits to represent xy's index are used in indices' elements.
  1377  //
  1378  // Asm: VPERMI2PS, CPU Feature: AVX512
  1379  func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4
  1380  
  1381  // ConcatPermute performs a full permutation of vector x, y using indices:
  1382  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1383  // where xy is the concatenation of x (lower half) and y (upper half).
  1384  // Only the needed bits to represent xy's index are used in indices' elements.
  1385  //
  1386  // Asm: VPERMI2D, CPU Feature: AVX512
  1387  func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4
  1388  
  1389  // ConcatPermute performs a full permutation of vector x, y using indices:
  1390  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1391  // where xy is the concatenation of x (lower half) and y (upper half).
  1392  // Only the needed bits to represent xy's index are used in indices' elements.
  1393  //
  1394  // Asm: VPERMI2D, CPU Feature: AVX512
  1395  func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4
  1396  
  1397  // ConcatPermute performs a full permutation of vector x, y using indices:
  1398  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1399  // where xy is the concatenation of x (lower half) and y (upper half).
  1400  // Only the needed bits to represent xy's index are used in indices' elements.
  1401  //
  1402  // Asm: VPERMI2PS, CPU Feature: AVX512
  1403  func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8
  1404  
  1405  // ConcatPermute performs a full permutation of vector x, y using indices:
  1406  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1407  // where xy is the concatenation of x (lower half) and y (upper half).
  1408  // Only the needed bits to represent xy's index are used in indices' elements.
  1409  //
  1410  // Asm: VPERMI2D, CPU Feature: AVX512
  1411  func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8
  1412  
  1413  // ConcatPermute performs a full permutation of vector x, y using indices:
  1414  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1415  // where xy is the concatenation of x (lower half) and y (upper half).
  1416  // Only the needed bits to represent xy's index are used in indices' elements.
  1417  //
  1418  // Asm: VPERMI2D, CPU Feature: AVX512
  1419  func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8
  1420  
  1421  // ConcatPermute performs a full permutation of vector x, y using indices:
  1422  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1423  // where xy is the concatenation of x (lower half) and y (upper half).
  1424  // Only the needed bits to represent xy's index are used in indices' elements.
  1425  //
  1426  // Asm: VPERMI2PS, CPU Feature: AVX512
  1427  func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16
  1428  
  1429  // ConcatPermute performs a full permutation of vector x, y using indices:
  1430  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1431  // where xy is the concatenation of x (lower half) and y (upper half).
  1432  // Only the needed bits to represent xy's index are used in indices' elements.
  1433  //
  1434  // Asm: VPERMI2D, CPU Feature: AVX512
  1435  func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16
  1436  
  1437  // ConcatPermute performs a full permutation of vector x, y using indices:
  1438  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1439  // where xy is the concatenation of x (lower half) and y (upper half).
  1440  // Only the needed bits to represent xy's index are used in indices' elements.
  1441  //
  1442  // Asm: VPERMI2D, CPU Feature: AVX512
  1443  func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16
  1444  
  1445  // ConcatPermute performs a full permutation of vector x, y using indices:
  1446  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1447  // where xy is the concatenation of x (lower half) and y (upper half).
  1448  // Only the needed bits to represent xy's index are used in indices' elements.
  1449  //
  1450  // Asm: VPERMI2PD, CPU Feature: AVX512
  1451  func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2
  1452  
  1453  // ConcatPermute performs a full permutation of vector x, y using indices:
  1454  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1455  // where xy is the concatenation of x (lower half) and y (upper half).
  1456  // Only the needed bits to represent xy's index are used in indices' elements.
  1457  //
  1458  // Asm: VPERMI2Q, CPU Feature: AVX512
  1459  func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2
  1460  
  1461  // ConcatPermute performs a full permutation of vector x, y using indices:
  1462  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1463  // where xy is the concatenation of x (lower half) and y (upper half).
  1464  // Only the needed bits to represent xy's index are used in indices' elements.
  1465  //
  1466  // Asm: VPERMI2Q, CPU Feature: AVX512
  1467  func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2
  1468  
  1469  // ConcatPermute performs a full permutation of vector x, y using indices:
  1470  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1471  // where xy is the concatenation of x (lower half) and y (upper half).
  1472  // Only the needed bits to represent xy's index are used in indices' elements.
  1473  //
  1474  // Asm: VPERMI2PD, CPU Feature: AVX512
  1475  func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4
  1476  
  1477  // ConcatPermute performs a full permutation of vector x, y using indices:
  1478  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1479  // where xy is the concatenation of x (lower half) and y (upper half).
  1480  // Only the needed bits to represent xy's index are used in indices' elements.
  1481  //
  1482  // Asm: VPERMI2Q, CPU Feature: AVX512
  1483  func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4
  1484  
  1485  // ConcatPermute performs a full permutation of vector x, y using indices:
  1486  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1487  // where xy is the concatenation of x (lower half) and y (upper half).
  1488  // Only the needed bits to represent xy's index are used in indices' elements.
  1489  //
  1490  // Asm: VPERMI2Q, CPU Feature: AVX512
  1491  func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4
  1492  
  1493  // ConcatPermute performs a full permutation of vector x, y using indices:
  1494  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1495  // where xy is the concatenation of x (lower half) and y (upper half).
  1496  // Only the needed bits to represent xy's index are used in indices' elements.
  1497  //
  1498  // Asm: VPERMI2PD, CPU Feature: AVX512
  1499  func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8
  1500  
  1501  // ConcatPermute performs a full permutation of vector x, y using indices:
  1502  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1503  // where xy is the concatenation of x (lower half) and y (upper half).
  1504  // Only the needed bits to represent xy's index are used in indices' elements.
  1505  //
  1506  // Asm: VPERMI2Q, CPU Feature: AVX512
  1507  func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8
  1508  
  1509  // ConcatPermute performs a full permutation of vector x, y using indices:
  1510  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  1511  // where xy is the concatenation of x (lower half) and y (upper half).
  1512  // Only the needed bits to represent xy's index are used in indices' elements.
  1513  //
  1514  // Asm: VPERMI2Q, CPU Feature: AVX512
  1515  func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8
  1516  
  1517  /* ConcatShiftBytesRight */
  1518  
  1519  // ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes.
  1520  // The result vector will be the lower half of the concatenated vector.
  1521  //
  1522  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1523  //
  1524  // Asm: VPALIGNR, CPU Feature: AVX
  1525  func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16
  1526  
  1527  /* ConcatShiftBytesRightGrouped */
  1528  
  1529  // ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
  1530  // The result vector will be the lower half of the concatenated vector.
  1531  // This operation is performed grouped by each 16 byte.
  1532  //
  1533  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1534  //
  1535  // Asm: VPALIGNR, CPU Feature: AVX2
  1536  func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32
  1537  
  1538  // ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
  1539  // The result vector will be the lower half of the concatenated vector.
  1540  // This operation is performed grouped by each 16 byte.
  1541  //
  1542  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  1543  //
  1544  // Asm: VPALIGNR, CPU Feature: AVX512
  1545  func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64
  1546  
  1547  /* ConvertToInt32 */
  1548  
  1549  // ConvertToInt32 converts element values to int32.
  1550  //
  1551  // Asm: VCVTTPS2DQ, CPU Feature: AVX
  1552  func (x Float32x4) ConvertToInt32() Int32x4
  1553  
  1554  // ConvertToInt32 converts element values to int32.
  1555  //
  1556  // Asm: VCVTTPS2DQ, CPU Feature: AVX
  1557  func (x Float32x8) ConvertToInt32() Int32x8
  1558  
  1559  // ConvertToInt32 converts element values to int32.
  1560  //
  1561  // Asm: VCVTTPS2DQ, CPU Feature: AVX512
  1562  func (x Float32x16) ConvertToInt32() Int32x16
  1563  
  1564  /* ConvertToUint32 */
  1565  
  1566  // ConvertToUint32 converts element values to uint32.
  1567  //
  1568  // Asm: VCVTPS2UDQ, CPU Feature: AVX512
  1569  func (x Float32x4) ConvertToUint32() Uint32x4
  1570  
  1571  // ConvertToUint32 converts element values to uint32.
  1572  //
  1573  // Asm: VCVTPS2UDQ, CPU Feature: AVX512
  1574  func (x Float32x8) ConvertToUint32() Uint32x8
  1575  
  1576  // ConvertToUint32 converts element values to uint32.
  1577  //
  1578  // Asm: VCVTPS2UDQ, CPU Feature: AVX512
  1579  func (x Float32x16) ConvertToUint32() Uint32x16
  1580  
  1581  /* CopySign */
  1582  
  1583  // CopySign returns the product of the first operand with -1, 0, or 1,
  1584  // whichever constant is nearest to the value of the second operand.
  1585  //
  1586  // Asm: VPSIGNB, CPU Feature: AVX
  1587  func (x Int8x16) CopySign(y Int8x16) Int8x16
  1588  
  1589  // CopySign returns the product of the first operand with -1, 0, or 1,
  1590  // whichever constant is nearest to the value of the second operand.
  1591  //
  1592  // Asm: VPSIGNB, CPU Feature: AVX2
  1593  func (x Int8x32) CopySign(y Int8x32) Int8x32
  1594  
  1595  // CopySign returns the product of the first operand with -1, 0, or 1,
  1596  // whichever constant is nearest to the value of the second operand.
  1597  //
  1598  // Asm: VPSIGNW, CPU Feature: AVX
  1599  func (x Int16x8) CopySign(y Int16x8) Int16x8
  1600  
  1601  // CopySign returns the product of the first operand with -1, 0, or 1,
  1602  // whichever constant is nearest to the value of the second operand.
  1603  //
  1604  // Asm: VPSIGNW, CPU Feature: AVX2
  1605  func (x Int16x16) CopySign(y Int16x16) Int16x16
  1606  
  1607  // CopySign returns the product of the first operand with -1, 0, or 1,
  1608  // whichever constant is nearest to the value of the second operand.
  1609  //
  1610  // Asm: VPSIGND, CPU Feature: AVX
  1611  func (x Int32x4) CopySign(y Int32x4) Int32x4
  1612  
  1613  // CopySign returns the product of the first operand with -1, 0, or 1,
  1614  // whichever constant is nearest to the value of the second operand.
  1615  //
  1616  // Asm: VPSIGND, CPU Feature: AVX2
  1617  func (x Int32x8) CopySign(y Int32x8) Int32x8
  1618  
  1619  /* Div */
  1620  
  1621  // Div divides elements of two vectors.
  1622  //
  1623  // Asm: VDIVPS, CPU Feature: AVX
  1624  func (x Float32x4) Div(y Float32x4) Float32x4
  1625  
  1626  // Div divides elements of two vectors.
  1627  //
  1628  // Asm: VDIVPS, CPU Feature: AVX
  1629  func (x Float32x8) Div(y Float32x8) Float32x8
  1630  
  1631  // Div divides elements of two vectors.
  1632  //
  1633  // Asm: VDIVPS, CPU Feature: AVX512
  1634  func (x Float32x16) Div(y Float32x16) Float32x16
  1635  
  1636  // Div divides elements of two vectors.
  1637  //
  1638  // Asm: VDIVPD, CPU Feature: AVX
  1639  func (x Float64x2) Div(y Float64x2) Float64x2
  1640  
  1641  // Div divides elements of two vectors.
  1642  //
  1643  // Asm: VDIVPD, CPU Feature: AVX
  1644  func (x Float64x4) Div(y Float64x4) Float64x4
  1645  
  1646  // Div divides elements of two vectors.
  1647  //
  1648  // Asm: VDIVPD, CPU Feature: AVX512
  1649  func (x Float64x8) Div(y Float64x8) Float64x8
  1650  
  1651  /* DotProductPairs */
  1652  
  1653  // DotProductPairs multiplies the elements and add the pairs together,
  1654  // yielding a vector of half as many elements with twice the input element size.
  1655  //
  1656  // Asm: VPMADDWD, CPU Feature: AVX
  1657  func (x Int16x8) DotProductPairs(y Int16x8) Int32x4
  1658  
  1659  // DotProductPairs multiplies the elements and add the pairs together,
  1660  // yielding a vector of half as many elements with twice the input element size.
  1661  //
  1662  // Asm: VPMADDWD, CPU Feature: AVX2
  1663  func (x Int16x16) DotProductPairs(y Int16x16) Int32x8
  1664  
  1665  // DotProductPairs multiplies the elements and add the pairs together,
  1666  // yielding a vector of half as many elements with twice the input element size.
  1667  //
  1668  // Asm: VPMADDWD, CPU Feature: AVX512
  1669  func (x Int16x32) DotProductPairs(y Int16x32) Int32x16
  1670  
  1671  /* DotProductPairsSaturated */
  1672  
  1673  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1674  // yielding a vector of half as many elements with twice the input element size.
  1675  //
  1676  // Asm: VPMADDUBSW, CPU Feature: AVX
  1677  func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8
  1678  
  1679  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1680  // yielding a vector of half as many elements with twice the input element size.
  1681  //
  1682  // Asm: VPMADDUBSW, CPU Feature: AVX2
  1683  func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16
  1684  
  1685  // DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
  1686  // yielding a vector of half as many elements with twice the input element size.
  1687  //
  1688  // Asm: VPMADDUBSW, CPU Feature: AVX512
  1689  func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
  1690  
  1691  /* DotProductQuadruple */
  1692  
  1693  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1694  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1695  //
  1696  // Asm: VPDPBUSD, CPU Feature: AVXVNNI
  1697  func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4
  1698  
  1699  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1700  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1701  //
  1702  // Asm: VPDPBUSD, CPU Feature: AVXVNNI
  1703  func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8
  1704  
  1705  // DotProductQuadruple performs dot products on groups of 4 elements of x and y.
  1706  // DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1707  //
  1708  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
  1709  func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16
  1710  
  1711  /* DotProductQuadrupleSaturated */
  1712  
  1713  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  1714  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1715  //
  1716  // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
  1717  func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4
  1718  
  1719  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  1720  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1721  //
  1722  // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
  1723  func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8
  1724  
  1725  // DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
  1726  // DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
  1727  //
  1728  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
  1729  func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
  1730  
  1731  /* Equal */
  1732  
  1733  // Equal compares for equality.
  1734  //
  1735  // Asm: VPCMPEQB, CPU Feature: AVX
  1736  func (x Int8x16) Equal(y Int8x16) Mask8x16
  1737  
  1738  // Equal compares for equality.
  1739  //
  1740  // Asm: VPCMPEQB, CPU Feature: AVX2
  1741  func (x Int8x32) Equal(y Int8x32) Mask8x32
  1742  
  1743  // Equal compares for equality.
  1744  //
  1745  // Asm: VPCMPEQB, CPU Feature: AVX512
  1746  func (x Int8x64) Equal(y Int8x64) Mask8x64
  1747  
  1748  // Equal compares for equality.
  1749  //
  1750  // Asm: VPCMPEQW, CPU Feature: AVX
  1751  func (x Int16x8) Equal(y Int16x8) Mask16x8
  1752  
  1753  // Equal compares for equality.
  1754  //
  1755  // Asm: VPCMPEQW, CPU Feature: AVX2
  1756  func (x Int16x16) Equal(y Int16x16) Mask16x16
  1757  
  1758  // Equal compares for equality.
  1759  //
  1760  // Asm: VPCMPEQW, CPU Feature: AVX512
  1761  func (x Int16x32) Equal(y Int16x32) Mask16x32
  1762  
  1763  // Equal compares for equality.
  1764  //
  1765  // Asm: VPCMPEQD, CPU Feature: AVX
  1766  func (x Int32x4) Equal(y Int32x4) Mask32x4
  1767  
  1768  // Equal compares for equality.
  1769  //
  1770  // Asm: VPCMPEQD, CPU Feature: AVX2
  1771  func (x Int32x8) Equal(y Int32x8) Mask32x8
  1772  
  1773  // Equal compares for equality.
  1774  //
  1775  // Asm: VPCMPEQD, CPU Feature: AVX512
  1776  func (x Int32x16) Equal(y Int32x16) Mask32x16
  1777  
  1778  // Equal compares for equality.
  1779  //
  1780  // Asm: VPCMPEQQ, CPU Feature: AVX
  1781  func (x Int64x2) Equal(y Int64x2) Mask64x2
  1782  
  1783  // Equal compares for equality.
  1784  //
  1785  // Asm: VPCMPEQQ, CPU Feature: AVX2
  1786  func (x Int64x4) Equal(y Int64x4) Mask64x4
  1787  
  1788  // Equal compares for equality.
  1789  //
  1790  // Asm: VPCMPEQQ, CPU Feature: AVX512
  1791  func (x Int64x8) Equal(y Int64x8) Mask64x8
  1792  
  1793  // Equal compares for equality.
  1794  //
  1795  // Asm: VPCMPEQB, CPU Feature: AVX
  1796  func (x Uint8x16) Equal(y Uint8x16) Mask8x16
  1797  
  1798  // Equal compares for equality.
  1799  //
  1800  // Asm: VPCMPEQB, CPU Feature: AVX2
  1801  func (x Uint8x32) Equal(y Uint8x32) Mask8x32
  1802  
  1803  // Equal compares for equality.
  1804  //
  1805  // Asm: VPCMPEQB, CPU Feature: AVX512
  1806  func (x Uint8x64) Equal(y Uint8x64) Mask8x64
  1807  
  1808  // Equal compares for equality.
  1809  //
  1810  // Asm: VPCMPEQW, CPU Feature: AVX
  1811  func (x Uint16x8) Equal(y Uint16x8) Mask16x8
  1812  
  1813  // Equal compares for equality.
  1814  //
  1815  // Asm: VPCMPEQW, CPU Feature: AVX2
  1816  func (x Uint16x16) Equal(y Uint16x16) Mask16x16
  1817  
  1818  // Equal compares for equality.
  1819  //
  1820  // Asm: VPCMPEQW, CPU Feature: AVX512
  1821  func (x Uint16x32) Equal(y Uint16x32) Mask16x32
  1822  
  1823  // Equal compares for equality.
  1824  //
  1825  // Asm: VPCMPEQD, CPU Feature: AVX
  1826  func (x Uint32x4) Equal(y Uint32x4) Mask32x4
  1827  
  1828  // Equal compares for equality.
  1829  //
  1830  // Asm: VPCMPEQD, CPU Feature: AVX2
  1831  func (x Uint32x8) Equal(y Uint32x8) Mask32x8
  1832  
  1833  // Equal compares for equality.
  1834  //
  1835  // Asm: VPCMPEQD, CPU Feature: AVX512
  1836  func (x Uint32x16) Equal(y Uint32x16) Mask32x16
  1837  
  1838  // Equal compares for equality.
  1839  //
  1840  // Asm: VPCMPEQQ, CPU Feature: AVX
  1841  func (x Uint64x2) Equal(y Uint64x2) Mask64x2
  1842  
  1843  // Equal compares for equality.
  1844  //
  1845  // Asm: VPCMPEQQ, CPU Feature: AVX2
  1846  func (x Uint64x4) Equal(y Uint64x4) Mask64x4
  1847  
  1848  // Equal compares for equality.
  1849  //
  1850  // Asm: VPCMPEQQ, CPU Feature: AVX512
  1851  func (x Uint64x8) Equal(y Uint64x8) Mask64x8
  1852  
  1853  // Equal compares for equality.
  1854  //
  1855  // Asm: VCMPPS, CPU Feature: AVX
  1856  func (x Float32x4) Equal(y Float32x4) Mask32x4
  1857  
  1858  // Equal compares for equality.
  1859  //
  1860  // Asm: VCMPPS, CPU Feature: AVX
  1861  func (x Float32x8) Equal(y Float32x8) Mask32x8
  1862  
  1863  // Equal compares for equality.
  1864  //
  1865  // Asm: VCMPPS, CPU Feature: AVX512
  1866  func (x Float32x16) Equal(y Float32x16) Mask32x16
  1867  
  1868  // Equal compares for equality.
  1869  //
  1870  // Asm: VCMPPD, CPU Feature: AVX
  1871  func (x Float64x2) Equal(y Float64x2) Mask64x2
  1872  
  1873  // Equal compares for equality.
  1874  //
  1875  // Asm: VCMPPD, CPU Feature: AVX
  1876  func (x Float64x4) Equal(y Float64x4) Mask64x4
  1877  
  1878  // Equal compares for equality.
  1879  //
  1880  // Asm: VCMPPD, CPU Feature: AVX512
  1881  func (x Float64x8) Equal(y Float64x8) Mask64x8
  1882  
  1883  /* Expand */
  1884  
  1885  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1886  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1887  //
  1888  // Asm: VEXPANDPS, CPU Feature: AVX512
  1889  func (x Float32x4) Expand(mask Mask32x4) Float32x4
  1890  
  1891  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1892  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1893  //
  1894  // Asm: VEXPANDPS, CPU Feature: AVX512
  1895  func (x Float32x8) Expand(mask Mask32x8) Float32x8
  1896  
  1897  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1898  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1899  //
  1900  // Asm: VEXPANDPS, CPU Feature: AVX512
  1901  func (x Float32x16) Expand(mask Mask32x16) Float32x16
  1902  
  1903  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1904  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1905  //
  1906  // Asm: VEXPANDPD, CPU Feature: AVX512
  1907  func (x Float64x2) Expand(mask Mask64x2) Float64x2
  1908  
  1909  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1910  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1911  //
  1912  // Asm: VEXPANDPD, CPU Feature: AVX512
  1913  func (x Float64x4) Expand(mask Mask64x4) Float64x4
  1914  
  1915  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1916  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1917  //
  1918  // Asm: VEXPANDPD, CPU Feature: AVX512
  1919  func (x Float64x8) Expand(mask Mask64x8) Float64x8
  1920  
  1921  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1922  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1923  //
  1924  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  1925  func (x Int8x16) Expand(mask Mask8x16) Int8x16
  1926  
  1927  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1928  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1929  //
  1930  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  1931  func (x Int8x32) Expand(mask Mask8x32) Int8x32
  1932  
  1933  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1934  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1935  //
  1936  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  1937  func (x Int8x64) Expand(mask Mask8x64) Int8x64
  1938  
  1939  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1940  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1941  //
  1942  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  1943  func (x Int16x8) Expand(mask Mask16x8) Int16x8
  1944  
  1945  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1946  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1947  //
  1948  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  1949  func (x Int16x16) Expand(mask Mask16x16) Int16x16
  1950  
  1951  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1952  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1953  //
  1954  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  1955  func (x Int16x32) Expand(mask Mask16x32) Int16x32
  1956  
  1957  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1958  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1959  //
  1960  // Asm: VPEXPANDD, CPU Feature: AVX512
  1961  func (x Int32x4) Expand(mask Mask32x4) Int32x4
  1962  
  1963  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1964  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1965  //
  1966  // Asm: VPEXPANDD, CPU Feature: AVX512
  1967  func (x Int32x8) Expand(mask Mask32x8) Int32x8
  1968  
  1969  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1970  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1971  //
  1972  // Asm: VPEXPANDD, CPU Feature: AVX512
  1973  func (x Int32x16) Expand(mask Mask32x16) Int32x16
  1974  
  1975  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1976  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1977  //
  1978  // Asm: VPEXPANDQ, CPU Feature: AVX512
  1979  func (x Int64x2) Expand(mask Mask64x2) Int64x2
  1980  
  1981  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1982  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1983  //
  1984  // Asm: VPEXPANDQ, CPU Feature: AVX512
  1985  func (x Int64x4) Expand(mask Mask64x4) Int64x4
  1986  
  1987  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1988  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1989  //
  1990  // Asm: VPEXPANDQ, CPU Feature: AVX512
  1991  func (x Int64x8) Expand(mask Mask64x8) Int64x8
  1992  
  1993  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  1994  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  1995  //
  1996  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  1997  func (x Uint8x16) Expand(mask Mask8x16) Uint8x16
  1998  
  1999  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2000  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2001  //
  2002  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2003  func (x Uint8x32) Expand(mask Mask8x32) Uint8x32
  2004  
  2005  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2006  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2007  //
  2008  // Asm: VPEXPANDB, CPU Feature: AVX512VBMI2
  2009  func (x Uint8x64) Expand(mask Mask8x64) Uint8x64
  2010  
  2011  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2012  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2013  //
  2014  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2015  func (x Uint16x8) Expand(mask Mask16x8) Uint16x8
  2016  
  2017  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2018  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2019  //
  2020  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2021  func (x Uint16x16) Expand(mask Mask16x16) Uint16x16
  2022  
  2023  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2024  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2025  //
  2026  // Asm: VPEXPANDW, CPU Feature: AVX512VBMI2
  2027  func (x Uint16x32) Expand(mask Mask16x32) Uint16x32
  2028  
  2029  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2030  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2031  //
  2032  // Asm: VPEXPANDD, CPU Feature: AVX512
  2033  func (x Uint32x4) Expand(mask Mask32x4) Uint32x4
  2034  
  2035  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2036  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2037  //
  2038  // Asm: VPEXPANDD, CPU Feature: AVX512
  2039  func (x Uint32x8) Expand(mask Mask32x8) Uint32x8
  2040  
  2041  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2042  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2043  //
  2044  // Asm: VPEXPANDD, CPU Feature: AVX512
  2045  func (x Uint32x16) Expand(mask Mask32x16) Uint32x16
  2046  
  2047  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2048  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2049  //
  2050  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2051  func (x Uint64x2) Expand(mask Mask64x2) Uint64x2
  2052  
  2053  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2054  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2055  //
  2056  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2057  func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
  2058  
  2059  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
  2060  // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
  2061  //
  2062  // Asm: VPEXPANDQ, CPU Feature: AVX512
  2063  func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
  2064  
  2065  /* ExtendLo2ToInt64x2 */
  2066  
  2067  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2068  // The result vector's elements are sign-extended.
  2069  //
  2070  // Asm: VPMOVSXBQ, CPU Feature: AVX
  2071  func (x Int8x16) ExtendLo2ToInt64x2() Int64x2
  2072  
  2073  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2074  // The result vector's elements are sign-extended.
  2075  //
  2076  // Asm: VPMOVSXWQ, CPU Feature: AVX
  2077  func (x Int16x8) ExtendLo2ToInt64x2() Int64x2
  2078  
  2079  // ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
  2080  // The result vector's elements are sign-extended.
  2081  //
  2082  // Asm: VPMOVSXDQ, CPU Feature: AVX
  2083  func (x Int32x4) ExtendLo2ToInt64x2() Int64x2
  2084  
  2085  /* ExtendLo2ToUint64x2 */
  2086  
  2087  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2088  // The result vector's elements are zero-extended.
  2089  //
  2090  // Asm: VPMOVZXBQ, CPU Feature: AVX
  2091  func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2
  2092  
  2093  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2094  // The result vector's elements are zero-extended.
  2095  //
  2096  // Asm: VPMOVZXWQ, CPU Feature: AVX
  2097  func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2
  2098  
  2099  // ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
  2100  // The result vector's elements are zero-extended.
  2101  //
  2102  // Asm: VPMOVZXDQ, CPU Feature: AVX
  2103  func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2
  2104  
  2105  /* ExtendLo4ToInt32x4 */
  2106  
  2107  // ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
  2108  // The result vector's elements are sign-extended.
  2109  //
  2110  // Asm: VPMOVSXBD, CPU Feature: AVX
  2111  func (x Int8x16) ExtendLo4ToInt32x4() Int32x4
  2112  
  2113  // ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
  2114  // The result vector's elements are sign-extended.
  2115  //
  2116  // Asm: VPMOVSXWD, CPU Feature: AVX
  2117  func (x Int16x8) ExtendLo4ToInt32x4() Int32x4
  2118  
  2119  /* ExtendLo4ToInt64x4 */
  2120  
  2121  // ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
  2122  // The result vector's elements are sign-extended.
  2123  //
  2124  // Asm: VPMOVSXBQ, CPU Feature: AVX2
  2125  func (x Int8x16) ExtendLo4ToInt64x4() Int64x4
  2126  
  2127  // ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
  2128  // The result vector's elements are sign-extended.
  2129  //
  2130  // Asm: VPMOVSXWQ, CPU Feature: AVX2
  2131  func (x Int16x8) ExtendLo4ToInt64x4() Int64x4
  2132  
  2133  /* ExtendLo4ToUint32x4 */
  2134  
  2135  // ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
  2136  // The result vector's elements are zero-extended.
  2137  //
  2138  // Asm: VPMOVZXBD, CPU Feature: AVX
  2139  func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4
  2140  
  2141  // ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
  2142  // The result vector's elements are zero-extended.
  2143  //
  2144  // Asm: VPMOVZXWD, CPU Feature: AVX
  2145  func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4
  2146  
  2147  /* ExtendLo4ToUint64x4 */
  2148  
  2149  // ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
  2150  // The result vector's elements are zero-extended.
  2151  //
  2152  // Asm: VPMOVZXBQ, CPU Feature: AVX2
  2153  func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4
  2154  
  2155  // ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
  2156  // The result vector's elements are zero-extended.
  2157  //
  2158  // Asm: VPMOVZXWQ, CPU Feature: AVX2
  2159  func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4
  2160  
  2161  /* ExtendLo8ToInt16x8 */
  2162  
  2163  // ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16.
  2164  // The result vector's elements are sign-extended.
  2165  //
  2166  // Asm: VPMOVSXBW, CPU Feature: AVX
  2167  func (x Int8x16) ExtendLo8ToInt16x8() Int16x8
  2168  
  2169  /* ExtendLo8ToInt32x8 */
  2170  
  2171  // ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32.
  2172  // The result vector's elements are sign-extended.
  2173  //
  2174  // Asm: VPMOVSXBD, CPU Feature: AVX2
  2175  func (x Int8x16) ExtendLo8ToInt32x8() Int32x8
  2176  
  2177  /* ExtendLo8ToInt64x8 */
  2178  
  2179  // ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64.
  2180  // The result vector's elements are sign-extended.
  2181  //
  2182  // Asm: VPMOVSXBQ, CPU Feature: AVX512
  2183  func (x Int8x16) ExtendLo8ToInt64x8() Int64x8
  2184  
  2185  /* ExtendLo8ToUint16x8 */
  2186  
  2187  // ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16.
  2188  // The result vector's elements are zero-extended.
  2189  //
  2190  // Asm: VPMOVZXBW, CPU Feature: AVX
  2191  func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8
  2192  
  2193  /* ExtendLo8ToUint32x8 */
  2194  
  2195  // ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32.
  2196  // The result vector's elements are zero-extended.
  2197  //
  2198  // Asm: VPMOVZXBD, CPU Feature: AVX2
  2199  func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8
  2200  
  2201  /* ExtendLo8ToUint64x8 */
  2202  
  2203  // ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64.
  2204  // The result vector's elements are zero-extended.
  2205  //
  2206  // Asm: VPMOVZXBQ, CPU Feature: AVX512
  2207  func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8
  2208  
  2209  /* ExtendToInt16 */
  2210  
  2211  // ExtendToInt16 converts element values to int16.
  2212  // The result vector's elements are sign-extended.
  2213  //
  2214  // Asm: VPMOVSXBW, CPU Feature: AVX2
  2215  func (x Int8x16) ExtendToInt16() Int16x16
  2216  
  2217  // ExtendToInt16 converts element values to int16.
  2218  // The result vector's elements are sign-extended.
  2219  //
  2220  // Asm: VPMOVSXBW, CPU Feature: AVX512
  2221  func (x Int8x32) ExtendToInt16() Int16x32
  2222  
  2223  /* ExtendToInt32 */
  2224  
  2225  // ExtendToInt32 converts element values to int32.
  2226  // The result vector's elements are sign-extended.
  2227  //
  2228  // Asm: VPMOVSXBD, CPU Feature: AVX512
  2229  func (x Int8x16) ExtendToInt32() Int32x16
  2230  
  2231  // ExtendToInt32 converts element values to int32.
  2232  // The result vector's elements are sign-extended.
  2233  //
  2234  // Asm: VPMOVSXWD, CPU Feature: AVX2
  2235  func (x Int16x8) ExtendToInt32() Int32x8
  2236  
  2237  // ExtendToInt32 converts element values to int32.
  2238  // The result vector's elements are sign-extended.
  2239  //
  2240  // Asm: VPMOVSXWD, CPU Feature: AVX512
  2241  func (x Int16x16) ExtendToInt32() Int32x16
  2242  
  2243  /* ExtendToInt64 */
  2244  
  2245  // ExtendToInt64 converts element values to int64.
  2246  // The result vector's elements are sign-extended.
  2247  //
  2248  // Asm: VPMOVSXWQ, CPU Feature: AVX512
  2249  func (x Int16x8) ExtendToInt64() Int64x8
  2250  
  2251  // ExtendToInt64 converts element values to int64.
  2252  // The result vector's elements are sign-extended.
  2253  //
  2254  // Asm: VPMOVSXDQ, CPU Feature: AVX2
  2255  func (x Int32x4) ExtendToInt64() Int64x4
  2256  
  2257  // ExtendToInt64 converts element values to int64.
  2258  // The result vector's elements are sign-extended.
  2259  //
  2260  // Asm: VPMOVSXDQ, CPU Feature: AVX512
  2261  func (x Int32x8) ExtendToInt64() Int64x8
  2262  
  2263  /* ExtendToUint16 */
  2264  
  2265  // ExtendToUint16 converts element values to uint16.
  2266  // The result vector's elements are zero-extended.
  2267  //
  2268  // Asm: VPMOVZXBW, CPU Feature: AVX2
  2269  func (x Uint8x16) ExtendToUint16() Uint16x16
  2270  
  2271  // ExtendToUint16 converts element values to uint16.
  2272  // The result vector's elements are zero-extended.
  2273  //
  2274  // Asm: VPMOVZXBW, CPU Feature: AVX512
  2275  func (x Uint8x32) ExtendToUint16() Uint16x32
  2276  
  2277  /* ExtendToUint32 */
  2278  
  2279  // ExtendToUint32 converts element values to uint32.
  2280  // The result vector's elements are zero-extended.
  2281  //
  2282  // Asm: VPMOVZXBD, CPU Feature: AVX512
  2283  func (x Uint8x16) ExtendToUint32() Uint32x16
  2284  
  2285  // ExtendToUint32 converts element values to uint32.
  2286  // The result vector's elements are zero-extended.
  2287  //
  2288  // Asm: VPMOVZXWD, CPU Feature: AVX2
  2289  func (x Uint16x8) ExtendToUint32() Uint32x8
  2290  
  2291  // ExtendToUint32 converts element values to uint32.
  2292  // The result vector's elements are zero-extended.
  2293  //
  2294  // Asm: VPMOVZXWD, CPU Feature: AVX512
  2295  func (x Uint16x16) ExtendToUint32() Uint32x16
  2296  
  2297  /* ExtendToUint64 */
  2298  
  2299  // ExtendToUint64 converts element values to uint64.
  2300  // The result vector's elements are zero-extended.
  2301  //
  2302  // Asm: VPMOVZXWQ, CPU Feature: AVX512
  2303  func (x Uint16x8) ExtendToUint64() Uint64x8
  2304  
  2305  // ExtendToUint64 converts element values to uint64.
  2306  // The result vector's elements are zero-extended.
  2307  //
  2308  // Asm: VPMOVZXDQ, CPU Feature: AVX2
  2309  func (x Uint32x4) ExtendToUint64() Uint64x4
  2310  
  2311  // ExtendToUint64 converts element values to uint64.
  2312  // The result vector's elements are zero-extended.
  2313  //
  2314  // Asm: VPMOVZXDQ, CPU Feature: AVX512
  2315  func (x Uint32x8) ExtendToUint64() Uint64x8
  2316  
  2317  /* Floor */
  2318  
  2319  // Floor rounds elements down to the nearest integer.
  2320  //
  2321  // Asm: VROUNDPS, CPU Feature: AVX
  2322  func (x Float32x4) Floor() Float32x4
  2323  
  2324  // Floor rounds elements down to the nearest integer.
  2325  //
  2326  // Asm: VROUNDPS, CPU Feature: AVX
  2327  func (x Float32x8) Floor() Float32x8
  2328  
  2329  // Floor rounds elements down to the nearest integer.
  2330  //
  2331  // Asm: VROUNDPD, CPU Feature: AVX
  2332  func (x Float64x2) Floor() Float64x2
  2333  
  2334  // Floor rounds elements down to the nearest integer.
  2335  //
  2336  // Asm: VROUNDPD, CPU Feature: AVX
  2337  func (x Float64x4) Floor() Float64x4
  2338  
  2339  /* FloorScaled */
  2340  
  2341  // FloorScaled rounds elements down with specified precision.
  2342  //
  2343  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2344  //
  2345  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2346  func (x Float32x4) FloorScaled(prec uint8) Float32x4
  2347  
  2348  // FloorScaled rounds elements down with specified precision.
  2349  //
  2350  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2351  //
  2352  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2353  func (x Float32x8) FloorScaled(prec uint8) Float32x8
  2354  
  2355  // FloorScaled rounds elements down with specified precision.
  2356  //
  2357  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2358  //
  2359  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  2360  func (x Float32x16) FloorScaled(prec uint8) Float32x16
  2361  
  2362  // FloorScaled rounds elements down with specified precision.
  2363  //
  2364  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2365  //
  2366  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2367  func (x Float64x2) FloorScaled(prec uint8) Float64x2
  2368  
  2369  // FloorScaled rounds elements down with specified precision.
  2370  //
  2371  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2372  //
  2373  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2374  func (x Float64x4) FloorScaled(prec uint8) Float64x4
  2375  
  2376  // FloorScaled rounds elements down with specified precision.
  2377  //
  2378  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2379  //
  2380  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  2381  func (x Float64x8) FloorScaled(prec uint8) Float64x8
  2382  
  2383  /* FloorScaledResidue */
  2384  
  2385  // FloorScaledResidue computes the difference after flooring with specified precision.
  2386  //
  2387  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2388  //
  2389  // Asm: VREDUCEPS, CPU Feature: AVX512
  2390  func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
  2391  
  2392  // FloorScaledResidue computes the difference after flooring with specified precision.
  2393  //
  2394  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2395  //
  2396  // Asm: VREDUCEPS, CPU Feature: AVX512
  2397  func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
  2398  
  2399  // FloorScaledResidue computes the difference after flooring with specified precision.
  2400  //
  2401  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2402  //
  2403  // Asm: VREDUCEPS, CPU Feature: AVX512
  2404  func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
  2405  
  2406  // FloorScaledResidue computes the difference after flooring with specified precision.
  2407  //
  2408  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2409  //
  2410  // Asm: VREDUCEPD, CPU Feature: AVX512
  2411  func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
  2412  
  2413  // FloorScaledResidue computes the difference after flooring with specified precision.
  2414  //
  2415  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2416  //
  2417  // Asm: VREDUCEPD, CPU Feature: AVX512
  2418  func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
  2419  
  2420  // FloorScaledResidue computes the difference after flooring with specified precision.
  2421  //
  2422  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2423  //
  2424  // Asm: VREDUCEPD, CPU Feature: AVX512
  2425  func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
  2426  
  2427  /* GaloisFieldAffineTransform */
  2428  
  2429  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2430  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2431  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2432  // corresponding to a group of 8 elements in x.
  2433  //
  2434  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2435  //
  2436  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2437  func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16
  2438  
  2439  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2440  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2441  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2442  // corresponding to a group of 8 elements in x.
  2443  //
  2444  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2445  //
  2446  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2447  func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32
  2448  
  2449  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
  2450  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2451  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2452  // corresponding to a group of 8 elements in x.
  2453  //
  2454  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2455  //
  2456  // Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
  2457  func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64
  2458  
  2459  /* GaloisFieldAffineTransformInverse */
  2460  
  2461  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2462  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2463  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2464  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2465  // corresponding to a group of 8 elements in x.
  2466  //
  2467  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2468  //
  2469  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2470  func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16
  2471  
  2472  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2473  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2474  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2475  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2476  // corresponding to a group of 8 elements in x.
  2477  //
  2478  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2479  //
  2480  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2481  func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32
  2482  
  2483  // GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8),
  2484  // with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
  2485  // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
  2486  // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
  2487  // corresponding to a group of 8 elements in x.
  2488  //
  2489  // b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2490  //
  2491  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  2492  func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
  2493  
  2494  /* GaloisFieldMul */
  2495  
  2496  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2497  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2498  //
  2499  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2500  func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16
  2501  
  2502  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2503  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2504  //
  2505  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2506  func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
  2507  
  2508  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
  2509  // reduction polynomial x^8 + x^4 + x^3 + x + 1.
  2510  //
  2511  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  2512  func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
  2513  
  2514  /* GetElem */
  2515  
  2516  // GetElem retrieves a single constant-indexed element's value.
  2517  //
  2518  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2519  //
  2520  // Asm: VPEXTRD, CPU Feature: AVX
  2521  func (x Float32x4) GetElem(index uint8) float32
  2522  
  2523  // GetElem retrieves a single constant-indexed element's value.
  2524  //
  2525  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2526  //
  2527  // Asm: VPEXTRQ, CPU Feature: AVX
  2528  func (x Float64x2) GetElem(index uint8) float64
  2529  
  2530  // GetElem retrieves a single constant-indexed element's value.
  2531  //
  2532  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2533  //
  2534  // Asm: VPEXTRB, CPU Feature: AVX512
  2535  func (x Int8x16) GetElem(index uint8) int8
  2536  
  2537  // GetElem retrieves a single constant-indexed element's value.
  2538  //
  2539  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2540  //
  2541  // Asm: VPEXTRW, CPU Feature: AVX512
  2542  func (x Int16x8) GetElem(index uint8) int16
  2543  
  2544  // GetElem retrieves a single constant-indexed element's value.
  2545  //
  2546  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2547  //
  2548  // Asm: VPEXTRD, CPU Feature: AVX
  2549  func (x Int32x4) GetElem(index uint8) int32
  2550  
  2551  // GetElem retrieves a single constant-indexed element's value.
  2552  //
  2553  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2554  //
  2555  // Asm: VPEXTRQ, CPU Feature: AVX
  2556  func (x Int64x2) GetElem(index uint8) int64
  2557  
  2558  // GetElem retrieves a single constant-indexed element's value.
  2559  //
  2560  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2561  //
  2562  // Asm: VPEXTRB, CPU Feature: AVX512
  2563  func (x Uint8x16) GetElem(index uint8) uint8
  2564  
  2565  // GetElem retrieves a single constant-indexed element's value.
  2566  //
  2567  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2568  //
  2569  // Asm: VPEXTRW, CPU Feature: AVX512
  2570  func (x Uint16x8) GetElem(index uint8) uint16
  2571  
  2572  // GetElem retrieves a single constant-indexed element's value.
  2573  //
  2574  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2575  //
  2576  // Asm: VPEXTRD, CPU Feature: AVX
  2577  func (x Uint32x4) GetElem(index uint8) uint32
  2578  
  2579  // GetElem retrieves a single constant-indexed element's value.
  2580  //
  2581  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  2582  //
  2583  // Asm: VPEXTRQ, CPU Feature: AVX
  2584  func (x Uint64x2) GetElem(index uint8) uint64
  2585  
  2586  /* GetHi */
  2587  
  2588  // GetHi returns the upper half of x.
  2589  //
  2590  // Asm: VEXTRACTF128, CPU Feature: AVX
  2591  func (x Float32x8) GetHi() Float32x4
  2592  
  2593  // GetHi returns the upper half of x.
  2594  //
  2595  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2596  func (x Float32x16) GetHi() Float32x8
  2597  
  2598  // GetHi returns the upper half of x.
  2599  //
  2600  // Asm: VEXTRACTF128, CPU Feature: AVX
  2601  func (x Float64x4) GetHi() Float64x2
  2602  
  2603  // GetHi returns the upper half of x.
  2604  //
  2605  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2606  func (x Float64x8) GetHi() Float64x4
  2607  
  2608  // GetHi returns the upper half of x.
  2609  //
  2610  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2611  func (x Int8x32) GetHi() Int8x16
  2612  
  2613  // GetHi returns the upper half of x.
  2614  //
  2615  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2616  func (x Int8x64) GetHi() Int8x32
  2617  
  2618  // GetHi returns the upper half of x.
  2619  //
  2620  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2621  func (x Int16x16) GetHi() Int16x8
  2622  
  2623  // GetHi returns the upper half of x.
  2624  //
  2625  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2626  func (x Int16x32) GetHi() Int16x16
  2627  
  2628  // GetHi returns the upper half of x.
  2629  //
  2630  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2631  func (x Int32x8) GetHi() Int32x4
  2632  
  2633  // GetHi returns the upper half of x.
  2634  //
  2635  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2636  func (x Int32x16) GetHi() Int32x8
  2637  
  2638  // GetHi returns the upper half of x.
  2639  //
  2640  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2641  func (x Int64x4) GetHi() Int64x2
  2642  
  2643  // GetHi returns the upper half of x.
  2644  //
  2645  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2646  func (x Int64x8) GetHi() Int64x4
  2647  
  2648  // GetHi returns the upper half of x.
  2649  //
  2650  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2651  func (x Uint8x32) GetHi() Uint8x16
  2652  
  2653  // GetHi returns the upper half of x.
  2654  //
  2655  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2656  func (x Uint8x64) GetHi() Uint8x32
  2657  
  2658  // GetHi returns the upper half of x.
  2659  //
  2660  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2661  func (x Uint16x16) GetHi() Uint16x8
  2662  
  2663  // GetHi returns the upper half of x.
  2664  //
  2665  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2666  func (x Uint16x32) GetHi() Uint16x16
  2667  
  2668  // GetHi returns the upper half of x.
  2669  //
  2670  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2671  func (x Uint32x8) GetHi() Uint32x4
  2672  
  2673  // GetHi returns the upper half of x.
  2674  //
  2675  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2676  func (x Uint32x16) GetHi() Uint32x8
  2677  
  2678  // GetHi returns the upper half of x.
  2679  //
  2680  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2681  func (x Uint64x4) GetHi() Uint64x2
  2682  
  2683  // GetHi returns the upper half of x.
  2684  //
  2685  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2686  func (x Uint64x8) GetHi() Uint64x4
  2687  
  2688  /* GetLo */
  2689  
  2690  // GetLo returns the lower half of x.
  2691  //
  2692  // Asm: VEXTRACTF128, CPU Feature: AVX
  2693  func (x Float32x8) GetLo() Float32x4
  2694  
  2695  // GetLo returns the lower half of x.
  2696  //
  2697  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2698  func (x Float32x16) GetLo() Float32x8
  2699  
  2700  // GetLo returns the lower half of x.
  2701  //
  2702  // Asm: VEXTRACTF128, CPU Feature: AVX
  2703  func (x Float64x4) GetLo() Float64x2
  2704  
  2705  // GetLo returns the lower half of x.
  2706  //
  2707  // Asm: VEXTRACTF64X4, CPU Feature: AVX512
  2708  func (x Float64x8) GetLo() Float64x4
  2709  
  2710  // GetLo returns the lower half of x.
  2711  //
  2712  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2713  func (x Int8x32) GetLo() Int8x16
  2714  
  2715  // GetLo returns the lower half of x.
  2716  //
  2717  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2718  func (x Int8x64) GetLo() Int8x32
  2719  
  2720  // GetLo returns the lower half of x.
  2721  //
  2722  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2723  func (x Int16x16) GetLo() Int16x8
  2724  
  2725  // GetLo returns the lower half of x.
  2726  //
  2727  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2728  func (x Int16x32) GetLo() Int16x16
  2729  
  2730  // GetLo returns the lower half of x.
  2731  //
  2732  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2733  func (x Int32x8) GetLo() Int32x4
  2734  
  2735  // GetLo returns the lower half of x.
  2736  //
  2737  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2738  func (x Int32x16) GetLo() Int32x8
  2739  
  2740  // GetLo returns the lower half of x.
  2741  //
  2742  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2743  func (x Int64x4) GetLo() Int64x2
  2744  
  2745  // GetLo returns the lower half of x.
  2746  //
  2747  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2748  func (x Int64x8) GetLo() Int64x4
  2749  
  2750  // GetLo returns the lower half of x.
  2751  //
  2752  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2753  func (x Uint8x32) GetLo() Uint8x16
  2754  
  2755  // GetLo returns the lower half of x.
  2756  //
  2757  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2758  func (x Uint8x64) GetLo() Uint8x32
  2759  
  2760  // GetLo returns the lower half of x.
  2761  //
  2762  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2763  func (x Uint16x16) GetLo() Uint16x8
  2764  
  2765  // GetLo returns the lower half of x.
  2766  //
  2767  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2768  func (x Uint16x32) GetLo() Uint16x16
  2769  
  2770  // GetLo returns the lower half of x.
  2771  //
  2772  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2773  func (x Uint32x8) GetLo() Uint32x4
  2774  
  2775  // GetLo returns the lower half of x.
  2776  //
  2777  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2778  func (x Uint32x16) GetLo() Uint32x8
  2779  
  2780  // GetLo returns the lower half of x.
  2781  //
  2782  // Asm: VEXTRACTI128, CPU Feature: AVX2
  2783  func (x Uint64x4) GetLo() Uint64x2
  2784  
  2785  // GetLo returns the lower half of x.
  2786  //
  2787  // Asm: VEXTRACTI64X4, CPU Feature: AVX512
  2788  func (x Uint64x8) GetLo() Uint64x4
  2789  
  2790  /* Greater */
  2791  
  2792  // Greater compares for greater than.
  2793  //
  2794  // Asm: VPCMPGTB, CPU Feature: AVX
  2795  func (x Int8x16) Greater(y Int8x16) Mask8x16
  2796  
  2797  // Greater compares for greater than.
  2798  //
  2799  // Asm: VPCMPGTB, CPU Feature: AVX2
  2800  func (x Int8x32) Greater(y Int8x32) Mask8x32
  2801  
  2802  // Greater compares for greater than.
  2803  //
  2804  // Asm: VPCMPGTB, CPU Feature: AVX512
  2805  func (x Int8x64) Greater(y Int8x64) Mask8x64
  2806  
  2807  // Greater compares for greater than.
  2808  //
  2809  // Asm: VPCMPGTW, CPU Feature: AVX
  2810  func (x Int16x8) Greater(y Int16x8) Mask16x8
  2811  
  2812  // Greater compares for greater than.
  2813  //
  2814  // Asm: VPCMPGTW, CPU Feature: AVX2
  2815  func (x Int16x16) Greater(y Int16x16) Mask16x16
  2816  
  2817  // Greater compares for greater than.
  2818  //
  2819  // Asm: VPCMPGTW, CPU Feature: AVX512
  2820  func (x Int16x32) Greater(y Int16x32) Mask16x32
  2821  
  2822  // Greater compares for greater than.
  2823  //
  2824  // Asm: VPCMPGTD, CPU Feature: AVX
  2825  func (x Int32x4) Greater(y Int32x4) Mask32x4
  2826  
  2827  // Greater compares for greater than.
  2828  //
  2829  // Asm: VPCMPGTD, CPU Feature: AVX2
  2830  func (x Int32x8) Greater(y Int32x8) Mask32x8
  2831  
  2832  // Greater compares for greater than.
  2833  //
  2834  // Asm: VPCMPGTD, CPU Feature: AVX512
  2835  func (x Int32x16) Greater(y Int32x16) Mask32x16
  2836  
  2837  // Greater compares for greater than.
  2838  //
  2839  // Asm: VPCMPGTQ, CPU Feature: AVX
  2840  func (x Int64x2) Greater(y Int64x2) Mask64x2
  2841  
  2842  // Greater compares for greater than.
  2843  //
  2844  // Asm: VPCMPGTQ, CPU Feature: AVX2
  2845  func (x Int64x4) Greater(y Int64x4) Mask64x4
  2846  
  2847  // Greater compares for greater than.
  2848  //
  2849  // Asm: VPCMPGTQ, CPU Feature: AVX512
  2850  func (x Int64x8) Greater(y Int64x8) Mask64x8
  2851  
  2852  // Greater compares for greater than.
  2853  //
  2854  // Asm: VCMPPS, CPU Feature: AVX
  2855  func (x Float32x4) Greater(y Float32x4) Mask32x4
  2856  
  2857  // Greater compares for greater than.
  2858  //
  2859  // Asm: VCMPPS, CPU Feature: AVX
  2860  func (x Float32x8) Greater(y Float32x8) Mask32x8
  2861  
  2862  // Greater compares for greater than.
  2863  //
  2864  // Asm: VCMPPS, CPU Feature: AVX512
  2865  func (x Float32x16) Greater(y Float32x16) Mask32x16
  2866  
  2867  // Greater compares for greater than.
  2868  //
  2869  // Asm: VCMPPD, CPU Feature: AVX
  2870  func (x Float64x2) Greater(y Float64x2) Mask64x2
  2871  
  2872  // Greater compares for greater than.
  2873  //
  2874  // Asm: VCMPPD, CPU Feature: AVX
  2875  func (x Float64x4) Greater(y Float64x4) Mask64x4
  2876  
  2877  // Greater compares for greater than.
  2878  //
  2879  // Asm: VCMPPD, CPU Feature: AVX512
  2880  func (x Float64x8) Greater(y Float64x8) Mask64x8
  2881  
  2882  // Greater compares for greater than.
  2883  //
  2884  // Asm: VPCMPUB, CPU Feature: AVX512
  2885  func (x Uint8x64) Greater(y Uint8x64) Mask8x64
  2886  
  2887  // Greater compares for greater than.
  2888  //
  2889  // Asm: VPCMPUW, CPU Feature: AVX512
  2890  func (x Uint16x32) Greater(y Uint16x32) Mask16x32
  2891  
  2892  // Greater compares for greater than.
  2893  //
  2894  // Asm: VPCMPUD, CPU Feature: AVX512
  2895  func (x Uint32x16) Greater(y Uint32x16) Mask32x16
  2896  
  2897  // Greater compares for greater than.
  2898  //
  2899  // Asm: VPCMPUQ, CPU Feature: AVX512
  2900  func (x Uint64x8) Greater(y Uint64x8) Mask64x8
  2901  
  2902  /* GreaterEqual */
  2903  
  2904  // GreaterEqual compares for greater than or equal.
  2905  //
  2906  // Asm: VCMPPS, CPU Feature: AVX
  2907  func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
  2908  
  2909  // GreaterEqual compares for greater than or equal.
  2910  //
  2911  // Asm: VCMPPS, CPU Feature: AVX
  2912  func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
  2913  
  2914  // GreaterEqual compares for greater than or equal.
  2915  //
  2916  // Asm: VCMPPS, CPU Feature: AVX512
  2917  func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
  2918  
  2919  // GreaterEqual compares for greater than or equal.
  2920  //
  2921  // Asm: VCMPPD, CPU Feature: AVX
  2922  func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
  2923  
  2924  // GreaterEqual compares for greater than or equal.
  2925  //
  2926  // Asm: VCMPPD, CPU Feature: AVX
  2927  func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
  2928  
  2929  // GreaterEqual compares for greater than or equal.
  2930  //
  2931  // Asm: VCMPPD, CPU Feature: AVX512
  2932  func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
  2933  
  2934  // GreaterEqual compares for greater than or equal.
  2935  //
  2936  // Asm: VPCMPB, CPU Feature: AVX512
  2937  func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
  2938  
  2939  // GreaterEqual compares for greater than or equal.
  2940  //
  2941  // Asm: VPCMPW, CPU Feature: AVX512
  2942  func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
  2943  
  2944  // GreaterEqual compares for greater than or equal.
  2945  //
  2946  // Asm: VPCMPD, CPU Feature: AVX512
  2947  func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
  2948  
  2949  // GreaterEqual compares for greater than or equal.
  2950  //
  2951  // Asm: VPCMPQ, CPU Feature: AVX512
  2952  func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
  2953  
  2954  // GreaterEqual compares for greater than or equal.
  2955  //
  2956  // Asm: VPCMPUB, CPU Feature: AVX512
  2957  func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
  2958  
  2959  // GreaterEqual compares for greater than or equal.
  2960  //
  2961  // Asm: VPCMPUW, CPU Feature: AVX512
  2962  func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
  2963  
  2964  // GreaterEqual compares for greater than or equal.
  2965  //
  2966  // Asm: VPCMPUD, CPU Feature: AVX512
  2967  func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
  2968  
  2969  // GreaterEqual compares for greater than or equal.
  2970  //
  2971  // Asm: VPCMPUQ, CPU Feature: AVX512
  2972  func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
  2973  
  2974  /* InterleaveHi */
  2975  
  2976  // InterleaveHi interleaves the elements of the high halves of x and y.
  2977  //
  2978  // Asm: VPUNPCKHWD, CPU Feature: AVX
  2979  func (x Int16x8) InterleaveHi(y Int16x8) Int16x8
  2980  
  2981  // InterleaveHi interleaves the elements of the high halves of x and y.
  2982  //
  2983  // Asm: VPUNPCKHDQ, CPU Feature: AVX
  2984  func (x Int32x4) InterleaveHi(y Int32x4) Int32x4
  2985  
  2986  // InterleaveHi interleaves the elements of the high halves of x and y.
  2987  //
  2988  // Asm: VPUNPCKHQDQ, CPU Feature: AVX
  2989  func (x Int64x2) InterleaveHi(y Int64x2) Int64x2
  2990  
  2991  // InterleaveHi interleaves the elements of the high halves of x and y.
  2992  //
  2993  // Asm: VPUNPCKHWD, CPU Feature: AVX
  2994  func (x Uint16x8) InterleaveHi(y Uint16x8) Uint16x8
  2995  
  2996  // InterleaveHi interleaves the elements of the high halves of x and y.
  2997  //
  2998  // Asm: VPUNPCKHDQ, CPU Feature: AVX
  2999  func (x Uint32x4) InterleaveHi(y Uint32x4) Uint32x4
  3000  
  3001  // InterleaveHi interleaves the elements of the high halves of x and y.
  3002  //
  3003  // Asm: VPUNPCKHQDQ, CPU Feature: AVX
  3004  func (x Uint64x2) InterleaveHi(y Uint64x2) Uint64x2
  3005  
  3006  /* InterleaveHiGrouped */
  3007  
  3008  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3009  //
  3010  // Asm: VPUNPCKHWD, CPU Feature: AVX2
  3011  func (x Int16x16) InterleaveHiGrouped(y Int16x16) Int16x16
  3012  
  3013  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3014  //
  3015  // Asm: VPUNPCKHWD, CPU Feature: AVX512
  3016  func (x Int16x32) InterleaveHiGrouped(y Int16x32) Int16x32
  3017  
  3018  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3019  //
  3020  // Asm: VPUNPCKHDQ, CPU Feature: AVX2
  3021  func (x Int32x8) InterleaveHiGrouped(y Int32x8) Int32x8
  3022  
  3023  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3024  //
  3025  // Asm: VPUNPCKHDQ, CPU Feature: AVX512
  3026  func (x Int32x16) InterleaveHiGrouped(y Int32x16) Int32x16
  3027  
  3028  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3029  //
  3030  // Asm: VPUNPCKHQDQ, CPU Feature: AVX2
  3031  func (x Int64x4) InterleaveHiGrouped(y Int64x4) Int64x4
  3032  
  3033  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3034  //
  3035  // Asm: VPUNPCKHQDQ, CPU Feature: AVX512
  3036  func (x Int64x8) InterleaveHiGrouped(y Int64x8) Int64x8
  3037  
  3038  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3039  //
  3040  // Asm: VPUNPCKHWD, CPU Feature: AVX2
  3041  func (x Uint16x16) InterleaveHiGrouped(y Uint16x16) Uint16x16
  3042  
  3043  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3044  //
  3045  // Asm: VPUNPCKHWD, CPU Feature: AVX512
  3046  func (x Uint16x32) InterleaveHiGrouped(y Uint16x32) Uint16x32
  3047  
  3048  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3049  //
  3050  // Asm: VPUNPCKHDQ, CPU Feature: AVX2
  3051  func (x Uint32x8) InterleaveHiGrouped(y Uint32x8) Uint32x8
  3052  
  3053  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3054  //
  3055  // Asm: VPUNPCKHDQ, CPU Feature: AVX512
  3056  func (x Uint32x16) InterleaveHiGrouped(y Uint32x16) Uint32x16
  3057  
  3058  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3059  //
  3060  // Asm: VPUNPCKHQDQ, CPU Feature: AVX2
  3061  func (x Uint64x4) InterleaveHiGrouped(y Uint64x4) Uint64x4
  3062  
  3063  // InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y.
  3064  //
  3065  // Asm: VPUNPCKHQDQ, CPU Feature: AVX512
  3066  func (x Uint64x8) InterleaveHiGrouped(y Uint64x8) Uint64x8
  3067  
  3068  /* InterleaveLo */
  3069  
  3070  // InterleaveLo interleaves the elements of the low halves of x and y.
  3071  //
  3072  // Asm: VPUNPCKLWD, CPU Feature: AVX
  3073  func (x Int16x8) InterleaveLo(y Int16x8) Int16x8
  3074  
  3075  // InterleaveLo interleaves the elements of the low halves of x and y.
  3076  //
  3077  // Asm: VPUNPCKLDQ, CPU Feature: AVX
  3078  func (x Int32x4) InterleaveLo(y Int32x4) Int32x4
  3079  
  3080  // InterleaveLo interleaves the elements of the low halves of x and y.
  3081  //
  3082  // Asm: VPUNPCKLQDQ, CPU Feature: AVX
  3083  func (x Int64x2) InterleaveLo(y Int64x2) Int64x2
  3084  
  3085  // InterleaveLo interleaves the elements of the low halves of x and y.
  3086  //
  3087  // Asm: VPUNPCKLWD, CPU Feature: AVX
  3088  func (x Uint16x8) InterleaveLo(y Uint16x8) Uint16x8
  3089  
  3090  // InterleaveLo interleaves the elements of the low halves of x and y.
  3091  //
  3092  // Asm: VPUNPCKLDQ, CPU Feature: AVX
  3093  func (x Uint32x4) InterleaveLo(y Uint32x4) Uint32x4
  3094  
  3095  // InterleaveLo interleaves the elements of the low halves of x and y.
  3096  //
  3097  // Asm: VPUNPCKLQDQ, CPU Feature: AVX
  3098  func (x Uint64x2) InterleaveLo(y Uint64x2) Uint64x2
  3099  
  3100  /* InterleaveLoGrouped */
  3101  
  3102  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3103  //
  3104  // Asm: VPUNPCKLWD, CPU Feature: AVX2
  3105  func (x Int16x16) InterleaveLoGrouped(y Int16x16) Int16x16
  3106  
  3107  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3108  //
  3109  // Asm: VPUNPCKLWD, CPU Feature: AVX512
  3110  func (x Int16x32) InterleaveLoGrouped(y Int16x32) Int16x32
  3111  
  3112  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3113  //
  3114  // Asm: VPUNPCKLDQ, CPU Feature: AVX2
  3115  func (x Int32x8) InterleaveLoGrouped(y Int32x8) Int32x8
  3116  
  3117  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3118  //
  3119  // Asm: VPUNPCKLDQ, CPU Feature: AVX512
  3120  func (x Int32x16) InterleaveLoGrouped(y Int32x16) Int32x16
  3121  
  3122  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3123  //
  3124  // Asm: VPUNPCKLQDQ, CPU Feature: AVX2
  3125  func (x Int64x4) InterleaveLoGrouped(y Int64x4) Int64x4
  3126  
  3127  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3128  //
  3129  // Asm: VPUNPCKLQDQ, CPU Feature: AVX512
  3130  func (x Int64x8) InterleaveLoGrouped(y Int64x8) Int64x8
  3131  
  3132  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3133  //
  3134  // Asm: VPUNPCKLWD, CPU Feature: AVX2
  3135  func (x Uint16x16) InterleaveLoGrouped(y Uint16x16) Uint16x16
  3136  
  3137  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3138  //
  3139  // Asm: VPUNPCKLWD, CPU Feature: AVX512
  3140  func (x Uint16x32) InterleaveLoGrouped(y Uint16x32) Uint16x32
  3141  
  3142  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3143  //
  3144  // Asm: VPUNPCKLDQ, CPU Feature: AVX2
  3145  func (x Uint32x8) InterleaveLoGrouped(y Uint32x8) Uint32x8
  3146  
  3147  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3148  //
  3149  // Asm: VPUNPCKLDQ, CPU Feature: AVX512
  3150  func (x Uint32x16) InterleaveLoGrouped(y Uint32x16) Uint32x16
  3151  
  3152  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3153  //
  3154  // Asm: VPUNPCKLQDQ, CPU Feature: AVX2
  3155  func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4
  3156  
  3157  // InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y.
  3158  //
  3159  // Asm: VPUNPCKLQDQ, CPU Feature: AVX512
  3160  func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8
  3161  
  3162  /* IsNan */
  3163  
  3164  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3165  //
  3166  // Asm: VCMPPS, CPU Feature: AVX
  3167  func (x Float32x4) IsNan(y Float32x4) Mask32x4
  3168  
  3169  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3170  //
  3171  // Asm: VCMPPS, CPU Feature: AVX
  3172  func (x Float32x8) IsNan(y Float32x8) Mask32x8
  3173  
  3174  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3175  //
  3176  // Asm: VCMPPS, CPU Feature: AVX512
  3177  func (x Float32x16) IsNan(y Float32x16) Mask32x16
  3178  
  3179  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3180  //
  3181  // Asm: VCMPPD, CPU Feature: AVX
  3182  func (x Float64x2) IsNan(y Float64x2) Mask64x2
  3183  
  3184  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3185  //
  3186  // Asm: VCMPPD, CPU Feature: AVX
  3187  func (x Float64x4) IsNan(y Float64x4) Mask64x4
  3188  
  3189  // IsNan checks if elements are NaN. Use as x.IsNan(x).
  3190  //
  3191  // Asm: VCMPPD, CPU Feature: AVX512
  3192  func (x Float64x8) IsNan(y Float64x8) Mask64x8
  3193  
  3194  /* LeadingZeros */
  3195  
  3196  // LeadingZeros counts the leading zeros of each element in x.
  3197  //
  3198  // Asm: VPLZCNTD, CPU Feature: AVX512
  3199  func (x Int32x4) LeadingZeros() Int32x4
  3200  
  3201  // LeadingZeros counts the leading zeros of each element in x.
  3202  //
  3203  // Asm: VPLZCNTD, CPU Feature: AVX512
  3204  func (x Int32x8) LeadingZeros() Int32x8
  3205  
  3206  // LeadingZeros counts the leading zeros of each element in x.
  3207  //
  3208  // Asm: VPLZCNTD, CPU Feature: AVX512
  3209  func (x Int32x16) LeadingZeros() Int32x16
  3210  
  3211  // LeadingZeros counts the leading zeros of each element in x.
  3212  //
  3213  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3214  func (x Int64x2) LeadingZeros() Int64x2
  3215  
  3216  // LeadingZeros counts the leading zeros of each element in x.
  3217  //
  3218  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3219  func (x Int64x4) LeadingZeros() Int64x4
  3220  
  3221  // LeadingZeros counts the leading zeros of each element in x.
  3222  //
  3223  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3224  func (x Int64x8) LeadingZeros() Int64x8
  3225  
  3226  // LeadingZeros counts the leading zeros of each element in x.
  3227  //
  3228  // Asm: VPLZCNTD, CPU Feature: AVX512
  3229  func (x Uint32x4) LeadingZeros() Uint32x4
  3230  
  3231  // LeadingZeros counts the leading zeros of each element in x.
  3232  //
  3233  // Asm: VPLZCNTD, CPU Feature: AVX512
  3234  func (x Uint32x8) LeadingZeros() Uint32x8
  3235  
  3236  // LeadingZeros counts the leading zeros of each element in x.
  3237  //
  3238  // Asm: VPLZCNTD, CPU Feature: AVX512
  3239  func (x Uint32x16) LeadingZeros() Uint32x16
  3240  
  3241  // LeadingZeros counts the leading zeros of each element in x.
  3242  //
  3243  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3244  func (x Uint64x2) LeadingZeros() Uint64x2
  3245  
  3246  // LeadingZeros counts the leading zeros of each element in x.
  3247  //
  3248  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3249  func (x Uint64x4) LeadingZeros() Uint64x4
  3250  
  3251  // LeadingZeros counts the leading zeros of each element in x.
  3252  //
  3253  // Asm: VPLZCNTQ, CPU Feature: AVX512
  3254  func (x Uint64x8) LeadingZeros() Uint64x8
  3255  
  3256  /* Less */
  3257  
  3258  // Less compares for less than.
  3259  //
  3260  // Asm: VCMPPS, CPU Feature: AVX
  3261  func (x Float32x4) Less(y Float32x4) Mask32x4
  3262  
  3263  // Less compares for less than.
  3264  //
  3265  // Asm: VCMPPS, CPU Feature: AVX
  3266  func (x Float32x8) Less(y Float32x8) Mask32x8
  3267  
  3268  // Less compares for less than.
  3269  //
  3270  // Asm: VCMPPS, CPU Feature: AVX512
  3271  func (x Float32x16) Less(y Float32x16) Mask32x16
  3272  
  3273  // Less compares for less than.
  3274  //
  3275  // Asm: VCMPPD, CPU Feature: AVX
  3276  func (x Float64x2) Less(y Float64x2) Mask64x2
  3277  
  3278  // Less compares for less than.
  3279  //
  3280  // Asm: VCMPPD, CPU Feature: AVX
  3281  func (x Float64x4) Less(y Float64x4) Mask64x4
  3282  
  3283  // Less compares for less than.
  3284  //
  3285  // Asm: VCMPPD, CPU Feature: AVX512
  3286  func (x Float64x8) Less(y Float64x8) Mask64x8
  3287  
  3288  // Less compares for less than.
  3289  //
  3290  // Asm: VPCMPB, CPU Feature: AVX512
  3291  func (x Int8x64) Less(y Int8x64) Mask8x64
  3292  
  3293  // Less compares for less than.
  3294  //
  3295  // Asm: VPCMPW, CPU Feature: AVX512
  3296  func (x Int16x32) Less(y Int16x32) Mask16x32
  3297  
  3298  // Less compares for less than.
  3299  //
  3300  // Asm: VPCMPD, CPU Feature: AVX512
  3301  func (x Int32x16) Less(y Int32x16) Mask32x16
  3302  
  3303  // Less compares for less than.
  3304  //
  3305  // Asm: VPCMPQ, CPU Feature: AVX512
  3306  func (x Int64x8) Less(y Int64x8) Mask64x8
  3307  
  3308  // Less compares for less than.
  3309  //
  3310  // Asm: VPCMPUB, CPU Feature: AVX512
  3311  func (x Uint8x64) Less(y Uint8x64) Mask8x64
  3312  
  3313  // Less compares for less than.
  3314  //
  3315  // Asm: VPCMPUW, CPU Feature: AVX512
  3316  func (x Uint16x32) Less(y Uint16x32) Mask16x32
  3317  
  3318  // Less compares for less than.
  3319  //
  3320  // Asm: VPCMPUD, CPU Feature: AVX512
  3321  func (x Uint32x16) Less(y Uint32x16) Mask32x16
  3322  
  3323  // Less compares for less than.
  3324  //
  3325  // Asm: VPCMPUQ, CPU Feature: AVX512
  3326  func (x Uint64x8) Less(y Uint64x8) Mask64x8
  3327  
  3328  /* LessEqual */
  3329  
  3330  // LessEqual compares for less than or equal.
  3331  //
  3332  // Asm: VCMPPS, CPU Feature: AVX
  3333  func (x Float32x4) LessEqual(y Float32x4) Mask32x4
  3334  
  3335  // LessEqual compares for less than or equal.
  3336  //
  3337  // Asm: VCMPPS, CPU Feature: AVX
  3338  func (x Float32x8) LessEqual(y Float32x8) Mask32x8
  3339  
  3340  // LessEqual compares for less than or equal.
  3341  //
  3342  // Asm: VCMPPS, CPU Feature: AVX512
  3343  func (x Float32x16) LessEqual(y Float32x16) Mask32x16
  3344  
  3345  // LessEqual compares for less than or equal.
  3346  //
  3347  // Asm: VCMPPD, CPU Feature: AVX
  3348  func (x Float64x2) LessEqual(y Float64x2) Mask64x2
  3349  
  3350  // LessEqual compares for less than or equal.
  3351  //
  3352  // Asm: VCMPPD, CPU Feature: AVX
  3353  func (x Float64x4) LessEqual(y Float64x4) Mask64x4
  3354  
  3355  // LessEqual compares for less than or equal.
  3356  //
  3357  // Asm: VCMPPD, CPU Feature: AVX512
  3358  func (x Float64x8) LessEqual(y Float64x8) Mask64x8
  3359  
  3360  // LessEqual compares for less than or equal.
  3361  //
  3362  // Asm: VPCMPB, CPU Feature: AVX512
  3363  func (x Int8x64) LessEqual(y Int8x64) Mask8x64
  3364  
  3365  // LessEqual compares for less than or equal.
  3366  //
  3367  // Asm: VPCMPW, CPU Feature: AVX512
  3368  func (x Int16x32) LessEqual(y Int16x32) Mask16x32
  3369  
  3370  // LessEqual compares for less than or equal.
  3371  //
  3372  // Asm: VPCMPD, CPU Feature: AVX512
  3373  func (x Int32x16) LessEqual(y Int32x16) Mask32x16
  3374  
  3375  // LessEqual compares for less than or equal.
  3376  //
  3377  // Asm: VPCMPQ, CPU Feature: AVX512
  3378  func (x Int64x8) LessEqual(y Int64x8) Mask64x8
  3379  
  3380  // LessEqual compares for less than or equal.
  3381  //
  3382  // Asm: VPCMPUB, CPU Feature: AVX512
  3383  func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
  3384  
  3385  // LessEqual compares for less than or equal.
  3386  //
  3387  // Asm: VPCMPUW, CPU Feature: AVX512
  3388  func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
  3389  
  3390  // LessEqual compares for less than or equal.
  3391  //
  3392  // Asm: VPCMPUD, CPU Feature: AVX512
  3393  func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
  3394  
  3395  // LessEqual compares for less than or equal.
  3396  //
  3397  // Asm: VPCMPUQ, CPU Feature: AVX512
  3398  func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
  3399  
  3400  /* Max */
  3401  
  3402  // Max computes the maximum of corresponding elements.
  3403  //
  3404  // Asm: VMAXPS, CPU Feature: AVX
  3405  func (x Float32x4) Max(y Float32x4) Float32x4
  3406  
  3407  // Max computes the maximum of corresponding elements.
  3408  //
  3409  // Asm: VMAXPS, CPU Feature: AVX
  3410  func (x Float32x8) Max(y Float32x8) Float32x8
  3411  
  3412  // Max computes the maximum of corresponding elements.
  3413  //
  3414  // Asm: VMAXPS, CPU Feature: AVX512
  3415  func (x Float32x16) Max(y Float32x16) Float32x16
  3416  
  3417  // Max computes the maximum of corresponding elements.
  3418  //
  3419  // Asm: VMAXPD, CPU Feature: AVX
  3420  func (x Float64x2) Max(y Float64x2) Float64x2
  3421  
  3422  // Max computes the maximum of corresponding elements.
  3423  //
  3424  // Asm: VMAXPD, CPU Feature: AVX
  3425  func (x Float64x4) Max(y Float64x4) Float64x4
  3426  
  3427  // Max computes the maximum of corresponding elements.
  3428  //
  3429  // Asm: VMAXPD, CPU Feature: AVX512
  3430  func (x Float64x8) Max(y Float64x8) Float64x8
  3431  
  3432  // Max computes the maximum of corresponding elements.
  3433  //
  3434  // Asm: VPMAXSB, CPU Feature: AVX
  3435  func (x Int8x16) Max(y Int8x16) Int8x16
  3436  
  3437  // Max computes the maximum of corresponding elements.
  3438  //
  3439  // Asm: VPMAXSB, CPU Feature: AVX2
  3440  func (x Int8x32) Max(y Int8x32) Int8x32
  3441  
  3442  // Max computes the maximum of corresponding elements.
  3443  //
  3444  // Asm: VPMAXSB, CPU Feature: AVX512
  3445  func (x Int8x64) Max(y Int8x64) Int8x64
  3446  
  3447  // Max computes the maximum of corresponding elements.
  3448  //
  3449  // Asm: VPMAXSW, CPU Feature: AVX
  3450  func (x Int16x8) Max(y Int16x8) Int16x8
  3451  
  3452  // Max computes the maximum of corresponding elements.
  3453  //
  3454  // Asm: VPMAXSW, CPU Feature: AVX2
  3455  func (x Int16x16) Max(y Int16x16) Int16x16
  3456  
  3457  // Max computes the maximum of corresponding elements.
  3458  //
  3459  // Asm: VPMAXSW, CPU Feature: AVX512
  3460  func (x Int16x32) Max(y Int16x32) Int16x32
  3461  
  3462  // Max computes the maximum of corresponding elements.
  3463  //
  3464  // Asm: VPMAXSD, CPU Feature: AVX
  3465  func (x Int32x4) Max(y Int32x4) Int32x4
  3466  
  3467  // Max computes the maximum of corresponding elements.
  3468  //
  3469  // Asm: VPMAXSD, CPU Feature: AVX2
  3470  func (x Int32x8) Max(y Int32x8) Int32x8
  3471  
  3472  // Max computes the maximum of corresponding elements.
  3473  //
  3474  // Asm: VPMAXSD, CPU Feature: AVX512
  3475  func (x Int32x16) Max(y Int32x16) Int32x16
  3476  
  3477  // Max computes the maximum of corresponding elements.
  3478  //
  3479  // Asm: VPMAXSQ, CPU Feature: AVX512
  3480  func (x Int64x2) Max(y Int64x2) Int64x2
  3481  
  3482  // Max computes the maximum of corresponding elements.
  3483  //
  3484  // Asm: VPMAXSQ, CPU Feature: AVX512
  3485  func (x Int64x4) Max(y Int64x4) Int64x4
  3486  
  3487  // Max computes the maximum of corresponding elements.
  3488  //
  3489  // Asm: VPMAXSQ, CPU Feature: AVX512
  3490  func (x Int64x8) Max(y Int64x8) Int64x8
  3491  
  3492  // Max computes the maximum of corresponding elements.
  3493  //
  3494  // Asm: VPMAXUB, CPU Feature: AVX
  3495  func (x Uint8x16) Max(y Uint8x16) Uint8x16
  3496  
  3497  // Max computes the maximum of corresponding elements.
  3498  //
  3499  // Asm: VPMAXUB, CPU Feature: AVX2
  3500  func (x Uint8x32) Max(y Uint8x32) Uint8x32
  3501  
  3502  // Max computes the maximum of corresponding elements.
  3503  //
  3504  // Asm: VPMAXUB, CPU Feature: AVX512
  3505  func (x Uint8x64) Max(y Uint8x64) Uint8x64
  3506  
  3507  // Max computes the maximum of corresponding elements.
  3508  //
  3509  // Asm: VPMAXUW, CPU Feature: AVX
  3510  func (x Uint16x8) Max(y Uint16x8) Uint16x8
  3511  
  3512  // Max computes the maximum of corresponding elements.
  3513  //
  3514  // Asm: VPMAXUW, CPU Feature: AVX2
  3515  func (x Uint16x16) Max(y Uint16x16) Uint16x16
  3516  
  3517  // Max computes the maximum of corresponding elements.
  3518  //
  3519  // Asm: VPMAXUW, CPU Feature: AVX512
  3520  func (x Uint16x32) Max(y Uint16x32) Uint16x32
  3521  
  3522  // Max computes the maximum of corresponding elements.
  3523  //
  3524  // Asm: VPMAXUD, CPU Feature: AVX
  3525  func (x Uint32x4) Max(y Uint32x4) Uint32x4
  3526  
  3527  // Max computes the maximum of corresponding elements.
  3528  //
  3529  // Asm: VPMAXUD, CPU Feature: AVX2
  3530  func (x Uint32x8) Max(y Uint32x8) Uint32x8
  3531  
  3532  // Max computes the maximum of corresponding elements.
  3533  //
  3534  // Asm: VPMAXUD, CPU Feature: AVX512
  3535  func (x Uint32x16) Max(y Uint32x16) Uint32x16
  3536  
  3537  // Max computes the maximum of corresponding elements.
  3538  //
  3539  // Asm: VPMAXUQ, CPU Feature: AVX512
  3540  func (x Uint64x2) Max(y Uint64x2) Uint64x2
  3541  
  3542  // Max computes the maximum of corresponding elements.
  3543  //
  3544  // Asm: VPMAXUQ, CPU Feature: AVX512
  3545  func (x Uint64x4) Max(y Uint64x4) Uint64x4
  3546  
  3547  // Max computes the maximum of corresponding elements.
  3548  //
  3549  // Asm: VPMAXUQ, CPU Feature: AVX512
  3550  func (x Uint64x8) Max(y Uint64x8) Uint64x8
  3551  
  3552  /* Min */
  3553  
  3554  // Min computes the minimum of corresponding elements.
  3555  //
  3556  // Asm: VMINPS, CPU Feature: AVX
  3557  func (x Float32x4) Min(y Float32x4) Float32x4
  3558  
  3559  // Min computes the minimum of corresponding elements.
  3560  //
  3561  // Asm: VMINPS, CPU Feature: AVX
  3562  func (x Float32x8) Min(y Float32x8) Float32x8
  3563  
  3564  // Min computes the minimum of corresponding elements.
  3565  //
  3566  // Asm: VMINPS, CPU Feature: AVX512
  3567  func (x Float32x16) Min(y Float32x16) Float32x16
  3568  
  3569  // Min computes the minimum of corresponding elements.
  3570  //
  3571  // Asm: VMINPD, CPU Feature: AVX
  3572  func (x Float64x2) Min(y Float64x2) Float64x2
  3573  
  3574  // Min computes the minimum of corresponding elements.
  3575  //
  3576  // Asm: VMINPD, CPU Feature: AVX
  3577  func (x Float64x4) Min(y Float64x4) Float64x4
  3578  
  3579  // Min computes the minimum of corresponding elements.
  3580  //
  3581  // Asm: VMINPD, CPU Feature: AVX512
  3582  func (x Float64x8) Min(y Float64x8) Float64x8
  3583  
  3584  // Min computes the minimum of corresponding elements.
  3585  //
  3586  // Asm: VPMINSB, CPU Feature: AVX
  3587  func (x Int8x16) Min(y Int8x16) Int8x16
  3588  
  3589  // Min computes the minimum of corresponding elements.
  3590  //
  3591  // Asm: VPMINSB, CPU Feature: AVX2
  3592  func (x Int8x32) Min(y Int8x32) Int8x32
  3593  
  3594  // Min computes the minimum of corresponding elements.
  3595  //
  3596  // Asm: VPMINSB, CPU Feature: AVX512
  3597  func (x Int8x64) Min(y Int8x64) Int8x64
  3598  
  3599  // Min computes the minimum of corresponding elements.
  3600  //
  3601  // Asm: VPMINSW, CPU Feature: AVX
  3602  func (x Int16x8) Min(y Int16x8) Int16x8
  3603  
  3604  // Min computes the minimum of corresponding elements.
  3605  //
  3606  // Asm: VPMINSW, CPU Feature: AVX2
  3607  func (x Int16x16) Min(y Int16x16) Int16x16
  3608  
  3609  // Min computes the minimum of corresponding elements.
  3610  //
  3611  // Asm: VPMINSW, CPU Feature: AVX512
  3612  func (x Int16x32) Min(y Int16x32) Int16x32
  3613  
  3614  // Min computes the minimum of corresponding elements.
  3615  //
  3616  // Asm: VPMINSD, CPU Feature: AVX
  3617  func (x Int32x4) Min(y Int32x4) Int32x4
  3618  
  3619  // Min computes the minimum of corresponding elements.
  3620  //
  3621  // Asm: VPMINSD, CPU Feature: AVX2
  3622  func (x Int32x8) Min(y Int32x8) Int32x8
  3623  
  3624  // Min computes the minimum of corresponding elements.
  3625  //
  3626  // Asm: VPMINSD, CPU Feature: AVX512
  3627  func (x Int32x16) Min(y Int32x16) Int32x16
  3628  
  3629  // Min computes the minimum of corresponding elements.
  3630  //
  3631  // Asm: VPMINSQ, CPU Feature: AVX512
  3632  func (x Int64x2) Min(y Int64x2) Int64x2
  3633  
  3634  // Min computes the minimum of corresponding elements.
  3635  //
  3636  // Asm: VPMINSQ, CPU Feature: AVX512
  3637  func (x Int64x4) Min(y Int64x4) Int64x4
  3638  
  3639  // Min computes the minimum of corresponding elements.
  3640  //
  3641  // Asm: VPMINSQ, CPU Feature: AVX512
  3642  func (x Int64x8) Min(y Int64x8) Int64x8
  3643  
  3644  // Min computes the minimum of corresponding elements.
  3645  //
  3646  // Asm: VPMINUB, CPU Feature: AVX
  3647  func (x Uint8x16) Min(y Uint8x16) Uint8x16
  3648  
  3649  // Min computes the minimum of corresponding elements.
  3650  //
  3651  // Asm: VPMINUB, CPU Feature: AVX2
  3652  func (x Uint8x32) Min(y Uint8x32) Uint8x32
  3653  
  3654  // Min computes the minimum of corresponding elements.
  3655  //
  3656  // Asm: VPMINUB, CPU Feature: AVX512
  3657  func (x Uint8x64) Min(y Uint8x64) Uint8x64
  3658  
  3659  // Min computes the minimum of corresponding elements.
  3660  //
  3661  // Asm: VPMINUW, CPU Feature: AVX
  3662  func (x Uint16x8) Min(y Uint16x8) Uint16x8
  3663  
  3664  // Min computes the minimum of corresponding elements.
  3665  //
  3666  // Asm: VPMINUW, CPU Feature: AVX2
  3667  func (x Uint16x16) Min(y Uint16x16) Uint16x16
  3668  
  3669  // Min computes the minimum of corresponding elements.
  3670  //
  3671  // Asm: VPMINUW, CPU Feature: AVX512
  3672  func (x Uint16x32) Min(y Uint16x32) Uint16x32
  3673  
  3674  // Min computes the minimum of corresponding elements.
  3675  //
  3676  // Asm: VPMINUD, CPU Feature: AVX
  3677  func (x Uint32x4) Min(y Uint32x4) Uint32x4
  3678  
  3679  // Min computes the minimum of corresponding elements.
  3680  //
  3681  // Asm: VPMINUD, CPU Feature: AVX2
  3682  func (x Uint32x8) Min(y Uint32x8) Uint32x8
  3683  
  3684  // Min computes the minimum of corresponding elements.
  3685  //
  3686  // Asm: VPMINUD, CPU Feature: AVX512
  3687  func (x Uint32x16) Min(y Uint32x16) Uint32x16
  3688  
  3689  // Min computes the minimum of corresponding elements.
  3690  //
  3691  // Asm: VPMINUQ, CPU Feature: AVX512
  3692  func (x Uint64x2) Min(y Uint64x2) Uint64x2
  3693  
  3694  // Min computes the minimum of corresponding elements.
  3695  //
  3696  // Asm: VPMINUQ, CPU Feature: AVX512
  3697  func (x Uint64x4) Min(y Uint64x4) Uint64x4
  3698  
  3699  // Min computes the minimum of corresponding elements.
  3700  //
  3701  // Asm: VPMINUQ, CPU Feature: AVX512
  3702  func (x Uint64x8) Min(y Uint64x8) Uint64x8
  3703  
  3704  /* Mul */
  3705  
  3706  // Mul multiplies corresponding elements of two vectors.
  3707  //
  3708  // Asm: VMULPS, CPU Feature: AVX
  3709  func (x Float32x4) Mul(y Float32x4) Float32x4
  3710  
  3711  // Mul multiplies corresponding elements of two vectors.
  3712  //
  3713  // Asm: VMULPS, CPU Feature: AVX
  3714  func (x Float32x8) Mul(y Float32x8) Float32x8
  3715  
  3716  // Mul multiplies corresponding elements of two vectors.
  3717  //
  3718  // Asm: VMULPS, CPU Feature: AVX512
  3719  func (x Float32x16) Mul(y Float32x16) Float32x16
  3720  
  3721  // Mul multiplies corresponding elements of two vectors.
  3722  //
  3723  // Asm: VMULPD, CPU Feature: AVX
  3724  func (x Float64x2) Mul(y Float64x2) Float64x2
  3725  
  3726  // Mul multiplies corresponding elements of two vectors.
  3727  //
  3728  // Asm: VMULPD, CPU Feature: AVX
  3729  func (x Float64x4) Mul(y Float64x4) Float64x4
  3730  
  3731  // Mul multiplies corresponding elements of two vectors.
  3732  //
  3733  // Asm: VMULPD, CPU Feature: AVX512
  3734  func (x Float64x8) Mul(y Float64x8) Float64x8
  3735  
  3736  // Mul multiplies corresponding elements of two vectors.
  3737  //
  3738  // Asm: VPMULLW, CPU Feature: AVX
  3739  func (x Int16x8) Mul(y Int16x8) Int16x8
  3740  
  3741  // Mul multiplies corresponding elements of two vectors.
  3742  //
  3743  // Asm: VPMULLW, CPU Feature: AVX2
  3744  func (x Int16x16) Mul(y Int16x16) Int16x16
  3745  
  3746  // Mul multiplies corresponding elements of two vectors.
  3747  //
  3748  // Asm: VPMULLW, CPU Feature: AVX512
  3749  func (x Int16x32) Mul(y Int16x32) Int16x32
  3750  
  3751  // Mul multiplies corresponding elements of two vectors.
  3752  //
  3753  // Asm: VPMULLD, CPU Feature: AVX
  3754  func (x Int32x4) Mul(y Int32x4) Int32x4
  3755  
  3756  // Mul multiplies corresponding elements of two vectors.
  3757  //
  3758  // Asm: VPMULLD, CPU Feature: AVX2
  3759  func (x Int32x8) Mul(y Int32x8) Int32x8
  3760  
  3761  // Mul multiplies corresponding elements of two vectors.
  3762  //
  3763  // Asm: VPMULLD, CPU Feature: AVX512
  3764  func (x Int32x16) Mul(y Int32x16) Int32x16
  3765  
  3766  // Mul multiplies corresponding elements of two vectors.
  3767  //
  3768  // Asm: VPMULLQ, CPU Feature: AVX512
  3769  func (x Int64x2) Mul(y Int64x2) Int64x2
  3770  
  3771  // Mul multiplies corresponding elements of two vectors.
  3772  //
  3773  // Asm: VPMULLQ, CPU Feature: AVX512
  3774  func (x Int64x4) Mul(y Int64x4) Int64x4
  3775  
  3776  // Mul multiplies corresponding elements of two vectors.
  3777  //
  3778  // Asm: VPMULLQ, CPU Feature: AVX512
  3779  func (x Int64x8) Mul(y Int64x8) Int64x8
  3780  
  3781  // Mul multiplies corresponding elements of two vectors.
  3782  //
  3783  // Asm: VPMULLW, CPU Feature: AVX
  3784  func (x Uint16x8) Mul(y Uint16x8) Uint16x8
  3785  
  3786  // Mul multiplies corresponding elements of two vectors.
  3787  //
  3788  // Asm: VPMULLW, CPU Feature: AVX2
  3789  func (x Uint16x16) Mul(y Uint16x16) Uint16x16
  3790  
  3791  // Mul multiplies corresponding elements of two vectors.
  3792  //
  3793  // Asm: VPMULLW, CPU Feature: AVX512
  3794  func (x Uint16x32) Mul(y Uint16x32) Uint16x32
  3795  
  3796  // Mul multiplies corresponding elements of two vectors.
  3797  //
  3798  // Asm: VPMULLD, CPU Feature: AVX
  3799  func (x Uint32x4) Mul(y Uint32x4) Uint32x4
  3800  
  3801  // Mul multiplies corresponding elements of two vectors.
  3802  //
  3803  // Asm: VPMULLD, CPU Feature: AVX2
  3804  func (x Uint32x8) Mul(y Uint32x8) Uint32x8
  3805  
  3806  // Mul multiplies corresponding elements of two vectors.
  3807  //
  3808  // Asm: VPMULLD, CPU Feature: AVX512
  3809  func (x Uint32x16) Mul(y Uint32x16) Uint32x16
  3810  
  3811  // Mul multiplies corresponding elements of two vectors.
  3812  //
  3813  // Asm: VPMULLQ, CPU Feature: AVX512
  3814  func (x Uint64x2) Mul(y Uint64x2) Uint64x2
  3815  
  3816  // Mul multiplies corresponding elements of two vectors.
  3817  //
  3818  // Asm: VPMULLQ, CPU Feature: AVX512
  3819  func (x Uint64x4) Mul(y Uint64x4) Uint64x4
  3820  
  3821  // Mul multiplies corresponding elements of two vectors.
  3822  //
  3823  // Asm: VPMULLQ, CPU Feature: AVX512
  3824  func (x Uint64x8) Mul(y Uint64x8) Uint64x8
  3825  
  3826  /* MulAdd */
  3827  
  3828  // MulAdd performs a fused (x * y) + z.
  3829  //
  3830  // Asm: VFMADD213PS, CPU Feature: AVX512
  3831  func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
  3832  
  3833  // MulAdd performs a fused (x * y) + z.
  3834  //
  3835  // Asm: VFMADD213PS, CPU Feature: AVX512
  3836  func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
  3837  
  3838  // MulAdd performs a fused (x * y) + z.
  3839  //
  3840  // Asm: VFMADD213PS, CPU Feature: AVX512
  3841  func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
  3842  
  3843  // MulAdd performs a fused (x * y) + z.
  3844  //
  3845  // Asm: VFMADD213PD, CPU Feature: AVX512
  3846  func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
  3847  
  3848  // MulAdd performs a fused (x * y) + z.
  3849  //
  3850  // Asm: VFMADD213PD, CPU Feature: AVX512
  3851  func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
  3852  
  3853  // MulAdd performs a fused (x * y) + z.
  3854  //
  3855  // Asm: VFMADD213PD, CPU Feature: AVX512
  3856  func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
  3857  
  3858  /* MulAddSub */
  3859  
  3860  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3861  //
  3862  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  3863  func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
  3864  
  3865  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3866  //
  3867  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  3868  func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
  3869  
  3870  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3871  //
  3872  // Asm: VFMADDSUB213PS, CPU Feature: AVX512
  3873  func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
  3874  
  3875  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3876  //
  3877  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  3878  func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
  3879  
  3880  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3881  //
  3882  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  3883  func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
  3884  
  3885  // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  3886  //
  3887  // Asm: VFMADDSUB213PD, CPU Feature: AVX512
  3888  func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
  3889  
  3890  /* MulEvenWiden */
  3891  
  3892  // MulEvenWiden multiplies even-indexed elements, widening the result.
  3893  // Result[i] = v1.Even[i] * v2.Even[i].
  3894  //
  3895  // Asm: VPMULDQ, CPU Feature: AVX
  3896  func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
  3897  
  3898  // MulEvenWiden multiplies even-indexed elements, widening the result.
  3899  // Result[i] = v1.Even[i] * v2.Even[i].
  3900  //
  3901  // Asm: VPMULDQ, CPU Feature: AVX2
  3902  func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
  3903  
  3904  // MulEvenWiden multiplies even-indexed elements, widening the result.
  3905  // Result[i] = v1.Even[i] * v2.Even[i].
  3906  //
  3907  // Asm: VPMULUDQ, CPU Feature: AVX
  3908  func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
  3909  
  3910  // MulEvenWiden multiplies even-indexed elements, widening the result.
  3911  // Result[i] = v1.Even[i] * v2.Even[i].
  3912  //
  3913  // Asm: VPMULUDQ, CPU Feature: AVX2
  3914  func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
  3915  
  3916  /* MulHigh */
  3917  
  3918  // MulHigh multiplies elements and stores the high part of the result.
  3919  //
  3920  // Asm: VPMULHW, CPU Feature: AVX
  3921  func (x Int16x8) MulHigh(y Int16x8) Int16x8
  3922  
  3923  // MulHigh multiplies elements and stores the high part of the result.
  3924  //
  3925  // Asm: VPMULHW, CPU Feature: AVX2
  3926  func (x Int16x16) MulHigh(y Int16x16) Int16x16
  3927  
  3928  // MulHigh multiplies elements and stores the high part of the result.
  3929  //
  3930  // Asm: VPMULHW, CPU Feature: AVX512
  3931  func (x Int16x32) MulHigh(y Int16x32) Int16x32
  3932  
  3933  // MulHigh multiplies elements and stores the high part of the result.
  3934  //
  3935  // Asm: VPMULHUW, CPU Feature: AVX
  3936  func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
  3937  
  3938  // MulHigh multiplies elements and stores the high part of the result.
  3939  //
  3940  // Asm: VPMULHUW, CPU Feature: AVX2
  3941  func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
  3942  
  3943  // MulHigh multiplies elements and stores the high part of the result.
  3944  //
  3945  // Asm: VPMULHUW, CPU Feature: AVX512
  3946  func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
  3947  
  3948  /* MulSubAdd */
  3949  
  3950  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3951  //
  3952  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  3953  func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
  3954  
  3955  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3956  //
  3957  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  3958  func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
  3959  
  3960  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3961  //
  3962  // Asm: VFMSUBADD213PS, CPU Feature: AVX512
  3963  func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
  3964  
  3965  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3966  //
  3967  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  3968  func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
  3969  
  3970  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3971  //
  3972  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  3973  func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
  3974  
  3975  // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  3976  //
  3977  // Asm: VFMSUBADD213PD, CPU Feature: AVX512
  3978  func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
  3979  
  3980  /* NotEqual */
  3981  
  3982  // NotEqual compares for inequality.
  3983  //
  3984  // Asm: VCMPPS, CPU Feature: AVX
  3985  func (x Float32x4) NotEqual(y Float32x4) Mask32x4
  3986  
  3987  // NotEqual compares for inequality.
  3988  //
  3989  // Asm: VCMPPS, CPU Feature: AVX
  3990  func (x Float32x8) NotEqual(y Float32x8) Mask32x8
  3991  
  3992  // NotEqual compares for inequality.
  3993  //
  3994  // Asm: VCMPPS, CPU Feature: AVX512
  3995  func (x Float32x16) NotEqual(y Float32x16) Mask32x16
  3996  
  3997  // NotEqual compares for inequality.
  3998  //
  3999  // Asm: VCMPPD, CPU Feature: AVX
  4000  func (x Float64x2) NotEqual(y Float64x2) Mask64x2
  4001  
  4002  // NotEqual compares for inequality.
  4003  //
  4004  // Asm: VCMPPD, CPU Feature: AVX
  4005  func (x Float64x4) NotEqual(y Float64x4) Mask64x4
  4006  
  4007  // NotEqual compares for inequality.
  4008  //
  4009  // Asm: VCMPPD, CPU Feature: AVX512
  4010  func (x Float64x8) NotEqual(y Float64x8) Mask64x8
  4011  
  4012  // NotEqual compares for inequality.
  4013  //
  4014  // Asm: VPCMPB, CPU Feature: AVX512
  4015  func (x Int8x64) NotEqual(y Int8x64) Mask8x64
  4016  
  4017  // NotEqual compares for inequality.
  4018  //
  4019  // Asm: VPCMPW, CPU Feature: AVX512
  4020  func (x Int16x32) NotEqual(y Int16x32) Mask16x32
  4021  
  4022  // NotEqual compares for inequality.
  4023  //
  4024  // Asm: VPCMPD, CPU Feature: AVX512
  4025  func (x Int32x16) NotEqual(y Int32x16) Mask32x16
  4026  
  4027  // NotEqual compares for inequality.
  4028  //
  4029  // Asm: VPCMPQ, CPU Feature: AVX512
  4030  func (x Int64x8) NotEqual(y Int64x8) Mask64x8
  4031  
  4032  // NotEqual compares for inequality.
  4033  //
  4034  // Asm: VPCMPUB, CPU Feature: AVX512
  4035  func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
  4036  
  4037  // NotEqual compares for inequality.
  4038  //
  4039  // Asm: VPCMPUW, CPU Feature: AVX512
  4040  func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
  4041  
  4042  // NotEqual compares for inequality.
  4043  //
  4044  // Asm: VPCMPUD, CPU Feature: AVX512
  4045  func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
  4046  
  4047  // NotEqual compares for inequality.
  4048  //
  4049  // Asm: VPCMPUQ, CPU Feature: AVX512
  4050  func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
  4051  
  4052  /* OnesCount */
  4053  
  4054  // OnesCount counts the number of set bits in each element.
  4055  //
  4056  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4057  func (x Int8x16) OnesCount() Int8x16
  4058  
  4059  // OnesCount counts the number of set bits in each element.
  4060  //
  4061  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4062  func (x Int8x32) OnesCount() Int8x32
  4063  
  4064  // OnesCount counts the number of set bits in each element.
  4065  //
  4066  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4067  func (x Int8x64) OnesCount() Int8x64
  4068  
  4069  // OnesCount counts the number of set bits in each element.
  4070  //
  4071  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4072  func (x Int16x8) OnesCount() Int16x8
  4073  
  4074  // OnesCount counts the number of set bits in each element.
  4075  //
  4076  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4077  func (x Int16x16) OnesCount() Int16x16
  4078  
  4079  // OnesCount counts the number of set bits in each element.
  4080  //
  4081  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4082  func (x Int16x32) OnesCount() Int16x32
  4083  
  4084  // OnesCount counts the number of set bits in each element.
  4085  //
  4086  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4087  func (x Int32x4) OnesCount() Int32x4
  4088  
  4089  // OnesCount counts the number of set bits in each element.
  4090  //
  4091  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4092  func (x Int32x8) OnesCount() Int32x8
  4093  
  4094  // OnesCount counts the number of set bits in each element.
  4095  //
  4096  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4097  func (x Int32x16) OnesCount() Int32x16
  4098  
  4099  // OnesCount counts the number of set bits in each element.
  4100  //
  4101  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4102  func (x Int64x2) OnesCount() Int64x2
  4103  
  4104  // OnesCount counts the number of set bits in each element.
  4105  //
  4106  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4107  func (x Int64x4) OnesCount() Int64x4
  4108  
  4109  // OnesCount counts the number of set bits in each element.
  4110  //
  4111  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4112  func (x Int64x8) OnesCount() Int64x8
  4113  
  4114  // OnesCount counts the number of set bits in each element.
  4115  //
  4116  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4117  func (x Uint8x16) OnesCount() Uint8x16
  4118  
  4119  // OnesCount counts the number of set bits in each element.
  4120  //
  4121  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4122  func (x Uint8x32) OnesCount() Uint8x32
  4123  
  4124  // OnesCount counts the number of set bits in each element.
  4125  //
  4126  // Asm: VPOPCNTB, CPU Feature: AVX512BITALG
  4127  func (x Uint8x64) OnesCount() Uint8x64
  4128  
  4129  // OnesCount counts the number of set bits in each element.
  4130  //
  4131  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4132  func (x Uint16x8) OnesCount() Uint16x8
  4133  
  4134  // OnesCount counts the number of set bits in each element.
  4135  //
  4136  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4137  func (x Uint16x16) OnesCount() Uint16x16
  4138  
  4139  // OnesCount counts the number of set bits in each element.
  4140  //
  4141  // Asm: VPOPCNTW, CPU Feature: AVX512BITALG
  4142  func (x Uint16x32) OnesCount() Uint16x32
  4143  
  4144  // OnesCount counts the number of set bits in each element.
  4145  //
  4146  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4147  func (x Uint32x4) OnesCount() Uint32x4
  4148  
  4149  // OnesCount counts the number of set bits in each element.
  4150  //
  4151  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4152  func (x Uint32x8) OnesCount() Uint32x8
  4153  
  4154  // OnesCount counts the number of set bits in each element.
  4155  //
  4156  // Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
  4157  func (x Uint32x16) OnesCount() Uint32x16
  4158  
  4159  // OnesCount counts the number of set bits in each element.
  4160  //
  4161  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4162  func (x Uint64x2) OnesCount() Uint64x2
  4163  
  4164  // OnesCount counts the number of set bits in each element.
  4165  //
  4166  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4167  func (x Uint64x4) OnesCount() Uint64x4
  4168  
  4169  // OnesCount counts the number of set bits in each element.
  4170  //
  4171  // Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
  4172  func (x Uint64x8) OnesCount() Uint64x8
  4173  
  4174  /* Or */
  4175  
  4176  // Or performs a bitwise OR operation between two vectors.
  4177  //
  4178  // Asm: VPOR, CPU Feature: AVX
  4179  func (x Int8x16) Or(y Int8x16) Int8x16
  4180  
  4181  // Or performs a bitwise OR operation between two vectors.
  4182  //
  4183  // Asm: VPOR, CPU Feature: AVX2
  4184  func (x Int8x32) Or(y Int8x32) Int8x32
  4185  
  4186  // Or performs a bitwise OR operation between two vectors.
  4187  //
  4188  // Asm: VPORD, CPU Feature: AVX512
  4189  func (x Int8x64) Or(y Int8x64) Int8x64
  4190  
  4191  // Or performs a bitwise OR operation between two vectors.
  4192  //
  4193  // Asm: VPOR, CPU Feature: AVX
  4194  func (x Int16x8) Or(y Int16x8) Int16x8
  4195  
  4196  // Or performs a bitwise OR operation between two vectors.
  4197  //
  4198  // Asm: VPOR, CPU Feature: AVX2
  4199  func (x Int16x16) Or(y Int16x16) Int16x16
  4200  
  4201  // Or performs a bitwise OR operation between two vectors.
  4202  //
  4203  // Asm: VPORD, CPU Feature: AVX512
  4204  func (x Int16x32) Or(y Int16x32) Int16x32
  4205  
  4206  // Or performs a bitwise OR operation between two vectors.
  4207  //
  4208  // Asm: VPOR, CPU Feature: AVX
  4209  func (x Int32x4) Or(y Int32x4) Int32x4
  4210  
  4211  // Or performs a bitwise OR operation between two vectors.
  4212  //
  4213  // Asm: VPOR, CPU Feature: AVX2
  4214  func (x Int32x8) Or(y Int32x8) Int32x8
  4215  
  4216  // Or performs a bitwise OR operation between two vectors.
  4217  //
  4218  // Asm: VPORD, CPU Feature: AVX512
  4219  func (x Int32x16) Or(y Int32x16) Int32x16
  4220  
  4221  // Or performs a bitwise OR operation between two vectors.
  4222  //
  4223  // Asm: VPOR, CPU Feature: AVX
  4224  func (x Int64x2) Or(y Int64x2) Int64x2
  4225  
  4226  // Or performs a bitwise OR operation between two vectors.
  4227  //
  4228  // Asm: VPOR, CPU Feature: AVX2
  4229  func (x Int64x4) Or(y Int64x4) Int64x4
  4230  
  4231  // Or performs a bitwise OR operation between two vectors.
  4232  //
  4233  // Asm: VPORQ, CPU Feature: AVX512
  4234  func (x Int64x8) Or(y Int64x8) Int64x8
  4235  
  4236  // Or performs a bitwise OR operation between two vectors.
  4237  //
  4238  // Asm: VPOR, CPU Feature: AVX
  4239  func (x Uint8x16) Or(y Uint8x16) Uint8x16
  4240  
  4241  // Or performs a bitwise OR operation between two vectors.
  4242  //
  4243  // Asm: VPOR, CPU Feature: AVX2
  4244  func (x Uint8x32) Or(y Uint8x32) Uint8x32
  4245  
  4246  // Or performs a bitwise OR operation between two vectors.
  4247  //
  4248  // Asm: VPORD, CPU Feature: AVX512
  4249  func (x Uint8x64) Or(y Uint8x64) Uint8x64
  4250  
  4251  // Or performs a bitwise OR operation between two vectors.
  4252  //
  4253  // Asm: VPOR, CPU Feature: AVX
  4254  func (x Uint16x8) Or(y Uint16x8) Uint16x8
  4255  
  4256  // Or performs a bitwise OR operation between two vectors.
  4257  //
  4258  // Asm: VPOR, CPU Feature: AVX2
  4259  func (x Uint16x16) Or(y Uint16x16) Uint16x16
  4260  
  4261  // Or performs a bitwise OR operation between two vectors.
  4262  //
  4263  // Asm: VPORD, CPU Feature: AVX512
  4264  func (x Uint16x32) Or(y Uint16x32) Uint16x32
  4265  
  4266  // Or performs a bitwise OR operation between two vectors.
  4267  //
  4268  // Asm: VPOR, CPU Feature: AVX
  4269  func (x Uint32x4) Or(y Uint32x4) Uint32x4
  4270  
  4271  // Or performs a bitwise OR operation between two vectors.
  4272  //
  4273  // Asm: VPOR, CPU Feature: AVX2
  4274  func (x Uint32x8) Or(y Uint32x8) Uint32x8
  4275  
  4276  // Or performs a bitwise OR operation between two vectors.
  4277  //
  4278  // Asm: VPORD, CPU Feature: AVX512
  4279  func (x Uint32x16) Or(y Uint32x16) Uint32x16
  4280  
  4281  // Or performs a bitwise OR operation between two vectors.
  4282  //
  4283  // Asm: VPOR, CPU Feature: AVX
  4284  func (x Uint64x2) Or(y Uint64x2) Uint64x2
  4285  
  4286  // Or performs a bitwise OR operation between two vectors.
  4287  //
  4288  // Asm: VPOR, CPU Feature: AVX2
  4289  func (x Uint64x4) Or(y Uint64x4) Uint64x4
  4290  
  4291  // Or performs a bitwise OR operation between two vectors.
  4292  //
  4293  // Asm: VPORQ, CPU Feature: AVX512
  4294  func (x Uint64x8) Or(y Uint64x8) Uint64x8
  4295  
  4296  /* Permute */
  4297  
  4298  // Permute performs a full permutation of vector x using indices:
  4299  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4300  // The low 4 bits (values 0-15) of each element of indices is used
  4301  //
  4302  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4303  func (x Int8x16) Permute(indices Uint8x16) Int8x16
  4304  
  4305  // Permute performs a full permutation of vector x using indices:
  4306  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4307  // The low 4 bits (values 0-15) of each element of indices is used
  4308  //
  4309  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4310  func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
  4311  
  4312  // Permute performs a full permutation of vector x using indices:
  4313  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4314  // The low 5 bits (values 0-31) of each element of indices is used
  4315  //
  4316  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4317  func (x Int8x32) Permute(indices Uint8x32) Int8x32
  4318  
  4319  // Permute performs a full permutation of vector x using indices:
  4320  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4321  // The low 5 bits (values 0-31) of each element of indices is used
  4322  //
  4323  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4324  func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
  4325  
  4326  // Permute performs a full permutation of vector x using indices:
  4327  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4328  // The low 6 bits (values 0-63) of each element of indices is used
  4329  //
  4330  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4331  func (x Int8x64) Permute(indices Uint8x64) Int8x64
  4332  
  4333  // Permute performs a full permutation of vector x using indices:
  4334  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4335  // The low 6 bits (values 0-63) of each element of indices is used
  4336  //
  4337  // Asm: VPERMB, CPU Feature: AVX512VBMI
  4338  func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
  4339  
  4340  // Permute performs a full permutation of vector x using indices:
  4341  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4342  // The low 3 bits (values 0-7) of each element of indices is used
  4343  //
  4344  // Asm: VPERMW, CPU Feature: AVX512
  4345  func (x Int16x8) Permute(indices Uint16x8) Int16x8
  4346  
  4347  // Permute performs a full permutation of vector x using indices:
  4348  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4349  // The low 3 bits (values 0-7) of each element of indices is used
  4350  //
  4351  // Asm: VPERMW, CPU Feature: AVX512
  4352  func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
  4353  
  4354  // Permute performs a full permutation of vector x using indices:
  4355  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4356  // The low 4 bits (values 0-15) of each element of indices is used
  4357  //
  4358  // Asm: VPERMW, CPU Feature: AVX512
  4359  func (x Int16x16) Permute(indices Uint16x16) Int16x16
  4360  
  4361  // Permute performs a full permutation of vector x using indices:
  4362  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4363  // The low 4 bits (values 0-15) of each element of indices is used
  4364  //
  4365  // Asm: VPERMW, CPU Feature: AVX512
  4366  func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
  4367  
  4368  // Permute performs a full permutation of vector x using indices:
  4369  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4370  // The low 5 bits (values 0-31) of each element of indices is used
  4371  //
  4372  // Asm: VPERMW, CPU Feature: AVX512
  4373  func (x Int16x32) Permute(indices Uint16x32) Int16x32
  4374  
  4375  // Permute performs a full permutation of vector x using indices:
  4376  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4377  // The low 5 bits (values 0-31) of each element of indices is used
  4378  //
  4379  // Asm: VPERMW, CPU Feature: AVX512
  4380  func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
  4381  
  4382  // Permute performs a full permutation of vector x using indices:
  4383  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4384  // The low 3 bits (values 0-7) of each element of indices is used
  4385  //
  4386  // Asm: VPERMPS, CPU Feature: AVX2
  4387  func (x Float32x8) Permute(indices Uint32x8) Float32x8
  4388  
  4389  // Permute performs a full permutation of vector x using indices:
  4390  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4391  // The low 3 bits (values 0-7) of each element of indices is used
  4392  //
  4393  // Asm: VPERMD, CPU Feature: AVX2
  4394  func (x Int32x8) Permute(indices Uint32x8) Int32x8
  4395  
  4396  // Permute performs a full permutation of vector x using indices:
  4397  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4398  // The low 3 bits (values 0-7) of each element of indices is used
  4399  //
  4400  // Asm: VPERMD, CPU Feature: AVX2
  4401  func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
  4402  
  4403  // Permute performs a full permutation of vector x using indices:
  4404  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4405  // The low 4 bits (values 0-15) of each element of indices is used
  4406  //
  4407  // Asm: VPERMPS, CPU Feature: AVX512
  4408  func (x Float32x16) Permute(indices Uint32x16) Float32x16
  4409  
  4410  // Permute performs a full permutation of vector x using indices:
  4411  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4412  // The low 4 bits (values 0-15) of each element of indices is used
  4413  //
  4414  // Asm: VPERMD, CPU Feature: AVX512
  4415  func (x Int32x16) Permute(indices Uint32x16) Int32x16
  4416  
  4417  // Permute performs a full permutation of vector x using indices:
  4418  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4419  // The low 4 bits (values 0-15) of each element of indices is used
  4420  //
  4421  // Asm: VPERMD, CPU Feature: AVX512
  4422  func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
  4423  
  4424  // Permute performs a full permutation of vector x using indices:
  4425  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4426  // The low 2 bits (values 0-3) of each element of indices is used
  4427  //
  4428  // Asm: VPERMPD, CPU Feature: AVX512
  4429  func (x Float64x4) Permute(indices Uint64x4) Float64x4
  4430  
  4431  // Permute performs a full permutation of vector x using indices:
  4432  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4433  // The low 2 bits (values 0-3) of each element of indices is used
  4434  //
  4435  // Asm: VPERMQ, CPU Feature: AVX512
  4436  func (x Int64x4) Permute(indices Uint64x4) Int64x4
  4437  
  4438  // Permute performs a full permutation of vector x using indices:
  4439  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4440  // The low 2 bits (values 0-3) of each element of indices is used
  4441  //
  4442  // Asm: VPERMQ, CPU Feature: AVX512
  4443  func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
  4444  
  4445  // Permute performs a full permutation of vector x using indices:
  4446  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4447  // The low 3 bits (values 0-7) of each element of indices is used
  4448  //
  4449  // Asm: VPERMPD, CPU Feature: AVX512
  4450  func (x Float64x8) Permute(indices Uint64x8) Float64x8
  4451  
  4452  // Permute performs a full permutation of vector x using indices:
  4453  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4454  // The low 3 bits (values 0-7) of each element of indices is used
  4455  //
  4456  // Asm: VPERMQ, CPU Feature: AVX512
  4457  func (x Int64x8) Permute(indices Uint64x8) Int64x8
  4458  
  4459  // Permute performs a full permutation of vector x using indices:
  4460  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4461  // The low 3 bits (values 0-7) of each element of indices is used
  4462  //
  4463  // Asm: VPERMQ, CPU Feature: AVX512
  4464  func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
  4465  
  4466  /* PermuteOrZero */
  4467  
  4468  // PermuteOrZero performs a full permutation of vector x using indices:
  4469  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4470  // The lower four bits of each byte-sized index in indices select an element from x,
  4471  // unless the index's sign bit is set in which case zero is used instead.
  4472  //
  4473  // Asm: VPSHUFB, CPU Feature: AVX
  4474  func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16
  4475  
  4476  // PermuteOrZero performs a full permutation of vector x using indices:
  4477  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  4478  // The lower four bits of each byte-sized index in indices select an element from x,
  4479  // unless the index's sign bit is set in which case zero is used instead.
  4480  //
  4481  // Asm: VPSHUFB, CPU Feature: AVX
  4482  func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16
  4483  
  4484  /* PermuteOrZeroGrouped */
  4485  
  4486  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4487  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4488  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4489  // unless the index's sign bit is set in which case zero is used instead.
  4490  // Each group is of size 128-bit.
  4491  //
  4492  // Asm: VPSHUFB, CPU Feature: AVX2
  4493  func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32
  4494  
  4495  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4496  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4497  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4498  // unless the index's sign bit is set in which case zero is used instead.
  4499  // Each group is of size 128-bit.
  4500  //
  4501  // Asm: VPSHUFB, CPU Feature: AVX512
  4502  func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64
  4503  
  4504  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4505  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4506  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4507  // unless the index's sign bit is set in which case zero is used instead.
  4508  // Each group is of size 128-bit.
  4509  //
  4510  // Asm: VPSHUFB, CPU Feature: AVX2
  4511  func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32
  4512  
  4513  // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
  4514  // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
  4515  // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
  4516  // unless the index's sign bit is set in which case zero is used instead.
  4517  // Each group is of size 128-bit.
  4518  //
  4519  // Asm: VPSHUFB, CPU Feature: AVX512
  4520  func (x Uint8x64) PermuteOrZeroGrouped(indices Int8x64) Uint8x64
  4521  
  4522  /* Reciprocal */
  4523  
  4524  // Reciprocal computes an approximate reciprocal of each element.
  4525  //
  4526  // Asm: VRCPPS, CPU Feature: AVX
  4527  func (x Float32x4) Reciprocal() Float32x4
  4528  
  4529  // Reciprocal computes an approximate reciprocal of each element.
  4530  //
  4531  // Asm: VRCPPS, CPU Feature: AVX
  4532  func (x Float32x8) Reciprocal() Float32x8
  4533  
  4534  // Reciprocal computes an approximate reciprocal of each element.
  4535  //
  4536  // Asm: VRCP14PS, CPU Feature: AVX512
  4537  func (x Float32x16) Reciprocal() Float32x16
  4538  
  4539  // Reciprocal computes an approximate reciprocal of each element.
  4540  //
  4541  // Asm: VRCP14PD, CPU Feature: AVX512
  4542  func (x Float64x2) Reciprocal() Float64x2
  4543  
  4544  // Reciprocal computes an approximate reciprocal of each element.
  4545  //
  4546  // Asm: VRCP14PD, CPU Feature: AVX512
  4547  func (x Float64x4) Reciprocal() Float64x4
  4548  
  4549  // Reciprocal computes an approximate reciprocal of each element.
  4550  //
  4551  // Asm: VRCP14PD, CPU Feature: AVX512
  4552  func (x Float64x8) Reciprocal() Float64x8
  4553  
  4554  /* ReciprocalSqrt */
  4555  
  4556  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4557  //
  4558  // Asm: VRSQRTPS, CPU Feature: AVX
  4559  func (x Float32x4) ReciprocalSqrt() Float32x4
  4560  
  4561  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4562  //
  4563  // Asm: VRSQRTPS, CPU Feature: AVX
  4564  func (x Float32x8) ReciprocalSqrt() Float32x8
  4565  
  4566  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4567  //
  4568  // Asm: VRSQRT14PS, CPU Feature: AVX512
  4569  func (x Float32x16) ReciprocalSqrt() Float32x16
  4570  
  4571  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4572  //
  4573  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4574  func (x Float64x2) ReciprocalSqrt() Float64x2
  4575  
  4576  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4577  //
  4578  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4579  func (x Float64x4) ReciprocalSqrt() Float64x4
  4580  
  4581  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
  4582  //
  4583  // Asm: VRSQRT14PD, CPU Feature: AVX512
  4584  func (x Float64x8) ReciprocalSqrt() Float64x8
  4585  
  4586  /* RotateAllLeft */
  4587  
  4588  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4589  //
  4590  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4591  //
  4592  // Asm: VPROLD, CPU Feature: AVX512
  4593  func (x Int32x4) RotateAllLeft(shift uint8) Int32x4
  4594  
  4595  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4596  //
  4597  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4598  //
  4599  // Asm: VPROLD, CPU Feature: AVX512
  4600  func (x Int32x8) RotateAllLeft(shift uint8) Int32x8
  4601  
  4602  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4603  //
  4604  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4605  //
  4606  // Asm: VPROLD, CPU Feature: AVX512
  4607  func (x Int32x16) RotateAllLeft(shift uint8) Int32x16
  4608  
  4609  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4610  //
  4611  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4612  //
  4613  // Asm: VPROLQ, CPU Feature: AVX512
  4614  func (x Int64x2) RotateAllLeft(shift uint8) Int64x2
  4615  
  4616  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4617  //
  4618  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4619  //
  4620  // Asm: VPROLQ, CPU Feature: AVX512
  4621  func (x Int64x4) RotateAllLeft(shift uint8) Int64x4
  4622  
  4623  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4624  //
  4625  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4626  //
  4627  // Asm: VPROLQ, CPU Feature: AVX512
  4628  func (x Int64x8) RotateAllLeft(shift uint8) Int64x8
  4629  
  4630  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4631  //
  4632  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4633  //
  4634  // Asm: VPROLD, CPU Feature: AVX512
  4635  func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4
  4636  
  4637  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4638  //
  4639  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4640  //
  4641  // Asm: VPROLD, CPU Feature: AVX512
  4642  func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8
  4643  
  4644  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4645  //
  4646  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4647  //
  4648  // Asm: VPROLD, CPU Feature: AVX512
  4649  func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16
  4650  
  4651  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4652  //
  4653  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4654  //
  4655  // Asm: VPROLQ, CPU Feature: AVX512
  4656  func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2
  4657  
  4658  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4659  //
  4660  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4661  //
  4662  // Asm: VPROLQ, CPU Feature: AVX512
  4663  func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
  4664  
  4665  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
  4666  //
  4667  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4668  //
  4669  // Asm: VPROLQ, CPU Feature: AVX512
  4670  func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
  4671  
  4672  /* RotateAllRight */
  4673  
  4674  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4675  //
  4676  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4677  //
  4678  // Asm: VPRORD, CPU Feature: AVX512
  4679  func (x Int32x4) RotateAllRight(shift uint8) Int32x4
  4680  
  4681  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4682  //
  4683  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4684  //
  4685  // Asm: VPRORD, CPU Feature: AVX512
  4686  func (x Int32x8) RotateAllRight(shift uint8) Int32x8
  4687  
  4688  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4689  //
  4690  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4691  //
  4692  // Asm: VPRORD, CPU Feature: AVX512
  4693  func (x Int32x16) RotateAllRight(shift uint8) Int32x16
  4694  
  4695  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4696  //
  4697  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4698  //
  4699  // Asm: VPRORQ, CPU Feature: AVX512
  4700  func (x Int64x2) RotateAllRight(shift uint8) Int64x2
  4701  
  4702  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4703  //
  4704  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4705  //
  4706  // Asm: VPRORQ, CPU Feature: AVX512
  4707  func (x Int64x4) RotateAllRight(shift uint8) Int64x4
  4708  
  4709  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4710  //
  4711  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4712  //
  4713  // Asm: VPRORQ, CPU Feature: AVX512
  4714  func (x Int64x8) RotateAllRight(shift uint8) Int64x8
  4715  
  4716  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4717  //
  4718  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4719  //
  4720  // Asm: VPRORD, CPU Feature: AVX512
  4721  func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
  4722  
  4723  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4724  //
  4725  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4726  //
  4727  // Asm: VPRORD, CPU Feature: AVX512
  4728  func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
  4729  
  4730  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4731  //
  4732  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4733  //
  4734  // Asm: VPRORD, CPU Feature: AVX512
  4735  func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
  4736  
  4737  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4738  //
  4739  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4740  //
  4741  // Asm: VPRORQ, CPU Feature: AVX512
  4742  func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
  4743  
  4744  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4745  //
  4746  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4747  //
  4748  // Asm: VPRORQ, CPU Feature: AVX512
  4749  func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
  4750  
  4751  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  4752  //
  4753  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4754  //
  4755  // Asm: VPRORQ, CPU Feature: AVX512
  4756  func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
  4757  
  4758  /* RotateLeft */
  4759  
  4760  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4761  //
  4762  // Asm: VPROLVD, CPU Feature: AVX512
  4763  func (x Int32x4) RotateLeft(y Int32x4) Int32x4
  4764  
  4765  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4766  //
  4767  // Asm: VPROLVD, CPU Feature: AVX512
  4768  func (x Int32x8) RotateLeft(y Int32x8) Int32x8
  4769  
  4770  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4771  //
  4772  // Asm: VPROLVD, CPU Feature: AVX512
  4773  func (x Int32x16) RotateLeft(y Int32x16) Int32x16
  4774  
  4775  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4776  //
  4777  // Asm: VPROLVQ, CPU Feature: AVX512
  4778  func (x Int64x2) RotateLeft(y Int64x2) Int64x2
  4779  
  4780  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4781  //
  4782  // Asm: VPROLVQ, CPU Feature: AVX512
  4783  func (x Int64x4) RotateLeft(y Int64x4) Int64x4
  4784  
  4785  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4786  //
  4787  // Asm: VPROLVQ, CPU Feature: AVX512
  4788  func (x Int64x8) RotateLeft(y Int64x8) Int64x8
  4789  
  4790  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4791  //
  4792  // Asm: VPROLVD, CPU Feature: AVX512
  4793  func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4
  4794  
  4795  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4796  //
  4797  // Asm: VPROLVD, CPU Feature: AVX512
  4798  func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8
  4799  
  4800  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4801  //
  4802  // Asm: VPROLVD, CPU Feature: AVX512
  4803  func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16
  4804  
  4805  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4806  //
  4807  // Asm: VPROLVQ, CPU Feature: AVX512
  4808  func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2
  4809  
  4810  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4811  //
  4812  // Asm: VPROLVQ, CPU Feature: AVX512
  4813  func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
  4814  
  4815  // RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements.
  4816  //
  4817  // Asm: VPROLVQ, CPU Feature: AVX512
  4818  func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
  4819  
  4820  /* RotateRight */
  4821  
  4822  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4823  //
  4824  // Asm: VPRORVD, CPU Feature: AVX512
  4825  func (x Int32x4) RotateRight(y Int32x4) Int32x4
  4826  
  4827  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4828  //
  4829  // Asm: VPRORVD, CPU Feature: AVX512
  4830  func (x Int32x8) RotateRight(y Int32x8) Int32x8
  4831  
  4832  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4833  //
  4834  // Asm: VPRORVD, CPU Feature: AVX512
  4835  func (x Int32x16) RotateRight(y Int32x16) Int32x16
  4836  
  4837  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4838  //
  4839  // Asm: VPRORVQ, CPU Feature: AVX512
  4840  func (x Int64x2) RotateRight(y Int64x2) Int64x2
  4841  
  4842  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4843  //
  4844  // Asm: VPRORVQ, CPU Feature: AVX512
  4845  func (x Int64x4) RotateRight(y Int64x4) Int64x4
  4846  
  4847  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4848  //
  4849  // Asm: VPRORVQ, CPU Feature: AVX512
  4850  func (x Int64x8) RotateRight(y Int64x8) Int64x8
  4851  
  4852  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4853  //
  4854  // Asm: VPRORVD, CPU Feature: AVX512
  4855  func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4
  4856  
  4857  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4858  //
  4859  // Asm: VPRORVD, CPU Feature: AVX512
  4860  func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8
  4861  
  4862  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4863  //
  4864  // Asm: VPRORVD, CPU Feature: AVX512
  4865  func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16
  4866  
  4867  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4868  //
  4869  // Asm: VPRORVQ, CPU Feature: AVX512
  4870  func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2
  4871  
  4872  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4873  //
  4874  // Asm: VPRORVQ, CPU Feature: AVX512
  4875  func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
  4876  
  4877  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
  4878  //
  4879  // Asm: VPRORVQ, CPU Feature: AVX512
  4880  func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
  4881  
  4882  /* RoundToEven */
  4883  
  4884  // RoundToEven rounds elements to the nearest integer.
  4885  //
  4886  // Asm: VROUNDPS, CPU Feature: AVX
  4887  func (x Float32x4) RoundToEven() Float32x4
  4888  
  4889  // RoundToEven rounds elements to the nearest integer.
  4890  //
  4891  // Asm: VROUNDPS, CPU Feature: AVX
  4892  func (x Float32x8) RoundToEven() Float32x8
  4893  
  4894  // RoundToEven rounds elements to the nearest integer.
  4895  //
  4896  // Asm: VROUNDPD, CPU Feature: AVX
  4897  func (x Float64x2) RoundToEven() Float64x2
  4898  
  4899  // RoundToEven rounds elements to the nearest integer.
  4900  //
  4901  // Asm: VROUNDPD, CPU Feature: AVX
  4902  func (x Float64x4) RoundToEven() Float64x4
  4903  
  4904  /* RoundToEvenScaled */
  4905  
  4906  // RoundToEvenScaled rounds elements with specified precision.
  4907  //
  4908  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4909  //
  4910  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  4911  func (x Float32x4) RoundToEvenScaled(prec uint8) Float32x4
  4912  
  4913  // RoundToEvenScaled rounds elements with specified precision.
  4914  //
  4915  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4916  //
  4917  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  4918  func (x Float32x8) RoundToEvenScaled(prec uint8) Float32x8
  4919  
  4920  // RoundToEvenScaled rounds elements with specified precision.
  4921  //
  4922  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4923  //
  4924  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  4925  func (x Float32x16) RoundToEvenScaled(prec uint8) Float32x16
  4926  
  4927  // RoundToEvenScaled rounds elements with specified precision.
  4928  //
  4929  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4930  //
  4931  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  4932  func (x Float64x2) RoundToEvenScaled(prec uint8) Float64x2
  4933  
  4934  // RoundToEvenScaled rounds elements with specified precision.
  4935  //
  4936  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4937  //
  4938  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  4939  func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
  4940  
  4941  // RoundToEvenScaled rounds elements with specified precision.
  4942  //
  4943  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4944  //
  4945  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  4946  func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
  4947  
  4948  /* RoundToEvenScaledResidue */
  4949  
  4950  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4951  //
  4952  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4953  //
  4954  // Asm: VREDUCEPS, CPU Feature: AVX512
  4955  func (x Float32x4) RoundToEvenScaledResidue(prec uint8) Float32x4
  4956  
  4957  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4958  //
  4959  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4960  //
  4961  // Asm: VREDUCEPS, CPU Feature: AVX512
  4962  func (x Float32x8) RoundToEvenScaledResidue(prec uint8) Float32x8
  4963  
  4964  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4965  //
  4966  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4967  //
  4968  // Asm: VREDUCEPS, CPU Feature: AVX512
  4969  func (x Float32x16) RoundToEvenScaledResidue(prec uint8) Float32x16
  4970  
  4971  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4972  //
  4973  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4974  //
  4975  // Asm: VREDUCEPD, CPU Feature: AVX512
  4976  func (x Float64x2) RoundToEvenScaledResidue(prec uint8) Float64x2
  4977  
  4978  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4979  //
  4980  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4981  //
  4982  // Asm: VREDUCEPD, CPU Feature: AVX512
  4983  func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
  4984  
  4985  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
  4986  //
  4987  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  4988  //
  4989  // Asm: VREDUCEPD, CPU Feature: AVX512
  4990  func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
  4991  
  4992  /* SHA1FourRounds */
  4993  
  4994  // SHA1FourRounds performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4.
  4995  // x contains the state variables a, b, c and d from upper to lower order.
  4996  // y contains the W array elements (with the state variable e added to the upper element) from upper to lower order.
  4997  // result = the state variables a', b', c', d' updated after 4 rounds.
  4998  // constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop.
  4999  //
  5000  // constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5001  //
  5002  // Asm: SHA1RNDS4, CPU Feature: SHA
  5003  func (x Uint32x4) SHA1FourRounds(constant uint8, y Uint32x4) Uint32x4
  5004  
  5005  /* SHA1Message1 */
  5006  
  5007  // SHA1Message1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4.
  5008  // x = {W3, W2, W1, W0}
  5009  // y = {0, 0, W5, W4}
  5010  // result = {W3^W5, W2^W4, W1^W3, W0^W2}.
  5011  //
  5012  // Asm: SHA1MSG1, CPU Feature: SHA
  5013  func (x Uint32x4) SHA1Message1(y Uint32x4) Uint32x4
  5014  
  5015  /* SHA1Message2 */
  5016  
  5017  // SHA1Message2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4.
  5018  // x = result of 2.
  5019  // y = {W15, W14, W13}
  5020  // result = {W19, W18, W17, W16}
  5021  //
  5022  // Asm: SHA1MSG2, CPU Feature: SHA
  5023  func (x Uint32x4) SHA1Message2(y Uint32x4) Uint32x4
  5024  
  5025  /* SHA1NextE */
  5026  
  5027  // SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4.
  5028  // x contains the state variable a (before the 4 rounds), placed in the upper element.
  5029  // y is the elements of W array for next 4 rounds from upper to lower order.
  5030  // result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element,
  5031  // from upper to lower order.
  5032  // For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0
  5033  // for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the
  5034  // computation of the value of e'.)
  5035  //
  5036  // Asm: SHA1NEXTE, CPU Feature: SHA
  5037  func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4
  5038  
  5039  /* SHA256Message1 */
  5040  
  5041  // SHA256Message1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4.
  5042  // x = {W0, W1, W2, W3}
  5043  // y = {W4, 0, 0, 0}
  5044  // result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
  5045  //
  5046  // Asm: SHA256MSG1, CPU Feature: SHA
  5047  func (x Uint32x4) SHA256Message1(y Uint32x4) Uint32x4
  5048  
  5049  /* SHA256Message2 */
  5050  
  5051  // SHA256Message2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4.
  5052  // x = result of 2
  5053  // y = {0, 0, W14, W15}
  5054  // result = {W16, W17, W18, W19}
  5055  //
  5056  // Asm: SHA256MSG2, CPU Feature: SHA
  5057  func (x Uint32x4) SHA256Message2(y Uint32x4) Uint32x4
  5058  
  5059  /* SHA256TwoRounds */
  5060  
  5061  // SHA256TwoRounds does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4.
  5062  // x = {h, g, d, c}
  5063  // y = {f, e, b, a}
  5064  // z = {W0+K0, W1+K1}
  5065  // result = {f', e', b', a'}
  5066  // The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to
  5067  // the corresponding element of the W array to make the input data z.
  5068  // The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data
  5069  // y (the state variables a, b, e, f before the 2 rounds).
  5070  //
  5071  // Asm: SHA256RNDS2, CPU Feature: SHA
  5072  func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4
  5073  
  5074  /* SaturateToInt8 */
  5075  
  5076  // SaturateToInt8 converts element values to int8.
  5077  // Conversion is done with saturation on the vector elements.
  5078  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5079  //
  5080  // Asm: VPMOVSWB, CPU Feature: AVX512
  5081  func (x Int16x8) SaturateToInt8() Int8x16
  5082  
  5083  // SaturateToInt8 converts element values to int8.
  5084  // Conversion is done with saturation on the vector elements.
  5085  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5086  //
  5087  // Asm: VPMOVSWB, CPU Feature: AVX512
  5088  func (x Int16x16) SaturateToInt8() Int8x16
  5089  
  5090  // SaturateToInt8 converts element values to int8.
  5091  // Conversion is done with saturation on the vector elements.
  5092  //
  5093  // Asm: VPMOVSWB, CPU Feature: AVX512
  5094  func (x Int16x32) SaturateToInt8() Int8x32
  5095  
  5096  // SaturateToInt8 converts element values to int8.
  5097  // Conversion is done with saturation on the vector elements.
  5098  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5099  //
  5100  // Asm: VPMOVSDB, CPU Feature: AVX512
  5101  func (x Int32x4) SaturateToInt8() Int8x16
  5102  
  5103  // SaturateToInt8 converts element values to int8.
  5104  // Conversion is done with saturation on the vector elements.
  5105  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5106  //
  5107  // Asm: VPMOVSDB, CPU Feature: AVX512
  5108  func (x Int32x8) SaturateToInt8() Int8x16
  5109  
  5110  // SaturateToInt8 converts element values to int8.
  5111  // Conversion is done with saturation on the vector elements.
  5112  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5113  //
  5114  // Asm: VPMOVSDB, CPU Feature: AVX512
  5115  func (x Int32x16) SaturateToInt8() Int8x16
  5116  
  5117  // SaturateToInt8 converts element values to int8.
  5118  // Conversion is done with saturation on the vector elements.
  5119  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5120  //
  5121  // Asm: VPMOVSQB, CPU Feature: AVX512
  5122  func (x Int64x2) SaturateToInt8() Int8x16
  5123  
  5124  // SaturateToInt8 converts element values to int8.
  5125  // Conversion is done with saturation on the vector elements.
  5126  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5127  //
  5128  // Asm: VPMOVSQB, CPU Feature: AVX512
  5129  func (x Int64x4) SaturateToInt8() Int8x16
  5130  
  5131  // SaturateToInt8 converts element values to int8.
  5132  // Conversion is done with saturation on the vector elements.
  5133  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5134  //
  5135  // Asm: VPMOVSQB, CPU Feature: AVX512
  5136  func (x Int64x8) SaturateToInt8() Int8x16
  5137  
  5138  /* SaturateToInt16 */
  5139  
  5140  // SaturateToInt16 converts element values to int16.
  5141  // Conversion is done with saturation on the vector elements.
  5142  //
  5143  // Asm: VPMOVSDW, CPU Feature: AVX512
  5144  func (x Int32x4) SaturateToInt16() Int16x8
  5145  
  5146  // SaturateToInt16 converts element values to int16.
  5147  // Conversion is done with saturation on the vector elements.
  5148  //
  5149  // Asm: VPMOVSDW, CPU Feature: AVX512
  5150  func (x Int32x8) SaturateToInt16() Int16x8
  5151  
  5152  // SaturateToInt16 converts element values to int16.
  5153  // Conversion is done with saturation on the vector elements.
  5154  //
  5155  // Asm: VPMOVSDW, CPU Feature: AVX512
  5156  func (x Int32x16) SaturateToInt16() Int16x16
  5157  
  5158  // SaturateToInt16 converts element values to int16.
  5159  // Conversion is done with saturation on the vector elements.
  5160  //
  5161  // Asm: VPMOVSQW, CPU Feature: AVX512
  5162  func (x Int64x2) SaturateToInt16() Int16x8
  5163  
  5164  // SaturateToInt16 converts element values to int16.
  5165  // Conversion is done with saturation on the vector elements.
  5166  //
  5167  // Asm: VPMOVSQW, CPU Feature: AVX512
  5168  func (x Int64x4) SaturateToInt16() Int16x8
  5169  
  5170  // SaturateToInt16 converts element values to int16.
  5171  // Conversion is done with saturation on the vector elements.
  5172  //
  5173  // Asm: VPMOVSQW, CPU Feature: AVX512
  5174  func (x Int64x8) SaturateToInt16() Int16x8
  5175  
  5176  /* SaturateToInt16Concat */
  5177  
  5178  // SaturateToInt16Concat converts element values to int16.
  5179  // With each 128-bit as a group:
  5180  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5181  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5182  // Conversion is done with saturation on the vector elements.
  5183  //
  5184  // Asm: VPACKSSDW, CPU Feature: AVX
  5185  func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8
  5186  
  5187  // SaturateToInt16Concat converts element values to int16.
  5188  // With each 128-bit as a group:
  5189  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5190  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5191  // Conversion is done with saturation on the vector elements.
  5192  //
  5193  // Asm: VPACKSSDW, CPU Feature: AVX2
  5194  func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16
  5195  
  5196  // SaturateToInt16Concat converts element values to int16.
  5197  // With each 128-bit as a group:
  5198  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5199  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5200  // Conversion is done with saturation on the vector elements.
  5201  //
  5202  // Asm: VPACKSSDW, CPU Feature: AVX512
  5203  func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32
  5204  
  5205  /* SaturateToInt32 */
  5206  
  5207  // SaturateToInt32 converts element values to int32.
  5208  // Conversion is done with saturation on the vector elements.
  5209  //
  5210  // Asm: VPMOVSQD, CPU Feature: AVX512
  5211  func (x Int64x2) SaturateToInt32() Int32x4
  5212  
  5213  // SaturateToInt32 converts element values to int32.
  5214  // Conversion is done with saturation on the vector elements.
  5215  //
  5216  // Asm: VPMOVSQD, CPU Feature: AVX512
  5217  func (x Int64x4) SaturateToInt32() Int32x4
  5218  
  5219  // SaturateToInt32 converts element values to int32.
  5220  // Conversion is done with saturation on the vector elements.
  5221  //
  5222  // Asm: VPMOVSQD, CPU Feature: AVX512
  5223  func (x Int64x8) SaturateToInt32() Int32x8
  5224  
  5225  /* SaturateToUint8 */
  5226  
  5227  // SaturateToUint8 converts element values to uint8.
  5228  // Conversion is done with saturation on the vector elements.
  5229  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5230  //
  5231  // Asm: VPMOVSWB, CPU Feature: AVX512
  5232  func (x Int16x8) SaturateToUint8() Int8x16
  5233  
  5234  // SaturateToUint8 converts element values to uint8.
  5235  // Conversion is done with saturation on the vector elements.
  5236  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5237  //
  5238  // Asm: VPMOVSWB, CPU Feature: AVX512
  5239  func (x Int16x16) SaturateToUint8() Int8x16
  5240  
  5241  // SaturateToUint8 converts element values to uint8.
  5242  // Conversion is done with saturation on the vector elements.
  5243  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5244  //
  5245  // Asm: VPMOVSDB, CPU Feature: AVX512
  5246  func (x Int32x4) SaturateToUint8() Int8x16
  5247  
  5248  // SaturateToUint8 converts element values to uint8.
  5249  // Conversion is done with saturation on the vector elements.
  5250  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5251  //
  5252  // Asm: VPMOVSDB, CPU Feature: AVX512
  5253  func (x Int32x8) SaturateToUint8() Int8x16
  5254  
  5255  // SaturateToUint8 converts element values to uint8.
  5256  // Conversion is done with saturation on the vector elements.
  5257  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5258  //
  5259  // Asm: VPMOVSDB, CPU Feature: AVX512
  5260  func (x Int32x16) SaturateToUint8() Int8x16
  5261  
  5262  // SaturateToUint8 converts element values to uint8.
  5263  // Conversion is done with saturation on the vector elements.
  5264  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5265  //
  5266  // Asm: VPMOVSQB, CPU Feature: AVX512
  5267  func (x Int64x2) SaturateToUint8() Int8x16
  5268  
  5269  // SaturateToUint8 converts element values to uint8.
  5270  // Conversion is done with saturation on the vector elements.
  5271  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5272  //
  5273  // Asm: VPMOVSQB, CPU Feature: AVX512
  5274  func (x Int64x4) SaturateToUint8() Int8x16
  5275  
  5276  // SaturateToUint8 converts element values to uint8.
  5277  // Conversion is done with saturation on the vector elements.
  5278  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  5279  //
  5280  // Asm: VPMOVSQB, CPU Feature: AVX512
  5281  func (x Int64x8) SaturateToUint8() Int8x16
  5282  
  5283  // SaturateToUint8 converts element values to uint8.
  5284  // Conversion is done with saturation on the vector elements.
  5285  //
  5286  // Asm: VPMOVUSWB, CPU Feature: AVX512
  5287  func (x Uint16x32) SaturateToUint8() Uint8x32
  5288  
  5289  /* SaturateToUint16 */
  5290  
  5291  // SaturateToUint16 converts element values to uint16.
  5292  // Conversion is done with saturation on the vector elements.
  5293  //
  5294  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5295  func (x Uint32x4) SaturateToUint16() Uint16x8
  5296  
  5297  // SaturateToUint16 converts element values to uint16.
  5298  // Conversion is done with saturation on the vector elements.
  5299  //
  5300  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5301  func (x Uint32x8) SaturateToUint16() Uint16x8
  5302  
  5303  // SaturateToUint16 converts element values to uint16.
  5304  // Conversion is done with saturation on the vector elements.
  5305  //
  5306  // Asm: VPMOVUSDW, CPU Feature: AVX512
  5307  func (x Uint32x16) SaturateToUint16() Uint16x16
  5308  
  5309  // SaturateToUint16 converts element values to uint16.
  5310  // Conversion is done with saturation on the vector elements.
  5311  //
  5312  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5313  func (x Uint64x2) SaturateToUint16() Uint16x8
  5314  
  5315  // SaturateToUint16 converts element values to uint16.
  5316  // Conversion is done with saturation on the vector elements.
  5317  //
  5318  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5319  func (x Uint64x4) SaturateToUint16() Uint16x8
  5320  
  5321  // SaturateToUint16 converts element values to uint16.
  5322  // Conversion is done with saturation on the vector elements.
  5323  //
  5324  // Asm: VPMOVUSQW, CPU Feature: AVX512
  5325  func (x Uint64x8) SaturateToUint16() Uint16x8
  5326  
  5327  /* SaturateToUint16Concat */
  5328  
  5329  // SaturateToUint16Concat converts element values to uint16.
  5330  // With each 128-bit as a group:
  5331  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5332  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5333  // Conversion is done with saturation on the vector elements.
  5334  //
  5335  // Asm: VPACKUSDW, CPU Feature: AVX
  5336  func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8
  5337  
  5338  // SaturateToUint16Concat converts element values to uint16.
  5339  // With each 128-bit as a group:
  5340  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5341  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5342  // Conversion is done with saturation on the vector elements.
  5343  //
  5344  // Asm: VPACKUSDW, CPU Feature: AVX2
  5345  func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16
  5346  
  5347  // SaturateToUint16Concat converts element values to uint16.
  5348  // With each 128-bit as a group:
  5349  // The converted group from the first input vector will be packed to the lower part of the result vector,
  5350  // the converted group from the second input vector will be packed to the upper part of the result vector.
  5351  // Conversion is done with saturation on the vector elements.
  5352  //
  5353  // Asm: VPACKUSDW, CPU Feature: AVX512
  5354  func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32
  5355  
  5356  /* SaturateToUint32 */
  5357  
  5358  // SaturateToUint32 converts element values to uint32.
  5359  // Conversion is done with saturation on the vector elements.
  5360  //
  5361  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5362  func (x Uint64x2) SaturateToUint32() Uint32x4
  5363  
  5364  // SaturateToUint32 converts element values to uint32.
  5365  // Conversion is done with saturation on the vector elements.
  5366  //
  5367  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5368  func (x Uint64x4) SaturateToUint32() Uint32x4
  5369  
  5370  // SaturateToUint32 converts element values to uint32.
  5371  // Conversion is done with saturation on the vector elements.
  5372  //
  5373  // Asm: VPMOVUSQD, CPU Feature: AVX512
  5374  func (x Uint64x8) SaturateToUint32() Uint32x8
  5375  
  5376  /* Scale */
  5377  
  5378  // Scale multiplies elements by a power of 2.
  5379  //
  5380  // Asm: VSCALEFPS, CPU Feature: AVX512
  5381  func (x Float32x4) Scale(y Float32x4) Float32x4
  5382  
  5383  // Scale multiplies elements by a power of 2.
  5384  //
  5385  // Asm: VSCALEFPS, CPU Feature: AVX512
  5386  func (x Float32x8) Scale(y Float32x8) Float32x8
  5387  
  5388  // Scale multiplies elements by a power of 2.
  5389  //
  5390  // Asm: VSCALEFPS, CPU Feature: AVX512
  5391  func (x Float32x16) Scale(y Float32x16) Float32x16
  5392  
  5393  // Scale multiplies elements by a power of 2.
  5394  //
  5395  // Asm: VSCALEFPD, CPU Feature: AVX512
  5396  func (x Float64x2) Scale(y Float64x2) Float64x2
  5397  
  5398  // Scale multiplies elements by a power of 2.
  5399  //
  5400  // Asm: VSCALEFPD, CPU Feature: AVX512
  5401  func (x Float64x4) Scale(y Float64x4) Float64x4
  5402  
  5403  // Scale multiplies elements by a power of 2.
  5404  //
  5405  // Asm: VSCALEFPD, CPU Feature: AVX512
  5406  func (x Float64x8) Scale(y Float64x8) Float64x8
  5407  
  5408  /* Select128FromPair */
  5409  
  5410  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5411  // 128-bit elements, and returns a 256-bit result formed by
  5412  // concatenating the two elements specified by lo and hi.
  5413  // For example,
  5414  //
  5415  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5416  //
  5417  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5418  //
  5419  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5420  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5421  //
  5422  // Asm: VPERM2F128, CPU Feature: AVX
  5423  func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
  5424  
  5425  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5426  // 128-bit elements, and returns a 256-bit result formed by
  5427  // concatenating the two elements specified by lo and hi.
  5428  // For example,
  5429  //
  5430  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5431  //
  5432  // returns {70, 71, 40, 41}.
  5433  //
  5434  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5435  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5436  //
  5437  // Asm: VPERM2F128, CPU Feature: AVX
  5438  func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
  5439  
  5440  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5441  // 128-bit elements, and returns a 256-bit result formed by
  5442  // concatenating the two elements specified by lo and hi.
  5443  // For example,
  5444  //
  5445  //	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
  5446  //	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  5447  //
  5448  // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  5449  //
  5450  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5451  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5452  //
  5453  // Asm: VPERM2I128, CPU Feature: AVX2
  5454  func (x Int8x32) Select128FromPair(lo, hi uint8, y Int8x32) Int8x32
  5455  
  5456  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5457  // 128-bit elements, and returns a 256-bit result formed by
  5458  // concatenating the two elements specified by lo and hi.
  5459  // For example,
  5460  //
  5461  //	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
  5462  //	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
  5463  //
  5464  // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
  5465  //
  5466  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5467  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5468  //
  5469  // Asm: VPERM2I128, CPU Feature: AVX2
  5470  func (x Int16x16) Select128FromPair(lo, hi uint8, y Int16x16) Int16x16
  5471  
  5472  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5473  // 128-bit elements, and returns a 256-bit result formed by
  5474  // concatenating the two elements specified by lo and hi.
  5475  // For example,
  5476  //
  5477  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5478  //
  5479  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5480  //
  5481  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5482  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5483  //
  5484  // Asm: VPERM2I128, CPU Feature: AVX2
  5485  func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
  5486  
  5487  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5488  // 128-bit elements, and returns a 256-bit result formed by
  5489  // concatenating the two elements specified by lo and hi.
  5490  // For example,
  5491  //
  5492  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5493  //
  5494  // returns {70, 71, 40, 41}.
  5495  //
  5496  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5497  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5498  //
  5499  // Asm: VPERM2I128, CPU Feature: AVX2
  5500  func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
  5501  
  5502  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5503  // 128-bit elements, and returns a 256-bit result formed by
  5504  // concatenating the two elements specified by lo and hi.
  5505  // For example,
  5506  //
  5507  //	{0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
  5508  //	     {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  5509  //
  5510  // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  5511  //
  5512  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5513  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5514  //
  5515  // Asm: VPERM2I128, CPU Feature: AVX2
  5516  func (x Uint8x32) Select128FromPair(lo, hi uint8, y Uint8x32) Uint8x32
  5517  
  5518  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5519  // 128-bit elements, and returns a 256-bit result formed by
  5520  // concatenating the two elements specified by lo and hi.
  5521  // For example,
  5522  //
  5523  //	{40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
  5524  //	 {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
  5525  //
  5526  // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
  5527  //
  5528  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5529  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5530  //
  5531  // Asm: VPERM2I128, CPU Feature: AVX2
  5532  func (x Uint16x16) Select128FromPair(lo, hi uint8, y Uint16x16) Uint16x16
  5533  
  5534  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5535  // 128-bit elements, and returns a 256-bit result formed by
  5536  // concatenating the two elements specified by lo and hi.
  5537  // For example,
  5538  //
  5539  //	{40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
  5540  //
  5541  // returns {70, 71, 72, 73, 40, 41, 42, 43}.
  5542  //
  5543  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5544  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5545  //
  5546  // Asm: VPERM2I128, CPU Feature: AVX2
  5547  func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
  5548  
  5549  // Select128FromPair treats the 256-bit vectors x and y as a single vector of four
  5550  // 128-bit elements, and returns a 256-bit result formed by
  5551  // concatenating the two elements specified by lo and hi.
  5552  // For example,
  5553  //
  5554  //	{40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
  5555  //
  5556  // returns {70, 71, 40, 41}.
  5557  //
  5558  // lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
  5559  // lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
  5560  //
  5561  // Asm: VPERM2I128, CPU Feature: AVX2
  5562  func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4
  5563  
  5564  /* SetElem */
  5565  
  5566  // SetElem sets a single constant-indexed element's value.
  5567  //
  5568  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5569  //
  5570  // Asm: VPINSRD, CPU Feature: AVX
  5571  func (x Float32x4) SetElem(index uint8, y float32) Float32x4
  5572  
  5573  // SetElem sets a single constant-indexed element's value.
  5574  //
  5575  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5576  //
  5577  // Asm: VPINSRQ, CPU Feature: AVX
  5578  func (x Float64x2) SetElem(index uint8, y float64) Float64x2
  5579  
  5580  // SetElem sets a single constant-indexed element's value.
  5581  //
  5582  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5583  //
  5584  // Asm: VPINSRB, CPU Feature: AVX
  5585  func (x Int8x16) SetElem(index uint8, y int8) Int8x16
  5586  
  5587  // SetElem sets a single constant-indexed element's value.
  5588  //
  5589  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5590  //
  5591  // Asm: VPINSRW, CPU Feature: AVX
  5592  func (x Int16x8) SetElem(index uint8, y int16) Int16x8
  5593  
  5594  // SetElem sets a single constant-indexed element's value.
  5595  //
  5596  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5597  //
  5598  // Asm: VPINSRD, CPU Feature: AVX
  5599  func (x Int32x4) SetElem(index uint8, y int32) Int32x4
  5600  
  5601  // SetElem sets a single constant-indexed element's value.
  5602  //
  5603  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5604  //
  5605  // Asm: VPINSRQ, CPU Feature: AVX
  5606  func (x Int64x2) SetElem(index uint8, y int64) Int64x2
  5607  
  5608  // SetElem sets a single constant-indexed element's value.
  5609  //
  5610  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5611  //
  5612  // Asm: VPINSRB, CPU Feature: AVX
  5613  func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16
  5614  
  5615  // SetElem sets a single constant-indexed element's value.
  5616  //
  5617  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5618  //
  5619  // Asm: VPINSRW, CPU Feature: AVX
  5620  func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8
  5621  
  5622  // SetElem sets a single constant-indexed element's value.
  5623  //
  5624  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5625  //
  5626  // Asm: VPINSRD, CPU Feature: AVX
  5627  func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4
  5628  
  5629  // SetElem sets a single constant-indexed element's value.
  5630  //
  5631  // index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5632  //
  5633  // Asm: VPINSRQ, CPU Feature: AVX
  5634  func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2
  5635  
  5636  /* SetHi */
  5637  
  5638  // SetHi returns x with its upper half set to y.
  5639  //
  5640  // Asm: VINSERTF128, CPU Feature: AVX
  5641  func (x Float32x8) SetHi(y Float32x4) Float32x8
  5642  
  5643  // SetHi returns x with its upper half set to y.
  5644  //
  5645  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5646  func (x Float32x16) SetHi(y Float32x8) Float32x16
  5647  
  5648  // SetHi returns x with its upper half set to y.
  5649  //
  5650  // Asm: VINSERTF128, CPU Feature: AVX
  5651  func (x Float64x4) SetHi(y Float64x2) Float64x4
  5652  
  5653  // SetHi returns x with its upper half set to y.
  5654  //
  5655  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5656  func (x Float64x8) SetHi(y Float64x4) Float64x8
  5657  
  5658  // SetHi returns x with its upper half set to y.
  5659  //
  5660  // Asm: VINSERTI128, CPU Feature: AVX2
  5661  func (x Int8x32) SetHi(y Int8x16) Int8x32
  5662  
  5663  // SetHi returns x with its upper half set to y.
  5664  //
  5665  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5666  func (x Int8x64) SetHi(y Int8x32) Int8x64
  5667  
  5668  // SetHi returns x with its upper half set to y.
  5669  //
  5670  // Asm: VINSERTI128, CPU Feature: AVX2
  5671  func (x Int16x16) SetHi(y Int16x8) Int16x16
  5672  
  5673  // SetHi returns x with its upper half set to y.
  5674  //
  5675  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5676  func (x Int16x32) SetHi(y Int16x16) Int16x32
  5677  
  5678  // SetHi returns x with its upper half set to y.
  5679  //
  5680  // Asm: VINSERTI128, CPU Feature: AVX2
  5681  func (x Int32x8) SetHi(y Int32x4) Int32x8
  5682  
  5683  // SetHi returns x with its upper half set to y.
  5684  //
  5685  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5686  func (x Int32x16) SetHi(y Int32x8) Int32x16
  5687  
  5688  // SetHi returns x with its upper half set to y.
  5689  //
  5690  // Asm: VINSERTI128, CPU Feature: AVX2
  5691  func (x Int64x4) SetHi(y Int64x2) Int64x4
  5692  
  5693  // SetHi returns x with its upper half set to y.
  5694  //
  5695  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5696  func (x Int64x8) SetHi(y Int64x4) Int64x8
  5697  
  5698  // SetHi returns x with its upper half set to y.
  5699  //
  5700  // Asm: VINSERTI128, CPU Feature: AVX2
  5701  func (x Uint8x32) SetHi(y Uint8x16) Uint8x32
  5702  
  5703  // SetHi returns x with its upper half set to y.
  5704  //
  5705  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5706  func (x Uint8x64) SetHi(y Uint8x32) Uint8x64
  5707  
  5708  // SetHi returns x with its upper half set to y.
  5709  //
  5710  // Asm: VINSERTI128, CPU Feature: AVX2
  5711  func (x Uint16x16) SetHi(y Uint16x8) Uint16x16
  5712  
  5713  // SetHi returns x with its upper half set to y.
  5714  //
  5715  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5716  func (x Uint16x32) SetHi(y Uint16x16) Uint16x32
  5717  
  5718  // SetHi returns x with its upper half set to y.
  5719  //
  5720  // Asm: VINSERTI128, CPU Feature: AVX2
  5721  func (x Uint32x8) SetHi(y Uint32x4) Uint32x8
  5722  
  5723  // SetHi returns x with its upper half set to y.
  5724  //
  5725  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5726  func (x Uint32x16) SetHi(y Uint32x8) Uint32x16
  5727  
  5728  // SetHi returns x with its upper half set to y.
  5729  //
  5730  // Asm: VINSERTI128, CPU Feature: AVX2
  5731  func (x Uint64x4) SetHi(y Uint64x2) Uint64x4
  5732  
  5733  // SetHi returns x with its upper half set to y.
  5734  //
  5735  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5736  func (x Uint64x8) SetHi(y Uint64x4) Uint64x8
  5737  
  5738  /* SetLo */
  5739  
  5740  // SetLo returns x with its lower half set to y.
  5741  //
  5742  // Asm: VINSERTF128, CPU Feature: AVX
  5743  func (x Float32x8) SetLo(y Float32x4) Float32x8
  5744  
  5745  // SetLo returns x with its lower half set to y.
  5746  //
  5747  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5748  func (x Float32x16) SetLo(y Float32x8) Float32x16
  5749  
  5750  // SetLo returns x with its lower half set to y.
  5751  //
  5752  // Asm: VINSERTF128, CPU Feature: AVX
  5753  func (x Float64x4) SetLo(y Float64x2) Float64x4
  5754  
  5755  // SetLo returns x with its lower half set to y.
  5756  //
  5757  // Asm: VINSERTF64X4, CPU Feature: AVX512
  5758  func (x Float64x8) SetLo(y Float64x4) Float64x8
  5759  
  5760  // SetLo returns x with its lower half set to y.
  5761  //
  5762  // Asm: VINSERTI128, CPU Feature: AVX2
  5763  func (x Int8x32) SetLo(y Int8x16) Int8x32
  5764  
  5765  // SetLo returns x with its lower half set to y.
  5766  //
  5767  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5768  func (x Int8x64) SetLo(y Int8x32) Int8x64
  5769  
  5770  // SetLo returns x with its lower half set to y.
  5771  //
  5772  // Asm: VINSERTI128, CPU Feature: AVX2
  5773  func (x Int16x16) SetLo(y Int16x8) Int16x16
  5774  
  5775  // SetLo returns x with its lower half set to y.
  5776  //
  5777  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5778  func (x Int16x32) SetLo(y Int16x16) Int16x32
  5779  
  5780  // SetLo returns x with its lower half set to y.
  5781  //
  5782  // Asm: VINSERTI128, CPU Feature: AVX2
  5783  func (x Int32x8) SetLo(y Int32x4) Int32x8
  5784  
  5785  // SetLo returns x with its lower half set to y.
  5786  //
  5787  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5788  func (x Int32x16) SetLo(y Int32x8) Int32x16
  5789  
  5790  // SetLo returns x with its lower half set to y.
  5791  //
  5792  // Asm: VINSERTI128, CPU Feature: AVX2
  5793  func (x Int64x4) SetLo(y Int64x2) Int64x4
  5794  
  5795  // SetLo returns x with its lower half set to y.
  5796  //
  5797  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5798  func (x Int64x8) SetLo(y Int64x4) Int64x8
  5799  
  5800  // SetLo returns x with its lower half set to y.
  5801  //
  5802  // Asm: VINSERTI128, CPU Feature: AVX2
  5803  func (x Uint8x32) SetLo(y Uint8x16) Uint8x32
  5804  
  5805  // SetLo returns x with its lower half set to y.
  5806  //
  5807  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5808  func (x Uint8x64) SetLo(y Uint8x32) Uint8x64
  5809  
  5810  // SetLo returns x with its lower half set to y.
  5811  //
  5812  // Asm: VINSERTI128, CPU Feature: AVX2
  5813  func (x Uint16x16) SetLo(y Uint16x8) Uint16x16
  5814  
  5815  // SetLo returns x with its lower half set to y.
  5816  //
  5817  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5818  func (x Uint16x32) SetLo(y Uint16x16) Uint16x32
  5819  
  5820  // SetLo returns x with its lower half set to y.
  5821  //
  5822  // Asm: VINSERTI128, CPU Feature: AVX2
  5823  func (x Uint32x8) SetLo(y Uint32x4) Uint32x8
  5824  
  5825  // SetLo returns x with its lower half set to y.
  5826  //
  5827  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5828  func (x Uint32x16) SetLo(y Uint32x8) Uint32x16
  5829  
  5830  // SetLo returns x with its lower half set to y.
  5831  //
  5832  // Asm: VINSERTI128, CPU Feature: AVX2
  5833  func (x Uint64x4) SetLo(y Uint64x2) Uint64x4
  5834  
  5835  // SetLo returns x with its lower half set to y.
  5836  //
  5837  // Asm: VINSERTI64X4, CPU Feature: AVX512
  5838  func (x Uint64x8) SetLo(y Uint64x4) Uint64x8
  5839  
  5840  /* ShiftAllLeft */
  5841  
  5842  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5843  //
  5844  // Asm: VPSLLW, CPU Feature: AVX
  5845  func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
  5846  
  5847  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5848  //
  5849  // Asm: VPSLLW, CPU Feature: AVX2
  5850  func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
  5851  
  5852  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5853  //
  5854  // Asm: VPSLLW, CPU Feature: AVX512
  5855  func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
  5856  
  5857  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5858  //
  5859  // Asm: VPSLLD, CPU Feature: AVX
  5860  func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
  5861  
  5862  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5863  //
  5864  // Asm: VPSLLD, CPU Feature: AVX2
  5865  func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
  5866  
  5867  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5868  //
  5869  // Asm: VPSLLD, CPU Feature: AVX512
  5870  func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
  5871  
  5872  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5873  //
  5874  // Asm: VPSLLQ, CPU Feature: AVX
  5875  func (x Int64x2) ShiftAllLeft(y uint64) Int64x2
  5876  
  5877  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5878  //
  5879  // Asm: VPSLLQ, CPU Feature: AVX2
  5880  func (x Int64x4) ShiftAllLeft(y uint64) Int64x4
  5881  
  5882  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5883  //
  5884  // Asm: VPSLLQ, CPU Feature: AVX512
  5885  func (x Int64x8) ShiftAllLeft(y uint64) Int64x8
  5886  
  5887  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5888  //
  5889  // Asm: VPSLLW, CPU Feature: AVX
  5890  func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
  5891  
  5892  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5893  //
  5894  // Asm: VPSLLW, CPU Feature: AVX2
  5895  func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
  5896  
  5897  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5898  //
  5899  // Asm: VPSLLW, CPU Feature: AVX512
  5900  func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
  5901  
  5902  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5903  //
  5904  // Asm: VPSLLD, CPU Feature: AVX
  5905  func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
  5906  
  5907  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5908  //
  5909  // Asm: VPSLLD, CPU Feature: AVX2
  5910  func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
  5911  
  5912  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5913  //
  5914  // Asm: VPSLLD, CPU Feature: AVX512
  5915  func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
  5916  
  5917  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5918  //
  5919  // Asm: VPSLLQ, CPU Feature: AVX
  5920  func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2
  5921  
  5922  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5923  //
  5924  // Asm: VPSLLQ, CPU Feature: AVX2
  5925  func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
  5926  
  5927  // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
  5928  //
  5929  // Asm: VPSLLQ, CPU Feature: AVX512
  5930  func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
  5931  
  5932  /* ShiftAllLeftConcat */
  5933  
  5934  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5935  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5936  //
  5937  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5938  //
  5939  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  5940  func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8
  5941  
  5942  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5943  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5944  //
  5945  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5946  //
  5947  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  5948  func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16
  5949  
  5950  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5951  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5952  //
  5953  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5954  //
  5955  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  5956  func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32
  5957  
  5958  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5959  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5960  //
  5961  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5962  //
  5963  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  5964  func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4
  5965  
  5966  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5967  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5968  //
  5969  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5970  //
  5971  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  5972  func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8
  5973  
  5974  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5975  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5976  //
  5977  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5978  //
  5979  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  5980  func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16
  5981  
  5982  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5983  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5984  //
  5985  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5986  //
  5987  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  5988  func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2
  5989  
  5990  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5991  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  5992  //
  5993  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  5994  //
  5995  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  5996  func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4
  5997  
  5998  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  5999  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6000  //
  6001  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6002  //
  6003  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6004  func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8
  6005  
  6006  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6007  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6008  //
  6009  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6010  //
  6011  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6012  func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8
  6013  
  6014  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6015  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6016  //
  6017  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6018  //
  6019  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6020  func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16
  6021  
  6022  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6023  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6024  //
  6025  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6026  //
  6027  // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
  6028  func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32
  6029  
  6030  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6031  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6032  //
  6033  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6034  //
  6035  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6036  func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4
  6037  
  6038  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6039  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6040  //
  6041  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6042  //
  6043  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6044  func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8
  6045  
  6046  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6047  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6048  //
  6049  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6050  //
  6051  // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
  6052  func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16
  6053  
  6054  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6055  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6056  //
  6057  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6058  //
  6059  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6060  func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2
  6061  
  6062  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6063  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6064  //
  6065  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6066  //
  6067  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6068  func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
  6069  
  6070  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  6071  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  6072  //
  6073  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6074  //
  6075  // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
  6076  func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
  6077  
  6078  /* ShiftAllRight */
  6079  
  6080  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6081  //
  6082  // Asm: VPSRAW, CPU Feature: AVX
  6083  func (x Int16x8) ShiftAllRight(y uint64) Int16x8
  6084  
  6085  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6086  //
  6087  // Asm: VPSRAW, CPU Feature: AVX2
  6088  func (x Int16x16) ShiftAllRight(y uint64) Int16x16
  6089  
  6090  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6091  //
  6092  // Asm: VPSRAW, CPU Feature: AVX512
  6093  func (x Int16x32) ShiftAllRight(y uint64) Int16x32
  6094  
  6095  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6096  //
  6097  // Asm: VPSRAD, CPU Feature: AVX
  6098  func (x Int32x4) ShiftAllRight(y uint64) Int32x4
  6099  
  6100  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6101  //
  6102  // Asm: VPSRAD, CPU Feature: AVX2
  6103  func (x Int32x8) ShiftAllRight(y uint64) Int32x8
  6104  
  6105  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6106  //
  6107  // Asm: VPSRAD, CPU Feature: AVX512
  6108  func (x Int32x16) ShiftAllRight(y uint64) Int32x16
  6109  
  6110  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6111  //
  6112  // Asm: VPSRAQ, CPU Feature: AVX512
  6113  func (x Int64x2) ShiftAllRight(y uint64) Int64x2
  6114  
  6115  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6116  //
  6117  // Asm: VPSRAQ, CPU Feature: AVX512
  6118  func (x Int64x4) ShiftAllRight(y uint64) Int64x4
  6119  
  6120  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
  6121  //
  6122  // Asm: VPSRAQ, CPU Feature: AVX512
  6123  func (x Int64x8) ShiftAllRight(y uint64) Int64x8
  6124  
  6125  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6126  //
  6127  // Asm: VPSRLW, CPU Feature: AVX
  6128  func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
  6129  
  6130  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6131  //
  6132  // Asm: VPSRLW, CPU Feature: AVX2
  6133  func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
  6134  
  6135  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6136  //
  6137  // Asm: VPSRLW, CPU Feature: AVX512
  6138  func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
  6139  
  6140  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6141  //
  6142  // Asm: VPSRLD, CPU Feature: AVX
  6143  func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
  6144  
  6145  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6146  //
  6147  // Asm: VPSRLD, CPU Feature: AVX2
  6148  func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
  6149  
  6150  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6151  //
  6152  // Asm: VPSRLD, CPU Feature: AVX512
  6153  func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
  6154  
  6155  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6156  //
  6157  // Asm: VPSRLQ, CPU Feature: AVX
  6158  func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2
  6159  
  6160  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6161  //
  6162  // Asm: VPSRLQ, CPU Feature: AVX2
  6163  func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
  6164  
  6165  // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
  6166  //
  6167  // Asm: VPSRLQ, CPU Feature: AVX512
  6168  func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
  6169  
  6170  /* ShiftAllRightConcat */
  6171  
  6172  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6173  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6174  //
  6175  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6176  //
  6177  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6178  func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8
  6179  
  6180  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6181  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6182  //
  6183  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6184  //
  6185  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6186  func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16
  6187  
  6188  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6189  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6190  //
  6191  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6192  //
  6193  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6194  func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32
  6195  
  6196  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6197  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6198  //
  6199  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6200  //
  6201  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6202  func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4
  6203  
  6204  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6205  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6206  //
  6207  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6208  //
  6209  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6210  func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8
  6211  
  6212  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6213  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6214  //
  6215  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6216  //
  6217  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6218  func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16
  6219  
  6220  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6221  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6222  //
  6223  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6224  //
  6225  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6226  func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2
  6227  
  6228  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6229  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6230  //
  6231  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6232  //
  6233  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6234  func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4
  6235  
  6236  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6237  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6238  //
  6239  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6240  //
  6241  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6242  func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8
  6243  
  6244  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6245  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6246  //
  6247  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6248  //
  6249  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6250  func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8
  6251  
  6252  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6253  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6254  //
  6255  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6256  //
  6257  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6258  func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16
  6259  
  6260  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6261  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6262  //
  6263  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6264  //
  6265  // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
  6266  func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
  6267  
  6268  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6269  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6270  //
  6271  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6272  //
  6273  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6274  func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
  6275  
  6276  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6277  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6278  //
  6279  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6280  //
  6281  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6282  func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
  6283  
  6284  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6285  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6286  //
  6287  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6288  //
  6289  // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
  6290  func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
  6291  
  6292  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6293  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6294  //
  6295  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6296  //
  6297  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6298  func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
  6299  
  6300  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6301  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6302  //
  6303  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6304  //
  6305  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6306  func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
  6307  
  6308  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  6309  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  6310  //
  6311  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  6312  //
  6313  // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
  6314  func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
  6315  
  6316  /* ShiftLeft */
  6317  
  6318  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6319  //
  6320  // Asm: VPSLLVW, CPU Feature: AVX512
  6321  func (x Int16x8) ShiftLeft(y Int16x8) Int16x8
  6322  
  6323  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6324  //
  6325  // Asm: VPSLLVW, CPU Feature: AVX512
  6326  func (x Int16x16) ShiftLeft(y Int16x16) Int16x16
  6327  
  6328  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6329  //
  6330  // Asm: VPSLLVW, CPU Feature: AVX512
  6331  func (x Int16x32) ShiftLeft(y Int16x32) Int16x32
  6332  
  6333  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6334  //
  6335  // Asm: VPSLLVD, CPU Feature: AVX2
  6336  func (x Int32x4) ShiftLeft(y Int32x4) Int32x4
  6337  
  6338  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6339  //
  6340  // Asm: VPSLLVD, CPU Feature: AVX2
  6341  func (x Int32x8) ShiftLeft(y Int32x8) Int32x8
  6342  
  6343  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6344  //
  6345  // Asm: VPSLLVD, CPU Feature: AVX512
  6346  func (x Int32x16) ShiftLeft(y Int32x16) Int32x16
  6347  
  6348  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6349  //
  6350  // Asm: VPSLLVQ, CPU Feature: AVX2
  6351  func (x Int64x2) ShiftLeft(y Int64x2) Int64x2
  6352  
  6353  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6354  //
  6355  // Asm: VPSLLVQ, CPU Feature: AVX2
  6356  func (x Int64x4) ShiftLeft(y Int64x4) Int64x4
  6357  
  6358  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6359  //
  6360  // Asm: VPSLLVQ, CPU Feature: AVX512
  6361  func (x Int64x8) ShiftLeft(y Int64x8) Int64x8
  6362  
  6363  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6364  //
  6365  // Asm: VPSLLVW, CPU Feature: AVX512
  6366  func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8
  6367  
  6368  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6369  //
  6370  // Asm: VPSLLVW, CPU Feature: AVX512
  6371  func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16
  6372  
  6373  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6374  //
  6375  // Asm: VPSLLVW, CPU Feature: AVX512
  6376  func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32
  6377  
  6378  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6379  //
  6380  // Asm: VPSLLVD, CPU Feature: AVX2
  6381  func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4
  6382  
  6383  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6384  //
  6385  // Asm: VPSLLVD, CPU Feature: AVX2
  6386  func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8
  6387  
  6388  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6389  //
  6390  // Asm: VPSLLVD, CPU Feature: AVX512
  6391  func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16
  6392  
  6393  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6394  //
  6395  // Asm: VPSLLVQ, CPU Feature: AVX2
  6396  func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2
  6397  
  6398  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6399  //
  6400  // Asm: VPSLLVQ, CPU Feature: AVX2
  6401  func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
  6402  
  6403  // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
  6404  //
  6405  // Asm: VPSLLVQ, CPU Feature: AVX512
  6406  func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
  6407  
  6408  /* ShiftLeftConcat */
  6409  
  6410  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6411  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6412  //
  6413  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6414  func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8
  6415  
  6416  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6417  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6418  //
  6419  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6420  func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16
  6421  
  6422  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6423  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6424  //
  6425  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6426  func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32
  6427  
  6428  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6429  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6430  //
  6431  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6432  func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4
  6433  
  6434  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6435  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6436  //
  6437  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6438  func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8
  6439  
  6440  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6441  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6442  //
  6443  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6444  func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16
  6445  
  6446  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6447  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6448  //
  6449  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6450  func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2
  6451  
  6452  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6453  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6454  //
  6455  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6456  func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4
  6457  
  6458  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6459  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6460  //
  6461  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6462  func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8
  6463  
  6464  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6465  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6466  //
  6467  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6468  func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8
  6469  
  6470  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6471  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6472  //
  6473  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6474  func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16
  6475  
  6476  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6477  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6478  //
  6479  // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
  6480  func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
  6481  
  6482  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6483  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6484  //
  6485  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6486  func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
  6487  
  6488  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6489  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6490  //
  6491  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6492  func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
  6493  
  6494  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6495  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6496  //
  6497  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
  6498  func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
  6499  
  6500  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6501  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6502  //
  6503  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6504  func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
  6505  
  6506  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6507  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6508  //
  6509  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6510  func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
  6511  
  6512  // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
  6513  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  6514  //
  6515  // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
  6516  func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
  6517  
  6518  /* ShiftRight */
  6519  
  6520  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6521  //
  6522  // Asm: VPSRAVW, CPU Feature: AVX512
  6523  func (x Int16x8) ShiftRight(y Int16x8) Int16x8
  6524  
  6525  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6526  //
  6527  // Asm: VPSRAVW, CPU Feature: AVX512
  6528  func (x Int16x16) ShiftRight(y Int16x16) Int16x16
  6529  
  6530  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6531  //
  6532  // Asm: VPSRAVW, CPU Feature: AVX512
  6533  func (x Int16x32) ShiftRight(y Int16x32) Int16x32
  6534  
  6535  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6536  //
  6537  // Asm: VPSRAVD, CPU Feature: AVX2
  6538  func (x Int32x4) ShiftRight(y Int32x4) Int32x4
  6539  
  6540  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6541  //
  6542  // Asm: VPSRAVD, CPU Feature: AVX2
  6543  func (x Int32x8) ShiftRight(y Int32x8) Int32x8
  6544  
  6545  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6546  //
  6547  // Asm: VPSRAVD, CPU Feature: AVX512
  6548  func (x Int32x16) ShiftRight(y Int32x16) Int32x16
  6549  
  6550  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6551  //
  6552  // Asm: VPSRAVQ, CPU Feature: AVX512
  6553  func (x Int64x2) ShiftRight(y Int64x2) Int64x2
  6554  
  6555  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6556  //
  6557  // Asm: VPSRAVQ, CPU Feature: AVX512
  6558  func (x Int64x4) ShiftRight(y Int64x4) Int64x4
  6559  
  6560  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
  6561  //
  6562  // Asm: VPSRAVQ, CPU Feature: AVX512
  6563  func (x Int64x8) ShiftRight(y Int64x8) Int64x8
  6564  
  6565  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6566  //
  6567  // Asm: VPSRLVW, CPU Feature: AVX512
  6568  func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8
  6569  
  6570  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6571  //
  6572  // Asm: VPSRLVW, CPU Feature: AVX512
  6573  func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16
  6574  
  6575  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6576  //
  6577  // Asm: VPSRLVW, CPU Feature: AVX512
  6578  func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32
  6579  
  6580  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6581  //
  6582  // Asm: VPSRLVD, CPU Feature: AVX2
  6583  func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4
  6584  
  6585  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6586  //
  6587  // Asm: VPSRLVD, CPU Feature: AVX2
  6588  func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8
  6589  
  6590  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6591  //
  6592  // Asm: VPSRLVD, CPU Feature: AVX512
  6593  func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16
  6594  
  6595  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6596  //
  6597  // Asm: VPSRLVQ, CPU Feature: AVX2
  6598  func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2
  6599  
  6600  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6601  //
  6602  // Asm: VPSRLVQ, CPU Feature: AVX2
  6603  func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
  6604  
  6605  // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
  6606  //
  6607  // Asm: VPSRLVQ, CPU Feature: AVX512
  6608  func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
  6609  
  6610  /* ShiftRightConcat */
  6611  
  6612  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6613  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6614  //
  6615  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6616  func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8
  6617  
  6618  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6619  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6620  //
  6621  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6622  func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16
  6623  
  6624  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6625  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6626  //
  6627  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6628  func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32
  6629  
  6630  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6631  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6632  //
  6633  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6634  func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4
  6635  
  6636  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6637  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6638  //
  6639  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6640  func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8
  6641  
  6642  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6643  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6644  //
  6645  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6646  func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16
  6647  
  6648  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6649  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6650  //
  6651  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6652  func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
  6653  
  6654  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6655  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6656  //
  6657  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6658  func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
  6659  
  6660  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6661  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6662  //
  6663  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6664  func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
  6665  
  6666  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6667  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6668  //
  6669  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6670  func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
  6671  
  6672  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6673  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6674  //
  6675  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6676  func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
  6677  
  6678  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6679  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6680  //
  6681  // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
  6682  func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
  6683  
  6684  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6685  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6686  //
  6687  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6688  func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
  6689  
  6690  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6691  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6692  //
  6693  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6694  func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
  6695  
  6696  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6697  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6698  //
  6699  // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
  6700  func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
  6701  
  6702  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6703  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6704  //
  6705  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6706  func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
  6707  
  6708  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6709  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6710  //
  6711  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6712  func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
  6713  
  6714  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  6715  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  6716  //
  6717  // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
  6718  func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
  6719  
  6720  /* Sqrt */
  6721  
  6722  // Sqrt computes the square root of each element.
  6723  //
  6724  // Asm: VSQRTPS, CPU Feature: AVX
  6725  func (x Float32x4) Sqrt() Float32x4
  6726  
  6727  // Sqrt computes the square root of each element.
  6728  //
  6729  // Asm: VSQRTPS, CPU Feature: AVX
  6730  func (x Float32x8) Sqrt() Float32x8
  6731  
  6732  // Sqrt computes the square root of each element.
  6733  //
  6734  // Asm: VSQRTPS, CPU Feature: AVX512
  6735  func (x Float32x16) Sqrt() Float32x16
  6736  
  6737  // Sqrt computes the square root of each element.
  6738  //
  6739  // Asm: VSQRTPD, CPU Feature: AVX
  6740  func (x Float64x2) Sqrt() Float64x2
  6741  
  6742  // Sqrt computes the square root of each element.
  6743  //
  6744  // Asm: VSQRTPD, CPU Feature: AVX
  6745  func (x Float64x4) Sqrt() Float64x4
  6746  
  6747  // Sqrt computes the square root of each element.
  6748  //
  6749  // Asm: VSQRTPD, CPU Feature: AVX512
  6750  func (x Float64x8) Sqrt() Float64x8
  6751  
  6752  /* Sub */
  6753  
  6754  // Sub subtracts corresponding elements of two vectors.
  6755  //
  6756  // Asm: VSUBPS, CPU Feature: AVX
  6757  func (x Float32x4) Sub(y Float32x4) Float32x4
  6758  
  6759  // Sub subtracts corresponding elements of two vectors.
  6760  //
  6761  // Asm: VSUBPS, CPU Feature: AVX
  6762  func (x Float32x8) Sub(y Float32x8) Float32x8
  6763  
  6764  // Sub subtracts corresponding elements of two vectors.
  6765  //
  6766  // Asm: VSUBPS, CPU Feature: AVX512
  6767  func (x Float32x16) Sub(y Float32x16) Float32x16
  6768  
  6769  // Sub subtracts corresponding elements of two vectors.
  6770  //
  6771  // Asm: VSUBPD, CPU Feature: AVX
  6772  func (x Float64x2) Sub(y Float64x2) Float64x2
  6773  
  6774  // Sub subtracts corresponding elements of two vectors.
  6775  //
  6776  // Asm: VSUBPD, CPU Feature: AVX
  6777  func (x Float64x4) Sub(y Float64x4) Float64x4
  6778  
  6779  // Sub subtracts corresponding elements of two vectors.
  6780  //
  6781  // Asm: VSUBPD, CPU Feature: AVX512
  6782  func (x Float64x8) Sub(y Float64x8) Float64x8
  6783  
  6784  // Sub subtracts corresponding elements of two vectors.
  6785  //
  6786  // Asm: VPSUBB, CPU Feature: AVX
  6787  func (x Int8x16) Sub(y Int8x16) Int8x16
  6788  
  6789  // Sub subtracts corresponding elements of two vectors.
  6790  //
  6791  // Asm: VPSUBB, CPU Feature: AVX2
  6792  func (x Int8x32) Sub(y Int8x32) Int8x32
  6793  
  6794  // Sub subtracts corresponding elements of two vectors.
  6795  //
  6796  // Asm: VPSUBB, CPU Feature: AVX512
  6797  func (x Int8x64) Sub(y Int8x64) Int8x64
  6798  
  6799  // Sub subtracts corresponding elements of two vectors.
  6800  //
  6801  // Asm: VPSUBW, CPU Feature: AVX
  6802  func (x Int16x8) Sub(y Int16x8) Int16x8
  6803  
  6804  // Sub subtracts corresponding elements of two vectors.
  6805  //
  6806  // Asm: VPSUBW, CPU Feature: AVX2
  6807  func (x Int16x16) Sub(y Int16x16) Int16x16
  6808  
  6809  // Sub subtracts corresponding elements of two vectors.
  6810  //
  6811  // Asm: VPSUBW, CPU Feature: AVX512
  6812  func (x Int16x32) Sub(y Int16x32) Int16x32
  6813  
  6814  // Sub subtracts corresponding elements of two vectors.
  6815  //
  6816  // Asm: VPSUBD, CPU Feature: AVX
  6817  func (x Int32x4) Sub(y Int32x4) Int32x4
  6818  
  6819  // Sub subtracts corresponding elements of two vectors.
  6820  //
  6821  // Asm: VPSUBD, CPU Feature: AVX2
  6822  func (x Int32x8) Sub(y Int32x8) Int32x8
  6823  
  6824  // Sub subtracts corresponding elements of two vectors.
  6825  //
  6826  // Asm: VPSUBD, CPU Feature: AVX512
  6827  func (x Int32x16) Sub(y Int32x16) Int32x16
  6828  
  6829  // Sub subtracts corresponding elements of two vectors.
  6830  //
  6831  // Asm: VPSUBQ, CPU Feature: AVX
  6832  func (x Int64x2) Sub(y Int64x2) Int64x2
  6833  
  6834  // Sub subtracts corresponding elements of two vectors.
  6835  //
  6836  // Asm: VPSUBQ, CPU Feature: AVX2
  6837  func (x Int64x4) Sub(y Int64x4) Int64x4
  6838  
  6839  // Sub subtracts corresponding elements of two vectors.
  6840  //
  6841  // Asm: VPSUBQ, CPU Feature: AVX512
  6842  func (x Int64x8) Sub(y Int64x8) Int64x8
  6843  
  6844  // Sub subtracts corresponding elements of two vectors.
  6845  //
  6846  // Asm: VPSUBB, CPU Feature: AVX
  6847  func (x Uint8x16) Sub(y Uint8x16) Uint8x16
  6848  
  6849  // Sub subtracts corresponding elements of two vectors.
  6850  //
  6851  // Asm: VPSUBB, CPU Feature: AVX2
  6852  func (x Uint8x32) Sub(y Uint8x32) Uint8x32
  6853  
  6854  // Sub subtracts corresponding elements of two vectors.
  6855  //
  6856  // Asm: VPSUBB, CPU Feature: AVX512
  6857  func (x Uint8x64) Sub(y Uint8x64) Uint8x64
  6858  
  6859  // Sub subtracts corresponding elements of two vectors.
  6860  //
  6861  // Asm: VPSUBW, CPU Feature: AVX
  6862  func (x Uint16x8) Sub(y Uint16x8) Uint16x8
  6863  
  6864  // Sub subtracts corresponding elements of two vectors.
  6865  //
  6866  // Asm: VPSUBW, CPU Feature: AVX2
  6867  func (x Uint16x16) Sub(y Uint16x16) Uint16x16
  6868  
  6869  // Sub subtracts corresponding elements of two vectors.
  6870  //
  6871  // Asm: VPSUBW, CPU Feature: AVX512
  6872  func (x Uint16x32) Sub(y Uint16x32) Uint16x32
  6873  
  6874  // Sub subtracts corresponding elements of two vectors.
  6875  //
  6876  // Asm: VPSUBD, CPU Feature: AVX
  6877  func (x Uint32x4) Sub(y Uint32x4) Uint32x4
  6878  
  6879  // Sub subtracts corresponding elements of two vectors.
  6880  //
  6881  // Asm: VPSUBD, CPU Feature: AVX2
  6882  func (x Uint32x8) Sub(y Uint32x8) Uint32x8
  6883  
  6884  // Sub subtracts corresponding elements of two vectors.
  6885  //
  6886  // Asm: VPSUBD, CPU Feature: AVX512
  6887  func (x Uint32x16) Sub(y Uint32x16) Uint32x16
  6888  
  6889  // Sub subtracts corresponding elements of two vectors.
  6890  //
  6891  // Asm: VPSUBQ, CPU Feature: AVX
  6892  func (x Uint64x2) Sub(y Uint64x2) Uint64x2
  6893  
  6894  // Sub subtracts corresponding elements of two vectors.
  6895  //
  6896  // Asm: VPSUBQ, CPU Feature: AVX2
  6897  func (x Uint64x4) Sub(y Uint64x4) Uint64x4
  6898  
  6899  // Sub subtracts corresponding elements of two vectors.
  6900  //
  6901  // Asm: VPSUBQ, CPU Feature: AVX512
  6902  func (x Uint64x8) Sub(y Uint64x8) Uint64x8
  6903  
  6904  /* SubPairs */
  6905  
  6906  // SubPairs horizontally subtracts adjacent pairs of elements.
  6907  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6908  //
  6909  // Asm: VHSUBPS, CPU Feature: AVX
  6910  func (x Float32x4) SubPairs(y Float32x4) Float32x4
  6911  
  6912  // SubPairs horizontally subtracts adjacent pairs of elements.
  6913  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6914  //
  6915  // Asm: VHSUBPS, CPU Feature: AVX
  6916  func (x Float32x8) SubPairs(y Float32x8) Float32x8
  6917  
  6918  // SubPairs horizontally subtracts adjacent pairs of elements.
  6919  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6920  //
  6921  // Asm: VHSUBPD, CPU Feature: AVX
  6922  func (x Float64x2) SubPairs(y Float64x2) Float64x2
  6923  
  6924  // SubPairs horizontally subtracts adjacent pairs of elements.
  6925  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6926  //
  6927  // Asm: VHSUBPD, CPU Feature: AVX
  6928  func (x Float64x4) SubPairs(y Float64x4) Float64x4
  6929  
  6930  // SubPairs horizontally subtracts adjacent pairs of elements.
  6931  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6932  //
  6933  // Asm: VPHSUBW, CPU Feature: AVX
  6934  func (x Int16x8) SubPairs(y Int16x8) Int16x8
  6935  
  6936  // SubPairs horizontally subtracts adjacent pairs of elements.
  6937  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6938  //
  6939  // Asm: VPHSUBW, CPU Feature: AVX2
  6940  func (x Int16x16) SubPairs(y Int16x16) Int16x16
  6941  
  6942  // SubPairs horizontally subtracts adjacent pairs of elements.
  6943  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6944  //
  6945  // Asm: VPHSUBD, CPU Feature: AVX
  6946  func (x Int32x4) SubPairs(y Int32x4) Int32x4
  6947  
  6948  // SubPairs horizontally subtracts adjacent pairs of elements.
  6949  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6950  //
  6951  // Asm: VPHSUBD, CPU Feature: AVX2
  6952  func (x Int32x8) SubPairs(y Int32x8) Int32x8
  6953  
  6954  // SubPairs horizontally subtracts adjacent pairs of elements.
  6955  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6956  //
  6957  // Asm: VPHSUBW, CPU Feature: AVX
  6958  func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8
  6959  
  6960  // SubPairs horizontally subtracts adjacent pairs of elements.
  6961  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6962  //
  6963  // Asm: VPHSUBW, CPU Feature: AVX2
  6964  func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16
  6965  
  6966  // SubPairs horizontally subtracts adjacent pairs of elements.
  6967  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6968  //
  6969  // Asm: VPHSUBD, CPU Feature: AVX
  6970  func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4
  6971  
  6972  // SubPairs horizontally subtracts adjacent pairs of elements.
  6973  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6974  //
  6975  // Asm: VPHSUBD, CPU Feature: AVX2
  6976  func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8
  6977  
  6978  /* SubPairsSaturated */
  6979  
  6980  // SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
  6981  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6982  //
  6983  // Asm: VPHSUBSW, CPU Feature: AVX
  6984  func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8
  6985  
  6986  // SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
  6987  // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
  6988  //
  6989  // Asm: VPHSUBSW, CPU Feature: AVX2
  6990  func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16
  6991  
  6992  /* SubSaturated */
  6993  
  6994  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  6995  //
  6996  // Asm: VPSUBSB, CPU Feature: AVX
  6997  func (x Int8x16) SubSaturated(y Int8x16) Int8x16
  6998  
  6999  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7000  //
  7001  // Asm: VPSUBSB, CPU Feature: AVX2
  7002  func (x Int8x32) SubSaturated(y Int8x32) Int8x32
  7003  
  7004  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7005  //
  7006  // Asm: VPSUBSB, CPU Feature: AVX512
  7007  func (x Int8x64) SubSaturated(y Int8x64) Int8x64
  7008  
  7009  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7010  //
  7011  // Asm: VPSUBSW, CPU Feature: AVX
  7012  func (x Int16x8) SubSaturated(y Int16x8) Int16x8
  7013  
  7014  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7015  //
  7016  // Asm: VPSUBSW, CPU Feature: AVX2
  7017  func (x Int16x16) SubSaturated(y Int16x16) Int16x16
  7018  
  7019  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7020  //
  7021  // Asm: VPSUBSW, CPU Feature: AVX512
  7022  func (x Int16x32) SubSaturated(y Int16x32) Int16x32
  7023  
  7024  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7025  //
  7026  // Asm: VPSUBUSB, CPU Feature: AVX
  7027  func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16
  7028  
  7029  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7030  //
  7031  // Asm: VPSUBUSB, CPU Feature: AVX2
  7032  func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32
  7033  
  7034  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7035  //
  7036  // Asm: VPSUBUSB, CPU Feature: AVX512
  7037  func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64
  7038  
  7039  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7040  //
  7041  // Asm: VPSUBUSW, CPU Feature: AVX
  7042  func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8
  7043  
  7044  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7045  //
  7046  // Asm: VPSUBUSW, CPU Feature: AVX2
  7047  func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
  7048  
  7049  // SubSaturated subtracts corresponding elements of two vectors with saturation.
  7050  //
  7051  // Asm: VPSUBUSW, CPU Feature: AVX512
  7052  func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
  7053  
  7054  /* SumAbsDiff */
  7055  
  7056  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7057  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7058  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7059  //
  7060  // Asm: VPSADBW, CPU Feature: AVX
  7061  func (x Uint8x16) SumAbsDiff(y Uint8x16) Uint16x8
  7062  
  7063  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7064  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7065  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7066  //
  7067  // Asm: VPSADBW, CPU Feature: AVX2
  7068  func (x Uint8x32) SumAbsDiff(y Uint8x32) Uint16x16
  7069  
  7070  // SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
  7071  // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
  7072  // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
  7073  //
  7074  // Asm: VPSADBW, CPU Feature: AVX512
  7075  func (x Uint8x64) SumAbsDiff(y Uint8x64) Uint16x32
  7076  
  7077  /* Trunc */
  7078  
  7079  // Trunc truncates elements towards zero.
  7080  //
  7081  // Asm: VROUNDPS, CPU Feature: AVX
  7082  func (x Float32x4) Trunc() Float32x4
  7083  
  7084  // Trunc truncates elements towards zero.
  7085  //
  7086  // Asm: VROUNDPS, CPU Feature: AVX
  7087  func (x Float32x8) Trunc() Float32x8
  7088  
  7089  // Trunc truncates elements towards zero.
  7090  //
  7091  // Asm: VROUNDPD, CPU Feature: AVX
  7092  func (x Float64x2) Trunc() Float64x2
  7093  
  7094  // Trunc truncates elements towards zero.
  7095  //
  7096  // Asm: VROUNDPD, CPU Feature: AVX
  7097  func (x Float64x4) Trunc() Float64x4
  7098  
  7099  /* TruncScaled */
  7100  
  7101  // TruncScaled truncates elements with specified precision.
  7102  //
  7103  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7104  //
  7105  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7106  func (x Float32x4) TruncScaled(prec uint8) Float32x4
  7107  
  7108  // TruncScaled truncates elements with specified precision.
  7109  //
  7110  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7111  //
  7112  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7113  func (x Float32x8) TruncScaled(prec uint8) Float32x8
  7114  
  7115  // TruncScaled truncates elements with specified precision.
  7116  //
  7117  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7118  //
  7119  // Asm: VRNDSCALEPS, CPU Feature: AVX512
  7120  func (x Float32x16) TruncScaled(prec uint8) Float32x16
  7121  
  7122  // TruncScaled truncates elements with specified precision.
  7123  //
  7124  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7125  //
  7126  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7127  func (x Float64x2) TruncScaled(prec uint8) Float64x2
  7128  
  7129  // TruncScaled truncates elements with specified precision.
  7130  //
  7131  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7132  //
  7133  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7134  func (x Float64x4) TruncScaled(prec uint8) Float64x4
  7135  
  7136  // TruncScaled truncates elements with specified precision.
  7137  //
  7138  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7139  //
  7140  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  7141  func (x Float64x8) TruncScaled(prec uint8) Float64x8
  7142  
  7143  /* TruncScaledResidue */
  7144  
  7145  // TruncScaledResidue computes the difference after truncating with specified precision.
  7146  //
  7147  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7148  //
  7149  // Asm: VREDUCEPS, CPU Feature: AVX512
  7150  func (x Float32x4) TruncScaledResidue(prec uint8) Float32x4
  7151  
  7152  // TruncScaledResidue computes the difference after truncating with specified precision.
  7153  //
  7154  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7155  //
  7156  // Asm: VREDUCEPS, CPU Feature: AVX512
  7157  func (x Float32x8) TruncScaledResidue(prec uint8) Float32x8
  7158  
  7159  // TruncScaledResidue computes the difference after truncating with specified precision.
  7160  //
  7161  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7162  //
  7163  // Asm: VREDUCEPS, CPU Feature: AVX512
  7164  func (x Float32x16) TruncScaledResidue(prec uint8) Float32x16
  7165  
  7166  // TruncScaledResidue computes the difference after truncating with specified precision.
  7167  //
  7168  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7169  //
  7170  // Asm: VREDUCEPD, CPU Feature: AVX512
  7171  func (x Float64x2) TruncScaledResidue(prec uint8) Float64x2
  7172  
  7173  // TruncScaledResidue computes the difference after truncating with specified precision.
  7174  //
  7175  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7176  //
  7177  // Asm: VREDUCEPD, CPU Feature: AVX512
  7178  func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
  7179  
  7180  // TruncScaledResidue computes the difference after truncating with specified precision.
  7181  //
  7182  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  7183  //
  7184  // Asm: VREDUCEPD, CPU Feature: AVX512
  7185  func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
  7186  
  7187  /* TruncateToInt8 */
  7188  
  7189  // TruncateToInt8 converts element values to int8.
  7190  // Conversion is done with truncation on the vector elements.
  7191  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7192  //
  7193  // Asm: VPMOVWB, CPU Feature: AVX512
  7194  func (x Int16x8) TruncateToInt8() Int8x16
  7195  
  7196  // TruncateToInt8 converts element values to int8.
  7197  // Conversion is done with truncation on the vector elements.
  7198  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7199  //
  7200  // Asm: VPMOVWB, CPU Feature: AVX512
  7201  func (x Int16x16) TruncateToInt8() Int8x16
  7202  
  7203  // TruncateToInt8 converts element values to int8.
  7204  // Conversion is done with truncation on the vector elements.
  7205  //
  7206  // Asm: VPMOVWB, CPU Feature: AVX512
  7207  func (x Int16x32) TruncateToInt8() Int8x32
  7208  
  7209  // TruncateToInt8 converts element values to int8.
  7210  // Conversion is done with truncation on the vector elements.
  7211  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7212  //
  7213  // Asm: VPMOVDB, CPU Feature: AVX512
  7214  func (x Int32x4) TruncateToInt8() Int8x16
  7215  
  7216  // TruncateToInt8 converts element values to int8.
  7217  // Conversion is done with truncation on the vector elements.
  7218  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7219  //
  7220  // Asm: VPMOVDB, CPU Feature: AVX512
  7221  func (x Int32x8) TruncateToInt8() Int8x16
  7222  
  7223  // TruncateToInt8 converts element values to int8.
  7224  // Conversion is done with truncation on the vector elements.
  7225  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7226  //
  7227  // Asm: VPMOVDB, CPU Feature: AVX512
  7228  func (x Int32x16) TruncateToInt8() Int8x16
  7229  
  7230  // TruncateToInt8 converts element values to int8.
  7231  // Conversion is done with truncation on the vector elements.
  7232  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7233  //
  7234  // Asm: VPMOVQB, CPU Feature: AVX512
  7235  func (x Int64x2) TruncateToInt8() Int8x16
  7236  
  7237  // TruncateToInt8 converts element values to int8.
  7238  // Conversion is done with truncation on the vector elements.
  7239  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7240  //
  7241  // Asm: VPMOVQB, CPU Feature: AVX512
  7242  func (x Int64x4) TruncateToInt8() Int8x16
  7243  
  7244  // TruncateToInt8 converts element values to int8.
  7245  // Conversion is done with truncation on the vector elements.
  7246  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7247  //
  7248  // Asm: VPMOVQB, CPU Feature: AVX512
  7249  func (x Int64x8) TruncateToInt8() Int8x16
  7250  
  7251  /* TruncateToInt16 */
  7252  
  7253  // TruncateToInt16 converts element values to int16.
  7254  // Conversion is done with truncation on the vector elements.
  7255  //
  7256  // Asm: VPMOVDW, CPU Feature: AVX512
  7257  func (x Int32x4) TruncateToInt16() Int16x8
  7258  
  7259  // TruncateToInt16 converts element values to int16.
  7260  // Conversion is done with truncation on the vector elements.
  7261  //
  7262  // Asm: VPMOVDW, CPU Feature: AVX512
  7263  func (x Int32x8) TruncateToInt16() Int16x8
  7264  
  7265  // TruncateToInt16 converts element values to int16.
  7266  // Conversion is done with truncation on the vector elements.
  7267  //
  7268  // Asm: VPMOVDW, CPU Feature: AVX512
  7269  func (x Int32x16) TruncateToInt16() Int16x16
  7270  
  7271  // TruncateToInt16 converts element values to int16.
  7272  // Conversion is done with truncation on the vector elements.
  7273  //
  7274  // Asm: VPMOVQW, CPU Feature: AVX512
  7275  func (x Int64x2) TruncateToInt16() Int16x8
  7276  
  7277  // TruncateToInt16 converts element values to int16.
  7278  // Conversion is done with truncation on the vector elements.
  7279  //
  7280  // Asm: VPMOVQW, CPU Feature: AVX512
  7281  func (x Int64x4) TruncateToInt16() Int16x8
  7282  
  7283  // TruncateToInt16 converts element values to int16.
  7284  // Conversion is done with truncation on the vector elements.
  7285  //
  7286  // Asm: VPMOVQW, CPU Feature: AVX512
  7287  func (x Int64x8) TruncateToInt16() Int16x8
  7288  
  7289  /* TruncateToInt32 */
  7290  
  7291  // TruncateToInt32 converts element values to int32.
  7292  // Conversion is done with truncation on the vector elements.
  7293  //
  7294  // Asm: VPMOVQD, CPU Feature: AVX512
  7295  func (x Int64x2) TruncateToInt32() Int32x4
  7296  
  7297  // TruncateToInt32 converts element values to int32.
  7298  // Conversion is done with truncation on the vector elements.
  7299  //
  7300  // Asm: VPMOVQD, CPU Feature: AVX512
  7301  func (x Int64x4) TruncateToInt32() Int32x4
  7302  
  7303  // TruncateToInt32 converts element values to int32.
  7304  // Conversion is done with truncation on the vector elements.
  7305  //
  7306  // Asm: VPMOVQD, CPU Feature: AVX512
  7307  func (x Int64x8) TruncateToInt32() Int32x8
  7308  
  7309  /* TruncateToUint8 */
  7310  
  7311  // TruncateToUint8 converts element values to uint8.
  7312  // Conversion is done with truncation on the vector elements.
  7313  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7314  //
  7315  // Asm: VPMOVWB, CPU Feature: AVX512
  7316  func (x Uint16x8) TruncateToUint8() Uint8x16
  7317  
  7318  // TruncateToUint8 converts element values to uint8.
  7319  // Conversion is done with truncation on the vector elements.
  7320  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7321  //
  7322  // Asm: VPMOVWB, CPU Feature: AVX512
  7323  func (x Uint16x16) TruncateToUint8() Uint8x16
  7324  
  7325  // TruncateToUint8 converts element values to uint8.
  7326  // Conversion is done with truncation on the vector elements.
  7327  //
  7328  // Asm: VPMOVWB, CPU Feature: AVX512
  7329  func (x Uint16x32) TruncateToUint8() Uint8x32
  7330  
  7331  // TruncateToUint8 converts element values to uint8.
  7332  // Conversion is done with truncation on the vector elements.
  7333  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7334  //
  7335  // Asm: VPMOVDB, CPU Feature: AVX512
  7336  func (x Uint32x4) TruncateToUint8() Uint8x16
  7337  
  7338  // TruncateToUint8 converts element values to uint8.
  7339  // Conversion is done with truncation on the vector elements.
  7340  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7341  //
  7342  // Asm: VPMOVDB, CPU Feature: AVX512
  7343  func (x Uint32x8) TruncateToUint8() Uint8x16
  7344  
  7345  // TruncateToUint8 converts element values to uint8.
  7346  // Conversion is done with truncation on the vector elements.
  7347  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7348  //
  7349  // Asm: VPMOVDB, CPU Feature: AVX512
  7350  func (x Uint32x16) TruncateToUint8() Uint8x16
  7351  
  7352  // TruncateToUint8 converts element values to uint8.
  7353  // Conversion is done with truncation on the vector elements.
  7354  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7355  //
  7356  // Asm: VPMOVQB, CPU Feature: AVX512
  7357  func (x Uint64x2) TruncateToUint8() Uint8x16
  7358  
  7359  // TruncateToUint8 converts element values to uint8.
  7360  // Conversion is done with truncation on the vector elements.
  7361  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7362  //
  7363  // Asm: VPMOVQB, CPU Feature: AVX512
  7364  func (x Uint64x4) TruncateToUint8() Uint8x16
  7365  
  7366  // TruncateToUint8 converts element values to uint8.
  7367  // Conversion is done with truncation on the vector elements.
  7368  // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
  7369  //
  7370  // Asm: VPMOVQB, CPU Feature: AVX512
  7371  func (x Uint64x8) TruncateToUint8() Uint8x16
  7372  
  7373  /* TruncateToUint16 */
  7374  
  7375  // TruncateToUint16 converts element values to uint16.
  7376  // Conversion is done with truncation on the vector elements.
  7377  //
  7378  // Asm: VPMOVDW, CPU Feature: AVX512
  7379  func (x Uint32x4) TruncateToUint16() Uint16x8
  7380  
  7381  // TruncateToUint16 converts element values to uint16.
  7382  // Conversion is done with truncation on the vector elements.
  7383  //
  7384  // Asm: VPMOVDW, CPU Feature: AVX512
  7385  func (x Uint32x8) TruncateToUint16() Uint16x8
  7386  
  7387  // TruncateToUint16 converts element values to uint16.
  7388  // Conversion is done with truncation on the vector elements.
  7389  //
  7390  // Asm: VPMOVDW, CPU Feature: AVX512
  7391  func (x Uint32x16) TruncateToUint16() Uint16x16
  7392  
  7393  // TruncateToUint16 converts element values to uint16.
  7394  // Conversion is done with truncation on the vector elements.
  7395  //
  7396  // Asm: VPMOVQW, CPU Feature: AVX512
  7397  func (x Uint64x2) TruncateToUint16() Uint16x8
  7398  
  7399  // TruncateToUint16 converts element values to uint16.
  7400  // Conversion is done with truncation on the vector elements.
  7401  //
  7402  // Asm: VPMOVQW, CPU Feature: AVX512
  7403  func (x Uint64x4) TruncateToUint16() Uint16x8
  7404  
  7405  // TruncateToUint16 converts element values to uint16.
  7406  // Conversion is done with truncation on the vector elements.
  7407  //
  7408  // Asm: VPMOVQW, CPU Feature: AVX512
  7409  func (x Uint64x8) TruncateToUint16() Uint16x8
  7410  
  7411  /* TruncateToUint32 */
  7412  
  7413  // TruncateToUint32 converts element values to uint32.
  7414  // Conversion is done with truncation on the vector elements.
  7415  //
  7416  // Asm: VPMOVQD, CPU Feature: AVX512
  7417  func (x Uint64x2) TruncateToUint32() Uint32x4
  7418  
  7419  // TruncateToUint32 converts element values to uint32.
  7420  // Conversion is done with truncation on the vector elements.
  7421  //
  7422  // Asm: VPMOVQD, CPU Feature: AVX512
  7423  func (x Uint64x4) TruncateToUint32() Uint32x4
  7424  
  7425  // TruncateToUint32 converts element values to uint32.
  7426  // Conversion is done with truncation on the vector elements.
  7427  //
  7428  // Asm: VPMOVQD, CPU Feature: AVX512
  7429  func (x Uint64x8) TruncateToUint32() Uint32x8
  7430  
  7431  /* Xor */
  7432  
  7433  // Xor performs a bitwise XOR operation between two vectors.
  7434  //
  7435  // Asm: VPXOR, CPU Feature: AVX
  7436  func (x Int8x16) Xor(y Int8x16) Int8x16
  7437  
  7438  // Xor performs a bitwise XOR operation between two vectors.
  7439  //
  7440  // Asm: VPXOR, CPU Feature: AVX2
  7441  func (x Int8x32) Xor(y Int8x32) Int8x32
  7442  
  7443  // Xor performs a bitwise XOR operation between two vectors.
  7444  //
  7445  // Asm: VPXORD, CPU Feature: AVX512
  7446  func (x Int8x64) Xor(y Int8x64) Int8x64
  7447  
  7448  // Xor performs a bitwise XOR operation between two vectors.
  7449  //
  7450  // Asm: VPXOR, CPU Feature: AVX
  7451  func (x Int16x8) Xor(y Int16x8) Int16x8
  7452  
  7453  // Xor performs a bitwise XOR operation between two vectors.
  7454  //
  7455  // Asm: VPXOR, CPU Feature: AVX2
  7456  func (x Int16x16) Xor(y Int16x16) Int16x16
  7457  
  7458  // Xor performs a bitwise XOR operation between two vectors.
  7459  //
  7460  // Asm: VPXORD, CPU Feature: AVX512
  7461  func (x Int16x32) Xor(y Int16x32) Int16x32
  7462  
  7463  // Xor performs a bitwise XOR operation between two vectors.
  7464  //
  7465  // Asm: VPXOR, CPU Feature: AVX
  7466  func (x Int32x4) Xor(y Int32x4) Int32x4
  7467  
  7468  // Xor performs a bitwise XOR operation between two vectors.
  7469  //
  7470  // Asm: VPXOR, CPU Feature: AVX2
  7471  func (x Int32x8) Xor(y Int32x8) Int32x8
  7472  
  7473  // Xor performs a bitwise XOR operation between two vectors.
  7474  //
  7475  // Asm: VPXORD, CPU Feature: AVX512
  7476  func (x Int32x16) Xor(y Int32x16) Int32x16
  7477  
  7478  // Xor performs a bitwise XOR operation between two vectors.
  7479  //
  7480  // Asm: VPXOR, CPU Feature: AVX
  7481  func (x Int64x2) Xor(y Int64x2) Int64x2
  7482  
  7483  // Xor performs a bitwise XOR operation between two vectors.
  7484  //
  7485  // Asm: VPXOR, CPU Feature: AVX2
  7486  func (x Int64x4) Xor(y Int64x4) Int64x4
  7487  
  7488  // Xor performs a bitwise XOR operation between two vectors.
  7489  //
  7490  // Asm: VPXORQ, CPU Feature: AVX512
  7491  func (x Int64x8) Xor(y Int64x8) Int64x8
  7492  
  7493  // Xor performs a bitwise XOR operation between two vectors.
  7494  //
  7495  // Asm: VPXOR, CPU Feature: AVX
  7496  func (x Uint8x16) Xor(y Uint8x16) Uint8x16
  7497  
  7498  // Xor performs a bitwise XOR operation between two vectors.
  7499  //
  7500  // Asm: VPXOR, CPU Feature: AVX2
  7501  func (x Uint8x32) Xor(y Uint8x32) Uint8x32
  7502  
  7503  // Xor performs a bitwise XOR operation between two vectors.
  7504  //
  7505  // Asm: VPXORD, CPU Feature: AVX512
  7506  func (x Uint8x64) Xor(y Uint8x64) Uint8x64
  7507  
  7508  // Xor performs a bitwise XOR operation between two vectors.
  7509  //
  7510  // Asm: VPXOR, CPU Feature: AVX
  7511  func (x Uint16x8) Xor(y Uint16x8) Uint16x8
  7512  
  7513  // Xor performs a bitwise XOR operation between two vectors.
  7514  //
  7515  // Asm: VPXOR, CPU Feature: AVX2
  7516  func (x Uint16x16) Xor(y Uint16x16) Uint16x16
  7517  
  7518  // Xor performs a bitwise XOR operation between two vectors.
  7519  //
  7520  // Asm: VPXORD, CPU Feature: AVX512
  7521  func (x Uint16x32) Xor(y Uint16x32) Uint16x32
  7522  
  7523  // Xor performs a bitwise XOR operation between two vectors.
  7524  //
  7525  // Asm: VPXOR, CPU Feature: AVX
  7526  func (x Uint32x4) Xor(y Uint32x4) Uint32x4
  7527  
  7528  // Xor performs a bitwise XOR operation between two vectors.
  7529  //
  7530  // Asm: VPXOR, CPU Feature: AVX2
  7531  func (x Uint32x8) Xor(y Uint32x8) Uint32x8
  7532  
  7533  // Xor performs a bitwise XOR operation between two vectors.
  7534  //
  7535  // Asm: VPXORD, CPU Feature: AVX512
  7536  func (x Uint32x16) Xor(y Uint32x16) Uint32x16
  7537  
  7538  // Xor performs a bitwise XOR operation between two vectors.
  7539  //
  7540  // Asm: VPXOR, CPU Feature: AVX
  7541  func (x Uint64x2) Xor(y Uint64x2) Uint64x2
  7542  
  7543  // Xor performs a bitwise XOR operation between two vectors.
  7544  //
  7545  // Asm: VPXOR, CPU Feature: AVX2
  7546  func (x Uint64x4) Xor(y Uint64x4) Uint64x4
  7547  
  7548  // Xor performs a bitwise XOR operation between two vectors.
  7549  //
  7550  // Asm: VPXORQ, CPU Feature: AVX512
  7551  func (x Uint64x8) Xor(y Uint64x8) Uint64x8
  7552  
  7553  // Float64x2 converts from Float32x4 to Float64x2
  7554  func (from Float32x4) AsFloat64x2() (to Float64x2)
  7555  
  7556  // Int8x16 converts from Float32x4 to Int8x16
  7557  func (from Float32x4) AsInt8x16() (to Int8x16)
  7558  
  7559  // Int16x8 converts from Float32x4 to Int16x8
  7560  func (from Float32x4) AsInt16x8() (to Int16x8)
  7561  
  7562  // Int32x4 converts from Float32x4 to Int32x4
  7563  func (from Float32x4) AsInt32x4() (to Int32x4)
  7564  
  7565  // Int64x2 converts from Float32x4 to Int64x2
  7566  func (from Float32x4) AsInt64x2() (to Int64x2)
  7567  
  7568  // Uint8x16 converts from Float32x4 to Uint8x16
  7569  func (from Float32x4) AsUint8x16() (to Uint8x16)
  7570  
  7571  // Uint16x8 converts from Float32x4 to Uint16x8
  7572  func (from Float32x4) AsUint16x8() (to Uint16x8)
  7573  
  7574  // Uint32x4 converts from Float32x4 to Uint32x4
  7575  func (from Float32x4) AsUint32x4() (to Uint32x4)
  7576  
  7577  // Uint64x2 converts from Float32x4 to Uint64x2
  7578  func (from Float32x4) AsUint64x2() (to Uint64x2)
  7579  
  7580  // Float64x4 converts from Float32x8 to Float64x4
  7581  func (from Float32x8) AsFloat64x4() (to Float64x4)
  7582  
  7583  // Int8x32 converts from Float32x8 to Int8x32
  7584  func (from Float32x8) AsInt8x32() (to Int8x32)
  7585  
  7586  // Int16x16 converts from Float32x8 to Int16x16
  7587  func (from Float32x8) AsInt16x16() (to Int16x16)
  7588  
  7589  // Int32x8 converts from Float32x8 to Int32x8
  7590  func (from Float32x8) AsInt32x8() (to Int32x8)
  7591  
  7592  // Int64x4 converts from Float32x8 to Int64x4
  7593  func (from Float32x8) AsInt64x4() (to Int64x4)
  7594  
  7595  // Uint8x32 converts from Float32x8 to Uint8x32
  7596  func (from Float32x8) AsUint8x32() (to Uint8x32)
  7597  
  7598  // Uint16x16 converts from Float32x8 to Uint16x16
  7599  func (from Float32x8) AsUint16x16() (to Uint16x16)
  7600  
  7601  // Uint32x8 converts from Float32x8 to Uint32x8
  7602  func (from Float32x8) AsUint32x8() (to Uint32x8)
  7603  
  7604  // Uint64x4 converts from Float32x8 to Uint64x4
  7605  func (from Float32x8) AsUint64x4() (to Uint64x4)
  7606  
  7607  // Float64x8 converts from Float32x16 to Float64x8
  7608  func (from Float32x16) AsFloat64x8() (to Float64x8)
  7609  
  7610  // Int8x64 converts from Float32x16 to Int8x64
  7611  func (from Float32x16) AsInt8x64() (to Int8x64)
  7612  
  7613  // Int16x32 converts from Float32x16 to Int16x32
  7614  func (from Float32x16) AsInt16x32() (to Int16x32)
  7615  
  7616  // Int32x16 converts from Float32x16 to Int32x16
  7617  func (from Float32x16) AsInt32x16() (to Int32x16)
  7618  
  7619  // Int64x8 converts from Float32x16 to Int64x8
  7620  func (from Float32x16) AsInt64x8() (to Int64x8)
  7621  
  7622  // Uint8x64 converts from Float32x16 to Uint8x64
  7623  func (from Float32x16) AsUint8x64() (to Uint8x64)
  7624  
  7625  // Uint16x32 converts from Float32x16 to Uint16x32
  7626  func (from Float32x16) AsUint16x32() (to Uint16x32)
  7627  
  7628  // Uint32x16 converts from Float32x16 to Uint32x16
  7629  func (from Float32x16) AsUint32x16() (to Uint32x16)
  7630  
  7631  // Uint64x8 converts from Float32x16 to Uint64x8
  7632  func (from Float32x16) AsUint64x8() (to Uint64x8)
  7633  
  7634  // Float32x4 converts from Float64x2 to Float32x4
  7635  func (from Float64x2) AsFloat32x4() (to Float32x4)
  7636  
  7637  // Int8x16 converts from Float64x2 to Int8x16
  7638  func (from Float64x2) AsInt8x16() (to Int8x16)
  7639  
  7640  // Int16x8 converts from Float64x2 to Int16x8
  7641  func (from Float64x2) AsInt16x8() (to Int16x8)
  7642  
  7643  // Int32x4 converts from Float64x2 to Int32x4
  7644  func (from Float64x2) AsInt32x4() (to Int32x4)
  7645  
  7646  // Int64x2 converts from Float64x2 to Int64x2
  7647  func (from Float64x2) AsInt64x2() (to Int64x2)
  7648  
  7649  // Uint8x16 converts from Float64x2 to Uint8x16
  7650  func (from Float64x2) AsUint8x16() (to Uint8x16)
  7651  
  7652  // Uint16x8 converts from Float64x2 to Uint16x8
  7653  func (from Float64x2) AsUint16x8() (to Uint16x8)
  7654  
  7655  // Uint32x4 converts from Float64x2 to Uint32x4
  7656  func (from Float64x2) AsUint32x4() (to Uint32x4)
  7657  
  7658  // Uint64x2 converts from Float64x2 to Uint64x2
  7659  func (from Float64x2) AsUint64x2() (to Uint64x2)
  7660  
  7661  // Float32x8 converts from Float64x4 to Float32x8
  7662  func (from Float64x4) AsFloat32x8() (to Float32x8)
  7663  
  7664  // Int8x32 converts from Float64x4 to Int8x32
  7665  func (from Float64x4) AsInt8x32() (to Int8x32)
  7666  
  7667  // Int16x16 converts from Float64x4 to Int16x16
  7668  func (from Float64x4) AsInt16x16() (to Int16x16)
  7669  
  7670  // Int32x8 converts from Float64x4 to Int32x8
  7671  func (from Float64x4) AsInt32x8() (to Int32x8)
  7672  
  7673  // Int64x4 converts from Float64x4 to Int64x4
  7674  func (from Float64x4) AsInt64x4() (to Int64x4)
  7675  
  7676  // Uint8x32 converts from Float64x4 to Uint8x32
  7677  func (from Float64x4) AsUint8x32() (to Uint8x32)
  7678  
  7679  // Uint16x16 converts from Float64x4 to Uint16x16
  7680  func (from Float64x4) AsUint16x16() (to Uint16x16)
  7681  
  7682  // Uint32x8 converts from Float64x4 to Uint32x8
  7683  func (from Float64x4) AsUint32x8() (to Uint32x8)
  7684  
  7685  // Uint64x4 converts from Float64x4 to Uint64x4
  7686  func (from Float64x4) AsUint64x4() (to Uint64x4)
  7687  
  7688  // Float32x16 converts from Float64x8 to Float32x16
  7689  func (from Float64x8) AsFloat32x16() (to Float32x16)
  7690  
  7691  // Int8x64 converts from Float64x8 to Int8x64
  7692  func (from Float64x8) AsInt8x64() (to Int8x64)
  7693  
  7694  // Int16x32 converts from Float64x8 to Int16x32
  7695  func (from Float64x8) AsInt16x32() (to Int16x32)
  7696  
  7697  // Int32x16 converts from Float64x8 to Int32x16
  7698  func (from Float64x8) AsInt32x16() (to Int32x16)
  7699  
  7700  // Int64x8 converts from Float64x8 to Int64x8
  7701  func (from Float64x8) AsInt64x8() (to Int64x8)
  7702  
  7703  // Uint8x64 converts from Float64x8 to Uint8x64
  7704  func (from Float64x8) AsUint8x64() (to Uint8x64)
  7705  
  7706  // Uint16x32 converts from Float64x8 to Uint16x32
  7707  func (from Float64x8) AsUint16x32() (to Uint16x32)
  7708  
  7709  // Uint32x16 converts from Float64x8 to Uint32x16
  7710  func (from Float64x8) AsUint32x16() (to Uint32x16)
  7711  
  7712  // Uint64x8 converts from Float64x8 to Uint64x8
  7713  func (from Float64x8) AsUint64x8() (to Uint64x8)
  7714  
  7715  // Float32x4 converts from Int8x16 to Float32x4
  7716  func (from Int8x16) AsFloat32x4() (to Float32x4)
  7717  
  7718  // Float64x2 converts from Int8x16 to Float64x2
  7719  func (from Int8x16) AsFloat64x2() (to Float64x2)
  7720  
  7721  // Int16x8 converts from Int8x16 to Int16x8
  7722  func (from Int8x16) AsInt16x8() (to Int16x8)
  7723  
  7724  // Int32x4 converts from Int8x16 to Int32x4
  7725  func (from Int8x16) AsInt32x4() (to Int32x4)
  7726  
  7727  // Int64x2 converts from Int8x16 to Int64x2
  7728  func (from Int8x16) AsInt64x2() (to Int64x2)
  7729  
  7730  // Uint8x16 converts from Int8x16 to Uint8x16
  7731  func (from Int8x16) AsUint8x16() (to Uint8x16)
  7732  
  7733  // Uint16x8 converts from Int8x16 to Uint16x8
  7734  func (from Int8x16) AsUint16x8() (to Uint16x8)
  7735  
  7736  // Uint32x4 converts from Int8x16 to Uint32x4
  7737  func (from Int8x16) AsUint32x4() (to Uint32x4)
  7738  
  7739  // Uint64x2 converts from Int8x16 to Uint64x2
  7740  func (from Int8x16) AsUint64x2() (to Uint64x2)
  7741  
  7742  // Float32x8 converts from Int8x32 to Float32x8
  7743  func (from Int8x32) AsFloat32x8() (to Float32x8)
  7744  
  7745  // Float64x4 converts from Int8x32 to Float64x4
  7746  func (from Int8x32) AsFloat64x4() (to Float64x4)
  7747  
  7748  // Int16x16 converts from Int8x32 to Int16x16
  7749  func (from Int8x32) AsInt16x16() (to Int16x16)
  7750  
  7751  // Int32x8 converts from Int8x32 to Int32x8
  7752  func (from Int8x32) AsInt32x8() (to Int32x8)
  7753  
  7754  // Int64x4 converts from Int8x32 to Int64x4
  7755  func (from Int8x32) AsInt64x4() (to Int64x4)
  7756  
  7757  // Uint8x32 converts from Int8x32 to Uint8x32
  7758  func (from Int8x32) AsUint8x32() (to Uint8x32)
  7759  
  7760  // Uint16x16 converts from Int8x32 to Uint16x16
  7761  func (from Int8x32) AsUint16x16() (to Uint16x16)
  7762  
  7763  // Uint32x8 converts from Int8x32 to Uint32x8
  7764  func (from Int8x32) AsUint32x8() (to Uint32x8)
  7765  
  7766  // Uint64x4 converts from Int8x32 to Uint64x4
  7767  func (from Int8x32) AsUint64x4() (to Uint64x4)
  7768  
  7769  // Float32x16 converts from Int8x64 to Float32x16
  7770  func (from Int8x64) AsFloat32x16() (to Float32x16)
  7771  
  7772  // Float64x8 converts from Int8x64 to Float64x8
  7773  func (from Int8x64) AsFloat64x8() (to Float64x8)
  7774  
  7775  // Int16x32 converts from Int8x64 to Int16x32
  7776  func (from Int8x64) AsInt16x32() (to Int16x32)
  7777  
  7778  // Int32x16 converts from Int8x64 to Int32x16
  7779  func (from Int8x64) AsInt32x16() (to Int32x16)
  7780  
  7781  // Int64x8 converts from Int8x64 to Int64x8
  7782  func (from Int8x64) AsInt64x8() (to Int64x8)
  7783  
  7784  // Uint8x64 converts from Int8x64 to Uint8x64
  7785  func (from Int8x64) AsUint8x64() (to Uint8x64)
  7786  
  7787  // Uint16x32 converts from Int8x64 to Uint16x32
  7788  func (from Int8x64) AsUint16x32() (to Uint16x32)
  7789  
  7790  // Uint32x16 converts from Int8x64 to Uint32x16
  7791  func (from Int8x64) AsUint32x16() (to Uint32x16)
  7792  
  7793  // Uint64x8 converts from Int8x64 to Uint64x8
  7794  func (from Int8x64) AsUint64x8() (to Uint64x8)
  7795  
  7796  // Float32x4 converts from Int16x8 to Float32x4
  7797  func (from Int16x8) AsFloat32x4() (to Float32x4)
  7798  
  7799  // Float64x2 converts from Int16x8 to Float64x2
  7800  func (from Int16x8) AsFloat64x2() (to Float64x2)
  7801  
  7802  // Int8x16 converts from Int16x8 to Int8x16
  7803  func (from Int16x8) AsInt8x16() (to Int8x16)
  7804  
  7805  // Int32x4 converts from Int16x8 to Int32x4
  7806  func (from Int16x8) AsInt32x4() (to Int32x4)
  7807  
  7808  // Int64x2 converts from Int16x8 to Int64x2
  7809  func (from Int16x8) AsInt64x2() (to Int64x2)
  7810  
  7811  // Uint8x16 converts from Int16x8 to Uint8x16
  7812  func (from Int16x8) AsUint8x16() (to Uint8x16)
  7813  
  7814  // Uint16x8 converts from Int16x8 to Uint16x8
  7815  func (from Int16x8) AsUint16x8() (to Uint16x8)
  7816  
  7817  // Uint32x4 converts from Int16x8 to Uint32x4
  7818  func (from Int16x8) AsUint32x4() (to Uint32x4)
  7819  
  7820  // Uint64x2 converts from Int16x8 to Uint64x2
  7821  func (from Int16x8) AsUint64x2() (to Uint64x2)
  7822  
  7823  // Float32x8 converts from Int16x16 to Float32x8
  7824  func (from Int16x16) AsFloat32x8() (to Float32x8)
  7825  
  7826  // Float64x4 converts from Int16x16 to Float64x4
  7827  func (from Int16x16) AsFloat64x4() (to Float64x4)
  7828  
  7829  // Int8x32 converts from Int16x16 to Int8x32
  7830  func (from Int16x16) AsInt8x32() (to Int8x32)
  7831  
  7832  // Int32x8 converts from Int16x16 to Int32x8
  7833  func (from Int16x16) AsInt32x8() (to Int32x8)
  7834  
  7835  // Int64x4 converts from Int16x16 to Int64x4
  7836  func (from Int16x16) AsInt64x4() (to Int64x4)
  7837  
  7838  // Uint8x32 converts from Int16x16 to Uint8x32
  7839  func (from Int16x16) AsUint8x32() (to Uint8x32)
  7840  
  7841  // Uint16x16 converts from Int16x16 to Uint16x16
  7842  func (from Int16x16) AsUint16x16() (to Uint16x16)
  7843  
  7844  // Uint32x8 converts from Int16x16 to Uint32x8
  7845  func (from Int16x16) AsUint32x8() (to Uint32x8)
  7846  
  7847  // Uint64x4 converts from Int16x16 to Uint64x4
  7848  func (from Int16x16) AsUint64x4() (to Uint64x4)
  7849  
  7850  // Float32x16 converts from Int16x32 to Float32x16
  7851  func (from Int16x32) AsFloat32x16() (to Float32x16)
  7852  
  7853  // Float64x8 converts from Int16x32 to Float64x8
  7854  func (from Int16x32) AsFloat64x8() (to Float64x8)
  7855  
  7856  // Int8x64 converts from Int16x32 to Int8x64
  7857  func (from Int16x32) AsInt8x64() (to Int8x64)
  7858  
  7859  // Int32x16 converts from Int16x32 to Int32x16
  7860  func (from Int16x32) AsInt32x16() (to Int32x16)
  7861  
  7862  // Int64x8 converts from Int16x32 to Int64x8
  7863  func (from Int16x32) AsInt64x8() (to Int64x8)
  7864  
  7865  // Uint8x64 converts from Int16x32 to Uint8x64
  7866  func (from Int16x32) AsUint8x64() (to Uint8x64)
  7867  
  7868  // Uint16x32 converts from Int16x32 to Uint16x32
  7869  func (from Int16x32) AsUint16x32() (to Uint16x32)
  7870  
  7871  // Uint32x16 converts from Int16x32 to Uint32x16
  7872  func (from Int16x32) AsUint32x16() (to Uint32x16)
  7873  
  7874  // Uint64x8 converts from Int16x32 to Uint64x8
  7875  func (from Int16x32) AsUint64x8() (to Uint64x8)
  7876  
  7877  // Float32x4 converts from Int32x4 to Float32x4
  7878  func (from Int32x4) AsFloat32x4() (to Float32x4)
  7879  
  7880  // Float64x2 converts from Int32x4 to Float64x2
  7881  func (from Int32x4) AsFloat64x2() (to Float64x2)
  7882  
  7883  // Int8x16 converts from Int32x4 to Int8x16
  7884  func (from Int32x4) AsInt8x16() (to Int8x16)
  7885  
  7886  // Int16x8 converts from Int32x4 to Int16x8
  7887  func (from Int32x4) AsInt16x8() (to Int16x8)
  7888  
  7889  // Int64x2 converts from Int32x4 to Int64x2
  7890  func (from Int32x4) AsInt64x2() (to Int64x2)
  7891  
  7892  // Uint8x16 converts from Int32x4 to Uint8x16
  7893  func (from Int32x4) AsUint8x16() (to Uint8x16)
  7894  
  7895  // Uint16x8 converts from Int32x4 to Uint16x8
  7896  func (from Int32x4) AsUint16x8() (to Uint16x8)
  7897  
  7898  // Uint32x4 converts from Int32x4 to Uint32x4
  7899  func (from Int32x4) AsUint32x4() (to Uint32x4)
  7900  
  7901  // Uint64x2 converts from Int32x4 to Uint64x2
  7902  func (from Int32x4) AsUint64x2() (to Uint64x2)
  7903  
  7904  // Float32x8 converts from Int32x8 to Float32x8
  7905  func (from Int32x8) AsFloat32x8() (to Float32x8)
  7906  
  7907  // Float64x4 converts from Int32x8 to Float64x4
  7908  func (from Int32x8) AsFloat64x4() (to Float64x4)
  7909  
  7910  // Int8x32 converts from Int32x8 to Int8x32
  7911  func (from Int32x8) AsInt8x32() (to Int8x32)
  7912  
  7913  // Int16x16 converts from Int32x8 to Int16x16
  7914  func (from Int32x8) AsInt16x16() (to Int16x16)
  7915  
  7916  // Int64x4 converts from Int32x8 to Int64x4
  7917  func (from Int32x8) AsInt64x4() (to Int64x4)
  7918  
  7919  // Uint8x32 converts from Int32x8 to Uint8x32
  7920  func (from Int32x8) AsUint8x32() (to Uint8x32)
  7921  
  7922  // Uint16x16 converts from Int32x8 to Uint16x16
  7923  func (from Int32x8) AsUint16x16() (to Uint16x16)
  7924  
  7925  // Uint32x8 converts from Int32x8 to Uint32x8
  7926  func (from Int32x8) AsUint32x8() (to Uint32x8)
  7927  
  7928  // Uint64x4 converts from Int32x8 to Uint64x4
  7929  func (from Int32x8) AsUint64x4() (to Uint64x4)
  7930  
  7931  // Float32x16 converts from Int32x16 to Float32x16
  7932  func (from Int32x16) AsFloat32x16() (to Float32x16)
  7933  
  7934  // Float64x8 converts from Int32x16 to Float64x8
  7935  func (from Int32x16) AsFloat64x8() (to Float64x8)
  7936  
  7937  // Int8x64 converts from Int32x16 to Int8x64
  7938  func (from Int32x16) AsInt8x64() (to Int8x64)
  7939  
  7940  // Int16x32 converts from Int32x16 to Int16x32
  7941  func (from Int32x16) AsInt16x32() (to Int16x32)
  7942  
  7943  // Int64x8 converts from Int32x16 to Int64x8
  7944  func (from Int32x16) AsInt64x8() (to Int64x8)
  7945  
  7946  // Uint8x64 converts from Int32x16 to Uint8x64
  7947  func (from Int32x16) AsUint8x64() (to Uint8x64)
  7948  
  7949  // Uint16x32 converts from Int32x16 to Uint16x32
  7950  func (from Int32x16) AsUint16x32() (to Uint16x32)
  7951  
  7952  // Uint32x16 converts from Int32x16 to Uint32x16
  7953  func (from Int32x16) AsUint32x16() (to Uint32x16)
  7954  
  7955  // Uint64x8 converts from Int32x16 to Uint64x8
  7956  func (from Int32x16) AsUint64x8() (to Uint64x8)
  7957  
  7958  // Float32x4 converts from Int64x2 to Float32x4
  7959  func (from Int64x2) AsFloat32x4() (to Float32x4)
  7960  
  7961  // Float64x2 converts from Int64x2 to Float64x2
  7962  func (from Int64x2) AsFloat64x2() (to Float64x2)
  7963  
  7964  // Int8x16 converts from Int64x2 to Int8x16
  7965  func (from Int64x2) AsInt8x16() (to Int8x16)
  7966  
  7967  // Int16x8 converts from Int64x2 to Int16x8
  7968  func (from Int64x2) AsInt16x8() (to Int16x8)
  7969  
  7970  // Int32x4 converts from Int64x2 to Int32x4
  7971  func (from Int64x2) AsInt32x4() (to Int32x4)
  7972  
  7973  // Uint8x16 converts from Int64x2 to Uint8x16
  7974  func (from Int64x2) AsUint8x16() (to Uint8x16)
  7975  
  7976  // Uint16x8 converts from Int64x2 to Uint16x8
  7977  func (from Int64x2) AsUint16x8() (to Uint16x8)
  7978  
  7979  // Uint32x4 converts from Int64x2 to Uint32x4
  7980  func (from Int64x2) AsUint32x4() (to Uint32x4)
  7981  
  7982  // Uint64x2 converts from Int64x2 to Uint64x2
  7983  func (from Int64x2) AsUint64x2() (to Uint64x2)
  7984  
  7985  // Float32x8 converts from Int64x4 to Float32x8
  7986  func (from Int64x4) AsFloat32x8() (to Float32x8)
  7987  
  7988  // Float64x4 converts from Int64x4 to Float64x4
  7989  func (from Int64x4) AsFloat64x4() (to Float64x4)
  7990  
  7991  // Int8x32 converts from Int64x4 to Int8x32
  7992  func (from Int64x4) AsInt8x32() (to Int8x32)
  7993  
  7994  // Int16x16 converts from Int64x4 to Int16x16
  7995  func (from Int64x4) AsInt16x16() (to Int16x16)
  7996  
  7997  // Int32x8 converts from Int64x4 to Int32x8
  7998  func (from Int64x4) AsInt32x8() (to Int32x8)
  7999  
  8000  // Uint8x32 converts from Int64x4 to Uint8x32
  8001  func (from Int64x4) AsUint8x32() (to Uint8x32)
  8002  
  8003  // Uint16x16 converts from Int64x4 to Uint16x16
  8004  func (from Int64x4) AsUint16x16() (to Uint16x16)
  8005  
  8006  // Uint32x8 converts from Int64x4 to Uint32x8
  8007  func (from Int64x4) AsUint32x8() (to Uint32x8)
  8008  
  8009  // Uint64x4 converts from Int64x4 to Uint64x4
  8010  func (from Int64x4) AsUint64x4() (to Uint64x4)
  8011  
  8012  // Float32x16 converts from Int64x8 to Float32x16
  8013  func (from Int64x8) AsFloat32x16() (to Float32x16)
  8014  
  8015  // Float64x8 converts from Int64x8 to Float64x8
  8016  func (from Int64x8) AsFloat64x8() (to Float64x8)
  8017  
  8018  // Int8x64 converts from Int64x8 to Int8x64
  8019  func (from Int64x8) AsInt8x64() (to Int8x64)
  8020  
  8021  // Int16x32 converts from Int64x8 to Int16x32
  8022  func (from Int64x8) AsInt16x32() (to Int16x32)
  8023  
  8024  // Int32x16 converts from Int64x8 to Int32x16
  8025  func (from Int64x8) AsInt32x16() (to Int32x16)
  8026  
  8027  // Uint8x64 converts from Int64x8 to Uint8x64
  8028  func (from Int64x8) AsUint8x64() (to Uint8x64)
  8029  
  8030  // Uint16x32 converts from Int64x8 to Uint16x32
  8031  func (from Int64x8) AsUint16x32() (to Uint16x32)
  8032  
  8033  // Uint32x16 converts from Int64x8 to Uint32x16
  8034  func (from Int64x8) AsUint32x16() (to Uint32x16)
  8035  
  8036  // Uint64x8 converts from Int64x8 to Uint64x8
  8037  func (from Int64x8) AsUint64x8() (to Uint64x8)
  8038  
  8039  // Float32x4 converts from Uint8x16 to Float32x4
  8040  func (from Uint8x16) AsFloat32x4() (to Float32x4)
  8041  
  8042  // Float64x2 converts from Uint8x16 to Float64x2
  8043  func (from Uint8x16) AsFloat64x2() (to Float64x2)
  8044  
  8045  // Int8x16 converts from Uint8x16 to Int8x16
  8046  func (from Uint8x16) AsInt8x16() (to Int8x16)
  8047  
  8048  // Int16x8 converts from Uint8x16 to Int16x8
  8049  func (from Uint8x16) AsInt16x8() (to Int16x8)
  8050  
  8051  // Int32x4 converts from Uint8x16 to Int32x4
  8052  func (from Uint8x16) AsInt32x4() (to Int32x4)
  8053  
  8054  // Int64x2 converts from Uint8x16 to Int64x2
  8055  func (from Uint8x16) AsInt64x2() (to Int64x2)
  8056  
  8057  // Uint16x8 converts from Uint8x16 to Uint16x8
  8058  func (from Uint8x16) AsUint16x8() (to Uint16x8)
  8059  
  8060  // Uint32x4 converts from Uint8x16 to Uint32x4
  8061  func (from Uint8x16) AsUint32x4() (to Uint32x4)
  8062  
  8063  // Uint64x2 converts from Uint8x16 to Uint64x2
  8064  func (from Uint8x16) AsUint64x2() (to Uint64x2)
  8065  
  8066  // Float32x8 converts from Uint8x32 to Float32x8
  8067  func (from Uint8x32) AsFloat32x8() (to Float32x8)
  8068  
  8069  // Float64x4 converts from Uint8x32 to Float64x4
  8070  func (from Uint8x32) AsFloat64x4() (to Float64x4)
  8071  
  8072  // Int8x32 converts from Uint8x32 to Int8x32
  8073  func (from Uint8x32) AsInt8x32() (to Int8x32)
  8074  
  8075  // Int16x16 converts from Uint8x32 to Int16x16
  8076  func (from Uint8x32) AsInt16x16() (to Int16x16)
  8077  
  8078  // Int32x8 converts from Uint8x32 to Int32x8
  8079  func (from Uint8x32) AsInt32x8() (to Int32x8)
  8080  
  8081  // Int64x4 converts from Uint8x32 to Int64x4
  8082  func (from Uint8x32) AsInt64x4() (to Int64x4)
  8083  
  8084  // Uint16x16 converts from Uint8x32 to Uint16x16
  8085  func (from Uint8x32) AsUint16x16() (to Uint16x16)
  8086  
  8087  // Uint32x8 converts from Uint8x32 to Uint32x8
  8088  func (from Uint8x32) AsUint32x8() (to Uint32x8)
  8089  
  8090  // Uint64x4 converts from Uint8x32 to Uint64x4
  8091  func (from Uint8x32) AsUint64x4() (to Uint64x4)
  8092  
  8093  // Float32x16 converts from Uint8x64 to Float32x16
  8094  func (from Uint8x64) AsFloat32x16() (to Float32x16)
  8095  
  8096  // Float64x8 converts from Uint8x64 to Float64x8
  8097  func (from Uint8x64) AsFloat64x8() (to Float64x8)
  8098  
  8099  // Int8x64 converts from Uint8x64 to Int8x64
  8100  func (from Uint8x64) AsInt8x64() (to Int8x64)
  8101  
  8102  // Int16x32 converts from Uint8x64 to Int16x32
  8103  func (from Uint8x64) AsInt16x32() (to Int16x32)
  8104  
  8105  // Int32x16 converts from Uint8x64 to Int32x16
  8106  func (from Uint8x64) AsInt32x16() (to Int32x16)
  8107  
  8108  // Int64x8 converts from Uint8x64 to Int64x8
  8109  func (from Uint8x64) AsInt64x8() (to Int64x8)
  8110  
  8111  // Uint16x32 converts from Uint8x64 to Uint16x32
  8112  func (from Uint8x64) AsUint16x32() (to Uint16x32)
  8113  
  8114  // Uint32x16 converts from Uint8x64 to Uint32x16
  8115  func (from Uint8x64) AsUint32x16() (to Uint32x16)
  8116  
  8117  // Uint64x8 converts from Uint8x64 to Uint64x8
  8118  func (from Uint8x64) AsUint64x8() (to Uint64x8)
  8119  
  8120  // Float32x4 converts from Uint16x8 to Float32x4
  8121  func (from Uint16x8) AsFloat32x4() (to Float32x4)
  8122  
  8123  // Float64x2 converts from Uint16x8 to Float64x2
  8124  func (from Uint16x8) AsFloat64x2() (to Float64x2)
  8125  
  8126  // Int8x16 converts from Uint16x8 to Int8x16
  8127  func (from Uint16x8) AsInt8x16() (to Int8x16)
  8128  
  8129  // Int16x8 converts from Uint16x8 to Int16x8
  8130  func (from Uint16x8) AsInt16x8() (to Int16x8)
  8131  
  8132  // Int32x4 converts from Uint16x8 to Int32x4
  8133  func (from Uint16x8) AsInt32x4() (to Int32x4)
  8134  
  8135  // Int64x2 converts from Uint16x8 to Int64x2
  8136  func (from Uint16x8) AsInt64x2() (to Int64x2)
  8137  
  8138  // Uint8x16 converts from Uint16x8 to Uint8x16
  8139  func (from Uint16x8) AsUint8x16() (to Uint8x16)
  8140  
  8141  // Uint32x4 converts from Uint16x8 to Uint32x4
  8142  func (from Uint16x8) AsUint32x4() (to Uint32x4)
  8143  
  8144  // Uint64x2 converts from Uint16x8 to Uint64x2
  8145  func (from Uint16x8) AsUint64x2() (to Uint64x2)
  8146  
  8147  // Float32x8 converts from Uint16x16 to Float32x8
  8148  func (from Uint16x16) AsFloat32x8() (to Float32x8)
  8149  
  8150  // Float64x4 converts from Uint16x16 to Float64x4
  8151  func (from Uint16x16) AsFloat64x4() (to Float64x4)
  8152  
  8153  // Int8x32 converts from Uint16x16 to Int8x32
  8154  func (from Uint16x16) AsInt8x32() (to Int8x32)
  8155  
  8156  // Int16x16 converts from Uint16x16 to Int16x16
  8157  func (from Uint16x16) AsInt16x16() (to Int16x16)
  8158  
  8159  // Int32x8 converts from Uint16x16 to Int32x8
  8160  func (from Uint16x16) AsInt32x8() (to Int32x8)
  8161  
  8162  // Int64x4 converts from Uint16x16 to Int64x4
  8163  func (from Uint16x16) AsInt64x4() (to Int64x4)
  8164  
  8165  // Uint8x32 converts from Uint16x16 to Uint8x32
  8166  func (from Uint16x16) AsUint8x32() (to Uint8x32)
  8167  
  8168  // Uint32x8 converts from Uint16x16 to Uint32x8
  8169  func (from Uint16x16) AsUint32x8() (to Uint32x8)
  8170  
  8171  // Uint64x4 converts from Uint16x16 to Uint64x4
  8172  func (from Uint16x16) AsUint64x4() (to Uint64x4)
  8173  
  8174  // Float32x16 converts from Uint16x32 to Float32x16
  8175  func (from Uint16x32) AsFloat32x16() (to Float32x16)
  8176  
  8177  // Float64x8 converts from Uint16x32 to Float64x8
  8178  func (from Uint16x32) AsFloat64x8() (to Float64x8)
  8179  
  8180  // Int8x64 converts from Uint16x32 to Int8x64
  8181  func (from Uint16x32) AsInt8x64() (to Int8x64)
  8182  
  8183  // Int16x32 converts from Uint16x32 to Int16x32
  8184  func (from Uint16x32) AsInt16x32() (to Int16x32)
  8185  
  8186  // Int32x16 converts from Uint16x32 to Int32x16
  8187  func (from Uint16x32) AsInt32x16() (to Int32x16)
  8188  
  8189  // Int64x8 converts from Uint16x32 to Int64x8
  8190  func (from Uint16x32) AsInt64x8() (to Int64x8)
  8191  
  8192  // Uint8x64 converts from Uint16x32 to Uint8x64
  8193  func (from Uint16x32) AsUint8x64() (to Uint8x64)
  8194  
  8195  // Uint32x16 converts from Uint16x32 to Uint32x16
  8196  func (from Uint16x32) AsUint32x16() (to Uint32x16)
  8197  
  8198  // Uint64x8 converts from Uint16x32 to Uint64x8
  8199  func (from Uint16x32) AsUint64x8() (to Uint64x8)
  8200  
  8201  // Float32x4 converts from Uint32x4 to Float32x4
  8202  func (from Uint32x4) AsFloat32x4() (to Float32x4)
  8203  
  8204  // Float64x2 converts from Uint32x4 to Float64x2
  8205  func (from Uint32x4) AsFloat64x2() (to Float64x2)
  8206  
  8207  // Int8x16 converts from Uint32x4 to Int8x16
  8208  func (from Uint32x4) AsInt8x16() (to Int8x16)
  8209  
  8210  // Int16x8 converts from Uint32x4 to Int16x8
  8211  func (from Uint32x4) AsInt16x8() (to Int16x8)
  8212  
  8213  // Int32x4 converts from Uint32x4 to Int32x4
  8214  func (from Uint32x4) AsInt32x4() (to Int32x4)
  8215  
  8216  // Int64x2 converts from Uint32x4 to Int64x2
  8217  func (from Uint32x4) AsInt64x2() (to Int64x2)
  8218  
  8219  // Uint8x16 converts from Uint32x4 to Uint8x16
  8220  func (from Uint32x4) AsUint8x16() (to Uint8x16)
  8221  
  8222  // Uint16x8 converts from Uint32x4 to Uint16x8
  8223  func (from Uint32x4) AsUint16x8() (to Uint16x8)
  8224  
  8225  // Uint64x2 converts from Uint32x4 to Uint64x2
  8226  func (from Uint32x4) AsUint64x2() (to Uint64x2)
  8227  
  8228  // Float32x8 converts from Uint32x8 to Float32x8
  8229  func (from Uint32x8) AsFloat32x8() (to Float32x8)
  8230  
  8231  // Float64x4 converts from Uint32x8 to Float64x4
  8232  func (from Uint32x8) AsFloat64x4() (to Float64x4)
  8233  
  8234  // Int8x32 converts from Uint32x8 to Int8x32
  8235  func (from Uint32x8) AsInt8x32() (to Int8x32)
  8236  
  8237  // Int16x16 converts from Uint32x8 to Int16x16
  8238  func (from Uint32x8) AsInt16x16() (to Int16x16)
  8239  
  8240  // Int32x8 converts from Uint32x8 to Int32x8
  8241  func (from Uint32x8) AsInt32x8() (to Int32x8)
  8242  
  8243  // Int64x4 converts from Uint32x8 to Int64x4
  8244  func (from Uint32x8) AsInt64x4() (to Int64x4)
  8245  
  8246  // Uint8x32 converts from Uint32x8 to Uint8x32
  8247  func (from Uint32x8) AsUint8x32() (to Uint8x32)
  8248  
  8249  // Uint16x16 converts from Uint32x8 to Uint16x16
  8250  func (from Uint32x8) AsUint16x16() (to Uint16x16)
  8251  
  8252  // Uint64x4 converts from Uint32x8 to Uint64x4
  8253  func (from Uint32x8) AsUint64x4() (to Uint64x4)
  8254  
  8255  // Float32x16 converts from Uint32x16 to Float32x16
  8256  func (from Uint32x16) AsFloat32x16() (to Float32x16)
  8257  
  8258  // Float64x8 converts from Uint32x16 to Float64x8
  8259  func (from Uint32x16) AsFloat64x8() (to Float64x8)
  8260  
  8261  // Int8x64 converts from Uint32x16 to Int8x64
  8262  func (from Uint32x16) AsInt8x64() (to Int8x64)
  8263  
  8264  // Int16x32 converts from Uint32x16 to Int16x32
  8265  func (from Uint32x16) AsInt16x32() (to Int16x32)
  8266  
  8267  // Int32x16 converts from Uint32x16 to Int32x16
  8268  func (from Uint32x16) AsInt32x16() (to Int32x16)
  8269  
  8270  // Int64x8 converts from Uint32x16 to Int64x8
  8271  func (from Uint32x16) AsInt64x8() (to Int64x8)
  8272  
  8273  // Uint8x64 converts from Uint32x16 to Uint8x64
  8274  func (from Uint32x16) AsUint8x64() (to Uint8x64)
  8275  
  8276  // Uint16x32 converts from Uint32x16 to Uint16x32
  8277  func (from Uint32x16) AsUint16x32() (to Uint16x32)
  8278  
  8279  // Uint64x8 converts from Uint32x16 to Uint64x8
  8280  func (from Uint32x16) AsUint64x8() (to Uint64x8)
  8281  
  8282  // Float32x4 converts from Uint64x2 to Float32x4
  8283  func (from Uint64x2) AsFloat32x4() (to Float32x4)
  8284  
  8285  // Float64x2 converts from Uint64x2 to Float64x2
  8286  func (from Uint64x2) AsFloat64x2() (to Float64x2)
  8287  
  8288  // Int8x16 converts from Uint64x2 to Int8x16
  8289  func (from Uint64x2) AsInt8x16() (to Int8x16)
  8290  
  8291  // Int16x8 converts from Uint64x2 to Int16x8
  8292  func (from Uint64x2) AsInt16x8() (to Int16x8)
  8293  
  8294  // Int32x4 converts from Uint64x2 to Int32x4
  8295  func (from Uint64x2) AsInt32x4() (to Int32x4)
  8296  
  8297  // Int64x2 converts from Uint64x2 to Int64x2
  8298  func (from Uint64x2) AsInt64x2() (to Int64x2)
  8299  
  8300  // Uint8x16 converts from Uint64x2 to Uint8x16
  8301  func (from Uint64x2) AsUint8x16() (to Uint8x16)
  8302  
  8303  // Uint16x8 converts from Uint64x2 to Uint16x8
  8304  func (from Uint64x2) AsUint16x8() (to Uint16x8)
  8305  
  8306  // Uint32x4 converts from Uint64x2 to Uint32x4
  8307  func (from Uint64x2) AsUint32x4() (to Uint32x4)
  8308  
  8309  // Float32x8 converts from Uint64x4 to Float32x8
  8310  func (from Uint64x4) AsFloat32x8() (to Float32x8)
  8311  
  8312  // Float64x4 converts from Uint64x4 to Float64x4
  8313  func (from Uint64x4) AsFloat64x4() (to Float64x4)
  8314  
  8315  // Int8x32 converts from Uint64x4 to Int8x32
  8316  func (from Uint64x4) AsInt8x32() (to Int8x32)
  8317  
  8318  // Int16x16 converts from Uint64x4 to Int16x16
  8319  func (from Uint64x4) AsInt16x16() (to Int16x16)
  8320  
  8321  // Int32x8 converts from Uint64x4 to Int32x8
  8322  func (from Uint64x4) AsInt32x8() (to Int32x8)
  8323  
  8324  // Int64x4 converts from Uint64x4 to Int64x4
  8325  func (from Uint64x4) AsInt64x4() (to Int64x4)
  8326  
  8327  // Uint8x32 converts from Uint64x4 to Uint8x32
  8328  func (from Uint64x4) AsUint8x32() (to Uint8x32)
  8329  
  8330  // Uint16x16 converts from Uint64x4 to Uint16x16
  8331  func (from Uint64x4) AsUint16x16() (to Uint16x16)
  8332  
  8333  // Uint32x8 converts from Uint64x4 to Uint32x8
  8334  func (from Uint64x4) AsUint32x8() (to Uint32x8)
  8335  
  8336  // Float32x16 converts from Uint64x8 to Float32x16
  8337  func (from Uint64x8) AsFloat32x16() (to Float32x16)
  8338  
  8339  // Float64x8 converts from Uint64x8 to Float64x8
  8340  func (from Uint64x8) AsFloat64x8() (to Float64x8)
  8341  
  8342  // Int8x64 converts from Uint64x8 to Int8x64
  8343  func (from Uint64x8) AsInt8x64() (to Int8x64)
  8344  
  8345  // Int16x32 converts from Uint64x8 to Int16x32
  8346  func (from Uint64x8) AsInt16x32() (to Int16x32)
  8347  
  8348  // Int32x16 converts from Uint64x8 to Int32x16
  8349  func (from Uint64x8) AsInt32x16() (to Int32x16)
  8350  
  8351  // Int64x8 converts from Uint64x8 to Int64x8
  8352  func (from Uint64x8) AsInt64x8() (to Int64x8)
  8353  
  8354  // Uint8x64 converts from Uint64x8 to Uint8x64
  8355  func (from Uint64x8) AsUint8x64() (to Uint8x64)
  8356  
  8357  // Uint16x32 converts from Uint64x8 to Uint16x32
  8358  func (from Uint64x8) AsUint16x32() (to Uint16x32)
  8359  
  8360  // Uint32x16 converts from Uint64x8 to Uint32x16
  8361  func (from Uint64x8) AsUint32x16() (to Uint32x16)
  8362  
  8363  // AsInt8x16 converts from Mask8x16 to Int8x16
  8364  func (from Mask8x16) AsInt8x16() (to Int8x16)
  8365  
  8366  // asMask converts from Int8x16 to Mask8x16
  8367  func (from Int8x16) asMask() (to Mask8x16)
  8368  
  8369  func (x Mask8x16) And(y Mask8x16) Mask8x16
  8370  
  8371  func (x Mask8x16) Or(y Mask8x16) Mask8x16
  8372  
  8373  // AsInt8x32 converts from Mask8x32 to Int8x32
  8374  func (from Mask8x32) AsInt8x32() (to Int8x32)
  8375  
  8376  // asMask converts from Int8x32 to Mask8x32
  8377  func (from Int8x32) asMask() (to Mask8x32)
  8378  
  8379  func (x Mask8x32) And(y Mask8x32) Mask8x32
  8380  
  8381  func (x Mask8x32) Or(y Mask8x32) Mask8x32
  8382  
  8383  // AsInt8x64 converts from Mask8x64 to Int8x64
  8384  func (from Mask8x64) AsInt8x64() (to Int8x64)
  8385  
  8386  // asMask converts from Int8x64 to Mask8x64
  8387  func (from Int8x64) asMask() (to Mask8x64)
  8388  
  8389  func (x Mask8x64) And(y Mask8x64) Mask8x64
  8390  
  8391  func (x Mask8x64) Or(y Mask8x64) Mask8x64
  8392  
  8393  // AsInt16x8 converts from Mask16x8 to Int16x8
  8394  func (from Mask16x8) AsInt16x8() (to Int16x8)
  8395  
  8396  // asMask converts from Int16x8 to Mask16x8
  8397  func (from Int16x8) asMask() (to Mask16x8)
  8398  
  8399  func (x Mask16x8) And(y Mask16x8) Mask16x8
  8400  
  8401  func (x Mask16x8) Or(y Mask16x8) Mask16x8
  8402  
  8403  // AsInt16x16 converts from Mask16x16 to Int16x16
  8404  func (from Mask16x16) AsInt16x16() (to Int16x16)
  8405  
  8406  // asMask converts from Int16x16 to Mask16x16
  8407  func (from Int16x16) asMask() (to Mask16x16)
  8408  
  8409  func (x Mask16x16) And(y Mask16x16) Mask16x16
  8410  
  8411  func (x Mask16x16) Or(y Mask16x16) Mask16x16
  8412  
  8413  // AsInt16x32 converts from Mask16x32 to Int16x32
  8414  func (from Mask16x32) AsInt16x32() (to Int16x32)
  8415  
  8416  // asMask converts from Int16x32 to Mask16x32
  8417  func (from Int16x32) asMask() (to Mask16x32)
  8418  
  8419  func (x Mask16x32) And(y Mask16x32) Mask16x32
  8420  
  8421  func (x Mask16x32) Or(y Mask16x32) Mask16x32
  8422  
  8423  // AsInt32x4 converts from Mask32x4 to Int32x4
  8424  func (from Mask32x4) AsInt32x4() (to Int32x4)
  8425  
  8426  // asMask converts from Int32x4 to Mask32x4
  8427  func (from Int32x4) asMask() (to Mask32x4)
  8428  
  8429  func (x Mask32x4) And(y Mask32x4) Mask32x4
  8430  
  8431  func (x Mask32x4) Or(y Mask32x4) Mask32x4
  8432  
  8433  // AsInt32x8 converts from Mask32x8 to Int32x8
  8434  func (from Mask32x8) AsInt32x8() (to Int32x8)
  8435  
  8436  // asMask converts from Int32x8 to Mask32x8
  8437  func (from Int32x8) asMask() (to Mask32x8)
  8438  
  8439  func (x Mask32x8) And(y Mask32x8) Mask32x8
  8440  
  8441  func (x Mask32x8) Or(y Mask32x8) Mask32x8
  8442  
  8443  // AsInt32x16 converts from Mask32x16 to Int32x16
  8444  func (from Mask32x16) AsInt32x16() (to Int32x16)
  8445  
  8446  // asMask converts from Int32x16 to Mask32x16
  8447  func (from Int32x16) asMask() (to Mask32x16)
  8448  
  8449  func (x Mask32x16) And(y Mask32x16) Mask32x16
  8450  
  8451  func (x Mask32x16) Or(y Mask32x16) Mask32x16
  8452  
  8453  // AsInt64x2 converts from Mask64x2 to Int64x2
  8454  func (from Mask64x2) AsInt64x2() (to Int64x2)
  8455  
  8456  // asMask converts from Int64x2 to Mask64x2
  8457  func (from Int64x2) asMask() (to Mask64x2)
  8458  
  8459  func (x Mask64x2) And(y Mask64x2) Mask64x2
  8460  
  8461  func (x Mask64x2) Or(y Mask64x2) Mask64x2
  8462  
  8463  // AsInt64x4 converts from Mask64x4 to Int64x4
  8464  func (from Mask64x4) AsInt64x4() (to Int64x4)
  8465  
  8466  // asMask converts from Int64x4 to Mask64x4
  8467  func (from Int64x4) asMask() (to Mask64x4)
  8468  
  8469  func (x Mask64x4) And(y Mask64x4) Mask64x4
  8470  
  8471  func (x Mask64x4) Or(y Mask64x4) Mask64x4
  8472  
  8473  // AsInt64x8 converts from Mask64x8 to Int64x8
  8474  func (from Mask64x8) AsInt64x8() (to Int64x8)
  8475  
  8476  // asMask converts from Int64x8 to Mask64x8
  8477  func (from Int64x8) asMask() (to Mask64x8)
  8478  
  8479  func (x Mask64x8) And(y Mask64x8) Mask64x8
  8480  
  8481  func (x Mask64x8) Or(y Mask64x8) Mask64x8
  8482
View as plain text