Text file src/simd/archsimd/_gen/simdgen/ops/Moves/go_amd64.yaml

     1  !sum
     2  - go: SetElem
     3    asm: "VPINSR[BWDQ]"
     4    in:
     5    - &t
     6      class: vreg
     7      base: $b
     8    - class: greg
     9      base: $b
    10      lanes: 1 # Scalar, darn it!
    11    - &imm
    12      class: immediate
    13      immOffset: 0
    14      name: index
    15    out:
    16    - *t
    17  
    18  - go: SetElem
    19    asm: "VPINSR[DQ]"
    20    in:
    21    - &t
    22      class: vreg
    23      base: int
    24      OverwriteBase: float
    25    - class: greg
    26      base: int
    27      OverwriteBase: float
    28      lanes: 1 # Scalar, darn it!
    29    - &imm
    30      class: immediate
    31      immOffset: 0
    32      name: index
    33    out:
    34    - *t
    35  
    36  - go: GetElem # VPEXTRB 128 bit
    37    asm: VPEXTRB
    38    in:
    39      - bits: 128
    40        base: $b
    41        class: vreg
    42        elemBits: 8
    43      - *imm
    44    out:
    45      - bits: 32
    46        base: $b
    47        class: greg
    48        overwriteBits: 8 # The XED data specification is wrong, correct it to the right bits
    49  
    50  - go: GetElem # VPEXTRW 128 bit
    51    asm: VPEXTRW
    52    in:
    53      - bits: 128
    54        base: $b
    55        class: vreg
    56        elemBits: 16
    57      - *imm
    58    out:
    59      - bits: 32
    60        base: $b
    61        class: greg
    62        overwriteBits: 16 # The XED data specification is wrong, correct it to the right bits
    63  
    64  - go: GetElem
    65    asm: "VPEXTR[DQ]"
    66    in:
    67    - class: vreg
    68      base: $b
    69      elemBits: $e
    70    - *imm
    71    out:
    72    - class: greg
    73      base: $b
    74      bits: $e
    75  
    76  - go: GetElem
    77    asm: "VPEXTR[DQ]"
    78    in:
    79    - class: vreg
    80      base: int
    81      elemBits: $e
    82      OverwriteBase: float
    83    - *imm
    84    out:
    85    - class: greg
    86      base: int
    87      bits: $e
    88      OverwriteBase: float
    89  
    90  - go: "SetHi|SetLo"
    91    regexpTag: "move"
    92    asm: "VINSERTI128|VINSERTI64X4"
    93    inVariant: []
    94    in:
    95    - &i8x2N
    96      class: vreg
    97      base: $t
    98      OverwriteElementBits: 8
    99    - &i8xN
   100      class: vreg
   101      base: $t
   102      OverwriteElementBits: 8
   103    - &imm01 # This immediate should be only 0 or 1
   104      class: immediate
   105      const: 0 # place holder
   106      name: index
   107    out:
   108    - *i8x2N
   109  
   110  - go: "GetHi|GetLo"
   111    asm: "VEXTRACTI128|VEXTRACTI64X4"
   112    regexpTag: "move"
   113    inVariant: []
   114    in:
   115    - *i8x2N
   116    - *imm01
   117    out:
   118    - *i8xN
   119  
   120  - go: "SetHi|SetLo"
   121    asm: "VINSERTI128|VINSERTI64X4"
   122    regexpTag: "move"
   123    inVariant: []
   124    in:
   125    - &i16x2N
   126      class: vreg
   127      base: $t
   128      OverwriteElementBits: 16
   129    - &i16xN
   130      class: vreg
   131      base: $t
   132      OverwriteElementBits: 16
   133    - *imm01
   134    out:
   135    - *i16x2N
   136  
   137  - go: "GetHi|GetLo"
   138    regexpTag: "move"
   139    asm: "VEXTRACTI128|VEXTRACTI64X4"
   140    inVariant: []
   141    in:
   142    - *i16x2N
   143    - *imm01
   144    out:
   145    - *i16xN
   146  
   147  - go: "SetHi|SetLo"
   148    regexpTag: "move"
   149    asm: "VINSERTI128|VINSERTI64X4"
   150    inVariant: []
   151    in:
   152    - &i32x2N
   153      class: vreg
   154      base: $t
   155      OverwriteElementBits: 32
   156    - &i32xN
   157      class: vreg
   158      base: $t
   159      OverwriteElementBits: 32
   160    - *imm01
   161    out:
   162    - *i32x2N
   163  
   164  - go: "GetHi|GetLo"
   165    regexpTag: "move"
   166    asm: "VEXTRACTI128|VEXTRACTI64X4"
   167    inVariant: []
   168    in:
   169    - *i32x2N
   170    - *imm01
   171    out:
   172    - *i32xN
   173  
   174  - go: "SetHi|SetLo"
   175    regexpTag: "move"
   176    asm: "VINSERTI128|VINSERTI64X4"
   177    inVariant: []
   178    in:
   179    - &i64x2N
   180      class: vreg
   181      base: $t
   182      OverwriteElementBits: 64
   183    - &i64xN
   184      class: vreg
   185      base: $t
   186      OverwriteElementBits: 64
   187    - *imm01
   188    out:
   189    - *i64x2N
   190  
   191  - go: "GetHi|GetLo"
   192    regexpTag: "move"
   193    asm: "VEXTRACTI128|VEXTRACTI64X4"
   194    inVariant: []
   195    in:
   196    - *i64x2N
   197    - *imm01
   198    out:
   199    - *i64xN
   200  
   201  - go: "SetHi|SetLo"
   202    regexpTag: "move"
   203    asm: "VINSERTF128|VINSERTF64X4"
   204    inVariant: []
   205    in:
   206    - &f32x2N
   207      class: vreg
   208      base: $t
   209      OverwriteElementBits: 32
   210    - &f32xN
   211      class: vreg
   212      base: $t
   213      OverwriteElementBits: 32
   214    - *imm01
   215    out:
   216    - *f32x2N
   217  
   218  - go: "GetHi|GetLo"
   219    regexpTag: "move"
   220    asm: "VEXTRACTF128|VEXTRACTF64X4"
   221    inVariant: []
   222    in:
   223    - *f32x2N
   224    - *imm01
   225    out:
   226    - *f32xN
   227  
   228  - go: "SetHi|SetLo"
   229    regexpTag: "move"
   230    asm: "VINSERTF128|VINSERTF64X4"
   231    inVariant: []
   232    in:
   233    - &f64x2N
   234      class: vreg
   235      base: $t
   236      OverwriteElementBits: 64
   237    - &f64xN
   238      class: vreg
   239      base: $t
   240      OverwriteElementBits: 64
   241    - *imm01
   242    out:
   243    - *f64x2N
   244  
   245  - go: "GetHi|GetLo"
   246    regexpTag: "move"
   247    asm: "VEXTRACTF128|VEXTRACTF64X4"
   248    inVariant: []
   249    in:
   250    - *f64x2N
   251    - *imm01
   252    out:
   253    - *f64xN
   254  
   255  - go: Permute
   256    asm: "VPERM[BWDQ]|VPERMP[SD]"
   257    operandOrder: "21Type1"
   258    in:
   259    - &anyindices
   260      class: vreg
   261      name: indices
   262      overwriteBase: uint
   263    - go: $t
   264    out:
   265    - go: $t
   266  
   267  - go: ConcatPermute
   268    asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
   269    # Because we are overwriting the receiver's type, we
   270    # have to move the receiver to be a parameter so that
   271    # we can have no duplication.
   272    operandOrder: "231Type1"
   273    in:
   274    - *anyindices # result in arg 0
   275    - go: $t
   276    - go: $t
   277    out:
   278    - go: $t
   279  
   280  - go: Compress
   281    asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
   282    in:
   283      # The mask in Compress is a control mask rather than a write mask, so it's not optional.
   284    - class: mask
   285    - go: $t
   286    out:
   287    - go: $t
   288  
   289  # For now a non-public method because
   290  # (1) [OverwriteClass] must be set together with [OverwriteBase]
   291  # (2) "simdgen does not support [OverwriteClass] in inputs".
   292  # That means the signature is wrong.
   293  - go: blend
   294    asm: VPBLENDVB
   295    zeroing: false
   296    in:
   297    - &v
   298      go: $t
   299      class: vreg
   300      base: int
   301    - *v
   302    -
   303      class: vreg
   304      base: int
   305      name: mask
   306    out:
   307    - *v
   308  
   309  # For AVX512
   310  - go: blend
   311    asm: VPBLENDM[BWDQ]
   312    zeroing: false
   313    in:
   314    - &v
   315      go: $t
   316      bits: 512
   317      class: vreg
   318      base: int
   319    - *v
   320    inVariant:
   321    -
   322      class: mask
   323    out:
   324    - *v
   325  
   326    # For AVX512
   327  - go: move
   328    asm: VMOVDQU(8|16|32|64)
   329    zeroing: true
   330    in:
   331    - &v
   332      go: $t
   333      class: vreg
   334      base: int|uint
   335    inVariant:
   336    -
   337      class: mask
   338    out:
   339    - *v
   340  
   341  - go: Expand
   342    asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
   343    in:
   344      # The mask in Expand is a control mask rather than a write mask, so it's not optional.
   345    - class: mask
   346    - go: $t
   347    out:
   348    - go: $t
   349  
   350  - go: broadcast1To2
   351    asm: VPBROADCASTQ
   352    in:
   353    - class: vreg
   354      bits: 128
   355      elemBits: 64
   356      base: $b
   357    out:
   358    - class: vreg
   359      bits: 128
   360      elemBits: 64
   361      base: $b
   362  
   363  # weirdly, this one case on AVX2 is memory-operand-only
   364  - go: broadcast1To2
   365    asm: VPBROADCASTQ
   366    in:
   367    - class: vreg
   368      bits: 128
   369      elemBits: 64
   370      base: int
   371      OverwriteBase: float
   372    out:
   373    - class: vreg
   374      bits: 128
   375      elemBits: 64
   376      base: int
   377      OverwriteBase: float
   378  
   379  - go: broadcast1To4
   380    asm: VPBROADCAST[BWDQ]
   381    in:
   382    - class: vreg
   383      bits: 128
   384      base: $b
   385    out:
   386    - class: vreg
   387      lanes: 4
   388      base: $b
   389  
   390  - go: broadcast1To8
   391    asm: VPBROADCAST[BWDQ]
   392    in:
   393    - class: vreg
   394      bits: 128
   395      base: $b
   396    out:
   397    - class: vreg
   398      lanes: 8
   399      base: $b
   400  
   401  - go: broadcast1To16
   402    asm: VPBROADCAST[BWDQ]
   403    in:
   404    - class: vreg
   405      bits: 128
   406      base: $b
   407    out:
   408    - class: vreg
   409      lanes: 16
   410      base: $b
   411  
   412  - go: broadcast1To32
   413    asm: VPBROADCAST[BWDQ]
   414    in:
   415    - class: vreg
   416      bits: 128
   417      base: $b
   418    out:
   419    - class: vreg
   420      lanes: 32
   421      base: $b
   422  
   423  - go: broadcast1To64
   424    asm: VPBROADCASTB
   425    in:
   426    - class: vreg
   427      bits: 128
   428      base: $b
   429    out:
   430    - class: vreg
   431      lanes: 64
   432      base: $b
   433  
   434  - go: broadcast1To4
   435    asm: VBROADCASTS[SD]
   436    in:
   437    - class: vreg
   438      bits: 128
   439      base: float
   440    out:
   441    - class: vreg
   442      lanes: 4
   443      base: float
   444  
   445  - go: broadcast1To8
   446    asm: VBROADCASTS[SD]
   447    in:
   448    - class: vreg
   449      bits: 128
   450      base: float
   451    out:
   452    - class: vreg
   453      lanes: 8
   454      base: float
   455  
   456  - go: broadcast1To16
   457    asm: VBROADCASTS[SD]
   458    in:
   459    - class: vreg
   460      bits: 128
   461      base: float
   462    out:
   463    - class: vreg
   464      lanes: 16
   465      base: float
   466  
   467  # VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
   468  - go: PermuteOrZero
   469    asm: VPSHUFB
   470    in:
   471    - &128any
   472      bits: 128
   473      go: $t
   474    - bits: 128
   475      name: indices
   476      base: int # always signed
   477    out:
   478    - *128any
   479  
   480  - go: PermuteOrZeroGrouped
   481    asm: VPSHUFB
   482    in:
   483    - &256Or512any
   484      bits: "256|512"
   485      go: $t
   486    - bits: "256|512"
   487      base: int
   488      name: indices
   489    out:
   490    - *256Or512any
   491  
   492  - go: permuteScalars
   493    asm: VPSHUFD
   494    addDoc: !string |-
   495      //
   496      //   result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
   497      //
   498      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   499    in:
   500    - *128any
   501    - class: immediate
   502      immOffset: 0
   503      name: indices
   504    hideMaskMethods: true
   505    out:
   506    - *128any
   507  
   508  - go: permuteScalarsGrouped
   509    asm: VPSHUFD
   510    addDoc: !string |-
   511      //
   512      //   result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   513      //
   514      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   515      // Each group is of size 128-bit.
   516    in:
   517    - *256Or512any
   518    - class: immediate
   519      immOffset: 0
   520      name: indices
   521    hideMaskMethods: true
   522    out:
   523    - *256Or512any
   524  
   525  - go: permuteScalarsLo # For AVX
   526    asm: VPSHUFLW
   527    addDoc: !string |-
   528      //
   529      //   result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
   530      //
   531      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   532    in:
   533    - &32x4To16x8
   534      bits: 128
   535      go: $t
   536      elemBits: 32
   537      OverwriteElementBits: 16 # XED data incorrectly specified this as 32-bit, we need to fix it.
   538    - class: immediate
   539      immOffset: 0
   540      name: indices
   541    hideMaskMethods: true
   542    out:
   543    - *32x4To16x8
   544  
   545  - go: permuteScalarsLo
   546    asm: VPSHUFLW
   547    addDoc: !string |-
   548      //
   549      //   result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
   550      //
   551      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   552    in:
   553      - &128lanes8
   554        bits: 128
   555        go: $t
   556        elemBits: 16
   557      - class: immediate
   558        immOffset: 0
   559        name: indices
   560    hideMaskMethods: true
   561    out:
   562      - *128lanes8
   563  
   564  - go: permuteScalarsLoGrouped
   565    asm: VPSHUFLW
   566    addDoc: !string |-
   567      //
   568      //   result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   569      //    x_group1[indices[0:2]], ...}
   570      //
   571      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   572      // Each group is of size 128-bit.
   573    in:
   574    - &256Or512lanes8
   575      bits: "256|512"
   576      go: $t
   577      elemBits: 16
   578    - class: immediate
   579      immOffset: 0
   580      name: indices
   581    hideMaskMethods: true
   582    out:
   583    - *256Or512lanes8
   584  
   585  - go: permuteScalarsHi
   586    asm: VPSHUFHW
   587    addDoc: !string |-
   588      //
   589      //   result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
   590      //
   591      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   592    in:
   593    - *128lanes8
   594    - class: immediate
   595      immOffset: 0
   596      name: indices
   597    hideMaskMethods: true
   598    out:
   599    - *128lanes8
   600  
   601  - go: permuteScalarsHi # For AVX
   602    asm: VPSHUFHW
   603    addDoc: !string |-
   604      //
   605      //   result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
   606      //
   607      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   608    in:
   609    - &32x4To16x8
   610      bits: 128
   611      go: $t
   612      elemBits: 32
   613      OverwriteElementBits: 16 # XED data incorrectly specified this as 32-bit, we need to fix it.
   614    - class: immediate
   615      immOffset: 0
   616      name: indices
   617    hideMaskMethods: true
   618    out:
   619    - *32x4To16x8
   620  
   621  - go: permuteScalarsHiGrouped
   622    asm: VPSHUFHW
   623    addDoc: !string |-
   624      // result =
   625      //
   626      //   {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   627      //    x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   628      //
   629      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   630      // Each group is of size 128-bit.
   631    in:
   632    - *256Or512lanes8
   633    - class: immediate
   634      immOffset: 0
   635      name: indices
   636    hideMaskMethods: true
   637    out:
   638    - *256Or512lanes8
   639  
   640  - go: InterleaveHi
   641    asm: VPUNPCKH(QDQ|DQ|WD|WB)
   642    in:
   643    - *128any
   644    - *128any
   645    inVariant: []
   646    out:
   647    - *128any
   648  
   649  - go: InterleaveLo
   650    asm: VPUNPCKL(QDQ|DQ|WD|WB)
   651    in:
   652    - *128any
   653    - *128any
   654    inVariant: []
   655    out:
   656    - *128any
   657  
   658  - go: InterleaveHiGrouped
   659    asm: VPUNPCKH(QDQ|DQ|WD|WB)
   660    in:
   661    - *256Or512any
   662    - *256Or512any
   663    inVariant: []
   664    out:
   665    - *256Or512any
   666  
   667  - go: InterleaveLoGrouped
   668    asm: VPUNPCKL(QDQ|DQ|WD|WB)
   669    in:
   670    - *256Or512any
   671    - *256Or512any
   672    inVariant: []
   673    out:
   674    - *256Or512any
   675  
   676  # These are all described separately to carry the name of the constant parameter
   677  
   678  - go: concatSelectedConstant
   679    asm: VSHUFPS
   680    width: 32
   681    in:
   682    - &v
   683      go: $t
   684      class: vreg
   685      base: float
   686      bits: 128
   687    - *v
   688    - class: immediate
   689      immOffset: 0
   690      name: h1h0l1l0
   691    inVariant: []
   692    out:
   693    - *v
   694  
   695  - go: concatSelectedConstant
   696    asm: VSHUFPS
   697    in:
   698    - &v
   699      go: $t
   700      class: vreg
   701      base: float
   702      bits: 128
   703      OverwriteBase: int
   704    - *v
   705    - class: immediate
   706      immOffset: 0
   707      name: h1h0l1l0
   708    inVariant: []
   709    out:
   710    - *v
   711  
   712  - go: concatSelectedConstant
   713    asm: VSHUFPS
   714    in:
   715    - &v
   716      go: $t
   717      class: vreg
   718      base: float
   719      bits: 128
   720      OverwriteBase: uint
   721    - *v
   722    - class: immediate
   723      immOffset: 0
   724      name: h1h0l1l0
   725    inVariant: []
   726    out:
   727    - *v
   728  
   729  
   730  - go: concatSelectedConstantGrouped
   731    asm: VSHUFPS
   732    in:
   733    - &v
   734      go: $t
   735      class: vreg
   736      base: float
   737      bits: "256|512"
   738    - *v
   739    - class: immediate
   740      immOffset: 0
   741      name: h1h0l1l0
   742    inVariant: []
   743    out:
   744    - *v
   745  
   746  - go: concatSelectedConstantGrouped
   747    asm: VSHUFPS
   748    in:
   749    - &v
   750      go: $t
   751      class: vreg
   752      base: float
   753      bits: "256|512"
   754      OverwriteBase: int
   755    - *v
   756    - class: immediate
   757      immOffset: 0
   758      name: h1h0l1l0
   759    inVariant: []
   760    out:
   761    - *v
   762  
   763  - go: concatSelectedConstantGrouped
   764    asm: VSHUFPS
   765    in:
   766    - &v
   767      go: $t
   768      class: vreg
   769      base: float
   770      bits: "256|512"
   771      OverwriteBase: uint
   772    - *v
   773    - class: immediate
   774      immOffset: 0
   775      name: h1h0l1l0
   776    inVariant: []
   777    out:
   778    - *v
   779  
   780  
   781    # 64 bit versions
   782  
   783  - go: concatSelectedConstant
   784    asm: VSHUFPD
   785    in:
   786    - &v
   787      go: $t
   788      class: vreg
   789      base: float
   790      bits: 128
   791    - *v
   792    - class: immediate
   793      immOffset: 0
   794      name: hilo
   795    inVariant: []
   796    out:
   797    - *v
   798  
   799  - go: concatSelectedConstant
   800    asm: VSHUFPD
   801    in:
   802    - &v
   803      go: $t
   804      class: vreg
   805      base: float
   806      bits: 128
   807      OverwriteBase: int
   808    - *v
   809    - class: immediate
   810      immOffset: 0
   811      name: hilo
   812    inVariant: []
   813    out:
   814    - *v
   815  
   816  - go: concatSelectedConstant
   817    asm: VSHUFPD
   818    in:
   819    - &v
   820      go: $t
   821      class: vreg
   822      base: float
   823      bits: 128
   824      OverwriteBase: uint
   825    - *v
   826    - class: immediate
   827      immOffset: 0
   828      name: hilo
   829    inVariant: []
   830    out:
   831    - *v
   832  
   833  - go: concatSelectedConstantGrouped
   834    asm: VSHUFPD
   835    in:
   836    - &v
   837      go: $t
   838      class: vreg
   839      base: float
   840      bits: "256|512"
   841    - *v
   842    - class: immediate
   843      immOffset: 0
   844      name: hilos
   845    inVariant: []
   846    out:
   847    - *v
   848  
   849  - go: concatSelectedConstantGrouped
   850    asm: VSHUFPD
   851    in:
   852    - &v
   853      go: $t
   854      class: vreg
   855      base: float
   856      bits: "256|512"
   857      OverwriteBase: int
   858    - *v
   859    - class: immediate
   860      immOffset: 0
   861      name: hilos
   862    inVariant: []
   863    out:
   864    - *v
   865  
   866  - go: concatSelectedConstantGrouped
   867    asm: VSHUFPD
   868    in:
   869    - &v
   870      go: $t
   871      class: vreg
   872      base: float
   873      bits: "256|512"
   874      OverwriteBase: uint
   875    - *v
   876    - class: immediate
   877      immOffset: 0
   878      name: hilos
   879    inVariant: []
   880    out:
   881    - *v
   882  
   883  - go: ConcatPermute128Scalars
   884    asm: VPERM2F128
   885    operandOrder: II
   886    addDoc: !string |-
   887      // For example,
   888      //
   889      //   {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
   890      //
   891      // returns {70, 71, 40, 41}.
   892    in:
   893    - &v
   894      go: $t
   895      class: vreg
   896      base: float
   897      bits: 256
   898    - *v
   899    - class: immediate
   900      immOffset: 0
   901      name: "lo, hi"
   902    inVariant: []
   903    out:
   904    - *v
   905  
   906  - go: ConcatPermute128Scalars
   907    asm: VPERM2F128
   908    operandOrder: II
   909    addDoc: !string |-
   910      // For example,
   911      //
   912      //   {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
   913      //
   914      // returns {70, 71, 72, 73, 40, 41, 42, 43}.
   915    in:
   916    - &v
   917      go: $t
   918      class: vreg
   919      base: float
   920      bits: 256
   921      OverwriteElementBits: 32
   922    - *v
   923    - class: immediate
   924      immOffset: 0
   925      name: "lo, hi"
   926    inVariant: []
   927    out:
   928    - *v
   929  
   930  - go: ConcatPermute128Scalars
   931    asm: VPERM2I128
   932    operandOrder: II
   933    addDoc: !string |-
   934      // For example,
   935      //
   936      //   {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
   937      //
   938      // returns {70, 71, 40, 41}.
   939    in:
   940    - &v
   941      go: $t
   942      class: vreg
   943      base: int|uint
   944      bits: 256
   945      OverwriteElementBits: 64
   946    - *v
   947    - class: immediate
   948      immOffset: 0
   949      name: "lo, hi"
   950    inVariant: []
   951    out:
   952    - *v
   953  
   954  - go: ConcatPermute128Scalars
   955    asm: VPERM2I128
   956    operandOrder: II
   957    addDoc: !string |-
   958      // For example,
   959      //
   960      //   {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
   961      //
   962      // returns {70, 71, 72, 73, 40, 41, 42, 43}.
   963    in:
   964    - &v
   965      go: $t
   966      class: vreg
   967      base: int|uint
   968      bits: 256
   969      OverwriteElementBits: 32
   970    - *v
   971    - class: immediate
   972      immOffset: 0
   973      name: "lo, hi"
   974    inVariant: []
   975    out:
   976    - *v
   977  
   978  - go: ConcatPermute128Scalars
   979    asm: VPERM2I128
   980    operandOrder: II
   981    addDoc: !string |-
   982      // For example,
   983      //
   984      //   {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.NAME(3, 0,
   985      //    {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
   986      //
   987      // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
   988    in:
   989    - &v
   990      go: $t
   991      class: vreg
   992      base: int|uint
   993      bits: 256
   994      OverwriteElementBits: 16
   995    - *v
   996    - class: immediate
   997      immOffset: 0
   998      name: "lo, hi"
   999    inVariant: []
  1000    out:
  1001    - *v
  1002  
  1003  - go: ConcatPermute128Scalars
  1004    asm: VPERM2I128
  1005    operandOrder: II
  1006    addDoc: !string |-
  1007      // For example,
  1008      //
  1009      //   {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.NAME(3, 0,
  1010      //        {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  1011      //
  1012      // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  1013    in:
  1014    - &v
  1015      go: $t
  1016      class: vreg
  1017      base: int|uint
  1018      bits: 256
  1019      OverwriteElementBits: 8
  1020    - *v
  1021    - class: immediate
  1022      immOffset: 0
  1023      name: "lo, hi"
  1024    inVariant: []
  1025    out:
  1026    - *v
  1027  
  1028  - go: ConcatShiftBytesRight
  1029    asm: VPALIGNR
  1030    operandOrder: 2I
  1031    in:
  1032    - &uint128
  1033      go: $t
  1034      base: uint
  1035      bits: 128
  1036    - *uint128
  1037    - class: immediate
  1038      immOffset: 0
  1039      name: shift
  1040    out:
  1041    - *uint128
  1042  
  1043  - go: ConcatShiftBytesRightGrouped
  1044    asm: VPALIGNR
  1045    operandOrder: 2I
  1046    in:
  1047    - &uint256512
  1048      go: $t
  1049      base: uint
  1050      bits: 256|512
  1051    - *uint256512
  1052    - class: immediate
  1053      immOffset: 0
  1054      name: shift
  1055    out:
  1056    - *uint256512
  1057  

View as plain text