Text file src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml

     1  !sum
     2  - go: SetElem
     3    asm: "VPINSR[BWDQ]"
     4    in:
     5    - &t
     6      class: vreg
     7      base: $b
     8    - class: greg
     9      base: $b
    10      lanes: 1 # Scalar, darn it!
    11    - &imm
    12      class: immediate
    13      immOffset: 0
    14      name: index
    15    out:
    16    - *t
    17  
    18  - go: SetElem
    19    asm: "VPINSR[DQ]"
    20    in:
    21    - &t
    22      class: vreg
    23      base: int
    24      OverwriteBase: float
    25    - class: greg
    26      base: int
    27      OverwriteBase: float
    28      lanes: 1 # Scalar, darn it!
    29    - &imm
    30      class: immediate
    31      immOffset: 0
    32      name: index
    33    out:
    34    - *t
    35  
    36  - go: GetElem
    37    asm: "VPEXTR[BWDQ]"
    38    in:
    39    - class: vreg
    40      base: $b
    41      elemBits: $e
    42    - *imm
    43    out:
    44    - class: greg
    45      base: $b
    46      bits: $e
    47  
    48  - go: GetElem
    49    asm: "VPEXTR[DQ]"
    50    in:
    51    - class: vreg
    52      base: int
    53      elemBits: $e
    54      OverwriteBase: float
    55    - *imm
    56    out:
    57    - class: greg
    58      base: int
    59      bits: $e
    60      OverwriteBase: float
    61  
    62  - go: "SetHi|SetLo"
    63    regexpTag: "move"
    64    asm: "VINSERTI128|VINSERTI64X4"
    65    inVariant: []
    66    in:
    67    - &i8x2N
    68      class: vreg
    69      base: $t
    70      OverwriteElementBits: 8
    71    - &i8xN
    72      class: vreg
    73      base: $t
    74      OverwriteElementBits: 8
    75    - &imm01 # This immediate should be only 0 or 1
    76      class: immediate
    77      const: 0 # place holder
    78      name: index
    79    out:
    80    - *i8x2N
    81  
    82  - go: "GetHi|GetLo"
    83    asm: "VEXTRACTI128|VEXTRACTI64X4"
    84    regexpTag: "move"
    85    inVariant: []
    86    in:
    87    - *i8x2N
    88    - *imm01
    89    out:
    90    - *i8xN
    91  
    92  - go: "SetHi|SetLo"
    93    asm: "VINSERTI128|VINSERTI64X4"
    94    regexpTag: "move"
    95    inVariant: []
    96    in:
    97    - &i16x2N
    98      class: vreg
    99      base: $t
   100      OverwriteElementBits: 16
   101    - &i16xN
   102      class: vreg
   103      base: $t
   104      OverwriteElementBits: 16
   105    - *imm01
   106    out:
   107    - *i16x2N
   108  
   109  - go: "GetHi|GetLo"
   110    regexpTag: "move"
   111    asm: "VEXTRACTI128|VEXTRACTI64X4"
   112    inVariant: []
   113    in:
   114    - *i16x2N
   115    - *imm01
   116    out:
   117    - *i16xN
   118  
   119  - go: "SetHi|SetLo"
   120    regexpTag: "move"
   121    asm: "VINSERTI128|VINSERTI64X4"
   122    inVariant: []
   123    in:
   124    - &i32x2N
   125      class: vreg
   126      base: $t
   127      OverwriteElementBits: 32
   128    - &i32xN
   129      class: vreg
   130      base: $t
   131      OverwriteElementBits: 32
   132    - *imm01
   133    out:
   134    - *i32x2N
   135  
   136  - go: "GetHi|GetLo"
   137    regexpTag: "move"
   138    asm: "VEXTRACTI128|VEXTRACTI64X4"
   139    inVariant: []
   140    in:
   141    - *i32x2N
   142    - *imm01
   143    out:
   144    - *i32xN
   145  
   146  - go: "SetHi|SetLo"
   147    regexpTag: "move"
   148    asm: "VINSERTI128|VINSERTI64X4"
   149    inVariant: []
   150    in:
   151    - &i64x2N
   152      class: vreg
   153      base: $t
   154      OverwriteElementBits: 64
   155    - &i64xN
   156      class: vreg
   157      base: $t
   158      OverwriteElementBits: 64
   159    - *imm01
   160    out:
   161    - *i64x2N
   162  
   163  - go: "GetHi|GetLo"
   164    regexpTag: "move"
   165    asm: "VEXTRACTI128|VEXTRACTI64X4"
   166    inVariant: []
   167    in:
   168    - *i64x2N
   169    - *imm01
   170    out:
   171    - *i64xN
   172  
   173  - go: "SetHi|SetLo"
   174    regexpTag: "move"
   175    asm: "VINSERTF128|VINSERTF64X4"
   176    inVariant: []
   177    in:
   178    - &f32x2N
   179      class: vreg
   180      base: $t
   181      OverwriteElementBits: 32
   182    - &f32xN
   183      class: vreg
   184      base: $t
   185      OverwriteElementBits: 32
   186    - *imm01
   187    out:
   188    - *f32x2N
   189  
   190  - go: "GetHi|GetLo"
   191    regexpTag: "move"
   192    asm: "VEXTRACTF128|VEXTRACTF64X4"
   193    inVariant: []
   194    in:
   195    - *f32x2N
   196    - *imm01
   197    out:
   198    - *f32xN
   199  
   200  - go: "SetHi|SetLo"
   201    regexpTag: "move"
   202    asm: "VINSERTF128|VINSERTF64X4"
   203    inVariant: []
   204    in:
   205    - &f64x2N
   206      class: vreg
   207      base: $t
   208      OverwriteElementBits: 64
   209    - &f64xN
   210      class: vreg
   211      base: $t
   212      OverwriteElementBits: 64
   213    - *imm01
   214    out:
   215    - *f64x2N
   216  
   217  - go: "GetHi|GetLo"
   218    regexpTag: "move"
   219    asm: "VEXTRACTF128|VEXTRACTF64X4"
   220    inVariant: []
   221    in:
   222    - *f64x2N
   223    - *imm01
   224    out:
   225    - *f64xN
   226  
   227  - go: Permute
   228    asm: "VPERMQ|VPERMPD"
   229    addDoc: !string |-
   230      // The low 2 bits (values 0-3) of each element of indices is used.
   231    operandOrder: "21Type1"
   232    in:
   233    - &anyindices
   234      go: $t
   235      name: indices
   236      overwriteBase: uint
   237    - &any4
   238      go: $t
   239      lanes: 4
   240    out:
   241    - &any
   242      go: $t
   243  
   244  - go: Permute
   245    asm: "VPERM[WDQ]|VPERMP[SD]"
   246    addDoc: !string |-
   247      // The low 3 bits (values 0-7) of each element of indices is used.
   248    operandOrder: "21Type1"
   249    in:
   250    - *anyindices
   251    - &any8
   252      go: $t
   253      lanes: 8
   254    out:
   255    - *any
   256  
   257  - go: Permute
   258    asm: "VPERM[BWD]|VPERMPS"
   259    addDoc: !string |-
   260      // The low 4 bits (values 0-15) of each element of indices is used.
   261    operandOrder: "21Type1"
   262    in:
   263    - *anyindices
   264    - &any16
   265      go: $t
   266      lanes: 16
   267    out:
   268    - *any
   269  
   270  - go: Permute
   271    asm: "VPERM[BW]"
   272    addDoc: !string |-
   273      // The low 5 bits (values 0-31) of each element of indices is used.
   274    operandOrder: "21Type1"
   275    in:
   276    - *anyindices
   277    - &any32
   278      go: $t
   279      lanes: 32
   280    out:
   281    - *any
   282  
   283  - go: Permute
   284    asm: "VPERMB"
   285    addDoc: !string |-
   286      // The low 6 bits (values 0-63) of each element of indices is used.
   287    operandOrder: "21Type1"
   288    in:
   289    - *anyindices
   290    - &any64
   291      go: $t
   292      lanes: 64
   293    out:
   294    - *any
   295  
   296  - go: ConcatPermute
   297    asm: "VPERMI2[BWDQ]|VPERMI2P[SD]"
   298    # Because we are overwriting the receiver's type, we
   299    # have to move the receiver to be a parameter so that
   300    # we can have no duplication.
   301    operandOrder: "231Type1"
   302    in:
   303    - *anyindices # result in arg 0
   304    - *any
   305    - *any
   306    out:
   307    - *any
   308  
   309  - go: Compress
   310    asm: "VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]"
   311    in:
   312      # The mask in Compress is a control mask rather than a write mask, so it's not optional.
   313    - class: mask
   314    - *any
   315    out:
   316    - *any
   317  
   318  # For now a non-public method because
   319  # (1) [OverwriteClass] must be set together with [OverwriteBase]
   320  # (2) "simdgen does not support [OverwriteClass] in inputs".
   321  # That means the signature is wrong.
   322  - go: blend
   323    asm: VPBLENDVB
   324    zeroing: false
   325    in:
   326    - &v
   327      go: $t
   328      class: vreg
   329      base: int
   330    - *v
   331    -
   332      class: vreg
   333      base: int
   334      name: mask
   335    out:
   336    - *v
   337  
   338  # For AVX512
   339  - go: blend
   340    asm: VPBLENDM[BWDQ]
   341    zeroing: false
   342    in:
   343    - &v
   344      go: $t
   345      bits: 512
   346      class: vreg
   347      base: int
   348    - *v
   349    inVariant:
   350    -
   351      class: mask
   352    out:
   353    - *v
   354  
   355    # For AVX512
   356  - go: move
   357    asm: VMOVDQU(8|16|32|64)
   358    zeroing: true
   359    in:
   360    - &v
   361      go: $t
   362      class: vreg
   363      base: int|uint
   364    inVariant:
   365    -
   366      class: mask
   367    out:
   368    - *v
   369  
   370  - go: Expand
   371    asm: "VPEXPAND[BWDQ]|VEXPANDP[SD]"
   372    in:
   373      # The mask in Expand is a control mask rather than a write mask, so it's not optional.
   374    - class: mask
   375    - *any
   376    out:
   377    - *any
   378  
   379  - go: Broadcast1To2
   380    asm: VPBROADCASTQ
   381    in:
   382    - class: vreg
   383      bits: 128
   384      elemBits: 64
   385      base: $b
   386    out:
   387    - class: vreg
   388      bits: 128
   389      elemBits: 64
   390      base: $b
   391  
   392  # weirdly, this one case on AVX2 is memory-operand-only
   393  - go: Broadcast1To2
   394    asm: VPBROADCASTQ
   395    in:
   396    - class: vreg
   397      bits: 128
   398      elemBits: 64
   399      base: int
   400      OverwriteBase: float
   401    out:
   402    - class: vreg
   403      bits: 128
   404      elemBits: 64
   405      base: int
   406      OverwriteBase: float
   407  
   408  - go: Broadcast1To4
   409    asm: VPBROADCAST[BWDQ]
   410    in:
   411    - class: vreg
   412      bits: 128
   413      base: $b
   414    out:
   415    - class: vreg
   416      lanes: 4
   417      base: $b
   418  
   419  - go: Broadcast1To8
   420    asm: VPBROADCAST[BWDQ]
   421    in:
   422    - class: vreg
   423      bits: 128
   424      base: $b
   425    out:
   426    - class: vreg
   427      lanes: 8
   428      base: $b
   429  
   430  - go: Broadcast1To16
   431    asm: VPBROADCAST[BWDQ]
   432    in:
   433    - class: vreg
   434      bits: 128
   435      base: $b
   436    out:
   437    - class: vreg
   438      lanes: 16
   439      base: $b
   440  
   441  - go: Broadcast1To32
   442    asm: VPBROADCAST[BWDQ]
   443    in:
   444    - class: vreg
   445      bits: 128
   446      base: $b
   447    out:
   448    - class: vreg
   449      lanes: 32
   450      base: $b
   451  
   452  - go: Broadcast1To64
   453    asm: VPBROADCASTB
   454    in:
   455    - class: vreg
   456      bits: 128
   457      base: $b
   458    out:
   459    - class: vreg
   460      lanes: 64
   461      base: $b
   462  
   463  - go: Broadcast1To4
   464    asm: VBROADCASTS[SD]
   465    in:
   466    - class: vreg
   467      bits: 128
   468      base: float
   469    out:
   470    - class: vreg
   471      lanes: 4
   472      base: float
   473  
   474  - go: Broadcast1To8
   475    asm: VBROADCASTS[SD]
   476    in:
   477    - class: vreg
   478      bits: 128
   479      base: float
   480    out:
   481    - class: vreg
   482      lanes: 8
   483      base: float
   484  
   485  - go: Broadcast1To16
   486    asm: VBROADCASTS[SD]
   487    in:
   488    - class: vreg
   489      bits: 128
   490      base: float
   491    out:
   492    - class: vreg
   493      lanes: 16
   494      base: float
   495  
   496  # VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
   497  - go: PermuteOrZero
   498    asm: VPSHUFB
   499    addDoc: !string |-
   500      // The lower four bits of each byte-sized index in indices select an element from x,
   501      // unless the index's sign bit is set in which case zero is used instead.
   502    in:
   503    - &128any
   504      bits: 128
   505      go: $t
   506    - bits: 128
   507      name: indices
   508      base: int # always signed
   509    out:
   510    - *128any
   511  
   512  - go: PermuteOrZeroGrouped
   513    asm: VPSHUFB
   514    addDoc: !string |-
   515      //
   516      //   result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
   517      //
   518      // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
   519      // unless the index's sign bit is set in which case zero is used instead.
   520      // Each group is of size 128-bit.
   521    in:
   522    - &256Or512any
   523      bits: "256|512"
   524      go: $t
   525    - bits: "256|512"
   526      base: int
   527      name: indices
   528    out:
   529    - *256Or512any
   530  
   531  - go: permuteScalars
   532    asm: VPSHUFD
   533    addDoc: !string |-
   534      //
   535      //   result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
   536      //
   537      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   538    in:
   539    - *128any
   540    - class: immediate
   541      immOffset: 0
   542      name: indices
   543    hideMaskMethods: true
   544    out:
   545    - *128any
   546  
   547  - go: permuteScalarsGrouped
   548    asm: VPSHUFD
   549    addDoc: !string |-
   550      //
   551      //   result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
   552      //
   553      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   554      // Each group is of size 128-bit.
   555    in:
   556    - *256Or512any
   557    - class: immediate
   558      immOffset: 0
   559      name: indices
   560    hideMaskMethods: true
   561    out:
   562    - *256Or512any
   563  
   564  - go: permuteScalarsLo
   565    asm: VPSHUFLW
   566    addDoc: !string |-
   567      //
   568      //   result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
   569      //
   570      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   571    in:
   572      - &128lanes8
   573        bits: 128
   574        go: $t
   575        elemBits: 16
   576      - class: immediate
   577        immOffset: 0
   578        name: indices
   579    hideMaskMethods: true
   580    out:
   581      - *128lanes8
   582  
   583  - go: permuteScalarsLoGrouped
   584    asm: VPSHUFLW
   585    addDoc: !string |-
   586      //
   587      //   result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x[4], x[5], x[6], x[7],
   588      //    x_group1[indices[0:2]], ...}
   589      //
   590      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   591      // Each group is of size 128-bit.
   592    in:
   593    - &256Or512lanes8
   594      bits: "256|512"
   595      go: $t
   596      elemBits: 16
   597    - class: immediate
   598      immOffset: 0
   599      name: indices
   600    hideMaskMethods: true
   601    out:
   602    - *256Or512lanes8
   603  
   604  - go: permuteScalarsHi
   605    asm: VPSHUFHW
   606    addDoc: !string |-
   607      //
   608      //   result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
   609      //
   610      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   611    in:
   612    - *128lanes8
   613    - class: immediate
   614      immOffset: 0
   615      name: indices
   616    hideMaskMethods: true
   617    out:
   618    - *128lanes8
   619  
   620  - go: permuteScalarsHiGrouped
   621    asm: VPSHUFHW
   622    addDoc: !string |-
   623      // result =
   624      //
   625      //   {x_group0[0], x_group0[1], x_group0[2], x_group0[3], x_group0[indices[0:2]+4], x_group0[indices[2:4]+4], x_group0[indices[4:6]+4], x_group0[indices[6:8]+4],
   626      //    x_group1[0], x_group1[1], x_group1[2], x_group1[3], x_group1[indices[0:2]+4], ...}
   627      //
   628      // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
   629      // Each group is of size 128-bit.
   630    in:
   631    - *256Or512lanes8
   632    - class: immediate
   633      immOffset: 0
   634      name: indices
   635    hideMaskMethods: true
   636    out:
   637    - *256Or512lanes8
   638  
   639  - go: InterleaveHi
   640    asm: VPUNPCKH(QDQ|DQ|WD|WB)
   641    in:
   642    - *128any
   643    - *128any
   644    inVariant: []
   645    out:
   646    - *128any
   647  
   648  - go: InterleaveLo
   649    asm: VPUNPCKL(QDQ|DQ|WD|WB)
   650    in:
   651    - *128any
   652    - *128any
   653    inVariant: []
   654    out:
   655    - *128any
   656  
   657  - go: InterleaveHiGrouped
   658    asm: VPUNPCKH(QDQ|DQ|WD|WB)
   659    in:
   660    - *256Or512any
   661    - *256Or512any
   662    inVariant: []
   663    out:
   664    - *256Or512any
   665  
   666  - go: InterleaveLoGrouped
   667    asm: VPUNPCKL(QDQ|DQ|WD|WB)
   668    in:
   669    - *256Or512any
   670    - *256Or512any
   671    inVariant: []
   672    out:
   673    - *256Or512any
   674  
   675  # These are all described separately to carry the name of the constant parameter
   676  
   677  - go: concatSelectedConstant
   678    asm: VSHUFPS
   679    width: 32
   680    in:
   681    - &v
   682      go: $t
   683      class: vreg
   684      base: float
   685      bits: 128
   686    - *v
   687    - class: immediate
   688      immOffset: 0
   689      name: h1h0l1l0
   690    inVariant: []
   691    out:
   692    - *v
   693  
   694  - go: concatSelectedConstant
   695    asm: VSHUFPS
   696    in:
   697    - &v
   698      go: $t
   699      class: vreg
   700      base: float
   701      bits: 128
   702      OverwriteBase: int
   703    - *v
   704    - class: immediate
   705      immOffset: 0
   706      name: h1h0l1l0
   707    inVariant: []
   708    out:
   709    - *v
   710  
   711  - go: concatSelectedConstant
   712    asm: VSHUFPS
   713    in:
   714    - &v
   715      go: $t
   716      class: vreg
   717      base: float
   718      bits: 128
   719      OverwriteBase: uint
   720    - *v
   721    - class: immediate
   722      immOffset: 0
   723      name: h1h0l1l0
   724    inVariant: []
   725    out:
   726    - *v
   727  
   728  
   729  - go: concatSelectedConstantGrouped
   730    asm: VSHUFPS
   731    in:
   732    - &v
   733      go: $t
   734      class: vreg
   735      base: float
   736      bits: "256|512"
   737    - *v
   738    - class: immediate
   739      immOffset: 0
   740      name: h1h0l1l0
   741    inVariant: []
   742    out:
   743    - *v
   744  
   745  - go: concatSelectedConstantGrouped
   746    asm: VSHUFPS
   747    in:
   748    - &v
   749      go: $t
   750      class: vreg
   751      base: float
   752      bits: "256|512"
   753      OverwriteBase: int
   754    - *v
   755    - class: immediate
   756      immOffset: 0
   757      name: h1h0l1l0
   758    inVariant: []
   759    out:
   760    - *v
   761  
   762  - go: concatSelectedConstantGrouped
   763    asm: VSHUFPS
   764    in:
   765    - &v
   766      go: $t
   767      class: vreg
   768      base: float
   769      bits: "256|512"
   770      OverwriteBase: uint
   771    - *v
   772    - class: immediate
   773      immOffset: 0
   774      name: h1h0l1l0
   775    inVariant: []
   776    out:
   777    - *v
   778  
   779  
   780    # 64 bit versions
   781  
   782  - go: concatSelectedConstant
   783    asm: VSHUFPD
   784    in:
   785    - &v
   786      go: $t
   787      class: vreg
   788      base: float
   789      bits: 128
   790    - *v
   791    - class: immediate
   792      immOffset: 0
   793      name: hilo
   794    inVariant: []
   795    out:
   796    - *v
   797  
   798  - go: concatSelectedConstant
   799    asm: VSHUFPD
   800    in:
   801    - &v
   802      go: $t
   803      class: vreg
   804      base: float
   805      bits: 128
   806      OverwriteBase: int
   807    - *v
   808    - class: immediate
   809      immOffset: 0
   810      name: hilo
   811    inVariant: []
   812    out:
   813    - *v
   814  
   815  - go: concatSelectedConstant
   816    asm: VSHUFPD
   817    in:
   818    - &v
   819      go: $t
   820      class: vreg
   821      base: float
   822      bits: 128
   823      OverwriteBase: uint
   824    - *v
   825    - class: immediate
   826      immOffset: 0
   827      name: hilo
   828    inVariant: []
   829    out:
   830    - *v
   831  
   832  - go: concatSelectedConstantGrouped
   833    asm: VSHUFPD
   834    in:
   835    - &v
   836      go: $t
   837      class: vreg
   838      base: float
   839      bits: "256|512"
   840    - *v
   841    - class: immediate
   842      immOffset: 0
   843      name: hilos
   844    inVariant: []
   845    out:
   846    - *v
   847  
   848  - go: concatSelectedConstantGrouped
   849    asm: VSHUFPD
   850    in:
   851    - &v
   852      go: $t
   853      class: vreg
   854      base: float
   855      bits: "256|512"
   856      OverwriteBase: int
   857    - *v
   858    - class: immediate
   859      immOffset: 0
   860      name: hilos
   861    inVariant: []
   862    out:
   863    - *v
   864  
   865  - go: concatSelectedConstantGrouped
   866    asm: VSHUFPD
   867    in:
   868    - &v
   869      go: $t
   870      class: vreg
   871      base: float
   872      bits: "256|512"
   873      OverwriteBase: uint
   874    - *v
   875    - class: immediate
   876      immOffset: 0
   877      name: hilos
   878    inVariant: []
   879    out:
   880    - *v
   881  
   882  - go: Select128FromPair
   883    asm: VPERM2F128
   884    operandOrder: II
   885    addDoc: !string |-
   886      // For example,
   887      //
   888      //   {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
   889      //
   890      // returns {70, 71, 40, 41}.
   891    in:
   892    - &v
   893      go: $t
   894      class: vreg
   895      base: float
   896      bits: 256
   897    - *v
   898    - class: immediate
   899      immOffset: 0
   900      name: "lo, hi"
   901    inVariant: []
   902    out:
   903    - *v
   904  
   905  - go: Select128FromPair
   906    asm: VPERM2F128
   907    operandOrder: II
   908    addDoc: !string |-
   909      // For example,
   910      //
   911      //   {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
   912      //
   913      // returns {70, 71, 72, 73, 40, 41, 42, 43}.
   914    in:
   915    - &v
   916      go: $t
   917      class: vreg
   918      base: float
   919      bits: 256
   920      OverwriteElementBits: 32
   921    - *v
   922    - class: immediate
   923      immOffset: 0
   924      name: "lo, hi"
   925    inVariant: []
   926    out:
   927    - *v
   928  
   929  - go: Select128FromPair
   930    asm: VPERM2I128
   931    operandOrder: II
   932    addDoc: !string |-
   933      // For example,
   934      //
   935      //   {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
   936      //
   937      // returns {70, 71, 40, 41}.
   938    in:
   939    - &v
   940      go: $t
   941      class: vreg
   942      base: int|uint
   943      bits: 256
   944      OverwriteElementBits: 64
   945    - *v
   946    - class: immediate
   947      immOffset: 0
   948      name: "lo, hi"
   949    inVariant: []
   950    out:
   951    - *v
   952  
   953  - go: Select128FromPair
   954    asm: VPERM2I128
   955    operandOrder: II
   956    addDoc: !string |-
   957      // For example,
   958      //
   959      //   {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
   960      //
   961      // returns {70, 71, 72, 73, 40, 41, 42, 43}.
   962    in:
   963    - &v
   964      go: $t
   965      class: vreg
   966      base: int|uint
   967      bits: 256
   968      OverwriteElementBits: 32
   969    - *v
   970    - class: immediate
   971      immOffset: 0
   972      name: "lo, hi"
   973    inVariant: []
   974    out:
   975    - *v
   976  
   977  - go: Select128FromPair
   978    asm: VPERM2I128
   979    operandOrder: II
   980    addDoc: !string |-
   981      // For example,
   982      //
   983      //   {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.NAME(3, 0,
   984      //    {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
   985      //
   986      // returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
   987    in:
   988    - &v
   989      go: $t
   990      class: vreg
   991      base: int|uint
   992      bits: 256
   993      OverwriteElementBits: 16
   994    - *v
   995    - class: immediate
   996      immOffset: 0
   997      name: "lo, hi"
   998    inVariant: []
   999    out:
  1000    - *v
  1001  
  1002  - go: Select128FromPair
  1003    asm: VPERM2I128
  1004    operandOrder: II
  1005    addDoc: !string |-
  1006      // For example,
  1007      //
  1008      //   {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.NAME(3, 0,
  1009      //        {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
  1010      //
  1011      // returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
  1012    in:
  1013    - &v
  1014      go: $t
  1015      class: vreg
  1016      base: int|uint
  1017      bits: 256
  1018      OverwriteElementBits: 8
  1019    - *v
  1020    - class: immediate
  1021      immOffset: 0
  1022      name: "lo, hi"
  1023    inVariant: []
  1024    out:
  1025    - *v
  1026  
  1027  - go: ConcatShiftBytesRight
  1028    asm: VPALIGNR
  1029    in:
  1030    - &uint128
  1031      go: $t
  1032      base: uint
  1033      bits: 128
  1034    - *uint128
  1035    - class: immediate
  1036      immOffset: 0
  1037      name: shift
  1038    out:
  1039    - *uint128
  1040  
  1041  - go: ConcatShiftBytesRightGrouped
  1042    asm: VPALIGNR
  1043    in:
  1044    - &uint256512
  1045      go: $t
  1046      base: uint
  1047      bits: 256|512
  1048    - *uint256512
  1049    - class: immediate
  1050      immOffset: 0
  1051      name: shift
  1052    out:
  1053    - *uint256512
  1054  

View as plain text