Text file src/simd/archsimd/_gen/simdgen/ops/Moves/go_arm64.yaml

     1  !sum
     2  
     3  # GetElem VMOV (UMOV, integer)
     4  - go: GetElem
     5    asm: "VMOV"
     6    SSAVariant: "extr"
     7    in:
     8    - &immIndex
     9      class: immediate
    10      go: Immediate
    11      name: index
    12    - &vector
    13      go: $t
    14      base: int|uint
    15    out:
    16    - &scalar
    17      class: greg
    18  
    19  # GetElem VDUP (duplicate element into scalar float in vreg)
    20  - go: GetElem
    21    asm: "VDUP"
    22    SSAVariant: "extr"
    23    in:
    24    - *immIndex
    25    - &fvector
    26      go: $t
    27      base: float
    28    out:
    29    - &fscalarOut
    30      class: vreg
    31      treatLikeAScalarOfSize: 0
    32      OverwriteClass: greg
    33      OverwriteBase: float
    34  
    35  # SetElem VMOV (INS, integer)
    36  - go: SetElem
    37    asm: "VMOV"
    38    SSAVariant: "ins"
    39    in:
    40    - *immIndex
    41    - *vector
    42    - *scalar
    43    out:
    44    - *vector
    45  
    46  # SetElem VMOV (float: INS from the lane 0 of the source)
    47  - go: SetElem
    48    asm: "VMOV"
    49    SSAVariant: "ins0"
    50    specialLower: !string "match (%h [0] (VMOVI16B [0]) y:(VDUP%sextr [i] _)) => y"
    51    in:
    52    - *immIndex
    53    - &imm0
    54      class: immediate
    55      go: Immediate
    56      const: 0 # imm+vreg with treatLikeAScalarOfSize are treated as scalar in lane 0
    57    - *fvector
    58    - &fscalarIn
    59      class: vreg
    60      treatLikeAScalarOfSize: 0
    61    out:
    62    - *fvector
    63  
    64  # broadcast1To16 VDUP (duplicate element 0 to all 16 byte lanes)
    65  - go: broadcast1To16
    66    asm: "VDUP"
    67    SSAVariant: "bcast"
    68    constImm: 0
    69    specialLower: !string "match (%h [i] (VMOV%sins [j] _ (MOVDconst [c]))) && i == j && c>=-128 && c<=255 => (VMOVI%a [uint8(c)])"
    70    in:
    71    - &immLane0
    72      name: "@" # tag "fully constant" (no aux field needed)
    73      const: 0
    74      class: immediate
    75      go: Immediate
    76    - &bcast8
    77      go: $t
    78      bits: 128
    79      elemBits: 8
    80    out:
    81    - *bcast8
    82  
    83  # broadcast1To8 VDUP (duplicate element 0 to all 8 halfword lanes)
    84  - go: broadcast1To8
    85    asm: "VDUP"
    86    SSAVariant: "bcast"
    87    constImm: 0
    88    in:
    89    - *immLane0
    90    - &bcast16
    91      go: $t
    92      bits: 128
    93      elemBits: 16
    94    out:
    95    - *bcast16
    96  
    97  # broadcast1To4 VDUP (duplicate element 0 to all 4 word lanes)
    98  - go: broadcast1To4
    99    asm: "VDUP"
   100    SSAVariant: "bcast"
   101    constImm: 0
   102    in:
   103    - *immLane0
   104    - &bcast32
   105      go: $t
   106      bits: 128
   107      elemBits: 32
   108    out:
   109    - *bcast32
   110  
   111  # broadcast1To2 VDUP (duplicate element 0 to both doubleword lanes)
   112  - go: broadcast1To2
   113    asm: "VDUP"
   114    SSAVariant: "bcast"
   115    constImm: 0
   116    in:
   117    - *immLane0
   118    - &bcast64
   119      go: $t
   120      bits: 128
   121      elemBits: 64
   122    out:
   123    - *bcast64
   124  
   125  # bitSelect — VBIT (bit insert if true, internal)
   126  # Only Int8x16 is needed since VBIT operates on the full 128-bit register.
   127  # Other types cast to Int8x16 before calling bitSelect.
   128  - go: bitSelect
   129    asm: "VBIT"
   130    specialLower: !string "match (%h x y (VNOT16B mask)) => (VBIF16B x y mask)"
   131    in:
   132    - &v8
   133      go: $t
   134      class: vreg
   135      base: int
   136      elemBits: 8
   137    - *v8
   138    - &maskReg
   139      class: vreg
   140      base: int
   141      name: mask
   142    out:
   143    - *v8
   144  
   145  # bitSelectNot — VBIF (bit insert if false, internal)
   146  - go: bitSelectNot
   147    asm: "VBIF"
   148    specialLower: !string "match (%h x y (VNOT16B mask)) => (VBIT16B x y mask)"
   149    in:
   150    - *v8
   151    - *v8
   152    - *maskReg
   153    out:
   154    - *v8
   155  
   156  # LookupOrZero — TBL (table lookup, zeros for out-of-range indices)
   157  - go: LookupOrZero
   158    regexpTag: "move"
   159    asm: "VTBL"
   160    in:
   161    - &bytesTable
   162      go: $t
   163      base: int|uint
   164      elemBits: 8
   165      name: table
   166    - &byteIndices
   167      go: $t
   168      base: int|uint
   169      elemBits: 8
   170      name: indices
   171    out:
   172    - &bytes
   173      go: $t
   174      base: int|uint
   175      elemBits: 8
   176  
   177  # LookupOrKeep — TBX (table lookup extended, keeps original for out-of-range)
   178  - go: lookupOrKeep
   179    noGenericOps: "true"
   180    regexpTag: "move"
   181    asm: "VTBX"
   182    in:
   183    - &bytesKeep
   184      go: $t
   185      base: int|uint
   186      elemBits: 8
   187      name: keep
   188    - *bytesTable
   189    - *byteIndices
   190    out:
   191    - *bytes
   192  
   193  # ConcatShiftBytesRight — EXT (extract bytes from concatenated pair)
   194  - go: ConcatShiftBytesRight
   195    asm: "VEXT"
   196    operandOrder: 2I
   197    in:
   198    - class: immediate
   199      immOffset: 0
   200      name: shift
   201    - &uint8x128
   202      go: $t
   203      base: uint
   204      bits: 128
   205      elemBits: 8
   206    - *uint8x128
   207    out:
   208    - *uint8x128
   209  
   210  # InterleaveLo — ZIP1 (interleave low halves)
   211  - go: InterleaveLo
   212    asm: "VZIP1"
   213    in:
   214    - &any128
   215      go: $t
   216      bits: 128
   217    - *any128
   218    out:
   219    - *any128
   220  
   221  # InterleaveHi — ZIP2 (interleave high halves)
   222  - go: InterleaveHi
   223    asm: "VZIP2"
   224    in:
   225    - *any128
   226    - *any128
   227    out:
   228    - *any128
   229  
   230  # ConcatEven — UZP1 (extract even-indexed elements)
   231  - go: ConcatEven
   232    asm: "VUZP1"
   233    in:
   234    - *any128
   235    - *any128
   236    out:
   237    - *any128
   238  
   239  # ConcatOdd — UZP2 (extract odd-indexed elements)
   240  - go: ConcatOdd
   241    asm: "VUZP2"
   242    in:
   243    - *any128
   244    - *any128
   245    out:
   246    - *any128
   247  
   248  # InterleaveEven — TRN1 (transpose even-position pairs)
   249  - go: InterleaveEven
   250    asm: "VTRN1"
   251    in:
   252    - *any128
   253    - *any128
   254    out:
   255    - *any128
   256  
   257  # InterleaveOdd — TRN2 (transpose odd-position pairs)
   258  - go: InterleaveOdd
   259    asm: "VTRN2"
   260    in:
   261    - *any128
   262    - *any128
   263    out:
   264    - *any128
   265  

View as plain text