Text file src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml

     1  !sum
     2  - go: DotProductPairs
     3    commutative: false
     4    documentation: !string |-
     5      // NAME multiplies the elements and add the pairs together,
     6      // yielding a vector of half as many elements with twice the input element size.
     7  # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
     8  - go: DotProductPairsSaturated
     9    commutative: false
    10    documentation: !string |-
    11      // NAME multiplies the elements and add the pairs together with saturation,
    12      // yielding a vector of half as many elements with twice the input element size.
    13  # - go: DotProductBroadcast
    14  #   commutative: true
    15  # #   documentation: !string |-
    16  #     // NAME multiplies all elements and broadcasts the sum.
    17  - go: AddDotProductPairs
    18    commutative: false
    19    noTypes: "true"
    20    noGenericOps: "true"
    21    documentation: !string |-
    22      // NAME performs dot products on pairs of elements of y and z and then adds x.
    23  - go: MulAdd
    24    commutative: false
    25    documentation: !string |-
    26      // NAME performs a fused (x * y) + z.
    27  - go: MulAddSub
    28    commutative: false
    29    documentation: !string |-
    30      // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
    31  - go: MulSubAdd
    32    commutative: false
    33    documentation: !string |-
    34      // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
    35  - go: SumAbsDiff
    36    commutative: false
    37    documentation: !string |-
    38      // NAME sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will
    39      // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed.
    40      // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.
    41  

View as plain text