!sum - go: DotProductPairs commutative: false documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. - go: DotProductPairsSaturated commutative: false documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. # QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. # - go: DotProductBroadcast # commutative: true # # documentation: !string |- # // NAME multiplies all elements and broadcasts the sum. - go: DotProductQuadruple commutative: false documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y. // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. - go: DotProductQuadrupleSaturated commutative: false documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y. // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. - go: AddDotProductPairs commutative: false noTypes: "true" noGenericOps: "true" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. - go: MulAdd commutative: false documentation: !string |- // NAME performs a fused (x * y) + z. - go: MulAddSub commutative: false documentation: !string |- // NAME performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. - go: MulSubAdd commutative: false documentation: !string |- // NAME performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. - go: SumAbsDiff commutative: false documentation: !string |- // NAME sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will // be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed. // This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors.