Source file src/simd/archsimd/ops_emulated_wasm.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd && wasm
     6  
     7  package archsimd
     8  
     9  var nn = [2]int64{-1 << 63, -1 << 63}
    10  var f0s = [16]int8{-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0}
    11  var ff00s = [8]int16{-1, 0, -1, 0, -1, 0, -1, 0}
    12  var ffff0000s = [4]int32{-1, 0, -1, 0}
    13  
    14  // For unsigned comparison, the trick for converting it into
    15  // signed comparisonm is to notice that the unsigned range is
    16  // the same as the signed range plus 1 << bitwidth-1.
    17  // And adding or subtracting the sign bit is the same as XORing
    18  // it.  Thus, XOR both sign bits and then used the signed
    19  // comparison operations.
    20  
    21  // Less return a mask vector of x[i] < y[i]
    22  func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
    23  	signs := LoadInt64x2Array(&nn)
    24  	ix := x.BitsToInt64().Xor(signs)
    25  	iy := y.BitsToInt64().Xor(signs)
    26  	return ix.Less(iy)
    27  }
    28  
    29  // LessEqual return a mask vector of x[i] <= y[i]
    30  func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
    31  	signs := LoadInt64x2Array(&nn)
    32  	ix := x.BitsToInt64().Xor(signs)
    33  	iy := y.BitsToInt64().Xor(signs)
    34  	return ix.LessEqual(iy)
    35  }
    36  
    37  // Greater return a mask vector of x[i] > y[i]
    38  func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
    39  	signs := LoadInt64x2Array(&nn)
    40  	ix := x.BitsToInt64().Xor(signs)
    41  	iy := y.BitsToInt64().Xor(signs)
    42  	return ix.Greater(iy)
    43  }
    44  
    45  // GreaterEqual return a mask vector of x[i] >= y[i]
    46  func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
    47  	signs := LoadInt64x2Array(&nn)
    48  	ix := x.BitsToInt64().Xor(signs)
    49  	iy := y.BitsToInt64().Xor(signs)
    50  	return ix.GreaterEqual(iy)
    51  }
    52  
    53  // Max returns the elementswise maximum of elements in x and y
    54  func (x Int64x2) Max(y Int64x2) Int64x2 {
    55  	mask := x.Greater(y).ToInt64x2()
    56  	return x.And(mask).Or(y.AndNot(mask))
    57  }
    58  
    59  // Min returns the elementswise minimum of elements in x and y
    60  func (x Int64x2) Min(y Int64x2) Int64x2 {
    61  	mask := x.Less(y).ToInt64x2()
    62  	return x.And(mask).Or(y.AndNot(mask))
    63  }
    64  
    65  // Max returns the elementswise maximum of elements in x and y
    66  func (x Uint64x2) Max(y Uint64x2) Uint64x2 {
    67  	mask := x.Greater(y).ToInt64x2().ToBits()
    68  	return x.And(mask).Or(y.AndNot(mask))
    69  }
    70  
    71  // Min returns the elementswise minimum of elements in x and y
    72  func (x Uint64x2) Min(y Uint64x2) Uint64x2 {
    73  	mask := x.Less(y).ToInt64x2().ToBits()
    74  	return x.And(mask).Or(y.AndNot(mask))
    75  }
    76  
    77  // Mul returns the elementswise product of elements in x and y
    78  func (x Int8x16) Mul(y Int8x16) Int8x16 {
    79  	// To obtain an 8-bit multiply, split the vectors into even and odd
    80  	// elements, shift odds into even position, widen elements in both
    81  	// vectors, multiply, discard high parts, realign the odd results
    82  	// and combine.
    83  	mask := LoadInt8x16Array(&f0s)
    84  	mask16 := mask.ToBits().ReshapeToUint16s()
    85  	xe := x.And(mask).ToBits().ReshapeToUint16s()
    86  	xo := x.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
    87  	ye := y.And(mask).ToBits().ReshapeToUint16s()
    88  	yo := y.AndNot(mask).ToBits().ReshapeToUint16s().ShiftAllRight(8)
    89  	pe := xe.Mul(ye).And(mask16)
    90  	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
    91  	return pe.Or(po).ReshapeToUint8s().BitsToInt8()
    92  }
    93  
    94  // Mul returns the elementswise product of elements in x and y
    95  func (x Uint8x16) Mul(y Uint8x16) Uint8x16 {
    96  	mask := LoadInt8x16Array(&f0s).ToBits()
    97  	mask16 := mask.ReshapeToUint16s()
    98  	xe := x.And(mask).ReshapeToUint16s()
    99  	xo := x.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
   100  	ye := y.And(mask).ReshapeToUint16s()
   101  	yo := y.AndNot(mask).ReshapeToUint16s().ShiftAllRight(8)
   102  	pe := xe.Mul(ye).And(mask16)
   103  	po := xo.Mul(yo).And(mask16).ShiftAllLeft(8)
   104  	return pe.Or(po).ReshapeToUint8s()
   105  }
   106  
   107  // OnesCount returns the number of set bits in each vector element
   108  func (x Int16x8) OnesCount() Int16x8 {
   109  	mask := LoadInt8x16Array(&f0s)
   110  	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()                      // per-byte counts
   111  	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()                     // even-element per-byte counts, as 16-bit elements
   112  	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
   113  	return ce.Add(co)                                                               // return their elementwise sum
   114  }
   115  
   116  // OnesCount returns the number of set bits in each vector element
   117  func (x Int32x4) OnesCount() Int32x4 {
   118  	mask := LoadInt8x16Array(&f0s)
   119  	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()                      // per-byte counts
   120  	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()                     // even-element per-byte counts, as 16-bit elements
   121  	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8) // odd-element per-byte counts, as 16-bit elements, aligned
   122  	mask16 := LoadInt16x8Array(&ff00s)
   123  	y := ce.Add(co) // per int16 counts, etc.
   124  	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
   125  	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
   126  	return ye.Add(yo)
   127  }
   128  
   129  // OnesCount returns the number of set bits in each vector element
   130  func (x Int64x2) OnesCount() Int64x2 {
   131  	mask := LoadInt8x16Array(&f0s)
   132  	c := x.ToBits().ReshapeToUint8s().BitsToInt8().OnesCount()
   133  	ce := c.And(mask).ToBits().ReshapeToUint16s().BitsToInt16()
   134  	co := c.AndNot(mask).ToBits().ReshapeToUint16s().BitsToInt16().ShiftAllRight(8)
   135  	mask16 := LoadInt16x8Array(&ff00s)
   136  	y := ce.Add(co)
   137  	ye := y.And(mask16).ToBits().ReshapeToUint32s().BitsToInt32()
   138  	yo := y.AndNot(mask16).ToBits().ReshapeToUint32s().BitsToInt32().ShiftAllRight(16)
   139  	mask32 := LoadInt32x4Array(&ffff0000s)
   140  	z := ye.Add(yo)
   141  	ze := z.And(mask32).ToBits().ReshapeToUint64s().BitsToInt64()
   142  	zo := z.AndNot(mask32).ToBits().ReshapeToUint64s().BitsToInt64().ShiftAllRight(32)
   143  	return ze.Add(zo)
   144  }
   145  
   146  // OnesCount returns the number of set bits in each vector element
   147  func (x Uint8x16) OnesCount() Uint8x16 {
   148  	return x.BitsToInt8().OnesCount().ToBits()
   149  }
   150  
   151  // OnesCount returns the number of set bits in each vector element
   152  func (x Uint16x8) OnesCount() Uint16x8 {
   153  	return x.BitsToInt16().OnesCount().ToBits()
   154  }
   155  
   156  // OnesCount returns the number of set bits in each vector element
   157  func (x Uint32x4) OnesCount() Uint32x4 {
   158  	return x.BitsToInt32().OnesCount().ToBits()
   159  }
   160  
   161  // OnesCount returns the number of set bits in each vector element
   162  func (x Uint64x2) OnesCount() Uint64x2 {
   163  	return x.BitsToInt64().OnesCount().ToBits()
   164  }
   165  
   166  // CarrylessMultiplyEven computes the carryless
   167  // multiplications of selected even halves of the elements of x and y.
   168  //
   169  // A carryless multiplication uses bitwise XOR instead of
   170  // add-with-carry, for example (in base two):
   171  //
   172  //	11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
   173  //
   174  // This also models multiplication of polynomials with coefficients
   175  // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
   176  // x**2 + 0x + 1 = x**2 + 1 modeled by 101.  (Note that "+" adds
   177  // polynomial terms, but coefficients "add" with XOR.)
   178  //
   179  // Emulated
   180  func (x Uint64x2) CarrylessMultiplyEven(y Uint64x2) Uint64x2 {
   181  	return x.carrylessMultiply(y)
   182  }
   183  
   184  // CarrylessMultiplyOdd computes the carryless
   185  // multiplications of selected odd halves of the elements of x and y.
   186  //
   187  // A carryless multiplication uses bitwise XOR instead of
   188  // add-with-carry, for example (in base two):
   189  //
   190  //	11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
   191  //
   192  // This also models multiplication of polynomials with coefficients
   193  // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
   194  // x**2 + 0x + 1 = x**2 + 1 modeled by 101.  (Note that "+" adds
   195  // polynomial terms, but coefficients "add" with XOR.)
   196  //
   197  // Emulated
   198  func (x Uint64x2) CarrylessMultiplyOdd(y Uint64x2) Uint64x2 {
   199  	x = x.SetElem(0, x.GetElem(1))
   200  	y = y.SetElem(0, x.GetElem(1))
   201  	return x.carrylessMultiply(y)
   202  }
   203  

View as plain text