Source file src/crypto/internal/fips140/nistec/p256_asm.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  //go:build (amd64 || arm64 || ppc64le || s390x) && !purego
    14  
    15  package nistec
    16  
    17  import (
    18  	"crypto/internal/fips140deps/byteorder"
    19  	"errors"
    20  	"math/bits"
    21  	"runtime"
    22  	"unsafe"
    23  )
    24  
    25  // p256Element is a P-256 base field element in [0, P-1] in the Montgomery
    26  // domain (with R 2²⁵⁶) as four limbs in little-endian order value.
    27  type p256Element [4]uint64
    28  
    29  // p256One is one in the Montgomery domain.
    30  var p256One = p256Element{0x0000000000000001, 0xffffffff00000000,
    31  	0xffffffffffffffff, 0x00000000fffffffe}
    32  
    33  var p256Zero = p256Element{}
    34  
    35  // p256P is 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 in the Montgomery domain.
    36  var p256P = p256Element{0xffffffffffffffff, 0x00000000ffffffff,
    37  	0x0000000000000000, 0xffffffff00000001}
    38  
    39  // P256Point is a P-256 point. The zero value should not be assumed to be valid
    40  // (although it is in this implementation).
    41  type P256Point struct {
    42  	// (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point
    43  	// at infinity can be represented by any set of coordinates with Z = 0.
    44  	x, y, z p256Element
    45  }
    46  
    47  // NewP256Point returns a new P256Point representing the point at infinity.
    48  func NewP256Point() *P256Point {
    49  	return &P256Point{
    50  		x: p256One, y: p256One, z: p256Zero,
    51  	}
    52  }
    53  
    54  // SetGenerator sets p to the canonical generator and returns p.
    55  func (p *P256Point) SetGenerator() *P256Point {
    56  	p.x = p256Element{0x79e730d418a9143c, 0x75ba95fc5fedb601,
    57  		0x79fb732b77622510, 0x18905f76a53755c6}
    58  	p.y = p256Element{0xddf25357ce95560a, 0x8b4ab8e4ba19e45c,
    59  		0xd2e88688dd21f325, 0x8571ff1825885d85}
    60  	p.z = p256One
    61  	return p
    62  }
    63  
    64  // Set sets p = q and returns p.
    65  func (p *P256Point) Set(q *P256Point) *P256Point {
    66  	p.x, p.y, p.z = q.x, q.y, q.z
    67  	return p
    68  }
    69  
    70  const p256ElementLength = 32
    71  const p256UncompressedLength = 1 + 2*p256ElementLength
    72  const p256CompressedLength = 1 + p256ElementLength
    73  
    74  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
    75  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
    76  // the curve, it returns nil and an error, and the receiver is unchanged.
    77  // Otherwise, it returns p.
    78  func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
    79  	// This implementation operates in the Montgomery domain with R = 2²⁵⁶ mod
    80  	// p. Elements in the Montgomery domain take the form a×R and p256Mul
    81  	// calculates (a × b × R⁻¹) mod p. rr is R in the domain, or R×R mod p, thus
    82  	// p256Mul(e, RR) gives e×R, i.e. converts e into the Montgomery domain.
    83  	rr := p256Element{0x0000000000000003, 0xfffffffbffffffff,
    84  		0xfffffffffffffffe, 0x00000004fffffffd}
    85  
    86  	switch {
    87  	// Point at infinity.
    88  	case len(b) == 1 && b[0] == 0:
    89  		return p.Set(NewP256Point()), nil
    90  
    91  	// Uncompressed form.
    92  	case len(b) == p256UncompressedLength && b[0] == 4:
    93  		var r P256Point
    94  		p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
    95  		p256BigToLittle(&r.y, (*[32]byte)(b[33:65]))
    96  		if p256LessThanP(&r.x) == 0 || p256LessThanP(&r.y) == 0 {
    97  			return nil, errors.New("invalid P256 element encoding")
    98  		}
    99  		p256Mul(&r.x, &r.x, &rr)
   100  		p256Mul(&r.y, &r.y, &rr)
   101  		if err := p256CheckOnCurve(&r.x, &r.y); err != nil {
   102  			return nil, err
   103  		}
   104  		r.z = p256One
   105  		return p.Set(&r), nil
   106  
   107  	// Compressed form.
   108  	case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
   109  		var r P256Point
   110  		p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
   111  		if p256LessThanP(&r.x) == 0 {
   112  			return nil, errors.New("invalid P256 element encoding")
   113  		}
   114  		p256Mul(&r.x, &r.x, &rr)
   115  
   116  		// y² = x³ - 3x + b
   117  		p256Polynomial(&r.y, &r.x)
   118  		if !p256Sqrt(&r.y, &r.y) {
   119  			return nil, errors.New("invalid P256 compressed point encoding")
   120  		}
   121  
   122  		// Select the positive or negative root, as indicated by the least
   123  		// significant bit, based on the encoding type byte.
   124  		yy := new(p256Element)
   125  		p256FromMont(yy, &r.y)
   126  		cond := int(yy[0]&1) ^ int(b[0]&1)
   127  		p256NegCond(&r.y, cond)
   128  
   129  		r.z = p256One
   130  		return p.Set(&r), nil
   131  
   132  	default:
   133  		return nil, errors.New("invalid P256 point encoding")
   134  	}
   135  }
   136  
   137  // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
   138  func p256Polynomial(y2, x *p256Element) *p256Element {
   139  	x3 := new(p256Element)
   140  	p256Sqr(x3, x, 1)
   141  	p256Mul(x3, x3, x)
   142  
   143  	threeX := new(p256Element)
   144  	p256Add(threeX, x, x)
   145  	p256Add(threeX, threeX, x)
   146  	p256NegCond(threeX, 1)
   147  
   148  	p256B := &p256Element{0xd89cdf6229c4bddf, 0xacf005cd78843090,
   149  		0xe5a220abf7212ed6, 0xdc30061d04874834}
   150  
   151  	p256Add(x3, x3, threeX)
   152  	p256Add(x3, x3, p256B)
   153  
   154  	*y2 = *x3
   155  	return y2
   156  }
   157  
   158  func p256CheckOnCurve(x, y *p256Element) error {
   159  	// y² = x³ - 3x + b
   160  	rhs := p256Polynomial(new(p256Element), x)
   161  	lhs := new(p256Element)
   162  	p256Sqr(lhs, y, 1)
   163  	if p256Equal(lhs, rhs) != 1 {
   164  		return errors.New("P256 point not on curve")
   165  	}
   166  	return nil
   167  }
   168  
   169  // p256LessThanP returns 1 if x < p, and 0 otherwise. Note that a p256Element is
   170  // not allowed to be equal to or greater than p, so if this function returns 0
   171  // then x is invalid.
   172  func p256LessThanP(x *p256Element) int {
   173  	var b uint64
   174  	_, b = bits.Sub64(x[0], p256P[0], b)
   175  	_, b = bits.Sub64(x[1], p256P[1], b)
   176  	_, b = bits.Sub64(x[2], p256P[2], b)
   177  	_, b = bits.Sub64(x[3], p256P[3], b)
   178  	return int(b)
   179  }
   180  
   181  func p256BigToLittle(l *p256Element, b *[32]byte) {
   182  	bytesToLimbs((*[4]uint64)(l), b)
   183  }
   184  
   185  func bytesToLimbs(l *[4]uint64, b *[32]byte) {
   186  	l[0] = byteorder.BEUint64(b[24:])
   187  	l[1] = byteorder.BEUint64(b[16:])
   188  	l[2] = byteorder.BEUint64(b[8:])
   189  	l[3] = byteorder.BEUint64(b[:])
   190  }
   191  
   192  func p256LittleToBig(b *[32]byte, l *p256Element) {
   193  	limbsToBytes(b, (*[4]uint64)(l))
   194  }
   195  
   196  func limbsToBytes(b *[32]byte, l *[4]uint64) {
   197  	byteorder.BEPutUint64(b[24:], l[0])
   198  	byteorder.BEPutUint64(b[16:], l[1])
   199  	byteorder.BEPutUint64(b[8:], l[2])
   200  	byteorder.BEPutUint64(b[:], l[3])
   201  }
   202  
   203  // p256Add sets res = x + y.
   204  func p256Add(res, x, y *p256Element) {
   205  	var c, b uint64
   206  	t1 := make([]uint64, 4)
   207  	t1[0], c = bits.Add64(x[0], y[0], 0)
   208  	t1[1], c = bits.Add64(x[1], y[1], c)
   209  	t1[2], c = bits.Add64(x[2], y[2], c)
   210  	t1[3], c = bits.Add64(x[3], y[3], c)
   211  	t2 := make([]uint64, 4)
   212  	t2[0], b = bits.Sub64(t1[0], p256P[0], 0)
   213  	t2[1], b = bits.Sub64(t1[1], p256P[1], b)
   214  	t2[2], b = bits.Sub64(t1[2], p256P[2], b)
   215  	t2[3], b = bits.Sub64(t1[3], p256P[3], b)
   216  	// Three options:
   217  	//   - a+b < p
   218  	//     then c is 0, b is 1, and t1 is correct
   219  	//   - p <= a+b < 2^256
   220  	//     then c is 0, b is 0, and t2 is correct
   221  	//   - 2^256 <= a+b
   222  	//     then c is 1, b is 1, and t2 is correct
   223  	t2Mask := (c ^ b) - 1
   224  	res[0] = (t1[0] & ^t2Mask) | (t2[0] & t2Mask)
   225  	res[1] = (t1[1] & ^t2Mask) | (t2[1] & t2Mask)
   226  	res[2] = (t1[2] & ^t2Mask) | (t2[2] & t2Mask)
   227  	res[3] = (t1[3] & ^t2Mask) | (t2[3] & t2Mask)
   228  }
   229  
   230  // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
   231  // false and e is unchanged. e and x can overlap.
   232  func p256Sqrt(e, x *p256Element) (isSquare bool) {
   233  	t0, t1 := new(p256Element), new(p256Element)
   234  
   235  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
   236  	//
   237  	// The sequence of 7 multiplications and 253 squarings is derived from the
   238  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
   239  	//
   240  	//	_10       = 2*1
   241  	//	_11       = 1 + _10
   242  	//	_1100     = _11 << 2
   243  	//	_1111     = _11 + _1100
   244  	//	_11110000 = _1111 << 4
   245  	//	_11111111 = _1111 + _11110000
   246  	//	x16       = _11111111 << 8 + _11111111
   247  	//	x32       = x16 << 16 + x16
   248  	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
   249  	//
   250  	p256Sqr(t0, x, 1)
   251  	p256Mul(t0, x, t0)
   252  	p256Sqr(t1, t0, 2)
   253  	p256Mul(t0, t0, t1)
   254  	p256Sqr(t1, t0, 4)
   255  	p256Mul(t0, t0, t1)
   256  	p256Sqr(t1, t0, 8)
   257  	p256Mul(t0, t0, t1)
   258  	p256Sqr(t1, t0, 16)
   259  	p256Mul(t0, t0, t1)
   260  	p256Sqr(t0, t0, 32)
   261  	p256Mul(t0, x, t0)
   262  	p256Sqr(t0, t0, 96)
   263  	p256Mul(t0, x, t0)
   264  	p256Sqr(t0, t0, 94)
   265  
   266  	p256Sqr(t1, t0, 1)
   267  	if p256Equal(t1, x) != 1 {
   268  		return false
   269  	}
   270  	*e = *t0
   271  	return true
   272  }
   273  
   274  // The following assembly functions are implemented in p256_asm_*.s
   275  
   276  // Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
   277  //
   278  //go:noescape
   279  func p256Mul(res, in1, in2 *p256Element)
   280  
   281  // Montgomery square, repeated n times (n >= 1).
   282  //
   283  //go:noescape
   284  func p256Sqr(res, in *p256Element, n int)
   285  
   286  // Montgomery multiplication by R⁻¹, or 1 outside the domain, as R⁻¹×R = 1.
   287  // Sets res = in * R⁻¹, bringing res out of the Montgomery domain.
   288  //
   289  //go:noescape
   290  func p256FromMont(res, in *p256Element)
   291  
   292  // If cond is not 0, sets val = -val mod p.
   293  //
   294  //go:noescape
   295  func p256NegCond(val *p256Element, cond int)
   296  
   297  // If cond is 0, sets res = b, otherwise sets res = a.
   298  //
   299  //go:noescape
   300  func p256MovCond(res, a, b *P256Point, cond int)
   301  
   302  // p256Table is a table of the first 16 multiples of a point. Points are stored
   303  // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
   304  // [0]P is the point at infinity and it's not stored.
   305  type p256Table [16]P256Point
   306  
   307  // p256Select sets res to the point at index idx in the table.
   308  // idx must be in [0, 15]. It executes in constant time.
   309  //
   310  //go:noescape
   311  func p256Select(res *P256Point, table *p256Table, idx int)
   312  
   313  // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
   314  // Montgomery domain elements. The point can't be the point at infinity.
   315  type p256AffinePoint struct {
   316  	x, y p256Element
   317  }
   318  
   319  // p256AffineTable is a table of the first 32 multiples of a point. Points are
   320  // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
   321  type p256AffineTable [32]p256AffinePoint
   322  
   323  // p256Precomputed is a series of precomputed multiples of G, the canonical
   324  // generator. The first p256AffineTable contains multiples of G. The second one
   325  // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
   326  // table is the previous table doubled six times. Six is the width of the
   327  // sliding window used in p256ScalarBaseMult, and having each table already
   328  // pre-doubled lets us avoid the doublings between windows entirely. This table
   329  // aliases into p256PrecomputedEmbed.
   330  var p256Precomputed *[43]p256AffineTable
   331  
   332  func init() {
   333  	p256PrecomputedPtr := unsafe.Pointer(&p256PrecomputedEmbed)
   334  	if runtime.GOARCH == "s390x" {
   335  		var newTable [43 * 32 * 2 * 4]uint64
   336  		for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256PrecomputedPtr) {
   337  			newTable[i] = byteorder.LEUint64(x[:])
   338  		}
   339  		p256PrecomputedPtr = unsafe.Pointer(&newTable)
   340  	}
   341  	p256Precomputed = (*[43]p256AffineTable)(p256PrecomputedPtr)
   342  }
   343  
   344  // p256SelectAffine sets res to the point at index idx in the table.
   345  // idx must be in [0, 31]. It executes in constant time.
   346  //
   347  //go:noescape
   348  func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
   349  
   350  // Point addition with an affine point and constant time conditions.
   351  // If zero is 0, sets res = in2. If sel is 0, sets res = in1.
   352  // If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2
   353  //
   354  //go:noescape
   355  func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int)
   356  
   357  // Point addition. Sets res = in1 + in2. Returns one if the two input points
   358  // were equal and zero otherwise. If in1 or in2 are the point at infinity, res
   359  // and the return value are undefined.
   360  //
   361  //go:noescape
   362  func p256PointAddAsm(res, in1, in2 *P256Point) int
   363  
   364  // Point doubling. Sets res = in + in. in can be the point at infinity.
   365  //
   366  //go:noescape
   367  func p256PointDoubleAsm(res, in *P256Point)
   368  
   369  // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1]
   370  // as four uint64 limbs in little-endian order.
   371  type p256OrdElement [4]uint64
   372  
   373  // p256OrdReduce ensures s is in the range [0, ord(G)-1].
   374  func p256OrdReduce(s *p256OrdElement) {
   375  	// Since 2 * ord(G) > 2²⁵⁶, we can just conditionally subtract ord(G),
   376  	// keeping the result if it doesn't underflow.
   377  	t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
   378  	t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
   379  	t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
   380  	t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
   381  	tMask := b - 1 // zero if subtraction underflowed
   382  	s[0] ^= (t0 ^ s[0]) & tMask
   383  	s[1] ^= (t1 ^ s[1]) & tMask
   384  	s[2] ^= (t2 ^ s[2]) & tMask
   385  	s[3] ^= (t3 ^ s[3]) & tMask
   386  }
   387  
   388  func p256OrdLittleToBig(b *[32]byte, l *p256OrdElement) {
   389  	limbsToBytes(b, (*[4]uint64)(l))
   390  }
   391  
   392  func p256OrdBigToLittle(l *p256OrdElement, b *[32]byte) {
   393  	bytesToLimbs((*[4]uint64)(l), b)
   394  }
   395  
   396  // Add sets q = p1 + p2, and returns q. The points may overlap.
   397  func (q *P256Point) Add(r1, r2 *P256Point) *P256Point {
   398  	var sum, double P256Point
   399  	r1IsInfinity := r1.isInfinity()
   400  	r2IsInfinity := r2.isInfinity()
   401  	pointsEqual := p256PointAddAsm(&sum, r1, r2)
   402  	p256PointDoubleAsm(&double, r1)
   403  	p256MovCond(&sum, &double, &sum, pointsEqual)
   404  	p256MovCond(&sum, r1, &sum, r2IsInfinity)
   405  	p256MovCond(&sum, r2, &sum, r1IsInfinity)
   406  	return q.Set(&sum)
   407  }
   408  
   409  // Double sets q = p + p, and returns q. The points may overlap.
   410  func (q *P256Point) Double(p *P256Point) *P256Point {
   411  	var double P256Point
   412  	p256PointDoubleAsm(&double, p)
   413  	return q.Set(&double)
   414  }
   415  
   416  // ScalarBaseMult sets r = scalar * generator, where scalar is a 32-byte big
   417  // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
   418  // returns an error and the receiver is unchanged.
   419  func (r *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
   420  	if len(scalar) != 32 {
   421  		return nil, errors.New("invalid scalar length")
   422  	}
   423  	scalarReversed := new(p256OrdElement)
   424  	p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
   425  	p256OrdReduce(scalarReversed)
   426  
   427  	r.p256BaseMult(scalarReversed)
   428  	return r, nil
   429  }
   430  
   431  // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
   432  // and returns r. If scalar is not 32 bytes long, ScalarBaseMult returns an
   433  // error and the receiver is unchanged.
   434  func (r *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
   435  	if len(scalar) != 32 {
   436  		return nil, errors.New("invalid scalar length")
   437  	}
   438  	scalarReversed := new(p256OrdElement)
   439  	p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
   440  	p256OrdReduce(scalarReversed)
   441  
   442  	r.Set(q).p256ScalarMult(scalarReversed)
   443  	return r, nil
   444  }
   445  
   446  // uint64IsZero returns 1 if x is zero and zero otherwise.
   447  func uint64IsZero(x uint64) int {
   448  	x = ^x
   449  	x &= x >> 32
   450  	x &= x >> 16
   451  	x &= x >> 8
   452  	x &= x >> 4
   453  	x &= x >> 2
   454  	x &= x >> 1
   455  	return int(x & 1)
   456  }
   457  
   458  // p256Equal returns 1 if a and b are equal and 0 otherwise.
   459  func p256Equal(a, b *p256Element) int {
   460  	var acc uint64
   461  	for i := range a {
   462  		acc |= a[i] ^ b[i]
   463  	}
   464  	return uint64IsZero(acc)
   465  }
   466  
   467  // isInfinity returns 1 if p is the point at infinity and 0 otherwise.
   468  func (p *P256Point) isInfinity() int {
   469  	return p256Equal(&p.z, &p256Zero)
   470  }
   471  
   472  // Bytes returns the uncompressed or infinity encoding of p, as specified in
   473  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
   474  // infinity is shorter than all other encodings.
   475  func (p *P256Point) Bytes() []byte {
   476  	// This function is outlined to make the allocations inline in the caller
   477  	// rather than happen on the heap.
   478  	var out [p256UncompressedLength]byte
   479  	return p.bytes(&out)
   480  }
   481  
   482  func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
   483  	// The proper representation of the point at infinity is a single zero byte.
   484  	if p.isInfinity() == 1 {
   485  		return append(out[:0], 0)
   486  	}
   487  
   488  	x, y := new(p256Element), new(p256Element)
   489  	p.affineFromMont(x, y)
   490  
   491  	out[0] = 4 // Uncompressed form.
   492  	p256LittleToBig((*[32]byte)(out[1:33]), x)
   493  	p256LittleToBig((*[32]byte)(out[33:65]), y)
   494  
   495  	return out[:]
   496  }
   497  
   498  // affineFromMont sets (x, y) to the affine coordinates of p, converted out of the
   499  // Montgomery domain.
   500  func (p *P256Point) affineFromMont(x, y *p256Element) {
   501  	p256Inverse(y, &p.z)
   502  	p256Sqr(x, y, 1)
   503  	p256Mul(y, y, x)
   504  
   505  	p256Mul(x, &p.x, x)
   506  	p256Mul(y, &p.y, y)
   507  
   508  	p256FromMont(x, x)
   509  	p256FromMont(y, y)
   510  }
   511  
   512  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
   513  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
   514  func (p *P256Point) BytesX() ([]byte, error) {
   515  	// This function is outlined to make the allocations inline in the caller
   516  	// rather than happen on the heap.
   517  	var out [p256ElementLength]byte
   518  	return p.bytesX(&out)
   519  }
   520  
   521  func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
   522  	if p.isInfinity() == 1 {
   523  		return nil, errors.New("P256 point is the point at infinity")
   524  	}
   525  
   526  	x := new(p256Element)
   527  	p256Inverse(x, &p.z)
   528  	p256Sqr(x, x, 1)
   529  	p256Mul(x, &p.x, x)
   530  	p256FromMont(x, x)
   531  	p256LittleToBig((*[32]byte)(out[:]), x)
   532  
   533  	return out[:], nil
   534  }
   535  
   536  // BytesCompressed returns the compressed or infinity encoding of p, as
   537  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
   538  // point at infinity is shorter than all other encodings.
   539  func (p *P256Point) BytesCompressed() []byte {
   540  	// This function is outlined to make the allocations inline in the caller
   541  	// rather than happen on the heap.
   542  	var out [p256CompressedLength]byte
   543  	return p.bytesCompressed(&out)
   544  }
   545  
   546  func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
   547  	if p.isInfinity() == 1 {
   548  		return append(out[:0], 0)
   549  	}
   550  
   551  	x, y := new(p256Element), new(p256Element)
   552  	p.affineFromMont(x, y)
   553  
   554  	out[0] = 2 | byte(y[0]&1)
   555  	p256LittleToBig((*[32]byte)(out[1:33]), x)
   556  
   557  	return out[:]
   558  }
   559  
   560  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
   561  func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
   562  	p256MovCond(q, p1, p2, cond)
   563  	return q
   564  }
   565  
   566  // p256Inverse sets out to in⁻¹ mod p. If in is zero, out will be zero.
   567  func p256Inverse(out, in *p256Element) {
   568  	// Inversion is calculated through exponentiation by p - 2, per Fermat's
   569  	// little theorem.
   570  	//
   571  	// The sequence of 12 multiplications and 255 squarings is derived from the
   572  	// following addition chain generated with github.com/mmcloughlin/addchain
   573  	// v0.4.0.
   574  	//
   575  	//  _10     = 2*1
   576  	//  _11     = 1 + _10
   577  	//  _110    = 2*_11
   578  	//  _111    = 1 + _110
   579  	//  _111000 = _111 << 3
   580  	//  _111111 = _111 + _111000
   581  	//  x12     = _111111 << 6 + _111111
   582  	//  x15     = x12 << 3 + _111
   583  	//  x16     = 2*x15 + 1
   584  	//  x32     = x16 << 16 + x16
   585  	//  i53     = x32 << 15
   586  	//  x47     = x15 + i53
   587  	//  i263    = ((i53 << 17 + 1) << 143 + x47) << 47
   588  	//  return    (x47 + i263) << 2 + 1
   589  	//
   590  	var z = new(p256Element)
   591  	var t0 = new(p256Element)
   592  	var t1 = new(p256Element)
   593  
   594  	p256Sqr(z, in, 1)
   595  	p256Mul(z, in, z)
   596  	p256Sqr(z, z, 1)
   597  	p256Mul(z, in, z)
   598  	p256Sqr(t0, z, 3)
   599  	p256Mul(t0, z, t0)
   600  	p256Sqr(t1, t0, 6)
   601  	p256Mul(t0, t0, t1)
   602  	p256Sqr(t0, t0, 3)
   603  	p256Mul(z, z, t0)
   604  	p256Sqr(t0, z, 1)
   605  	p256Mul(t0, in, t0)
   606  	p256Sqr(t1, t0, 16)
   607  	p256Mul(t0, t0, t1)
   608  	p256Sqr(t0, t0, 15)
   609  	p256Mul(z, z, t0)
   610  	p256Sqr(t0, t0, 17)
   611  	p256Mul(t0, in, t0)
   612  	p256Sqr(t0, t0, 143)
   613  	p256Mul(t0, z, t0)
   614  	p256Sqr(t0, t0, 47)
   615  	p256Mul(z, z, t0)
   616  	p256Sqr(z, z, 2)
   617  	p256Mul(out, in, z)
   618  }
   619  
   620  func boothW5(in uint) (int, int) {
   621  	var s uint = ^((in >> 5) - 1)
   622  	var d uint = (1 << 6) - in - 1
   623  	d = (d & s) | (in & (^s))
   624  	d = (d >> 1) + (d & 1)
   625  	return int(d), int(s & 1)
   626  }
   627  
   628  func boothW6(in uint) (int, int) {
   629  	var s uint = ^((in >> 6) - 1)
   630  	var d uint = (1 << 7) - in - 1
   631  	d = (d & s) | (in & (^s))
   632  	d = (d >> 1) + (d & 1)
   633  	return int(d), int(s & 1)
   634  }
   635  
   636  func (p *P256Point) p256BaseMult(scalar *p256OrdElement) {
   637  	var t0 p256AffinePoint
   638  
   639  	wvalue := (scalar[0] << 1) & 0x7f
   640  	sel, sign := boothW6(uint(wvalue))
   641  	p256SelectAffine(&t0, &p256Precomputed[0], sel)
   642  	p.x, p.y, p.z = t0.x, t0.y, p256One
   643  	p256NegCond(&p.y, sign)
   644  
   645  	index := uint(5)
   646  	zero := sel
   647  
   648  	for i := 1; i < 43; i++ {
   649  		if index < 192 {
   650  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   651  		} else {
   652  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   653  		}
   654  		index += 6
   655  		sel, sign = boothW6(uint(wvalue))
   656  		p256SelectAffine(&t0, &p256Precomputed[i], sel)
   657  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   658  		zero |= sel
   659  	}
   660  
   661  	// If the whole scalar was zero, set to the point at infinity.
   662  	p256MovCond(p, p, NewP256Point(), zero)
   663  }
   664  
   665  func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) {
   666  	// precomp is a table of precomputed points that stores powers of p
   667  	// from p^1 to p^16.
   668  	var precomp p256Table
   669  	var t0, t1, t2, t3 P256Point
   670  
   671  	// Prepare the table
   672  	precomp[0] = *p // 1
   673  
   674  	p256PointDoubleAsm(&t0, p)
   675  	p256PointDoubleAsm(&t1, &t0)
   676  	p256PointDoubleAsm(&t2, &t1)
   677  	p256PointDoubleAsm(&t3, &t2)
   678  	precomp[1] = t0  // 2
   679  	precomp[3] = t1  // 4
   680  	precomp[7] = t2  // 8
   681  	precomp[15] = t3 // 16
   682  
   683  	p256PointAddAsm(&t0, &t0, p)
   684  	p256PointAddAsm(&t1, &t1, p)
   685  	p256PointAddAsm(&t2, &t2, p)
   686  	precomp[2] = t0 // 3
   687  	precomp[4] = t1 // 5
   688  	precomp[8] = t2 // 9
   689  
   690  	p256PointDoubleAsm(&t0, &t0)
   691  	p256PointDoubleAsm(&t1, &t1)
   692  	precomp[5] = t0 // 6
   693  	precomp[9] = t1 // 10
   694  
   695  	p256PointAddAsm(&t2, &t0, p)
   696  	p256PointAddAsm(&t1, &t1, p)
   697  	precomp[6] = t2  // 7
   698  	precomp[10] = t1 // 11
   699  
   700  	p256PointDoubleAsm(&t0, &t0)
   701  	p256PointDoubleAsm(&t2, &t2)
   702  	precomp[11] = t0 // 12
   703  	precomp[13] = t2 // 14
   704  
   705  	p256PointAddAsm(&t0, &t0, p)
   706  	p256PointAddAsm(&t2, &t2, p)
   707  	precomp[12] = t0 // 13
   708  	precomp[14] = t2 // 15
   709  
   710  	// Start scanning the window from top bit
   711  	index := uint(254)
   712  	var sel, sign int
   713  
   714  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   715  	sel, _ = boothW5(uint(wvalue))
   716  
   717  	p256Select(p, &precomp, sel)
   718  	zero := sel
   719  
   720  	for index > 4 {
   721  		index -= 5
   722  		p256PointDoubleAsm(p, p)
   723  		p256PointDoubleAsm(p, p)
   724  		p256PointDoubleAsm(p, p)
   725  		p256PointDoubleAsm(p, p)
   726  		p256PointDoubleAsm(p, p)
   727  
   728  		if index < 192 {
   729  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   730  		} else {
   731  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   732  		}
   733  
   734  		sel, sign = boothW5(uint(wvalue))
   735  
   736  		p256Select(&t0, &precomp, sel)
   737  		p256NegCond(&t0.y, sign)
   738  		p256PointAddAsm(&t1, p, &t0)
   739  		p256MovCond(&t1, &t1, p, sel)
   740  		p256MovCond(p, &t1, &t0, zero)
   741  		zero |= sel
   742  	}
   743  
   744  	p256PointDoubleAsm(p, p)
   745  	p256PointDoubleAsm(p, p)
   746  	p256PointDoubleAsm(p, p)
   747  	p256PointDoubleAsm(p, p)
   748  	p256PointDoubleAsm(p, p)
   749  
   750  	wvalue = (scalar[0] << 1) & 0x3f
   751  	sel, sign = boothW5(uint(wvalue))
   752  
   753  	p256Select(&t0, &precomp, sel)
   754  	p256NegCond(&t0.y, sign)
   755  	p256PointAddAsm(&t1, p, &t0)
   756  	p256MovCond(&t1, &t1, p, sel)
   757  	p256MovCond(p, &t1, &t0, zero)
   758  }
   759  

View as plain text