p256_asm.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file contains the Go wrapper for the constant-time, 64-bit assembly
     6  // implementation of P256. The optimizations performed here are described in
     7  // detail in:
     8  // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
     9  //                          256-bit primes"
    10  // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
    11  // https://eprint.iacr.org/2013/816.pdf
    12  
    13  //go:build (amd64 || arm64 || ppc64le || s390x) && !purego
    14  
    15  package nistec
    16  
    17  import (
    18  	"crypto/internal/fips140deps/byteorder"
    19  	"errors"
    20  	"math/bits"
    21  	"runtime"
    22  	"unsafe"
    23  )
    24  
    25  // p256Element is a P-256 base field element in [0, P-1] in the Montgomery
    26  // domain (with R 2²⁵⁶) as four limbs in little-endian order value.
    27  type p256Element [4]uint64
    28  
    29  // p256One is one in the Montgomery domain.
    30  var p256One = p256Element{0x0000000000000001, 0xffffffff00000000,
    31  	0xffffffffffffffff, 0x00000000fffffffe}
    32  
    33  var p256Zero = p256Element{}
    34  
    35  // p256P is 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 in the Montgomery domain.
    36  var p256P = p256Element{0xffffffffffffffff, 0x00000000ffffffff,
    37  	0x0000000000000000, 0xffffffff00000001}
    38  
    39  // P256Point is a P-256 point. The zero value should not be assumed to be valid
    40  // (although it is in this implementation).
    41  type P256Point struct {
    42  	// (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point
    43  	// at infinity can be represented by any set of coordinates with Z = 0.
    44  	x, y, z p256Element
    45  }
    46  
    47  // NewP256Point returns a new P256Point representing the point at infinity.
    48  func NewP256Point() *P256Point {
    49  	return &P256Point{
    50  		x: p256One, y: p256One, z: p256Zero,
    51  	}
    52  }
    53  
    54  // SetGenerator sets p to the canonical generator and returns p.
    55  func (p *P256Point) SetGenerator() *P256Point {
    56  	p.x = p256Element{0x79e730d418a9143c, 0x75ba95fc5fedb601,
    57  		0x79fb732b77622510, 0x18905f76a53755c6}
    58  	p.y = p256Element{0xddf25357ce95560a, 0x8b4ab8e4ba19e45c,
    59  		0xd2e88688dd21f325, 0x8571ff1825885d85}
    60  	p.z = p256One
    61  	return p
    62  }
    63  
    64  // Set sets p = q and returns p.
    65  func (p *P256Point) Set(q *P256Point) *P256Point {
    66  	p.x, p.y, p.z = q.x, q.y, q.z
    67  	return p
    68  }
    69  
    70  const p256ElementLength = 32
    71  const p256UncompressedLength = 1 + 2*p256ElementLength
    72  const p256CompressedLength = 1 + p256ElementLength
    73  
    74  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
    75  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
    76  // the curve, it returns nil and an error, and the receiver is unchanged.
    77  // Otherwise, it returns p.
    78  func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
    79  	// p256Mul operates in the Montgomery domain with R = 2²⁵⁶ mod p. Thus rr
    80  	// here is R in the Montgomery domain, or R×R mod p. See comment in
    81  	// P256OrdInverse about how this is used.
    82  	rr := p256Element{0x0000000000000003, 0xfffffffbffffffff,
    83  		0xfffffffffffffffe, 0x00000004fffffffd}
    84  
    85  	switch {
    86  	// Point at infinity.
    87  	case len(b) == 1 && b[0] == 0:
    88  		return p.Set(NewP256Point()), nil
    89  
    90  	// Uncompressed form.
    91  	case len(b) == p256UncompressedLength && b[0] == 4:
    92  		var r P256Point
    93  		p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
    94  		p256BigToLittle(&r.y, (*[32]byte)(b[33:65]))
    95  		if p256LessThanP(&r.x) == 0 || p256LessThanP(&r.y) == 0 {
    96  			return nil, errors.New("invalid P256 element encoding")
    97  		}
    98  		p256Mul(&r.x, &r.x, &rr)
    99  		p256Mul(&r.y, &r.y, &rr)
   100  		if err := p256CheckOnCurve(&r.x, &r.y); err != nil {
   101  			return nil, err
   102  		}
   103  		r.z = p256One
   104  		return p.Set(&r), nil
   105  
   106  	// Compressed form.
   107  	case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
   108  		var r P256Point
   109  		p256BigToLittle(&r.x, (*[32]byte)(b[1:33]))
   110  		if p256LessThanP(&r.x) == 0 {
   111  			return nil, errors.New("invalid P256 element encoding")
   112  		}
   113  		p256Mul(&r.x, &r.x, &rr)
   114  
   115  		// y² = x³ - 3x + b
   116  		p256Polynomial(&r.y, &r.x)
   117  		if !p256Sqrt(&r.y, &r.y) {
   118  			return nil, errors.New("invalid P256 compressed point encoding")
   119  		}
   120  
   121  		// Select the positive or negative root, as indicated by the least
   122  		// significant bit, based on the encoding type byte.
   123  		yy := new(p256Element)
   124  		p256FromMont(yy, &r.y)
   125  		cond := int(yy[0]&1) ^ int(b[0]&1)
   126  		p256NegCond(&r.y, cond)
   127  
   128  		r.z = p256One
   129  		return p.Set(&r), nil
   130  
   131  	default:
   132  		return nil, errors.New("invalid P256 point encoding")
   133  	}
   134  }
   135  
   136  // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
   137  func p256Polynomial(y2, x *p256Element) *p256Element {
   138  	x3 := new(p256Element)
   139  	p256Sqr(x3, x, 1)
   140  	p256Mul(x3, x3, x)
   141  
   142  	threeX := new(p256Element)
   143  	p256Add(threeX, x, x)
   144  	p256Add(threeX, threeX, x)
   145  	p256NegCond(threeX, 1)
   146  
   147  	p256B := &p256Element{0xd89cdf6229c4bddf, 0xacf005cd78843090,
   148  		0xe5a220abf7212ed6, 0xdc30061d04874834}
   149  
   150  	p256Add(x3, x3, threeX)
   151  	p256Add(x3, x3, p256B)
   152  
   153  	*y2 = *x3
   154  	return y2
   155  }
   156  
   157  func p256CheckOnCurve(x, y *p256Element) error {
   158  	// y² = x³ - 3x + b
   159  	rhs := p256Polynomial(new(p256Element), x)
   160  	lhs := new(p256Element)
   161  	p256Sqr(lhs, y, 1)
   162  	if p256Equal(lhs, rhs) != 1 {
   163  		return errors.New("P256 point not on curve")
   164  	}
   165  	return nil
   166  }
   167  
   168  // p256LessThanP returns 1 if x < p, and 0 otherwise. Note that a p256Element is
   169  // not allowed to be equal to or greater than p, so if this function returns 0
   170  // then x is invalid.
   171  func p256LessThanP(x *p256Element) int {
   172  	var b uint64
   173  	_, b = bits.Sub64(x[0], p256P[0], b)
   174  	_, b = bits.Sub64(x[1], p256P[1], b)
   175  	_, b = bits.Sub64(x[2], p256P[2], b)
   176  	_, b = bits.Sub64(x[3], p256P[3], b)
   177  	return int(b)
   178  }
   179  
   180  func p256BigToLittle(l *p256Element, b *[32]byte) {
   181  	bytesToLimbs((*[4]uint64)(l), b)
   182  }
   183  
   184  func bytesToLimbs(l *[4]uint64, b *[32]byte) {
   185  	l[0] = byteorder.BEUint64(b[24:])
   186  	l[1] = byteorder.BEUint64(b[16:])
   187  	l[2] = byteorder.BEUint64(b[8:])
   188  	l[3] = byteorder.BEUint64(b[:])
   189  }
   190  
   191  func p256LittleToBig(b *[32]byte, l *p256Element) {
   192  	limbsToBytes(b, (*[4]uint64)(l))
   193  }
   194  
   195  func limbsToBytes(b *[32]byte, l *[4]uint64) {
   196  	byteorder.BEPutUint64(b[24:], l[0])
   197  	byteorder.BEPutUint64(b[16:], l[1])
   198  	byteorder.BEPutUint64(b[8:], l[2])
   199  	byteorder.BEPutUint64(b[:], l[3])
   200  }
   201  
   202  // p256Add sets res = x + y.
   203  func p256Add(res, x, y *p256Element) {
   204  	var c, b uint64
   205  	t1 := make([]uint64, 4)
   206  	t1[0], c = bits.Add64(x[0], y[0], 0)
   207  	t1[1], c = bits.Add64(x[1], y[1], c)
   208  	t1[2], c = bits.Add64(x[2], y[2], c)
   209  	t1[3], c = bits.Add64(x[3], y[3], c)
   210  	t2 := make([]uint64, 4)
   211  	t2[0], b = bits.Sub64(t1[0], p256P[0], 0)
   212  	t2[1], b = bits.Sub64(t1[1], p256P[1], b)
   213  	t2[2], b = bits.Sub64(t1[2], p256P[2], b)
   214  	t2[3], b = bits.Sub64(t1[3], p256P[3], b)
   215  	// Three options:
   216  	//   - a+b < p
   217  	//     then c is 0, b is 1, and t1 is correct
   218  	//   - p <= a+b < 2^256
   219  	//     then c is 0, b is 0, and t2 is correct
   220  	//   - 2^256 <= a+b
   221  	//     then c is 1, b is 1, and t2 is correct
   222  	t2Mask := (c ^ b) - 1
   223  	res[0] = (t1[0] & ^t2Mask) | (t2[0] & t2Mask)
   224  	res[1] = (t1[1] & ^t2Mask) | (t2[1] & t2Mask)
   225  	res[2] = (t1[2] & ^t2Mask) | (t2[2] & t2Mask)
   226  	res[3] = (t1[3] & ^t2Mask) | (t2[3] & t2Mask)
   227  }
   228  
   229  // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
   230  // false and e is unchanged. e and x can overlap.
   231  func p256Sqrt(e, x *p256Element) (isSquare bool) {
   232  	t0, t1 := new(p256Element), new(p256Element)
   233  
   234  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
   235  	//
   236  	// The sequence of 7 multiplications and 253 squarings is derived from the
   237  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
   238  	//
   239  	//	_10       = 2*1
   240  	//	_11       = 1 + _10
   241  	//	_1100     = _11 << 2
   242  	//	_1111     = _11 + _1100
   243  	//	_11110000 = _1111 << 4
   244  	//	_11111111 = _1111 + _11110000
   245  	//	x16       = _11111111 << 8 + _11111111
   246  	//	x32       = x16 << 16 + x16
   247  	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
   248  	//
   249  	p256Sqr(t0, x, 1)
   250  	p256Mul(t0, x, t0)
   251  	p256Sqr(t1, t0, 2)
   252  	p256Mul(t0, t0, t1)
   253  	p256Sqr(t1, t0, 4)
   254  	p256Mul(t0, t0, t1)
   255  	p256Sqr(t1, t0, 8)
   256  	p256Mul(t0, t0, t1)
   257  	p256Sqr(t1, t0, 16)
   258  	p256Mul(t0, t0, t1)
   259  	p256Sqr(t0, t0, 32)
   260  	p256Mul(t0, x, t0)
   261  	p256Sqr(t0, t0, 96)
   262  	p256Mul(t0, x, t0)
   263  	p256Sqr(t0, t0, 94)
   264  
   265  	p256Sqr(t1, t0, 1)
   266  	if p256Equal(t1, x) != 1 {
   267  		return false
   268  	}
   269  	*e = *t0
   270  	return true
   271  }
   272  
   273  // The following assembly functions are implemented in p256_asm_*.s
   274  
   275  // Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
   276  //
   277  //go:noescape
   278  func p256Mul(res, in1, in2 *p256Element)
   279  
   280  // Montgomery square, repeated n times (n >= 1).
   281  //
   282  //go:noescape
   283  func p256Sqr(res, in *p256Element, n int)
   284  
   285  // Montgomery multiplication by R⁻¹, or 1 outside the domain.
   286  // Sets res = in * R⁻¹, bringing res out of the Montgomery domain.
   287  //
   288  //go:noescape
   289  func p256FromMont(res, in *p256Element)
   290  
   291  // If cond is not 0, sets val = -val mod p.
   292  //
   293  //go:noescape
   294  func p256NegCond(val *p256Element, cond int)
   295  
   296  // If cond is 0, sets res = b, otherwise sets res = a.
   297  //
   298  //go:noescape
   299  func p256MovCond(res, a, b *P256Point, cond int)
   300  
   301  // p256Table is a table of the first 16 multiples of a point. Points are stored
   302  // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
   303  // [0]P is the point at infinity and it's not stored.
   304  type p256Table [16]P256Point
   305  
   306  // p256Select sets res to the point at index idx in the table.
   307  // idx must be in [0, 15]. It executes in constant time.
   308  //
   309  //go:noescape
   310  func p256Select(res *P256Point, table *p256Table, idx int)
   311  
   312  // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
   313  // Montgomery domain elements. The point can't be the point at infinity.
   314  type p256AffinePoint struct {
   315  	x, y p256Element
   316  }
   317  
   318  // p256AffineTable is a table of the first 32 multiples of a point. Points are
   319  // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
   320  type p256AffineTable [32]p256AffinePoint
   321  
   322  // p256Precomputed is a series of precomputed multiples of G, the canonical
   323  // generator. The first p256AffineTable contains multiples of G. The second one
   324  // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
   325  // table is the previous table doubled six times. Six is the width of the
   326  // sliding window used in p256ScalarBaseMult, and having each table already
   327  // pre-doubled lets us avoid the doublings between windows entirely. This table
   328  // aliases into p256PrecomputedEmbed.
   329  var p256Precomputed *[43]p256AffineTable
   330  
   331  func init() {
   332  	p256PrecomputedPtr := unsafe.Pointer(&p256PrecomputedEmbed)
   333  	if runtime.GOARCH == "s390x" {
   334  		var newTable [43 * 32 * 2 * 4]uint64
   335  		for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256PrecomputedPtr) {
   336  			newTable[i] = byteorder.LEUint64(x[:])
   337  		}
   338  		p256PrecomputedPtr = unsafe.Pointer(&newTable)
   339  	}
   340  	p256Precomputed = (*[43]p256AffineTable)(p256PrecomputedPtr)
   341  }
   342  
   343  // p256SelectAffine sets res to the point at index idx in the table.
   344  // idx must be in [0, 31]. It executes in constant time.
   345  //
   346  //go:noescape
   347  func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
   348  
   349  // Point addition with an affine point and constant time conditions.
   350  // If zero is 0, sets res = in2. If sel is 0, sets res = in1.
   351  // If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2
   352  //
   353  //go:noescape
   354  func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int)
   355  
   356  // Point addition. Sets res = in1 + in2. Returns one if the two input points
   357  // were equal and zero otherwise. If in1 or in2 are the point at infinity, res
   358  // and the return value are undefined.
   359  //
   360  //go:noescape
   361  func p256PointAddAsm(res, in1, in2 *P256Point) int
   362  
   363  // Point doubling. Sets res = in + in. in can be the point at infinity.
   364  //
   365  //go:noescape
   366  func p256PointDoubleAsm(res, in *P256Point)
   367  
   368  // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
   369  // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
   370  type p256OrdElement [4]uint64
   371  
   372  // p256OrdReduce ensures s is in the range [0, ord(G)-1].
   373  func p256OrdReduce(s *p256OrdElement) {
   374  	// Since 2 * ord(G) > 2²⁵⁶, we can just conditionally subtract ord(G),
   375  	// keeping the result if it doesn't underflow.
   376  	t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
   377  	t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
   378  	t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
   379  	t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
   380  	tMask := b - 1 // zero if subtraction underflowed
   381  	s[0] ^= (t0 ^ s[0]) & tMask
   382  	s[1] ^= (t1 ^ s[1]) & tMask
   383  	s[2] ^= (t2 ^ s[2]) & tMask
   384  	s[3] ^= (t3 ^ s[3]) & tMask
   385  }
   386  
   387  func p256OrdLittleToBig(b *[32]byte, l *p256OrdElement) {
   388  	limbsToBytes(b, (*[4]uint64)(l))
   389  }
   390  
   391  func p256OrdBigToLittle(l *p256OrdElement, b *[32]byte) {
   392  	bytesToLimbs((*[4]uint64)(l), b)
   393  }
   394  
   395  // Add sets q = p1 + p2, and returns q. The points may overlap.
   396  func (q *P256Point) Add(r1, r2 *P256Point) *P256Point {
   397  	var sum, double P256Point
   398  	r1IsInfinity := r1.isInfinity()
   399  	r2IsInfinity := r2.isInfinity()
   400  	pointsEqual := p256PointAddAsm(&sum, r1, r2)
   401  	p256PointDoubleAsm(&double, r1)
   402  	p256MovCond(&sum, &double, &sum, pointsEqual)
   403  	p256MovCond(&sum, r1, &sum, r2IsInfinity)
   404  	p256MovCond(&sum, r2, &sum, r1IsInfinity)
   405  	return q.Set(&sum)
   406  }
   407  
   408  // Double sets q = p + p, and returns q. The points may overlap.
   409  func (q *P256Point) Double(p *P256Point) *P256Point {
   410  	var double P256Point
   411  	p256PointDoubleAsm(&double, p)
   412  	return q.Set(&double)
   413  }
   414  
   415  // ScalarBaseMult sets r = scalar * generator, where scalar is a 32-byte big
   416  // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
   417  // returns an error and the receiver is unchanged.
   418  func (r *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
   419  	if len(scalar) != 32 {
   420  		return nil, errors.New("invalid scalar length")
   421  	}
   422  	scalarReversed := new(p256OrdElement)
   423  	p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
   424  	p256OrdReduce(scalarReversed)
   425  
   426  	r.p256BaseMult(scalarReversed)
   427  	return r, nil
   428  }
   429  
   430  // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
   431  // and returns r. If scalar is not 32 bytes long, ScalarBaseMult returns an
   432  // error and the receiver is unchanged.
   433  func (r *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
   434  	if len(scalar) != 32 {
   435  		return nil, errors.New("invalid scalar length")
   436  	}
   437  	scalarReversed := new(p256OrdElement)
   438  	p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar))
   439  	p256OrdReduce(scalarReversed)
   440  
   441  	r.Set(q).p256ScalarMult(scalarReversed)
   442  	return r, nil
   443  }
   444  
   445  // uint64IsZero returns 1 if x is zero and zero otherwise.
   446  func uint64IsZero(x uint64) int {
   447  	x = ^x
   448  	x &= x >> 32
   449  	x &= x >> 16
   450  	x &= x >> 8
   451  	x &= x >> 4
   452  	x &= x >> 2
   453  	x &= x >> 1
   454  	return int(x & 1)
   455  }
   456  
   457  // p256Equal returns 1 if a and b are equal and 0 otherwise.
   458  func p256Equal(a, b *p256Element) int {
   459  	var acc uint64
   460  	for i := range a {
   461  		acc |= a[i] ^ b[i]
   462  	}
   463  	return uint64IsZero(acc)
   464  }
   465  
   466  // isInfinity returns 1 if p is the point at infinity and 0 otherwise.
   467  func (p *P256Point) isInfinity() int {
   468  	return p256Equal(&p.z, &p256Zero)
   469  }
   470  
   471  // Bytes returns the uncompressed or infinity encoding of p, as specified in
   472  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
   473  // infinity is shorter than all other encodings.
   474  func (p *P256Point) Bytes() []byte {
   475  	// This function is outlined to make the allocations inline in the caller
   476  	// rather than happen on the heap.
   477  	var out [p256UncompressedLength]byte
   478  	return p.bytes(&out)
   479  }
   480  
   481  func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
   482  	// The proper representation of the point at infinity is a single zero byte.
   483  	if p.isInfinity() == 1 {
   484  		return append(out[:0], 0)
   485  	}
   486  
   487  	x, y := new(p256Element), new(p256Element)
   488  	p.affineFromMont(x, y)
   489  
   490  	out[0] = 4 // Uncompressed form.
   491  	p256LittleToBig((*[32]byte)(out[1:33]), x)
   492  	p256LittleToBig((*[32]byte)(out[33:65]), y)
   493  
   494  	return out[:]
   495  }
   496  
   497  // affineFromMont sets (x, y) to the affine coordinates of p, converted out of the
   498  // Montgomery domain.
   499  func (p *P256Point) affineFromMont(x, y *p256Element) {
   500  	p256Inverse(y, &p.z)
   501  	p256Sqr(x, y, 1)
   502  	p256Mul(y, y, x)
   503  
   504  	p256Mul(x, &p.x, x)
   505  	p256Mul(y, &p.y, y)
   506  
   507  	p256FromMont(x, x)
   508  	p256FromMont(y, y)
   509  }
   510  
   511  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
   512  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
   513  func (p *P256Point) BytesX() ([]byte, error) {
   514  	// This function is outlined to make the allocations inline in the caller
   515  	// rather than happen on the heap.
   516  	var out [p256ElementLength]byte
   517  	return p.bytesX(&out)
   518  }
   519  
   520  func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
   521  	if p.isInfinity() == 1 {
   522  		return nil, errors.New("P256 point is the point at infinity")
   523  	}
   524  
   525  	x := new(p256Element)
   526  	p256Inverse(x, &p.z)
   527  	p256Sqr(x, x, 1)
   528  	p256Mul(x, &p.x, x)
   529  	p256FromMont(x, x)
   530  	p256LittleToBig((*[32]byte)(out[:]), x)
   531  
   532  	return out[:], nil
   533  }
   534  
   535  // BytesCompressed returns the compressed or infinity encoding of p, as
   536  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
   537  // point at infinity is shorter than all other encodings.
   538  func (p *P256Point) BytesCompressed() []byte {
   539  	// This function is outlined to make the allocations inline in the caller
   540  	// rather than happen on the heap.
   541  	var out [p256CompressedLength]byte
   542  	return p.bytesCompressed(&out)
   543  }
   544  
   545  func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
   546  	if p.isInfinity() == 1 {
   547  		return append(out[:0], 0)
   548  	}
   549  
   550  	x, y := new(p256Element), new(p256Element)
   551  	p.affineFromMont(x, y)
   552  
   553  	out[0] = 2 | byte(y[0]&1)
   554  	p256LittleToBig((*[32]byte)(out[1:33]), x)
   555  
   556  	return out[:]
   557  }
   558  
   559  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
   560  func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
   561  	p256MovCond(q, p1, p2, cond)
   562  	return q
   563  }
   564  
   565  // p256Inverse sets out to in⁻¹ mod p. If in is zero, out will be zero.
   566  func p256Inverse(out, in *p256Element) {
   567  	// Inversion is calculated through exponentiation by p - 2, per Fermat's
   568  	// little theorem.
   569  	//
   570  	// The sequence of 12 multiplications and 255 squarings is derived from the
   571  	// following addition chain generated with github.com/mmcloughlin/addchain
   572  	// v0.4.0.
   573  	//
   574  	//  _10     = 2*1
   575  	//  _11     = 1 + _10
   576  	//  _110    = 2*_11
   577  	//  _111    = 1 + _110
   578  	//  _111000 = _111 << 3
   579  	//  _111111 = _111 + _111000
   580  	//  x12     = _111111 << 6 + _111111
   581  	//  x15     = x12 << 3 + _111
   582  	//  x16     = 2*x15 + 1
   583  	//  x32     = x16 << 16 + x16
   584  	//  i53     = x32 << 15
   585  	//  x47     = x15 + i53
   586  	//  i263    = ((i53 << 17 + 1) << 143 + x47) << 47
   587  	//  return    (x47 + i263) << 2 + 1
   588  	//
   589  	var z = new(p256Element)
   590  	var t0 = new(p256Element)
   591  	var t1 = new(p256Element)
   592  
   593  	p256Sqr(z, in, 1)
   594  	p256Mul(z, in, z)
   595  	p256Sqr(z, z, 1)
   596  	p256Mul(z, in, z)
   597  	p256Sqr(t0, z, 3)
   598  	p256Mul(t0, z, t0)
   599  	p256Sqr(t1, t0, 6)
   600  	p256Mul(t0, t0, t1)
   601  	p256Sqr(t0, t0, 3)
   602  	p256Mul(z, z, t0)
   603  	p256Sqr(t0, z, 1)
   604  	p256Mul(t0, in, t0)
   605  	p256Sqr(t1, t0, 16)
   606  	p256Mul(t0, t0, t1)
   607  	p256Sqr(t0, t0, 15)
   608  	p256Mul(z, z, t0)
   609  	p256Sqr(t0, t0, 17)
   610  	p256Mul(t0, in, t0)
   611  	p256Sqr(t0, t0, 143)
   612  	p256Mul(t0, z, t0)
   613  	p256Sqr(t0, t0, 47)
   614  	p256Mul(z, z, t0)
   615  	p256Sqr(z, z, 2)
   616  	p256Mul(out, in, z)
   617  }
   618  
   619  func boothW5(in uint) (int, int) {
   620  	var s uint = ^((in >> 5) - 1)
   621  	var d uint = (1 << 6) - in - 1
   622  	d = (d & s) | (in & (^s))
   623  	d = (d >> 1) + (d & 1)
   624  	return int(d), int(s & 1)
   625  }
   626  
   627  func boothW6(in uint) (int, int) {
   628  	var s uint = ^((in >> 6) - 1)
   629  	var d uint = (1 << 7) - in - 1
   630  	d = (d & s) | (in & (^s))
   631  	d = (d >> 1) + (d & 1)
   632  	return int(d), int(s & 1)
   633  }
   634  
   635  func (p *P256Point) p256BaseMult(scalar *p256OrdElement) {
   636  	var t0 p256AffinePoint
   637  
   638  	wvalue := (scalar[0] << 1) & 0x7f
   639  	sel, sign := boothW6(uint(wvalue))
   640  	p256SelectAffine(&t0, &p256Precomputed[0], sel)
   641  	p.x, p.y, p.z = t0.x, t0.y, p256One
   642  	p256NegCond(&p.y, sign)
   643  
   644  	index := uint(5)
   645  	zero := sel
   646  
   647  	for i := 1; i < 43; i++ {
   648  		if index < 192 {
   649  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
   650  		} else {
   651  			wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
   652  		}
   653  		index += 6
   654  		sel, sign = boothW6(uint(wvalue))
   655  		p256SelectAffine(&t0, &p256Precomputed[i], sel)
   656  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   657  		zero |= sel
   658  	}
   659  
   660  	// If the whole scalar was zero, set to the point at infinity.
   661  	p256MovCond(p, p, NewP256Point(), zero)
   662  }
   663  
   664  func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) {
   665  	// precomp is a table of precomputed points that stores powers of p
   666  	// from p^1 to p^16.
   667  	var precomp p256Table
   668  	var t0, t1, t2, t3 P256Point
   669  
   670  	// Prepare the table
   671  	precomp[0] = *p // 1
   672  
   673  	p256PointDoubleAsm(&t0, p)
   674  	p256PointDoubleAsm(&t1, &t0)
   675  	p256PointDoubleAsm(&t2, &t1)
   676  	p256PointDoubleAsm(&t3, &t2)
   677  	precomp[1] = t0  // 2
   678  	precomp[3] = t1  // 4
   679  	precomp[7] = t2  // 8
   680  	precomp[15] = t3 // 16
   681  
   682  	p256PointAddAsm(&t0, &t0, p)
   683  	p256PointAddAsm(&t1, &t1, p)
   684  	p256PointAddAsm(&t2, &t2, p)
   685  	precomp[2] = t0 // 3
   686  	precomp[4] = t1 // 5
   687  	precomp[8] = t2 // 9
   688  
   689  	p256PointDoubleAsm(&t0, &t0)
   690  	p256PointDoubleAsm(&t1, &t1)
   691  	precomp[5] = t0 // 6
   692  	precomp[9] = t1 // 10
   693  
   694  	p256PointAddAsm(&t2, &t0, p)
   695  	p256PointAddAsm(&t1, &t1, p)
   696  	precomp[6] = t2  // 7
   697  	precomp[10] = t1 // 11
   698  
   699  	p256PointDoubleAsm(&t0, &t0)
   700  	p256PointDoubleAsm(&t2, &t2)
   701  	precomp[11] = t0 // 12
   702  	precomp[13] = t2 // 14
   703  
   704  	p256PointAddAsm(&t0, &t0, p)
   705  	p256PointAddAsm(&t2, &t2, p)
   706  	precomp[12] = t0 // 13
   707  	precomp[14] = t2 // 15
   708  
   709  	// Start scanning the window from top bit
   710  	index := uint(254)
   711  	var sel, sign int
   712  
   713  	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
   714  	sel, _ = boothW5(uint(wvalue))
   715  
   716  	p256Select(p, &precomp, sel)
   717  	zero := sel
   718  
   719  	for index > 4 {
   720  		index -= 5
   721  		p256PointDoubleAsm(p, p)
   722  		p256PointDoubleAsm(p, p)
   723  		p256PointDoubleAsm(p, p)
   724  		p256PointDoubleAsm(p, p)
   725  		p256PointDoubleAsm(p, p)
   726  
   727  		if index < 192 {
   728  			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
   729  		} else {
   730  			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
   731  		}
   732  
   733  		sel, sign = boothW5(uint(wvalue))
   734  
   735  		p256Select(&t0, &precomp, sel)
   736  		p256NegCond(&t0.y, sign)
   737  		p256PointAddAsm(&t1, p, &t0)
   738  		p256MovCond(&t1, &t1, p, sel)
   739  		p256MovCond(p, &t1, &t0, zero)
   740  		zero |= sel
   741  	}
   742  
   743  	p256PointDoubleAsm(p, p)
   744  	p256PointDoubleAsm(p, p)
   745  	p256PointDoubleAsm(p, p)
   746  	p256PointDoubleAsm(p, p)
   747  	p256PointDoubleAsm(p, p)
   748  
   749  	wvalue = (scalar[0] << 1) & 0x3f
   750  	sel, sign = boothW5(uint(wvalue))
   751  
   752  	p256Select(&t0, &precomp, sel)
   753  	p256NegCond(&t0.y, sign)
   754  	p256PointAddAsm(&t1, p, &t0)
   755  	p256MovCond(&t1, &t1, p, sel)
   756  	p256MovCond(p, &t1, &t0, zero)
   757  }
   758
View as plain text