Source file src/crypto/internal/fips140/nistec/p256.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (!amd64 && !arm64 && !ppc64le && !s390x) || purego
     6  
     7  package nistec
     8  
     9  import (
    10  	"crypto/internal/fips140/nistec/fiat"
    11  	"crypto/internal/fips140/subtle"
    12  	"crypto/internal/fips140deps/byteorder"
    13  	"crypto/internal/fips140deps/cpu"
    14  	"errors"
    15  	"math/bits"
    16  	"sync"
    17  	"unsafe"
    18  )
    19  
    20  // P256Point is a P-256 point. The zero value is NOT valid.
    21  type P256Point struct {
    22  	// The point is represented in projective coordinates (X:Y:Z), where x = X/Z
    23  	// and y = Y/Z. Infinity is (0:1:0).
    24  	//
    25  	// fiat.P256Element is a base field element in [0, P-1] in the Montgomery
    26  	// domain (with R 2²⁵⁶ and P 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1) as four limbs in
    27  	// little-endian order value.
    28  	x, y, z fiat.P256Element
    29  }
    30  
    31  // NewP256Point returns a new P256Point representing the point at infinity point.
    32  func NewP256Point() *P256Point {
    33  	p := &P256Point{}
    34  	p.y.One()
    35  	return p
    36  }
    37  
    38  // SetGenerator sets p to the canonical generator and returns p.
    39  func (p *P256Point) SetGenerator() *P256Point {
    40  	p.x.SetBytes([]byte{0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x3, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96})
    41  	p.y.SetBytes([]byte{0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0xf, 0x9e, 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5})
    42  	p.z.One()
    43  	return p
    44  }
    45  
    46  // Set sets p = q and returns p.
    47  func (p *P256Point) Set(q *P256Point) *P256Point {
    48  	p.x.Set(&q.x)
    49  	p.y.Set(&q.y)
    50  	p.z.Set(&q.z)
    51  	return p
    52  }
    53  
    54  const p256ElementLength = 32
    55  const p256UncompressedLength = 1 + 2*p256ElementLength
    56  const p256CompressedLength = 1 + p256ElementLength
    57  
    58  // SetBytes sets p to the compressed, uncompressed, or infinity value encoded in
    59  // b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on
    60  // the curve, it returns nil and an error, and the receiver is unchanged.
    61  // Otherwise, it returns p.
    62  func (p *P256Point) SetBytes(b []byte) (*P256Point, error) {
    63  	switch {
    64  	// Point at infinity.
    65  	case len(b) == 1 && b[0] == 0:
    66  		return p.Set(NewP256Point()), nil
    67  
    68  	// Uncompressed form.
    69  	case len(b) == p256UncompressedLength && b[0] == 4:
    70  		x, err := new(fiat.P256Element).SetBytes(b[1 : 1+p256ElementLength])
    71  		if err != nil {
    72  			return nil, err
    73  		}
    74  		y, err := new(fiat.P256Element).SetBytes(b[1+p256ElementLength:])
    75  		if err != nil {
    76  			return nil, err
    77  		}
    78  		if err := p256CheckOnCurve(x, y); err != nil {
    79  			return nil, err
    80  		}
    81  		p.x.Set(x)
    82  		p.y.Set(y)
    83  		p.z.One()
    84  		return p, nil
    85  
    86  	// Compressed form.
    87  	case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3):
    88  		x, err := new(fiat.P256Element).SetBytes(b[1:])
    89  		if err != nil {
    90  			return nil, err
    91  		}
    92  
    93  		// y² = x³ - 3x + b
    94  		y := p256Polynomial(new(fiat.P256Element), x)
    95  		if !p256Sqrt(y, y) {
    96  			return nil, errors.New("invalid P256 compressed point encoding")
    97  		}
    98  
    99  		// Select the positive or negative root, as indicated by the least
   100  		// significant bit, based on the encoding type byte.
   101  		otherRoot := new(fiat.P256Element)
   102  		otherRoot.Sub(otherRoot, y)
   103  		cond := y.Bytes()[p256ElementLength-1]&1 ^ b[0]&1
   104  		y.Select(otherRoot, y, int(cond))
   105  
   106  		p.x.Set(x)
   107  		p.y.Set(y)
   108  		p.z.One()
   109  		return p, nil
   110  
   111  	default:
   112  		return nil, errors.New("invalid P256 point encoding")
   113  	}
   114  }
   115  
   116  var _p256B *fiat.P256Element
   117  var _p256BOnce sync.Once
   118  
   119  func p256B() *fiat.P256Element {
   120  	_p256BOnce.Do(func() {
   121  		_p256B, _ = new(fiat.P256Element).SetBytes([]byte{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 0x65, 0x1d, 0x6, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b})
   122  	})
   123  	return _p256B
   124  }
   125  
   126  // p256Polynomial sets y2 to x³ - 3x + b, and returns y2.
   127  func p256Polynomial(y2, x *fiat.P256Element) *fiat.P256Element {
   128  	y2.Square(x)
   129  	y2.Mul(y2, x)
   130  
   131  	threeX := new(fiat.P256Element).Add(x, x)
   132  	threeX.Add(threeX, x)
   133  	y2.Sub(y2, threeX)
   134  
   135  	return y2.Add(y2, p256B())
   136  }
   137  
   138  func p256CheckOnCurve(x, y *fiat.P256Element) error {
   139  	// y² = x³ - 3x + b
   140  	rhs := p256Polynomial(new(fiat.P256Element), x)
   141  	lhs := new(fiat.P256Element).Square(y)
   142  	if rhs.Equal(lhs) != 1 {
   143  		return errors.New("P256 point not on curve")
   144  	}
   145  	return nil
   146  }
   147  
   148  // Bytes returns the uncompressed or infinity encoding of p, as specified in
   149  // SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at
   150  // infinity is shorter than all other encodings.
   151  func (p *P256Point) Bytes() []byte {
   152  	// This function is outlined to make the allocations inline in the caller
   153  	// rather than happen on the heap.
   154  	var out [p256UncompressedLength]byte
   155  	return p.bytes(&out)
   156  }
   157  
   158  func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte {
   159  	// The SEC 1 representation of the point at infinity is a single zero byte,
   160  	// and only infinity has z = 0.
   161  	if p.z.IsZero() == 1 {
   162  		return append(out[:0], 0)
   163  	}
   164  
   165  	zinv := new(fiat.P256Element).Invert(&p.z)
   166  	x := new(fiat.P256Element).Mul(&p.x, zinv)
   167  	y := new(fiat.P256Element).Mul(&p.y, zinv)
   168  
   169  	buf := append(out[:0], 4)
   170  	buf = append(buf, x.Bytes()...)
   171  	buf = append(buf, y.Bytes()...)
   172  	return buf
   173  }
   174  
   175  // BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1,
   176  // Version 2.0, Section 2.3.5, or an error if p is the point at infinity.
   177  func (p *P256Point) BytesX() ([]byte, error) {
   178  	// This function is outlined to make the allocations inline in the caller
   179  	// rather than happen on the heap.
   180  	var out [p256ElementLength]byte
   181  	return p.bytesX(&out)
   182  }
   183  
   184  func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) {
   185  	if p.z.IsZero() == 1 {
   186  		return nil, errors.New("P256 point is the point at infinity")
   187  	}
   188  
   189  	zinv := new(fiat.P256Element).Invert(&p.z)
   190  	x := new(fiat.P256Element).Mul(&p.x, zinv)
   191  
   192  	return append(out[:0], x.Bytes()...), nil
   193  }
   194  
   195  // BytesCompressed returns the compressed or infinity encoding of p, as
   196  // specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the
   197  // point at infinity is shorter than all other encodings.
   198  func (p *P256Point) BytesCompressed() []byte {
   199  	// This function is outlined to make the allocations inline in the caller
   200  	// rather than happen on the heap.
   201  	var out [p256CompressedLength]byte
   202  	return p.bytesCompressed(&out)
   203  }
   204  
   205  func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte {
   206  	if p.z.IsZero() == 1 {
   207  		return append(out[:0], 0)
   208  	}
   209  
   210  	zinv := new(fiat.P256Element).Invert(&p.z)
   211  	x := new(fiat.P256Element).Mul(&p.x, zinv)
   212  	y := new(fiat.P256Element).Mul(&p.y, zinv)
   213  
   214  	// Encode the sign of the y coordinate (indicated by the least significant
   215  	// bit) as the encoding type (2 or 3).
   216  	buf := append(out[:0], 2)
   217  	buf[0] |= y.Bytes()[p256ElementLength-1] & 1
   218  	buf = append(buf, x.Bytes()...)
   219  	return buf
   220  }
   221  
   222  // Add sets q = p1 + p2, and returns q. The points may overlap.
   223  func (q *P256Point) Add(p1, p2 *P256Point) *P256Point {
   224  	// Complete addition formula for a = -3 from "Complete addition formulas for
   225  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
   226  
   227  	t0 := new(fiat.P256Element).Mul(&p1.x, &p2.x) // t0 := X1 * X2
   228  	t1 := new(fiat.P256Element).Mul(&p1.y, &p2.y) // t1 := Y1 * Y2
   229  	t2 := new(fiat.P256Element).Mul(&p1.z, &p2.z) // t2 := Z1 * Z2
   230  	t3 := new(fiat.P256Element).Add(&p1.x, &p1.y) // t3 := X1 + Y1
   231  	t4 := new(fiat.P256Element).Add(&p2.x, &p2.y) // t4 := X2 + Y2
   232  	t3.Mul(t3, t4)                                // t3 := t3 * t4
   233  	t4.Add(t0, t1)                                // t4 := t0 + t1
   234  	t3.Sub(t3, t4)                                // t3 := t3 - t4
   235  	t4.Add(&p1.y, &p1.z)                          // t4 := Y1 + Z1
   236  	x3 := new(fiat.P256Element).Add(&p2.y, &p2.z) // X3 := Y2 + Z2
   237  	t4.Mul(t4, x3)                                // t4 := t4 * X3
   238  	x3.Add(t1, t2)                                // X3 := t1 + t2
   239  	t4.Sub(t4, x3)                                // t4 := t4 - X3
   240  	x3.Add(&p1.x, &p1.z)                          // X3 := X1 + Z1
   241  	y3 := new(fiat.P256Element).Add(&p2.x, &p2.z) // Y3 := X2 + Z2
   242  	x3.Mul(x3, y3)                                // X3 := X3 * Y3
   243  	y3.Add(t0, t2)                                // Y3 := t0 + t2
   244  	y3.Sub(x3, y3)                                // Y3 := X3 - Y3
   245  	z3 := new(fiat.P256Element).Mul(p256B(), t2)  // Z3 := b * t2
   246  	x3.Sub(y3, z3)                                // X3 := Y3 - Z3
   247  	z3.Add(x3, x3)                                // Z3 := X3 + X3
   248  	x3.Add(x3, z3)                                // X3 := X3 + Z3
   249  	z3.Sub(t1, x3)                                // Z3 := t1 - X3
   250  	x3.Add(t1, x3)                                // X3 := t1 + X3
   251  	y3.Mul(p256B(), y3)                           // Y3 := b * Y3
   252  	t1.Add(t2, t2)                                // t1 := t2 + t2
   253  	t2.Add(t1, t2)                                // t2 := t1 + t2
   254  	y3.Sub(y3, t2)                                // Y3 := Y3 - t2
   255  	y3.Sub(y3, t0)                                // Y3 := Y3 - t0
   256  	t1.Add(y3, y3)                                // t1 := Y3 + Y3
   257  	y3.Add(t1, y3)                                // Y3 := t1 + Y3
   258  	t1.Add(t0, t0)                                // t1 := t0 + t0
   259  	t0.Add(t1, t0)                                // t0 := t1 + t0
   260  	t0.Sub(t0, t2)                                // t0 := t0 - t2
   261  	t1.Mul(t4, y3)                                // t1 := t4 * Y3
   262  	t2.Mul(t0, y3)                                // t2 := t0 * Y3
   263  	y3.Mul(x3, z3)                                // Y3 := X3 * Z3
   264  	y3.Add(y3, t2)                                // Y3 := Y3 + t2
   265  	x3.Mul(t3, x3)                                // X3 := t3 * X3
   266  	x3.Sub(x3, t1)                                // X3 := X3 - t1
   267  	z3.Mul(t4, z3)                                // Z3 := t4 * Z3
   268  	t1.Mul(t3, t0)                                // t1 := t3 * t0
   269  	z3.Add(z3, t1)                                // Z3 := Z3 + t1
   270  
   271  	q.x.Set(x3)
   272  	q.y.Set(y3)
   273  	q.z.Set(z3)
   274  	return q
   275  }
   276  
   277  // Double sets q = p + p, and returns q. The points may overlap.
   278  func (q *P256Point) Double(p *P256Point) *P256Point {
   279  	// Complete addition formula for a = -3 from "Complete addition formulas for
   280  	// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §A.2.
   281  
   282  	t0 := new(fiat.P256Element).Square(&p.x)     // t0 := X ^ 2
   283  	t1 := new(fiat.P256Element).Square(&p.y)     // t1 := Y ^ 2
   284  	t2 := new(fiat.P256Element).Square(&p.z)     // t2 := Z ^ 2
   285  	t3 := new(fiat.P256Element).Mul(&p.x, &p.y)  // t3 := X * Y
   286  	t3.Add(t3, t3)                               // t3 := t3 + t3
   287  	z3 := new(fiat.P256Element).Mul(&p.x, &p.z)  // Z3 := X * Z
   288  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
   289  	y3 := new(fiat.P256Element).Mul(p256B(), t2) // Y3 := b * t2
   290  	y3.Sub(y3, z3)                               // Y3 := Y3 - Z3
   291  	x3 := new(fiat.P256Element).Add(y3, y3)      // X3 := Y3 + Y3
   292  	y3.Add(x3, y3)                               // Y3 := X3 + Y3
   293  	x3.Sub(t1, y3)                               // X3 := t1 - Y3
   294  	y3.Add(t1, y3)                               // Y3 := t1 + Y3
   295  	y3.Mul(x3, y3)                               // Y3 := X3 * Y3
   296  	x3.Mul(x3, t3)                               // X3 := X3 * t3
   297  	t3.Add(t2, t2)                               // t3 := t2 + t2
   298  	t2.Add(t2, t3)                               // t2 := t2 + t3
   299  	z3.Mul(p256B(), z3)                          // Z3 := b * Z3
   300  	z3.Sub(z3, t2)                               // Z3 := Z3 - t2
   301  	z3.Sub(z3, t0)                               // Z3 := Z3 - t0
   302  	t3.Add(z3, z3)                               // t3 := Z3 + Z3
   303  	z3.Add(z3, t3)                               // Z3 := Z3 + t3
   304  	t3.Add(t0, t0)                               // t3 := t0 + t0
   305  	t0.Add(t3, t0)                               // t0 := t3 + t0
   306  	t0.Sub(t0, t2)                               // t0 := t0 - t2
   307  	t0.Mul(t0, z3)                               // t0 := t0 * Z3
   308  	y3.Add(y3, t0)                               // Y3 := Y3 + t0
   309  	t0.Mul(&p.y, &p.z)                           // t0 := Y * Z
   310  	t0.Add(t0, t0)                               // t0 := t0 + t0
   311  	z3.Mul(t0, z3)                               // Z3 := t0 * Z3
   312  	x3.Sub(x3, z3)                               // X3 := X3 - Z3
   313  	z3.Mul(t0, t1)                               // Z3 := t0 * t1
   314  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
   315  	z3.Add(z3, z3)                               // Z3 := Z3 + Z3
   316  
   317  	q.x.Set(x3)
   318  	q.y.Set(y3)
   319  	q.z.Set(z3)
   320  	return q
   321  }
   322  
   323  // p256AffinePoint is a point in affine coordinates (x, y). x and y are still
   324  // Montgomery domain elements. The point can't be the point at infinity.
   325  type p256AffinePoint struct {
   326  	x, y fiat.P256Element
   327  }
   328  
   329  func (p *p256AffinePoint) Projective() *P256Point {
   330  	pp := &P256Point{x: p.x, y: p.y}
   331  	pp.z.One()
   332  	return pp
   333  }
   334  
   335  // AddAffine sets q = p1 + p2, if infinity == 0, and to p1 if infinity == 1.
   336  // p2 can't be the point at infinity as it can't be represented in affine
   337  // coordinates, instead callers can set p2 to an arbitrary point and set
   338  // infinity to 1.
   339  func (q *P256Point) AddAffine(p1 *P256Point, p2 *p256AffinePoint, infinity int) *P256Point {
   340  	// Complete mixed addition formula for a = -3 from "Complete addition
   341  	// formulas for prime order elliptic curves"
   342  	// (https://eprint.iacr.org/2015/1060), Algorithm 5.
   343  
   344  	t0 := new(fiat.P256Element).Mul(&p1.x, &p2.x)   // t0 ← X1 · X2
   345  	t1 := new(fiat.P256Element).Mul(&p1.y, &p2.y)   // t1 ← Y1 · Y2
   346  	t3 := new(fiat.P256Element).Add(&p2.x, &p2.y)   // t3 ← X2 + Y2
   347  	t4 := new(fiat.P256Element).Add(&p1.x, &p1.y)   // t4 ← X1 + Y1
   348  	t3.Mul(t3, t4)                                  // t3 ← t3 · t4
   349  	t4.Add(t0, t1)                                  // t4 ← t0 + t1
   350  	t3.Sub(t3, t4)                                  // t3 ← t3 − t4
   351  	t4.Mul(&p2.y, &p1.z)                            // t4 ← Y2 · Z1
   352  	t4.Add(t4, &p1.y)                               // t4 ← t4 + Y1
   353  	y3 := new(fiat.P256Element).Mul(&p2.x, &p1.z)   // Y3 ← X2 · Z1
   354  	y3.Add(y3, &p1.x)                               // Y3 ← Y3 + X1
   355  	z3 := new(fiat.P256Element).Mul(p256B(), &p1.z) // Z3 ← b  · Z1
   356  	x3 := new(fiat.P256Element).Sub(y3, z3)         // X3 ← Y3 − Z3
   357  	z3.Add(x3, x3)                                  // Z3 ← X3 + X3
   358  	x3.Add(x3, z3)                                  // X3 ← X3 + Z3
   359  	z3.Sub(t1, x3)                                  // Z3 ← t1 − X3
   360  	x3.Add(t1, x3)                                  // X3 ← t1 + X3
   361  	y3.Mul(p256B(), y3)                             // Y3 ← b  · Y3
   362  	t1.Add(&p1.z, &p1.z)                            // t1 ← Z1 + Z1
   363  	t2 := new(fiat.P256Element).Add(t1, &p1.z)      // t2 ← t1 + Z1
   364  	y3.Sub(y3, t2)                                  // Y3 ← Y3 − t2
   365  	y3.Sub(y3, t0)                                  // Y3 ← Y3 − t0
   366  	t1.Add(y3, y3)                                  // t1 ← Y3 + Y3
   367  	y3.Add(t1, y3)                                  // Y3 ← t1 + Y3
   368  	t1.Add(t0, t0)                                  // t1 ← t0 + t0
   369  	t0.Add(t1, t0)                                  // t0 ← t1 + t0
   370  	t0.Sub(t0, t2)                                  // t0 ← t0 − t2
   371  	t1.Mul(t4, y3)                                  // t1 ← t4 · Y3
   372  	t2.Mul(t0, y3)                                  // t2 ← t0 · Y3
   373  	y3.Mul(x3, z3)                                  // Y3 ← X3 · Z3
   374  	y3.Add(y3, t2)                                  // Y3 ← Y3 + t2
   375  	x3.Mul(t3, x3)                                  // X3 ← t3 · X3
   376  	x3.Sub(x3, t1)                                  // X3 ← X3 − t1
   377  	z3.Mul(t4, z3)                                  // Z3 ← t4 · Z3
   378  	t1.Mul(t3, t0)                                  // t1 ← t3 · t0
   379  	z3.Add(z3, t1)                                  // Z3 ← Z3 + t1
   380  
   381  	q.x.Select(&p1.x, x3, infinity)
   382  	q.y.Select(&p1.y, y3, infinity)
   383  	q.z.Select(&p1.z, z3, infinity)
   384  	return q
   385  }
   386  
   387  // Select sets q to p1 if cond == 1, and to p2 if cond == 0.
   388  func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point {
   389  	q.x.Select(&p1.x, &p2.x, cond)
   390  	q.y.Select(&p1.y, &p2.y, cond)
   391  	q.z.Select(&p1.z, &p2.z, cond)
   392  	return q
   393  }
   394  
   395  // p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
   396  // Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
   397  type p256OrdElement [4]uint64
   398  
   399  // SetBytes sets s to the big-endian value of x, reducing it as necessary.
   400  func (s *p256OrdElement) SetBytes(x []byte) (*p256OrdElement, error) {
   401  	if len(x) != 32 {
   402  		return nil, errors.New("invalid scalar length")
   403  	}
   404  
   405  	s[0] = byteorder.BEUint64(x[24:])
   406  	s[1] = byteorder.BEUint64(x[16:])
   407  	s[2] = byteorder.BEUint64(x[8:])
   408  	s[3] = byteorder.BEUint64(x[:])
   409  
   410  	// Ensure s is in the range [0, ord(G)-1]. Since 2 * ord(G) > 2²⁵⁶, we can
   411  	// just conditionally subtract ord(G), keeping the result if it doesn't
   412  	// underflow.
   413  	t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0)
   414  	t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b)
   415  	t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b)
   416  	t3, b := bits.Sub64(s[3], 0xffffffff00000000, b)
   417  	tMask := b - 1 // zero if subtraction underflowed
   418  	s[0] ^= (t0 ^ s[0]) & tMask
   419  	s[1] ^= (t1 ^ s[1]) & tMask
   420  	s[2] ^= (t2 ^ s[2]) & tMask
   421  	s[3] ^= (t3 ^ s[3]) & tMask
   422  
   423  	return s, nil
   424  }
   425  
   426  func (s *p256OrdElement) Bytes() []byte {
   427  	var out [32]byte
   428  	byteorder.BEPutUint64(out[24:], s[0])
   429  	byteorder.BEPutUint64(out[16:], s[1])
   430  	byteorder.BEPutUint64(out[8:], s[2])
   431  	byteorder.BEPutUint64(out[:], s[3])
   432  	return out[:]
   433  }
   434  
   435  // Rsh returns the 64 least significant bits of x >> n. n must be lower
   436  // than 256. The value of n leaks through timing side-channels.
   437  func (s *p256OrdElement) Rsh(n int) uint64 {
   438  	i := n / 64
   439  	n = n % 64
   440  	res := s[i] >> n
   441  	// Shift in the more significant limb, if present.
   442  	if i := i + 1; i < len(s) {
   443  		res |= s[i] << (64 - n)
   444  	}
   445  	return res
   446  }
   447  
   448  // p256Table is a table of the first 16 multiples of a point. Points are stored
   449  // at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15.
   450  // [0]P is the point at infinity and it's not stored.
   451  type p256Table [16]P256Point
   452  
   453  // Select selects the n-th multiple of the table base point into p. It works in
   454  // constant time. n must be in [0, 16]. If n is 0, p is set to the identity point.
   455  func (table *p256Table) Select(p *P256Point, n uint8) {
   456  	if n > 16 {
   457  		panic("nistec: internal error: p256Table called with out-of-bounds value")
   458  	}
   459  	p.Set(NewP256Point())
   460  	for i := uint8(1); i <= 16; i++ {
   461  		cond := subtle.ConstantTimeByteEq(i, n)
   462  		p.Select(&table[i-1], p, cond)
   463  	}
   464  }
   465  
   466  // Compute populates the table to the first 16 multiples of q.
   467  func (table *p256Table) Compute(q *P256Point) *p256Table {
   468  	table[0].Set(q)
   469  	for i := 1; i < 16; i += 2 {
   470  		table[i].Double(&table[i/2])
   471  		if i+1 < 16 {
   472  			table[i+1].Add(&table[i], q)
   473  		}
   474  	}
   475  	return table
   476  }
   477  
   478  func boothW5(in uint64) (uint8, int) {
   479  	s := ^((in >> 5) - 1)
   480  	d := (1 << 6) - in - 1
   481  	d = (d & s) | (in & (^s))
   482  	d = (d >> 1) + (d & 1)
   483  	return uint8(d), int(s & 1)
   484  }
   485  
   486  // ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value,
   487  // and returns r. If scalar is not 32 bytes long, ScalarMult returns an error
   488  // and the receiver is unchanged.
   489  func (p *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
   490  	s, err := new(p256OrdElement).SetBytes(scalar)
   491  	if err != nil {
   492  		return nil, err
   493  	}
   494  
   495  	// Start scanning the window from the most significant bits. We move by
   496  	// 5 bits at a time and need to finish at -1, so -1 + 5 * 51 = 254.
   497  	index := 254
   498  
   499  	sel, sign := boothW5(s.Rsh(index))
   500  	// sign is always zero because the boothW5 input here is at
   501  	// most two bits long, so the top bit is never set.
   502  	_ = sign
   503  
   504  	// Neither Select nor Add have exceptions for the point at infinity /
   505  	// selector zero, so we don't need to check for it here or in the loop.
   506  	table := new(p256Table).Compute(q)
   507  	table.Select(p, sel)
   508  
   509  	t := NewP256Point()
   510  	for index >= 4 {
   511  		index -= 5
   512  
   513  		p.Double(p)
   514  		p.Double(p)
   515  		p.Double(p)
   516  		p.Double(p)
   517  		p.Double(p)
   518  
   519  		if index >= 0 {
   520  			sel, sign = boothW5(s.Rsh(index) & 0b111111)
   521  		} else {
   522  			// Booth encoding considers a virtual zero bit at index -1,
   523  			// so we shift left the least significant limb.
   524  			wvalue := (s[0] << 1) & 0b111111
   525  			sel, sign = boothW5(wvalue)
   526  		}
   527  
   528  		table.Select(t, sel)
   529  		t.Negate(sign)
   530  		p.Add(p, t)
   531  	}
   532  
   533  	return p, nil
   534  }
   535  
   536  // Negate sets p to -p, if cond == 1, and to p if cond == 0.
   537  func (p *P256Point) Negate(cond int) *P256Point {
   538  	negY := new(fiat.P256Element)
   539  	negY.Sub(negY, &p.y)
   540  	p.y.Select(negY, &p.y, cond)
   541  	return p
   542  }
   543  
   544  // p256AffineTable is a table of the first 32 multiples of a point. Points are
   545  // stored at an index offset of -1 like in p256Table, and [0]P is not stored.
   546  type p256AffineTable [32]p256AffinePoint
   547  
   548  // Select selects the n-th multiple of the table base point into p. It works in
   549  // constant time. n can be in [0, 32], but (unlike p256Table.Select) if n is 0,
   550  // p is set to an undefined value.
   551  func (table *p256AffineTable) Select(p *p256AffinePoint, n uint8) {
   552  	if n > 32 {
   553  		panic("nistec: internal error: p256AffineTable.Select called with out-of-bounds value")
   554  	}
   555  	for i := uint8(1); i <= 32; i++ {
   556  		cond := subtle.ConstantTimeByteEq(i, n)
   557  		p.x.Select(&table[i-1].x, &p.x, cond)
   558  		p.y.Select(&table[i-1].y, &p.y, cond)
   559  	}
   560  }
   561  
   562  // p256GeneratorTables is a series of precomputed multiples of G, the canonical
   563  // generator. The first p256AffineTable contains multiples of G. The second one
   564  // multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive
   565  // table is the previous table doubled six times. Six is the width of the
   566  // sliding window used in ScalarBaseMult, and having each table already
   567  // pre-doubled lets us avoid the doublings between windows entirely. This table
   568  // aliases into p256PrecomputedEmbed.
   569  var p256GeneratorTables *[43]p256AffineTable
   570  
   571  func init() {
   572  	p256GeneratorTablesPtr := unsafe.Pointer(&p256PrecomputedEmbed)
   573  	if cpu.BigEndian {
   574  		var newTable [43 * 32 * 2 * 4]uint64
   575  		for i, x := range (*[43 * 32 * 2 * 4][8]byte)(p256GeneratorTablesPtr) {
   576  			newTable[i] = byteorder.LEUint64(x[:])
   577  		}
   578  		p256GeneratorTablesPtr = unsafe.Pointer(&newTable)
   579  	}
   580  	p256GeneratorTables = (*[43]p256AffineTable)(p256GeneratorTablesPtr)
   581  }
   582  
   583  func boothW6(in uint64) (uint8, int) {
   584  	s := ^((in >> 6) - 1)
   585  	d := (1 << 7) - in - 1
   586  	d = (d & s) | (in & (^s))
   587  	d = (d >> 1) + (d & 1)
   588  	return uint8(d), int(s & 1)
   589  }
   590  
   591  // ScalarBaseMult sets p = scalar * generator, where scalar is a 32-byte big
   592  // endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult
   593  // returns an error and the receiver is unchanged.
   594  func (p *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
   595  	// This function works like ScalarMult above, but the table is fixed and
   596  	// "pre-doubled" for each iteration, so instead of doubling we move to the
   597  	// next table at each iteration.
   598  
   599  	s, err := new(p256OrdElement).SetBytes(scalar)
   600  	if err != nil {
   601  		return nil, err
   602  	}
   603  
   604  	// Start scanning the window from the most significant bits. We move by
   605  	// 6 bits at a time and need to finish at -1, so -1 + 6 * 42 = 251.
   606  	index := 251
   607  
   608  	sel, sign := boothW6(s.Rsh(index))
   609  	// sign is always zero because the boothW6 input here is at
   610  	// most five bits long, so the top bit is never set.
   611  	_ = sign
   612  
   613  	t := &p256AffinePoint{}
   614  	table := &p256GeneratorTables[(index+1)/6]
   615  	table.Select(t, sel)
   616  
   617  	// Select's output is undefined if the selector is zero, when it should be
   618  	// the point at infinity (because infinity can't be represented in affine
   619  	// coordinates). Here we conditionally set p to the infinity if sel is zero.
   620  	// In the loop, that's handled by AddAffine.
   621  	selIsZero := subtle.ConstantTimeByteEq(sel, 0)
   622  	p.Select(NewP256Point(), t.Projective(), selIsZero)
   623  
   624  	for index >= 5 {
   625  		index -= 6
   626  
   627  		if index >= 0 {
   628  			sel, sign = boothW6(s.Rsh(index) & 0b1111111)
   629  		} else {
   630  			// Booth encoding considers a virtual zero bit at index -1,
   631  			// so we shift left the least significant limb.
   632  			wvalue := (s[0] << 1) & 0b1111111
   633  			sel, sign = boothW6(wvalue)
   634  		}
   635  
   636  		table := &p256GeneratorTables[(index+1)/6]
   637  		table.Select(t, sel)
   638  		t.Negate(sign)
   639  		selIsZero := subtle.ConstantTimeByteEq(sel, 0)
   640  		p.AddAffine(p, t, selIsZero)
   641  	}
   642  
   643  	return p, nil
   644  }
   645  
   646  // Negate sets p to -p, if cond == 1, and to p if cond == 0.
   647  func (p *p256AffinePoint) Negate(cond int) *p256AffinePoint {
   648  	negY := new(fiat.P256Element)
   649  	negY.Sub(negY, &p.y)
   650  	p.y.Select(negY, &p.y, cond)
   651  	return p
   652  }
   653  
   654  // p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns
   655  // false and e is unchanged. e and x can overlap.
   656  func p256Sqrt(e, x *fiat.P256Element) (isSquare bool) {
   657  	t0, t1 := new(fiat.P256Element), new(fiat.P256Element)
   658  
   659  	// Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate.
   660  	//
   661  	// The sequence of 7 multiplications and 253 squarings is derived from the
   662  	// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
   663  	//
   664  	//	_10       = 2*1
   665  	//	_11       = 1 + _10
   666  	//	_1100     = _11 << 2
   667  	//	_1111     = _11 + _1100
   668  	//	_11110000 = _1111 << 4
   669  	//	_11111111 = _1111 + _11110000
   670  	//	x16       = _11111111 << 8 + _11111111
   671  	//	x32       = x16 << 16 + x16
   672  	//	return      ((x32 << 32 + 1) << 96 + 1) << 94
   673  	//
   674  	p256Square(t0, x, 1)
   675  	t0.Mul(x, t0)
   676  	p256Square(t1, t0, 2)
   677  	t0.Mul(t0, t1)
   678  	p256Square(t1, t0, 4)
   679  	t0.Mul(t0, t1)
   680  	p256Square(t1, t0, 8)
   681  	t0.Mul(t0, t1)
   682  	p256Square(t1, t0, 16)
   683  	t0.Mul(t0, t1)
   684  	p256Square(t0, t0, 32)
   685  	t0.Mul(x, t0)
   686  	p256Square(t0, t0, 96)
   687  	t0.Mul(x, t0)
   688  	p256Square(t0, t0, 94)
   689  
   690  	// Check if the candidate t0 is indeed a square root of x.
   691  	t1.Square(t0)
   692  	if t1.Equal(x) != 1 {
   693  		return false
   694  	}
   695  	e.Set(t0)
   696  	return true
   697  }
   698  
   699  // p256Square sets e to the square of x, repeated n times > 1.
   700  func p256Square(e, x *fiat.P256Element, n int) {
   701  	e.Square(x)
   702  	for i := 1; i < n; i++ {
   703  		e.Square(e)
   704  	}
   705  }
   706  

View as plain text