Convert field code to strict C89 (+ long long, +__int128)

This makes the software more portable to embedded systems
 and static analysis tools.

Sadly, it can't result in identical binaries because C99 mixed
 declarations seem to make GCC emit superfluous stack-pointer
 updates. The compiler is also somewhat dependent on the
 declaration order.
This commit is contained in:
Gregory Maxwell 2015-01-23 05:48:27 +00:00
parent 3627437d80
commit 25b35c7ecb
4 changed files with 190 additions and 147 deletions

View file

@ -51,8 +51,8 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
/* Reduce t9 at the start so there will be at most a single carry from the first pass */ /* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
uint32_t m; uint32_t m;
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6); t0 += x * 0x3D1UL; t1 += (x << 6);
@ -137,8 +137,8 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
/* Reduce t9 at the start so there will be at most a single carry from the first pass */ /* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
uint32_t m; uint32_t m;
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6); t0 += x * 0x3D1UL; t1 += (x << 6);
@ -192,12 +192,12 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
/* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint32_t z0, z1; uint32_t z0, z1;
/* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6); t0 += x * 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL;
@ -218,23 +218,36 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
} }
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) { static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
uint32_t t0 = r->n[0], t9 = r->n[9]; uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
uint32_t z0, z1;
uint32_t x;
t0 = r->n[0];
t9 = r->n[9];
/* Reduce t9 at the start so there will be at most a single carry from the first pass */ /* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; x = t9 >> 22;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t0 += x * 0x3D1UL;
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint32_t z0 = t0 & 0x3FFFFFFUL, z1 = z0 ^ 0x3D0UL; z0 = t0 & 0x3FFFFFFUL;
z1 = z0 ^ 0x3D0UL;
/* Fast return path should catch the majority of cases */ /* Fast return path should catch the majority of cases */
if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL))
return 0; return 0;
uint32_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], t1 = r->n[1];
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8]; t2 = r->n[2];
t3 = r->n[3];
t4 = r->n[4];
t5 = r->n[5];
t6 = r->n[6];
t7 = r->n[7];
t8 = r->n[8];
t9 &= 0x03FFFFFUL; t9 &= 0x03FFFFFUL;
t1 += (x << 6); t1 += (x << 6);
@ -266,11 +279,11 @@ SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
} }
SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) { SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
const uint32_t *t = a->n;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
#endif #endif
const uint32_t *t = a->n;
return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
} }
@ -283,23 +296,25 @@ SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
} }
SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) { SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
int i;
#ifdef VERIFY #ifdef VERIFY
a->magnitude = 0; a->magnitude = 0;
a->normalized = 1; a->normalized = 1;
#endif #endif
for (int i=0; i<10; i++) { for (i=0; i<10; i++) {
a->n[i] = 0; a->n[i] = 0;
} }
} }
static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
int i;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
VERIFY_CHECK(b->normalized); VERIFY_CHECK(b->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
secp256k1_fe_verify(b); secp256k1_fe_verify(b);
#endif #endif
for (int i = 9; i >= 0; i--) { for (i = 9; i >= 0; i--) {
if (a->n[i] > b->n[i]) return 1; if (a->n[i] > b->n[i]) return 1;
if (a->n[i] < b->n[i]) return -1; if (a->n[i] < b->n[i]) return -1;
} }
@ -307,10 +322,12 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
} }
static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
int i;
r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
for (int j=0; j<4; j++) { int j;
for (j=0; j<4; j++) {
int limb = (8*i+2*j)/26; int limb = (8*i+2*j)/26;
int shift = (8*i+2*j)%26; int shift = (8*i+2*j)%26;
r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift; r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift;
@ -329,13 +346,15 @@ static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
int i;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
#endif #endif
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
int j;
int c = 0; int c = 0;
for (int j=0; j<4; j++) { for (j=0; j<4; j++) {
int limb = (8*i+2*j)/26; int limb = (8*i+2*j)/26;
int shift = (8*i+2*j)%26; int shift = (8*i+2*j)%26;
c |= ((a->n[limb] >> shift) & 0x3) << (2 * j); c |= ((a->n[limb] >> shift) & 0x3) << (2 * j);
@ -412,6 +431,11 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1
#endif #endif
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
uint64_t c, d;
uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
VERIFY_BITS(a[0], 30); VERIFY_BITS(a[0], 30);
VERIFY_BITS(a[1], 30); VERIFY_BITS(a[1], 30);
VERIFY_BITS(a[2], 30); VERIFY_BITS(a[2], 30);
@ -433,14 +457,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
VERIFY_BITS(b[8], 30); VERIFY_BITS(b[8], 30);
VERIFY_BITS(b[9], 26); VERIFY_BITS(b[9], 26);
const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
/** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
* px is a shorthand for sum(a[i]*b[x-i], i=0..x). * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
* Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
*/ */
uint64_t c, d;
d = (uint64_t)a[0] * b[9] d = (uint64_t)a[0] * b[9]
+ (uint64_t)a[1] * b[8] + (uint64_t)a[1] * b[8]
+ (uint64_t)a[2] * b[7] + (uint64_t)a[2] * b[7]
@ -453,7 +474,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[0]; + (uint64_t)a[9] * b[0];
/* VERIFY_BITS(d, 64); */ /* VERIFY_BITS(d, 64); */
/* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
uint32_t t9 = d & M; d >>= 26; t9 = d & M; d >>= 26;
VERIFY_BITS(t9, 26); VERIFY_BITS(t9, 26);
VERIFY_BITS(d, 38); VERIFY_BITS(d, 38);
/* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
@ -472,12 +493,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[1]; + (uint64_t)a[9] * b[1];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
uint64_t u0 = d & M; d >>= 26; c += u0 * R0; u0 = d & M; d >>= 26; c += u0 * R0;
VERIFY_BITS(u0, 26); VERIFY_BITS(u0, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 61); VERIFY_BITS(c, 61);
/* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
uint32_t t0 = c & M; c >>= 26; c += u0 * R1; t0 = c & M; c >>= 26; c += u0 * R1;
VERIFY_BITS(t0, 26); VERIFY_BITS(t0, 26);
VERIFY_BITS(c, 37); VERIFY_BITS(c, 37);
/* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
@ -497,12 +518,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[2]; + (uint64_t)a[9] * b[2];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
uint64_t u1 = d & M; d >>= 26; c += u1 * R0; u1 = d & M; d >>= 26; c += u1 * R0;
VERIFY_BITS(u1, 26); VERIFY_BITS(u1, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 63); VERIFY_BITS(c, 63);
/* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
uint32_t t1 = c & M; c >>= 26; c += u1 * R1; t1 = c & M; c >>= 26; c += u1 * R1;
VERIFY_BITS(t1, 26); VERIFY_BITS(t1, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
@ -522,12 +543,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[3]; + (uint64_t)a[9] * b[3];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
uint64_t u2 = d & M; d >>= 26; c += u2 * R0; u2 = d & M; d >>= 26; c += u2 * R0;
VERIFY_BITS(u2, 26); VERIFY_BITS(u2, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 63); VERIFY_BITS(c, 63);
/* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
uint32_t t2 = c & M; c >>= 26; c += u2 * R1; t2 = c & M; c >>= 26; c += u2 * R1;
VERIFY_BITS(t2, 26); VERIFY_BITS(t2, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
@ -547,12 +568,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[4]; + (uint64_t)a[9] * b[4];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
uint64_t u3 = d & M; d >>= 26; c += u3 * R0; u3 = d & M; d >>= 26; c += u3 * R0;
VERIFY_BITS(u3, 26); VERIFY_BITS(u3, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
uint32_t t3 = c & M; c >>= 26; c += u3 * R1; t3 = c & M; c >>= 26; c += u3 * R1;
VERIFY_BITS(t3, 26); VERIFY_BITS(t3, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
@ -572,12 +593,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[5]; + (uint64_t)a[9] * b[5];
VERIFY_BITS(d, 62); VERIFY_BITS(d, 62);
/* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
uint64_t u4 = d & M; d >>= 26; c += u4 * R0; u4 = d & M; d >>= 26; c += u4 * R0;
VERIFY_BITS(u4, 26); VERIFY_BITS(u4, 26);
VERIFY_BITS(d, 36); VERIFY_BITS(d, 36);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
uint32_t t4 = c & M; c >>= 26; c += u4 * R1; t4 = c & M; c >>= 26; c += u4 * R1;
VERIFY_BITS(t4, 26); VERIFY_BITS(t4, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
@ -597,12 +618,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[6]; + (uint64_t)a[9] * b[6];
VERIFY_BITS(d, 62); VERIFY_BITS(d, 62);
/* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
uint64_t u5 = d & M; d >>= 26; c += u5 * R0; u5 = d & M; d >>= 26; c += u5 * R0;
VERIFY_BITS(u5, 26); VERIFY_BITS(u5, 26);
VERIFY_BITS(d, 36); VERIFY_BITS(d, 36);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
uint32_t t5 = c & M; c >>= 26; c += u5 * R1; t5 = c & M; c >>= 26; c += u5 * R1;
VERIFY_BITS(t5, 26); VERIFY_BITS(t5, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
@ -622,12 +643,12 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[7]; + (uint64_t)a[9] * b[7];
VERIFY_BITS(d, 61); VERIFY_BITS(d, 61);
/* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u6 = d & M; d >>= 26; c += u6 * R0; u6 = d & M; d >>= 26; c += u6 * R0;
VERIFY_BITS(u6, 26); VERIFY_BITS(u6, 26);
VERIFY_BITS(d, 35); VERIFY_BITS(d, 35);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
uint32_t t6 = c & M; c >>= 26; c += u6 * R1; t6 = c & M; c >>= 26; c += u6 * R1;
VERIFY_BITS(t6, 26); VERIFY_BITS(t6, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
@ -648,13 +669,13 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[9] * b[8]; + (uint64_t)a[9] * b[8];
VERIFY_BITS(d, 58); VERIFY_BITS(d, 58);
/* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u7 = d & M; d >>= 26; c += u7 * R0; u7 = d & M; d >>= 26; c += u7 * R0;
VERIFY_BITS(u7, 26); VERIFY_BITS(u7, 26);
VERIFY_BITS(d, 32); VERIFY_BITS(d, 32);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
/* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
uint32_t t7 = c & M; c >>= 26; c += u7 * R1; t7 = c & M; c >>= 26; c += u7 * R1;
VERIFY_BITS(t7, 26); VERIFY_BITS(t7, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
@ -675,7 +696,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
d += (uint64_t)a[9] * b[9]; d += (uint64_t)a[9] * b[9];
VERIFY_BITS(d, 57); VERIFY_BITS(d, 57);
/* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u8 = d & M; d >>= 26; c += u8 * R0; u8 = d & M; d >>= 26; c += u8 * R0;
VERIFY_BITS(u8, 26); VERIFY_BITS(u8, 26);
VERIFY_BITS(d, 31); VERIFY_BITS(d, 31);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
@ -739,6 +760,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
} }
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
uint64_t c, d;
uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
VERIFY_BITS(a[0], 30); VERIFY_BITS(a[0], 30);
VERIFY_BITS(a[1], 30); VERIFY_BITS(a[1], 30);
VERIFY_BITS(a[2], 30); VERIFY_BITS(a[2], 30);
@ -750,14 +776,11 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
VERIFY_BITS(a[8], 30); VERIFY_BITS(a[8], 30);
VERIFY_BITS(a[9], 26); VERIFY_BITS(a[9], 26);
const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
/** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
* px is a shorthand for sum(a[i]*a[x-i], i=0..x). * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
* Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
*/ */
uint64_t c, d;
d = (uint64_t)(a[0]*2) * a[9] d = (uint64_t)(a[0]*2) * a[9]
+ (uint64_t)(a[1]*2) * a[8] + (uint64_t)(a[1]*2) * a[8]
+ (uint64_t)(a[2]*2) * a[7] + (uint64_t)(a[2]*2) * a[7]
@ -765,7 +788,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)(a[4]*2) * a[5]; + (uint64_t)(a[4]*2) * a[5];
/* VERIFY_BITS(d, 64); */ /* VERIFY_BITS(d, 64); */
/* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
uint32_t t9 = d & M; d >>= 26; t9 = d & M; d >>= 26;
VERIFY_BITS(t9, 26); VERIFY_BITS(t9, 26);
VERIFY_BITS(d, 38); VERIFY_BITS(d, 38);
/* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
@ -780,12 +803,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[5] * a[5]; + (uint64_t)a[5] * a[5];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
uint64_t u0 = d & M; d >>= 26; c += u0 * R0; u0 = d & M; d >>= 26; c += u0 * R0;
VERIFY_BITS(u0, 26); VERIFY_BITS(u0, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 61); VERIFY_BITS(c, 61);
/* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
uint32_t t0 = c & M; c >>= 26; c += u0 * R1; t0 = c & M; c >>= 26; c += u0 * R1;
VERIFY_BITS(t0, 26); VERIFY_BITS(t0, 26);
VERIFY_BITS(c, 37); VERIFY_BITS(c, 37);
/* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
@ -800,12 +823,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)(a[5]*2) * a[6]; + (uint64_t)(a[5]*2) * a[6];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
uint64_t u1 = d & M; d >>= 26; c += u1 * R0; u1 = d & M; d >>= 26; c += u1 * R0;
VERIFY_BITS(u1, 26); VERIFY_BITS(u1, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 63); VERIFY_BITS(c, 63);
/* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
uint32_t t1 = c & M; c >>= 26; c += u1 * R1; t1 = c & M; c >>= 26; c += u1 * R1;
VERIFY_BITS(t1, 26); VERIFY_BITS(t1, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
@ -821,12 +844,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[6] * a[6]; + (uint64_t)a[6] * a[6];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
uint64_t u2 = d & M; d >>= 26; c += u2 * R0; u2 = d & M; d >>= 26; c += u2 * R0;
VERIFY_BITS(u2, 26); VERIFY_BITS(u2, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
VERIFY_BITS(c, 63); VERIFY_BITS(c, 63);
/* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
uint32_t t2 = c & M; c >>= 26; c += u2 * R1; t2 = c & M; c >>= 26; c += u2 * R1;
VERIFY_BITS(t2, 26); VERIFY_BITS(t2, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
@ -841,12 +864,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)(a[6]*2) * a[7]; + (uint64_t)(a[6]*2) * a[7];
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
uint64_t u3 = d & M; d >>= 26; c += u3 * R0; u3 = d & M; d >>= 26; c += u3 * R0;
VERIFY_BITS(u3, 26); VERIFY_BITS(u3, 26);
VERIFY_BITS(d, 37); VERIFY_BITS(d, 37);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
uint32_t t3 = c & M; c >>= 26; c += u3 * R1; t3 = c & M; c >>= 26; c += u3 * R1;
VERIFY_BITS(t3, 26); VERIFY_BITS(t3, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
@ -862,12 +885,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[7] * a[7]; + (uint64_t)a[7] * a[7];
VERIFY_BITS(d, 62); VERIFY_BITS(d, 62);
/* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
uint64_t u4 = d & M; d >>= 26; c += u4 * R0; u4 = d & M; d >>= 26; c += u4 * R0;
VERIFY_BITS(u4, 26); VERIFY_BITS(u4, 26);
VERIFY_BITS(d, 36); VERIFY_BITS(d, 36);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
uint32_t t4 = c & M; c >>= 26; c += u4 * R1; t4 = c & M; c >>= 26; c += u4 * R1;
VERIFY_BITS(t4, 26); VERIFY_BITS(t4, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
@ -882,12 +905,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)(a[7]*2) * a[8]; + (uint64_t)(a[7]*2) * a[8];
VERIFY_BITS(d, 62); VERIFY_BITS(d, 62);
/* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
uint64_t u5 = d & M; d >>= 26; c += u5 * R0; u5 = d & M; d >>= 26; c += u5 * R0;
VERIFY_BITS(u5, 26); VERIFY_BITS(u5, 26);
VERIFY_BITS(d, 36); VERIFY_BITS(d, 36);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
uint32_t t5 = c & M; c >>= 26; c += u5 * R1; t5 = c & M; c >>= 26; c += u5 * R1;
VERIFY_BITS(t5, 26); VERIFY_BITS(t5, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
@ -903,12 +926,12 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
+ (uint64_t)a[8] * a[8]; + (uint64_t)a[8] * a[8];
VERIFY_BITS(d, 61); VERIFY_BITS(d, 61);
/* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u6 = d & M; d >>= 26; c += u6 * R0; u6 = d & M; d >>= 26; c += u6 * R0;
VERIFY_BITS(u6, 26); VERIFY_BITS(u6, 26);
VERIFY_BITS(d, 35); VERIFY_BITS(d, 35);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
/* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
uint32_t t6 = c & M; c >>= 26; c += u6 * R1; t6 = c & M; c >>= 26; c += u6 * R1;
VERIFY_BITS(t6, 26); VERIFY_BITS(t6, 26);
VERIFY_BITS(c, 39); VERIFY_BITS(c, 39);
/* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
@ -924,13 +947,13 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
d += (uint64_t)(a[8]*2) * a[9]; d += (uint64_t)(a[8]*2) * a[9];
VERIFY_BITS(d, 58); VERIFY_BITS(d, 58);
/* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u7 = d & M; d >>= 26; c += u7 * R0; u7 = d & M; d >>= 26; c += u7 * R0;
VERIFY_BITS(u7, 26); VERIFY_BITS(u7, 26);
VERIFY_BITS(d, 32); VERIFY_BITS(d, 32);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */
VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
/* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
uint32_t t7 = c & M; c >>= 26; c += u7 * R1; t7 = c & M; c >>= 26; c += u7 * R1;
VERIFY_BITS(t7, 26); VERIFY_BITS(t7, 26);
VERIFY_BITS(c, 38); VERIFY_BITS(c, 38);
/* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
@ -947,7 +970,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t
d += (uint64_t)a[9] * a[9]; d += (uint64_t)a[9] * a[9];
VERIFY_BITS(d, 57); VERIFY_BITS(d, 57);
/* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
uint64_t u8 = d & M; d >>= 26; c += u8 * R0; u8 = d & M; d >>= 26; c += u8 * R0;
VERIFY_BITS(u8, 26); VERIFY_BITS(u8, 26);
VERIFY_BITS(d, 31); VERIFY_BITS(d, 31);
/* VERIFY_BITS(c, 64); */ /* VERIFY_BITS(c, 64); */

View file

@ -59,8 +59,8 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
/* Reduce t4 at the start so there will be at most a single carry from the first pass */ /* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
uint64_t m; uint64_t m;
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL; t0 += x * 0x1000003D1ULL;
@ -126,8 +126,8 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
/* Reduce t4 at the start so there will be at most a single carry from the first pass */ /* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
uint64_t m; uint64_t m;
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL; t0 += x * 0x1000003D1ULL;
@ -169,12 +169,12 @@ static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) { static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4]; uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
/* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint64_t z0, z1; uint64_t z0, z1;
/* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL; t0 += x * 0x1000003D1ULL;
t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0 = t0; z1 = t0 ^ 0x1000003D0ULL; t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL; z0 = t0; z1 = t0 ^ 0x1000003D0ULL;
@ -190,22 +190,31 @@ static int secp256k1_fe_normalizes_to_zero(secp256k1_fe_t *r) {
} }
static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) { static int secp256k1_fe_normalizes_to_zero_var(secp256k1_fe_t *r) {
uint64_t t0 = r->n[0], t4 = r->n[4]; uint64_t t0, t1, t2, t3, t4;
uint64_t z0, z1;
uint64_t x;
t0 = r->n[0];
t4 = r->n[4];
/* Reduce t4 at the start so there will be at most a single carry from the first pass */ /* Reduce t4 at the start so there will be at most a single carry from the first pass */
uint64_t x = t4 >> 48; x = t4 >> 48;
/* The first pass ensures the magnitude is 1, ... */ /* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x1000003D1ULL; t0 += x * 0x1000003D1ULL;
/* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
uint64_t z0 = t0 & 0xFFFFFFFFFFFFFULL, z1 = z0 ^ 0x1000003D0ULL; z0 = t0 & 0xFFFFFFFFFFFFFULL;
z1 = z0 ^ 0x1000003D0ULL;
/* Fast return path should catch the majority of cases */ /* Fast return path should catch the majority of cases */
if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL)) if ((z0 != 0ULL) & (z1 != 0xFFFFFFFFFFFFFULL))
return 0; return 0;
uint64_t t1 = r->n[1], t2 = r->n[2], t3 = r->n[3]; t1 = r->n[1];
t2 = r->n[2];
t3 = r->n[3];
t4 &= 0x0FFFFFFFFFFFFULL; t4 &= 0x0FFFFFFFFFFFFULL;
t1 += (t0 >> 52); t0 = z0; t1 += (t0 >> 52); t0 = z0;
@ -231,11 +240,11 @@ SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
} }
SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) { SECP256K1_INLINE static int secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
const uint64_t *t = a->n;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
#endif #endif
const uint64_t *t = a->n;
return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0; return (t[0] | t[1] | t[2] | t[3] | t[4]) == 0;
} }
@ -248,23 +257,25 @@ SECP256K1_INLINE static int secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
} }
SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) { SECP256K1_INLINE static void secp256k1_fe_clear(secp256k1_fe_t *a) {
int i;
#ifdef VERIFY #ifdef VERIFY
a->magnitude = 0; a->magnitude = 0;
a->normalized = 1; a->normalized = 1;
#endif #endif
for (int i=0; i<5; i++) { for (i=0; i<5; i++) {
a->n[i] = 0; a->n[i] = 0;
} }
} }
static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
int i;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
VERIFY_CHECK(b->normalized); VERIFY_CHECK(b->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
secp256k1_fe_verify(b); secp256k1_fe_verify(b);
#endif #endif
for (int i = 4; i >= 0; i--) { for (i = 4; i >= 0; i--) {
if (a->n[i] > b->n[i]) return 1; if (a->n[i] > b->n[i]) return 1;
if (a->n[i] < b->n[i]) return -1; if (a->n[i] < b->n[i]) return -1;
} }
@ -272,9 +283,11 @@ static int secp256k1_fe_cmp_var(const secp256k1_fe_t *a, const secp256k1_fe_t *b
} }
static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
int i;
r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
for (int j=0; j<2; j++) { int j;
for (j=0; j<2; j++) {
int limb = (8*i+4*j)/52; int limb = (8*i+4*j)/52;
int shift = (8*i+4*j)%52; int shift = (8*i+4*j)%52;
r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift; r->n[limb] |= (uint64_t)((a[31-i] >> (4*j)) & 0xF) << shift;
@ -293,13 +306,15 @@ static int secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { static void secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
int i;
#ifdef VERIFY #ifdef VERIFY
VERIFY_CHECK(a->normalized); VERIFY_CHECK(a->normalized);
secp256k1_fe_verify(a); secp256k1_fe_verify(a);
#endif #endif
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
int j;
int c = 0; int c = 0;
for (int j=0; j<2; j++) { for (j=0; j<2; j++) {
int limb = (8*i+4*j)/52; int limb = (8*i+4*j)/52;
int shift = (8*i+4*j)%52; int shift = (8*i+4*j)%52;
c |= ((a->n[limb] >> shift) & 0xF) << (4 * j); c |= ((a->n[limb] >> shift) & 0xF) << (4 * j);

View file

@ -16,6 +16,11 @@
#endif #endif
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
__int128 c, d;
uint64_t t3, t4, tx, u0;
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
VERIFY_BITS(a[0], 56); VERIFY_BITS(a[0], 56);
VERIFY_BITS(a[1], 56); VERIFY_BITS(a[1], 56);
VERIFY_BITS(a[2], 56); VERIFY_BITS(a[2], 56);
@ -28,16 +33,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
VERIFY_BITS(b[4], 52); VERIFY_BITS(b[4], 52);
VERIFY_CHECK(r != b); VERIFY_CHECK(r != b);
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
/* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
* px is a shorthand for sum(a[i]*b[x-i], i=0..x). * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
* Note that [x 0 0 0 0 0] = [x*R]. * Note that [x 0 0 0 0 0] = [x*R].
*/ */
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
__int128 c, d;
d = (__int128)a0 * b[3] d = (__int128)a0 * b[3]
+ (__int128)a1 * b[2] + (__int128)a1 * b[2]
+ (__int128)a2 * b[1] + (__int128)a2 * b[1]
@ -51,7 +51,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
VERIFY_BITS(d, 115); VERIFY_BITS(d, 115);
VERIFY_BITS(c, 60); VERIFY_BITS(c, 60);
/* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
uint64_t t3 = d & M; d >>= 52; t3 = d & M; d >>= 52;
VERIFY_BITS(t3, 52); VERIFY_BITS(t3, 52);
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
@ -66,11 +66,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
d += c * R; d += c * R;
VERIFY_BITS(d, 116); VERIFY_BITS(d, 116);
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
uint64_t t4 = d & M; d >>= 52; t4 = d & M; d >>= 52;
VERIFY_BITS(t4, 52); VERIFY_BITS(t4, 52);
VERIFY_BITS(d, 64); VERIFY_BITS(d, 64);
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
uint64_t tx = (t4 >> 48); t4 &= (M >> 4); tx = (t4 >> 48); t4 &= (M >> 4);
VERIFY_BITS(tx, 4); VERIFY_BITS(tx, 4);
VERIFY_BITS(t4, 48); VERIFY_BITS(t4, 48);
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
@ -84,7 +84,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
+ (__int128)a4 * b[1]; + (__int128)a4 * b[1];
VERIFY_BITS(d, 115); VERIFY_BITS(d, 115);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
uint64_t u0 = d & M; d >>= 52; u0 = d & M; d >>= 52;
VERIFY_BITS(u0, 52); VERIFY_BITS(u0, 52);
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
@ -153,22 +153,22 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
} }
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
__int128 c, d;
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
int64_t t3, t4, tx, u0;
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
VERIFY_BITS(a[0], 56); VERIFY_BITS(a[0], 56);
VERIFY_BITS(a[1], 56); VERIFY_BITS(a[1], 56);
VERIFY_BITS(a[2], 56); VERIFY_BITS(a[2], 56);
VERIFY_BITS(a[3], 56); VERIFY_BITS(a[3], 56);
VERIFY_BITS(a[4], 52); VERIFY_BITS(a[4], 52);
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
/** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n. /** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
* px is a shorthand for sum(a[i]*a[x-i], i=0..x). * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
* Note that [x 0 0 0 0 0] = [x*R]. * Note that [x 0 0 0 0 0] = [x*R].
*/ */
__int128 c, d;
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
d = (__int128)(a0*2) * a3 d = (__int128)(a0*2) * a3
+ (__int128)(a1*2) * a2; + (__int128)(a1*2) * a2;
VERIFY_BITS(d, 114); VERIFY_BITS(d, 114);
@ -180,7 +180,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
VERIFY_BITS(d, 115); VERIFY_BITS(d, 115);
VERIFY_BITS(c, 60); VERIFY_BITS(c, 60);
/* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
uint64_t t3 = d & M; d >>= 52; t3 = d & M; d >>= 52;
VERIFY_BITS(t3, 52); VERIFY_BITS(t3, 52);
VERIFY_BITS(d, 63); VERIFY_BITS(d, 63);
/* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
@ -194,11 +194,11 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
d += c * R; d += c * R;
VERIFY_BITS(d, 116); VERIFY_BITS(d, 116);
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
uint64_t t4 = d & M; d >>= 52; t4 = d & M; d >>= 52;
VERIFY_BITS(t4, 52); VERIFY_BITS(t4, 52);
VERIFY_BITS(d, 64); VERIFY_BITS(d, 64);
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
uint64_t tx = (t4 >> 48); t4 &= (M >> 4); tx = (t4 >> 48); t4 &= (M >> 4);
VERIFY_BITS(tx, 4); VERIFY_BITS(tx, 4);
VERIFY_BITS(t4, 48); VERIFY_BITS(t4, 48);
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
@ -210,7 +210,7 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
+ (__int128)(a2*2) * a3; + (__int128)(a2*2) * a3;
VERIFY_BITS(d, 114); VERIFY_BITS(d, 114);
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
uint64_t u0 = d & M; d >>= 52; u0 = d & M; d >>= 52;
VERIFY_BITS(u0, 52); VERIFY_BITS(u0, 52);
VERIFY_BITS(d, 62); VERIFY_BITS(d, 62);
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */

View file

@ -22,16 +22,18 @@
#endif #endif
static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) { static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) {
secp256k1_fe_t b;
int i;
unsigned char tmp[32];
if (*rlen < 65) { if (*rlen < 65) {
*rlen = 65; *rlen = 65;
return; return;
} }
*rlen = 65; *rlen = 65;
unsigned char tmp[32]; b = *a;
secp256k1_fe_t b = *a;
secp256k1_fe_normalize(&b); secp256k1_fe_normalize(&b);
secp256k1_fe_get_b32(tmp, &b); secp256k1_fe_get_b32(tmp, &b);
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
static const char *c = "0123456789ABCDEF"; static const char *c = "0123456789ABCDEF";
r[2*i] = c[(tmp[i] >> 4) & 0xF]; r[2*i] = c[(tmp[i] >> 4) & 0xF];
r[2*i+1] = c[(tmp[i]) & 0xF]; r[2*i+1] = c[(tmp[i]) & 0xF];
@ -40,6 +42,7 @@ static void secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) {
} }
static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) { static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
int i;
unsigned char tmp[32] = {0}; unsigned char tmp[32] = {0};
static const int cvt[256] = {0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0, static const int cvt[256] = {0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
@ -57,7 +60,7 @@ static int secp256k1_fe_set_hex(secp256k1_fe_t *r, const char *a, int alen) {
0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0, 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,
0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0}; 0, 0, 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0};
for (int i=0; i<32; i++) { for (i=0; i<32; i++) {
if (alen > i*2) if (alen > i*2)
tmp[32 - alen/2 + i] = (cvt[(unsigned char)a[2*i]] << 4) + cvt[(unsigned char)a[2*i+1]]; tmp[32 - alen/2 + i] = (cvt[(unsigned char)a[2*i]] << 4) + cvt[(unsigned char)a[2*i+1]];
} }
@ -72,62 +75,62 @@ SECP256K1_INLINE static int secp256k1_fe_equal_var(const secp256k1_fe_t *a, cons
} }
static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) { static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
int j;
/** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
* { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: * { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
* 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] * 1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
*/ */
secp256k1_fe_t x2;
secp256k1_fe_sqr(&x2, a); secp256k1_fe_sqr(&x2, a);
secp256k1_fe_mul(&x2, &x2, a); secp256k1_fe_mul(&x2, &x2, a);
secp256k1_fe_t x3;
secp256k1_fe_sqr(&x3, &x2); secp256k1_fe_sqr(&x3, &x2);
secp256k1_fe_mul(&x3, &x3, a); secp256k1_fe_mul(&x3, &x3, a);
secp256k1_fe_t x6 = x3; x6 = x3;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6); for (j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
secp256k1_fe_mul(&x6, &x6, &x3); secp256k1_fe_mul(&x6, &x6, &x3);
secp256k1_fe_t x9 = x6; x9 = x6;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9); for (j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
secp256k1_fe_mul(&x9, &x9, &x3); secp256k1_fe_mul(&x9, &x9, &x3);
secp256k1_fe_t x11 = x9; x11 = x9;
for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11); for (j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
secp256k1_fe_mul(&x11, &x11, &x2); secp256k1_fe_mul(&x11, &x11, &x2);
secp256k1_fe_t x22 = x11; x22 = x11;
for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22); for (j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
secp256k1_fe_mul(&x22, &x22, &x11); secp256k1_fe_mul(&x22, &x22, &x11);
secp256k1_fe_t x44 = x22; x44 = x22;
for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44); for (j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
secp256k1_fe_mul(&x44, &x44, &x22); secp256k1_fe_mul(&x44, &x44, &x22);
secp256k1_fe_t x88 = x44; x88 = x44;
for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88); for (j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
secp256k1_fe_mul(&x88, &x88, &x44); secp256k1_fe_mul(&x88, &x88, &x44);
secp256k1_fe_t x176 = x88; x176 = x88;
for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176); for (j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
secp256k1_fe_mul(&x176, &x176, &x88); secp256k1_fe_mul(&x176, &x176, &x88);
secp256k1_fe_t x220 = x176; x220 = x176;
for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220); for (j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
secp256k1_fe_mul(&x220, &x220, &x44); secp256k1_fe_mul(&x220, &x220, &x44);
secp256k1_fe_t x223 = x220; x223 = x220;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223); for (j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
secp256k1_fe_mul(&x223, &x223, &x3); secp256k1_fe_mul(&x223, &x223, &x3);
/* The final result is then assembled using a sliding window over the blocks. */ /* The final result is then assembled using a sliding window over the blocks. */
secp256k1_fe_t t1 = x223; t1 = x223;
for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t1, &t1, &x22); secp256k1_fe_mul(&t1, &t1, &x22);
for (int j=0; j<6; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<6; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t1, &t1, &x2); secp256k1_fe_mul(&t1, &t1, &x2);
secp256k1_fe_sqr(&t1, &t1); secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_sqr(r, &t1); secp256k1_fe_sqr(r, &t1);
@ -139,66 +142,66 @@ static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
} }
static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) { static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
secp256k1_fe_t x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
int j;
/** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in /** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
* { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: * { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
* [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] * [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
*/ */
secp256k1_fe_t x2;
secp256k1_fe_sqr(&x2, a); secp256k1_fe_sqr(&x2, a);
secp256k1_fe_mul(&x2, &x2, a); secp256k1_fe_mul(&x2, &x2, a);
secp256k1_fe_t x3;
secp256k1_fe_sqr(&x3, &x2); secp256k1_fe_sqr(&x3, &x2);
secp256k1_fe_mul(&x3, &x3, a); secp256k1_fe_mul(&x3, &x3, a);
secp256k1_fe_t x6 = x3; x6 = x3;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6); for (j=0; j<3; j++) secp256k1_fe_sqr(&x6, &x6);
secp256k1_fe_mul(&x6, &x6, &x3); secp256k1_fe_mul(&x6, &x6, &x3);
secp256k1_fe_t x9 = x6; x9 = x6;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9); for (j=0; j<3; j++) secp256k1_fe_sqr(&x9, &x9);
secp256k1_fe_mul(&x9, &x9, &x3); secp256k1_fe_mul(&x9, &x9, &x3);
secp256k1_fe_t x11 = x9; x11 = x9;
for (int j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11); for (j=0; j<2; j++) secp256k1_fe_sqr(&x11, &x11);
secp256k1_fe_mul(&x11, &x11, &x2); secp256k1_fe_mul(&x11, &x11, &x2);
secp256k1_fe_t x22 = x11; x22 = x11;
for (int j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22); for (j=0; j<11; j++) secp256k1_fe_sqr(&x22, &x22);
secp256k1_fe_mul(&x22, &x22, &x11); secp256k1_fe_mul(&x22, &x22, &x11);
secp256k1_fe_t x44 = x22; x44 = x22;
for (int j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44); for (j=0; j<22; j++) secp256k1_fe_sqr(&x44, &x44);
secp256k1_fe_mul(&x44, &x44, &x22); secp256k1_fe_mul(&x44, &x44, &x22);
secp256k1_fe_t x88 = x44; x88 = x44;
for (int j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88); for (j=0; j<44; j++) secp256k1_fe_sqr(&x88, &x88);
secp256k1_fe_mul(&x88, &x88, &x44); secp256k1_fe_mul(&x88, &x88, &x44);
secp256k1_fe_t x176 = x88; x176 = x88;
for (int j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176); for (j=0; j<88; j++) secp256k1_fe_sqr(&x176, &x176);
secp256k1_fe_mul(&x176, &x176, &x88); secp256k1_fe_mul(&x176, &x176, &x88);
secp256k1_fe_t x220 = x176; x220 = x176;
for (int j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220); for (j=0; j<44; j++) secp256k1_fe_sqr(&x220, &x220);
secp256k1_fe_mul(&x220, &x220, &x44); secp256k1_fe_mul(&x220, &x220, &x44);
secp256k1_fe_t x223 = x220; x223 = x220;
for (int j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223); for (j=0; j<3; j++) secp256k1_fe_sqr(&x223, &x223);
secp256k1_fe_mul(&x223, &x223, &x3); secp256k1_fe_mul(&x223, &x223, &x3);
/* The final result is then assembled using a sliding window over the blocks. */ /* The final result is then assembled using a sliding window over the blocks. */
secp256k1_fe_t t1 = x223; t1 = x223;
for (int j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<23; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t1, &t1, &x22); secp256k1_fe_mul(&t1, &t1, &x22);
for (int j=0; j<5; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<5; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t1, &t1, a); secp256k1_fe_mul(&t1, &t1, a);
for (int j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(&t1, &t1, &x2); secp256k1_fe_mul(&t1, &t1, &x2);
for (int j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1); for (j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1);
secp256k1_fe_mul(r, a, &t1); secp256k1_fe_mul(r, a, &t1);
} }
@ -228,6 +231,8 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
} }
static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a) { static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp256k1_fe_t *a) {
secp256k1_fe_t u;
size_t i;
if (len < 1) if (len < 1)
return; return;
@ -235,12 +240,12 @@ static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t *r, const secp25
r[0] = a[0]; r[0] = a[0];
size_t i = 0; i = 0;
while (++i < len) { while (++i < len) {
secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]); secp256k1_fe_mul(&r[i], &r[i - 1], &a[i]);
} }
secp256k1_fe_t u; secp256k1_fe_inv_var(&u, &r[--i]); secp256k1_fe_inv_var(&u, &r[--i]);
while (i > 0) { while (i > 0) {
int j = i--; int j = i--;