Switch scalar code to C89.

This commit is contained in:
Gregory Maxwell 2015-01-25 02:54:48 +00:00
parent 7137be8110
commit d9543c904f
4 changed files with 128 additions and 102 deletions

View file

@ -202,6 +202,7 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
#if defined(USE_FIELD_INV_BUILTIN)
secp256k1_fe_inv(r, a);
#elif defined(USE_FIELD_INV_NUM)
secp256k1_num_t n, m;
static const unsigned char prime[32] = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
@ -212,7 +213,6 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
secp256k1_fe_t c = *a;
secp256k1_fe_normalize_var(&c);
secp256k1_fe_get_b32(b, &c);
secp256k1_num_t n, m;
secp256k1_num_set_bin(&n, b, 32);
secp256k1_num_set_bin(&m, prime, 32);
secp256k1_num_mod_inverse(&n, &n, &m);

View file

@ -69,8 +69,9 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
}
SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsigned int overflow) {
uint128_t t;
VERIFY_CHECK(overflow <= 1);
uint128_t t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
@ -82,6 +83,7 @@ SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, unsig
}
static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
int overflow;
uint128_t t = (uint128_t)a->d[0] + b->d[0];
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
t += (uint128_t)a->d[1] + b->d[1];
@ -90,15 +92,16 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
t += (uint128_t)a->d[3] + b->d[3];
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
int overflow = t + secp256k1_scalar_check_overflow(r);
overflow = t + secp256k1_scalar_check_overflow(r);
VERIFY_CHECK(overflow == 0 || overflow == 1);
secp256k1_scalar_reduce(r, overflow);
return overflow;
}
static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
uint128_t t;
VERIFY_CHECK(bit < 256);
uint128_t t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
@ -113,11 +116,12 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
}
static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
int over;
r->d[0] = (uint64_t)b32[31] | (uint64_t)b32[30] << 8 | (uint64_t)b32[29] << 16 | (uint64_t)b32[28] << 24 | (uint64_t)b32[27] << 32 | (uint64_t)b32[26] << 40 | (uint64_t)b32[25] << 48 | (uint64_t)b32[24] << 56;
r->d[1] = (uint64_t)b32[23] | (uint64_t)b32[22] << 8 | (uint64_t)b32[21] << 16 | (uint64_t)b32[20] << 24 | (uint64_t)b32[19] << 32 | (uint64_t)b32[18] << 40 | (uint64_t)b32[17] << 48 | (uint64_t)b32[16] << 56;
r->d[2] = (uint64_t)b32[15] | (uint64_t)b32[14] << 8 | (uint64_t)b32[13] << 16 | (uint64_t)b32[12] << 24 | (uint64_t)b32[11] << 32 | (uint64_t)b32[10] << 40 | (uint64_t)b32[9] << 48 | (uint64_t)b32[8] << 56;
r->d[3] = (uint64_t)b32[7] | (uint64_t)b32[6] << 8 | (uint64_t)b32[5] << 16 | (uint64_t)b32[4] << 24 | (uint64_t)b32[3] << 32 | (uint64_t)b32[2] << 40 | (uint64_t)b32[1] << 48 | (uint64_t)b32[0] << 56;
int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
if (overflow) {
*overflow = over;
}
@ -195,16 +199,16 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
#define muladd2(a,b) { \
uint64_t tl, th; \
uint64_t tl, th, th2, tl2; \
{ \
uint128_t t = (uint128_t)a * b; \
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
tl = t; \
} \
uint64_t th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
uint64_t tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFFFFFFFFFF */ \
c0 += tl2; /* overflow is handled on the next line */ \
th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \
@ -217,8 +221,9 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
#define sumadd(a) { \
unsigned int over; \
c0 += (a); /* overflow is handled on the next line */ \
unsigned int over = (c0 < (a)) ? 1 : 0; \
over = (c0 < (a)) ? 1 : 0; \
c1 += over; /* overflow is handled on the next line */ \
c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \
}
@ -248,7 +253,12 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
}
static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l) {
uint128_t c;
uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
uint64_t m0, m1, m2, m3, m4, m5;
uint32_t m6;
uint64_t p0, p1, p2, p3;
uint32_t p4;
/* 160 bit accumulator. */
uint64_t c0, c1;
@ -258,53 +268,53 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint64_t *l
/* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
c0 = l[0]; c1 = 0; c2 = 0;
muladd_fast(n0, SECP256K1_N_C_0);
uint64_t m0; extract_fast(m0);
extract_fast(m0);
sumadd_fast(l[1]);
muladd(n1, SECP256K1_N_C_0);
muladd(n0, SECP256K1_N_C_1);
uint64_t m1; extract(m1);
extract(m1);
sumadd(l[2]);
muladd(n2, SECP256K1_N_C_0);
muladd(n1, SECP256K1_N_C_1);
sumadd(n0);
uint64_t m2; extract(m2);
extract(m2);
sumadd(l[3]);
muladd(n3, SECP256K1_N_C_0);
muladd(n2, SECP256K1_N_C_1);
sumadd(n1);
uint64_t m3; extract(m3);
extract(m3);
muladd(n3, SECP256K1_N_C_1);
sumadd(n2);
uint64_t m4; extract(m4);
extract(m4);
sumadd_fast(n3);
uint64_t m5; extract_fast(m5);
extract_fast(m5);
VERIFY_CHECK(c0 <= 1);
uint32_t m6 = c0;
m6 = c0;
/* Reduce 385 bits into 258. */
/* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
c0 = m0; c1 = 0; c2 = 0;
muladd_fast(m4, SECP256K1_N_C_0);
uint64_t p0; extract_fast(p0);
extract_fast(p0);
sumadd_fast(m1);
muladd(m5, SECP256K1_N_C_0);
muladd(m4, SECP256K1_N_C_1);
uint64_t p1; extract(p1);
extract(p1);
sumadd(m2);
muladd(m6, SECP256K1_N_C_0);
muladd(m5, SECP256K1_N_C_1);
sumadd(m4);
uint64_t p2; extract(p2);
extract(p2);
sumadd_fast(m3);
muladd_fast(m6, SECP256K1_N_C_1);
sumadd_fast(m5);
uint64_t p3; extract_fast(p3);
uint32_t p4 = c0 + m6;
extract_fast(p3);
p4 = c0 + m6;
VERIFY_CHECK(p4 <= 2);
/* Reduce 258 bits into 256. */
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
uint128_t c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
@ -413,12 +423,15 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, con
}
SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
VERIFY_CHECK(shift >= 256);
uint64_t l[8];
unsigned int shiftlimbs;
unsigned int shiftlow;
unsigned int shifthigh;
VERIFY_CHECK(shift >= 256);
secp256k1_scalar_mul_512(l, a, b);
unsigned int shiftlimbs = shift >> 6;
unsigned int shiftlow = shift & 0x3F;
unsigned int shifthigh = 64 - shiftlow;
shiftlimbs = shift >> 6;
shiftlow = shift & 0x3F;
shifthigh = 64 - shiftlow;
r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;

View file

@ -91,8 +91,9 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
}
SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint32_t overflow) {
uint64_t t;
VERIFY_CHECK(overflow <= 1);
uint64_t t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
@ -112,6 +113,7 @@ SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar_t *r, uint3
}
static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b) {
int overflow;
uint64_t t = (uint64_t)a->d[0] + b->d[0];
r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
t += (uint64_t)a->d[1] + b->d[1];
@ -128,15 +130,16 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
t += (uint64_t)a->d[7] + b->d[7];
r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
int overflow = t + secp256k1_scalar_check_overflow(r);
overflow = t + secp256k1_scalar_check_overflow(r);
VERIFY_CHECK(overflow == 0 || overflow == 1);
secp256k1_scalar_reduce(r, overflow);
return overflow;
}
static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
uint64_t t;
VERIFY_CHECK(bit < 256);
uint64_t t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
@ -159,6 +162,7 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
}
static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char *b32, int *overflow) {
int over;
r->d[0] = (uint32_t)b32[31] | (uint32_t)b32[30] << 8 | (uint32_t)b32[29] << 16 | (uint32_t)b32[28] << 24;
r->d[1] = (uint32_t)b32[27] | (uint32_t)b32[26] << 8 | (uint32_t)b32[25] << 16 | (uint32_t)b32[24] << 24;
r->d[2] = (uint32_t)b32[23] | (uint32_t)b32[22] << 8 | (uint32_t)b32[21] << 16 | (uint32_t)b32[20] << 24;
@ -167,7 +171,7 @@ static void secp256k1_scalar_set_b32(secp256k1_scalar_t *r, const unsigned char
r->d[5] = (uint32_t)b32[11] | (uint32_t)b32[10] << 8 | (uint32_t)b32[9] << 16 | (uint32_t)b32[8] << 24;
r->d[6] = (uint32_t)b32[7] | (uint32_t)b32[6] << 8 | (uint32_t)b32[5] << 16 | (uint32_t)b32[4] << 24;
r->d[7] = (uint32_t)b32[3] | (uint32_t)b32[2] << 8 | (uint32_t)b32[1] << 16 | (uint32_t)b32[0] << 24;
int over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
if (overflow) {
*overflow = over;
}
@ -263,16 +267,16 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
#define muladd2(a,b) { \
uint32_t tl, th; \
uint32_t tl, th, th2, tl2; \
{ \
uint64_t t = (uint64_t)a * b; \
th = t >> 32; /* at most 0xFFFFFFFE */ \
tl = t; \
} \
uint32_t th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
c2 += (th2 < th) ? 1 : 0; /* never overflows by contract (verified the next line) */ \
VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
uint32_t tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
th2 += (tl2 < tl) ? 1 : 0; /* at most 0xFFFFFFFF */ \
c0 += tl2; /* overflow is handled on the next line */ \
th2 += (c0 < tl2) ? 1 : 0; /* second overflow is handled on the next line */ \
@ -285,8 +289,9 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
#define sumadd(a) { \
unsigned int over; \
c0 += (a); /* overflow is handled on the next line */ \
unsigned int over = (c0 < (a)) ? 1 : 0; \
over = (c0 < (a)) ? 1 : 0; \
c1 += over; /* overflow is handled on the next line */ \
c2 += (c1 < over) ? 1 : 0; /* never overflows by contract */ \
}
@ -316,7 +321,10 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
}
static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l) {
uint64_t c;
uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12;
uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8;
/* 96 bit accumulator. */
uint32_t c0, c1, c2;
@ -325,115 +333,115 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar_t *r, const uint32_t *l
/* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
c0 = l[0]; c1 = 0; c2 = 0;
muladd_fast(n0, SECP256K1_N_C_0);
uint32_t m0; extract_fast(m0);
extract_fast(m0);
sumadd_fast(l[1]);
muladd(n1, SECP256K1_N_C_0);
muladd(n0, SECP256K1_N_C_1);
uint32_t m1; extract(m1);
extract(m1);
sumadd(l[2]);
muladd(n2, SECP256K1_N_C_0);
muladd(n1, SECP256K1_N_C_1);
muladd(n0, SECP256K1_N_C_2);
uint32_t m2; extract(m2);
extract(m2);
sumadd(l[3]);
muladd(n3, SECP256K1_N_C_0);
muladd(n2, SECP256K1_N_C_1);
muladd(n1, SECP256K1_N_C_2);
muladd(n0, SECP256K1_N_C_3);
uint32_t m3; extract(m3);
extract(m3);
sumadd(l[4]);
muladd(n4, SECP256K1_N_C_0);
muladd(n3, SECP256K1_N_C_1);
muladd(n2, SECP256K1_N_C_2);
muladd(n1, SECP256K1_N_C_3);
sumadd(n0);
uint32_t m4; extract(m4);
extract(m4);
sumadd(l[5]);
muladd(n5, SECP256K1_N_C_0);
muladd(n4, SECP256K1_N_C_1);
muladd(n3, SECP256K1_N_C_2);
muladd(n2, SECP256K1_N_C_3);
sumadd(n1);
uint32_t m5; extract(m5);
extract(m5);
sumadd(l[6]);
muladd(n6, SECP256K1_N_C_0);
muladd(n5, SECP256K1_N_C_1);
muladd(n4, SECP256K1_N_C_2);
muladd(n3, SECP256K1_N_C_3);
sumadd(n2);
uint32_t m6; extract(m6);
extract(m6);
sumadd(l[7]);
muladd(n7, SECP256K1_N_C_0);
muladd(n6, SECP256K1_N_C_1);
muladd(n5, SECP256K1_N_C_2);
muladd(n4, SECP256K1_N_C_3);
sumadd(n3);
uint32_t m7; extract(m7);
extract(m7);
muladd(n7, SECP256K1_N_C_1);
muladd(n6, SECP256K1_N_C_2);
muladd(n5, SECP256K1_N_C_3);
sumadd(n4);
uint32_t m8; extract(m8);
extract(m8);
muladd(n7, SECP256K1_N_C_2);
muladd(n6, SECP256K1_N_C_3);
sumadd(n5);
uint32_t m9; extract(m9);
extract(m9);
muladd(n7, SECP256K1_N_C_3);
sumadd(n6);
uint32_t m10; extract(m10);
extract(m10);
sumadd_fast(n7);
uint32_t m11; extract_fast(m11);
extract_fast(m11);
VERIFY_CHECK(c0 <= 1);
uint32_t m12 = c0;
m12 = c0;
/* Reduce 385 bits into 258. */
/* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
c0 = m0; c1 = 0; c2 = 0;
muladd_fast(m8, SECP256K1_N_C_0);
uint32_t p0; extract_fast(p0);
extract_fast(p0);
sumadd_fast(m1);
muladd(m9, SECP256K1_N_C_0);
muladd(m8, SECP256K1_N_C_1);
uint32_t p1; extract(p1);
extract(p1);
sumadd(m2);
muladd(m10, SECP256K1_N_C_0);
muladd(m9, SECP256K1_N_C_1);
muladd(m8, SECP256K1_N_C_2);
uint32_t p2; extract(p2);
extract(p2);
sumadd(m3);
muladd(m11, SECP256K1_N_C_0);
muladd(m10, SECP256K1_N_C_1);
muladd(m9, SECP256K1_N_C_2);
muladd(m8, SECP256K1_N_C_3);
uint32_t p3; extract(p3);
extract(p3);
sumadd(m4);
muladd(m12, SECP256K1_N_C_0);
muladd(m11, SECP256K1_N_C_1);
muladd(m10, SECP256K1_N_C_2);
muladd(m9, SECP256K1_N_C_3);
sumadd(m8);
uint32_t p4; extract(p4);
extract(p4);
sumadd(m5);
muladd(m12, SECP256K1_N_C_1);
muladd(m11, SECP256K1_N_C_2);
muladd(m10, SECP256K1_N_C_3);
sumadd(m9);
uint32_t p5; extract(p5);
extract(p5);
sumadd(m6);
muladd(m12, SECP256K1_N_C_2);
muladd(m11, SECP256K1_N_C_3);
sumadd(m10);
uint32_t p6; extract(p6);
extract(p6);
sumadd_fast(m7);
muladd_fast(m12, SECP256K1_N_C_3);
sumadd_fast(m11);
uint32_t p7; extract_fast(p7);
uint32_t p8 = c0 + m12;
extract_fast(p7);
p8 = c0 + m12;
VERIFY_CHECK(p8 <= 2);
/* Reduce 258 bits into 256. */
/* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
uint64_t c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
@ -646,12 +654,15 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, con
}
SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b, unsigned int shift) {
VERIFY_CHECK(shift >= 256);
uint32_t l[16];
unsigned int shiftlimbs;
unsigned int shiftlow;
unsigned int shifthigh;
VERIFY_CHECK(shift >= 256);
secp256k1_scalar_mul_512(l, a, b);
unsigned int shiftlimbs = shift >> 5;
unsigned int shiftlow = shift & 0x1F;
unsigned int shifthigh = 32 - shiftlow;
shiftlimbs = shift >> 5;
shiftlow = shift & 0x1F;
shifthigh = 32 - shiftlow;
r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;

View file

@ -43,6 +43,8 @@ static void secp256k1_scalar_order_get_num(secp256k1_num_t *r) {
#endif
static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
secp256k1_scalar_t *t;
int i;
/* First compute x ^ (2^N - 1) for some values of N. */
secp256k1_scalar_t x2, x3, x4, x6, x7, x8, x15, x30, x60, x120, x127;
@ -66,129 +68,129 @@ static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scal
secp256k1_scalar_mul(&x8, &x8, x);
secp256k1_scalar_sqr(&x15, &x8);
for (int i=0; i<6; i++)
for (i = 0; i < 6; i++)
secp256k1_scalar_sqr(&x15, &x15);
secp256k1_scalar_mul(&x15, &x15, &x7);
secp256k1_scalar_sqr(&x30, &x15);
for (int i=0; i<14; i++)
for (i = 0; i < 14; i++)
secp256k1_scalar_sqr(&x30, &x30);
secp256k1_scalar_mul(&x30, &x30, &x15);
secp256k1_scalar_sqr(&x60, &x30);
for (int i=0; i<29; i++)
for (i = 0; i < 29; i++)
secp256k1_scalar_sqr(&x60, &x60);
secp256k1_scalar_mul(&x60, &x60, &x30);
secp256k1_scalar_sqr(&x120, &x60);
for (int i=0; i<59; i++)
for (i = 0; i < 59; i++)
secp256k1_scalar_sqr(&x120, &x120);
secp256k1_scalar_mul(&x120, &x120, &x60);
secp256k1_scalar_sqr(&x127, &x120);
for (int i=0; i<6; i++)
for (i = 0; i < 6; i++)
secp256k1_scalar_sqr(&x127, &x127);
secp256k1_scalar_mul(&x127, &x127, &x7);
/* Then accumulate the final result (t starts at x127). */
secp256k1_scalar_t *t = &x127;
for (int i=0; i<2; i++) /* 0 */
t = &x127;
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<4; i++) /* 0 */
for (i = 0; i < 4; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<4; i++) /* 0 */
for (i = 0; i < 4; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<3; i++) /* 0 */
for (i = 0; i < 3; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<4; i++) /* 0 */
for (i = 0; i < 4; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<5; i++) /* 00 */
for (i = 0; i < 5; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<4; i++) /* 00 */
for (i = 0; i < 4; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<5; i++) /* 0 */
for (i = 0; i < 5; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x4); /* 1111 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<3; i++) /* 00 */
for (i = 0; i < 3; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<4; i++) /* 000 */
for (i = 0; i < 4; i++) /* 000 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<10; i++) /* 0000000 */
for (i = 0; i < 10; i++) /* 0000000 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<4; i++) /* 0 */
for (i = 0; i < 4; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x3); /* 111 */
for (int i=0; i<9; i++) /* 0 */
for (i = 0; i < 9; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x8); /* 11111111 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<3; i++) /* 00 */
for (i = 0; i < 3; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<3; i++) /* 00 */
for (i = 0; i < 3; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<5; i++) /* 0 */
for (i = 0; i < 5; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x4); /* 1111 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<5; i++) /* 000 */
for (i = 0; i < 5; i++) /* 000 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<4; i++) /* 00 */
for (i = 0; i < 4; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<2; i++) /* 0 */
for (i = 0; i < 2; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<8; i++) /* 000000 */
for (i = 0; i < 8; i++) /* 000000 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<3; i++) /* 0 */
for (i = 0; i < 3; i++) /* 0 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, &x2); /* 11 */
for (int i=0; i<3; i++) /* 00 */
for (i = 0; i < 3; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<6; i++) /* 00000 */
for (i = 0; i < 6; i++) /* 00000 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(t, t, x); /* 1 */
for (int i=0; i<8; i++) /* 00 */
for (i = 0; i < 8; i++) /* 00 */
secp256k1_scalar_sqr(t, t);
secp256k1_scalar_mul(r, t, &x6); /* 111111 */
}
@ -198,8 +200,8 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
secp256k1_scalar_inverse(r, x);
#elif defined(USE_SCALAR_INV_NUM)
unsigned char b[32];
secp256k1_scalar_get_b32(b, x);
secp256k1_num_t n, m;
secp256k1_scalar_get_b32(b, x);
secp256k1_num_set_bin(&n, b, 32);
secp256k1_scalar_order_get_num(&m);
secp256k1_num_mod_inverse(&n, &n, &m);
@ -250,6 +252,7 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
*/
static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
secp256k1_scalar_t c1, c2;
static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
0xA880B9FCUL, 0x8EC739C2UL, 0xE0CFC810UL, 0xB51283CFUL
@ -272,7 +275,6 @@ static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_
);
VERIFY_CHECK(r1 != a);
VERIFY_CHECK(r2 != a);
secp256k1_scalar_t c1, c2;
secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
secp256k1_scalar_mul(&c1, &c1, &minus_b1);