diff --git a/Makefile b/Makefile index bb931d3e61..eebc6e9c96 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,11 @@ OBJS := default: all +ifeq ($(CONF), gmp32) +FLAGS_COMMON := $(FLAGS_COMMON) -DUSE_NUM_GMP -DUSE_FIELD_10X26 +LIBS := -lgmp +SECP256K1_FILES := $(SECP256K1_FILES) src/num_gmp.h src/num_gmp.c src/field_10x26.c src/field_10x26.h +else ifeq ($(CONF), openssl) FLAGS_COMMON := $(FLAGS_COMMON) -DUSE_NUM_OPENSSL -DUSE_FIELD_INV_BUILTIN LIBS := -lcrypto @@ -39,16 +44,19 @@ SECP256K1_FILES := $(SECP256K1_FILES) src/field_5x52_int128.c endif endif endif +endif all: src/*.c src/*.asm src/*.h include/*.h +make CONF=openssl all-openssl +make CONF=gmp all-gmp + +make CONF=gmp32 all-gmp32 +make CONF=gmpasm all-gmpasm clean: +make CONF=openssl clean-openssl +make CONF=gmp clean-gmp + +make CONF=gmp32 clean-gmp32 +make CONF=gmpasm clean-gmpasm bench-any: bench-$(CONF) @@ -62,10 +70,10 @@ clean-$(CONF): obj/secp256k1-$(CONF).o: $(SECP256K1_FILES) src/secp256k1.c include/secp256k1.h $(CC) $(FLAGS_COMMON) $(FLAGS_PROD) src/secp256k1.c -c -o obj/secp256k1-$(CONF).o -bench-$(CONF): $(OBJS) src/bench.c +bench-$(CONF): $(OBJS) $(SECP256K1_FILES) src/bench.c $(CC) $(FLAGS_COMMON) $(FLAGS_PROD) src/bench.c $(LIBS) $(OBJS) -o bench-$(CONF) -tests-$(CONF): $(OBJS) src/tests.c +tests-$(CONF): $(OBJS) $(SECP256K1_FILES) src/tests.c $(CC) $(FLAGS_COMMON) $(FLAGS_TEST) src/tests.c $(LIBS) $(OBJS) -o tests-$(CONF) libsecp256k1-$(CONF).a: $(OBJS) obj/secp256k1-$(CONF).o diff --git a/src/field.c b/src/field.c index d795a9f487..e7c636e05c 100644 --- a/src/field.c +++ b/src/field.c @@ -1,5 +1,8 @@ -// just one implementation for now +#ifdef USE_FIELD_10X26 +#include "field_10x26.c" +#else #include "field_5x52.c" +#endif void static secp256k1_fe_get_hex(char *r, int *rlen, const secp256k1_fe_t *a) { if (*rlen < 65) { diff --git a/src/field.h b/src/field.h index a266b878cb..a2896073c1 100644 --- a/src/field.h +++ b/src/field.h @@ -12,8 +12,11 @@ * normality. */ -// just one implementation for now +#ifdef USE_FIELD_10X26 +#include "field_10x26.h" +#else #include "field_5x52.h" +#endif typedef struct { secp256k1_num_t p; diff --git a/src/field_10x26.c b/src/field_10x26.c new file mode 100644 index 0000000000..9f9edb9d69 --- /dev/null +++ b/src/field_10x26.c @@ -0,0 +1,475 @@ +#include +#include +#include +#include "num.h" +#include "field.h" + +void static secp256k1_fe_normalize(secp256k1_fe_t *r) { +// fog("normalize in: ", r); + uint32_t c; + c = r->n[0]; + uint32_t t0 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[1]; + uint32_t t1 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[2]; + uint32_t t2 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[3]; + uint32_t t3 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[4]; + uint32_t t4 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[5]; + uint32_t t5 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[6]; + uint32_t t6 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[7]; + uint32_t t7 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[8]; + uint32_t t8 = c & 0x3FFFFFFUL; + c = (c >> 26) + r->n[9]; + uint32_t t9 = c & 0x03FFFFFUL; + c >>= 22; +/* r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + fog(" tm1: ", r); + fprintf(stderr, "out c= %08lx\n", (unsigned long)c);*/ + + // The following code will not modify the t's if c is initially 0. + uint32_t d = c * 0x3D1UL + t0; + t0 = d & 0x3FFFFFFULL; + d = (d >> 26) + t1 + c*0x40; + t1 = d & 0x3FFFFFFULL; + d = (d >> 26) + t2; + t2 = d & 0x3FFFFFFULL; + d = (d >> 26) + t3; + t3 = d & 0x3FFFFFFULL; + d = (d >> 26) + t4; + t4 = d & 0x3FFFFFFULL; + d = (d >> 26) + t5; + t5 = d & 0x3FFFFFFULL; + d = (d >> 26) + t6; + t6 = d & 0x3FFFFFFULL; + d = (d >> 26) + t7; + t7 = d & 0x3FFFFFFULL; + d = (d >> 26) + t8; + t8 = d & 0x3FFFFFFULL; + d = (d >> 26) + t9; + t9 = d & 0x03FFFFFULL; + assert((d >> 22) == 0); +/* r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; + fog(" tm2: ", r); */ + + // Subtract p if result >= p + uint64_t low = ((uint64_t)t1 << 26) | t0; + uint64_t mask = -(int64_t)((t9 < 0x03FFFFFUL) | (t8 < 0x3FFFFFFUL) | (t7 < 0x3FFFFFFUL) | (t6 < 0x3FFFFFFUL) | (t5 < 0x3FFFFFFUL) | (t4 < 0x3FFFFFFUL) | (t3 < 0x3FFFFFFUL) | (t2 < 0x3FFFFFFUL) | (low < 0xFFFFEFFFFFC2FULL)); + t9 &= mask; + t8 &= mask; + t7 &= mask; + t6 &= mask; + t5 &= mask; + t4 &= mask; + t3 &= mask; + t2 &= mask; + low -= (~mask & 0xFFFFEFFFFFC2FULL); + + // push internal variables back + r->n[0] = low & 0x3FFFFFFUL; r->n[1] = (low >> 26) & 0x3FFFFFFUL; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; + r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; +/* fog(" out: ", r);*/ + +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; +#endif +} + +void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) { + r->n[0] = a; + r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; +#endif +} + +// TODO: not constant time! +int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) { +#ifdef VERIFY + assert(a->normalized); +#endif + return (a->n[0] == 0 && a->n[1] == 0 && a->n[2] == 0 && a->n[3] == 0 && a->n[4] == 0 && a->n[5] == 0 && a->n[6] == 0 && a->n[7] == 0 && a->n[8] == 0 && a->n[9] == 0); +} + +int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) { +#ifdef VERIFY + assert(a->normalized); +#endif + return a->n[0] & 1; +} + +// TODO: not constant time! +int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) { +#ifdef VERIFY + assert(a->normalized); + assert(b->normalized); +#endif + return (a->n[0] == b->n[0] && a->n[1] == b->n[1] && a->n[2] == b->n[2] && a->n[3] == b->n[3] && a->n[4] == b->n[4] && + a->n[5] == b->n[5] && a->n[6] == b->n[6] && a->n[7] == b->n[7] && a->n[8] == b->n[8] && a->n[9] == b->n[9]); +} + +void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) { + r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0; + r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; + for (int i=0; i<32; i++) { + for (int j=0; j<4; j++) { + int limb = (8*i+2*j)/26; + int shift = (8*i+2*j)%26; + r->n[limb] |= (uint32_t)((a[31-i] >> (2*j)) & 0x3) << shift; + } + } +#ifdef VERIFY + r->magnitude = 1; + r->normalized = 1; +#endif +} + +/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ +void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + assert(a->normalized); +#endif + for (int i=0; i<32; i++) { + int c = 0; + for (int j=0; j<4; j++) { + int limb = (8*i+2*j)/26; + int shift = (8*i+2*j)%26; + c |= ((a->n[limb] >> shift) & 0x3) << (2 * j); + } + r[31-i] = c; + } +} + +void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *a, int m) { +#ifdef VERIFY + assert(a->magnitude <= m); + r->magnitude = m + 1; + r->normalized = 0; +#endif + r->n[0] = 0x3FFFC2FUL * (m + 1) - a->n[0]; + r->n[1] = 0x3FFFFBFUL * (m + 1) - a->n[1]; + r->n[2] = 0x3FFFFFFUL * (m + 1) - a->n[2]; + r->n[3] = 0x3FFFFFFUL * (m + 1) - a->n[3]; + r->n[4] = 0x3FFFFFFUL * (m + 1) - a->n[4]; + r->n[5] = 0x3FFFFFFUL * (m + 1) - a->n[5]; + r->n[6] = 0x3FFFFFFUL * (m + 1) - a->n[6]; + r->n[7] = 0x3FFFFFFUL * (m + 1) - a->n[7]; + r->n[8] = 0x3FFFFFFUL * (m + 1) - a->n[8]; + r->n[9] = 0x03FFFFFUL * (m + 1) - a->n[9]; +} + +void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) { +#ifdef VERIFY + r->magnitude *= a; + r->normalized = 0; +#endif + r->n[0] *= a; + r->n[1] *= a; + r->n[2] *= a; + r->n[3] *= a; + r->n[4] *= a; + r->n[5] *= a; + r->n[6] *= a; + r->n[7] *= a; + r->n[8] *= a; + r->n[9] *= a; +} + +void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + r->magnitude += a->magnitude; + r->normalized = 0; +#endif + r->n[0] += a->n[0]; + r->n[1] += a->n[1]; + r->n[2] += a->n[2]; + r->n[3] += a->n[3]; + r->n[4] += a->n[4]; + r->n[5] += a->n[5]; + r->n[6] += a->n[6]; + r->n[7] += a->n[7]; + r->n[8] += a->n[8]; + r->n[9] += a->n[9]; +} + +void static inline secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t *b, uint32_t *r) { + uint64_t c = (uint64_t)a[0] * b[0]; + uint32_t t0 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[1] + + (uint64_t)a[1] * b[0]; + uint32_t t1 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[2] + + (uint64_t)a[1] * b[1] + + (uint64_t)a[2] * b[0]; + uint32_t t2 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[3] + + (uint64_t)a[1] * b[2] + + (uint64_t)a[2] * b[1] + + (uint64_t)a[3] * b[0]; + uint32_t t3 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[4] + + (uint64_t)a[1] * b[3] + + (uint64_t)a[2] * b[2] + + (uint64_t)a[3] * b[1] + + (uint64_t)a[4] * b[0]; + uint32_t t4 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[5] + + (uint64_t)a[1] * b[4] + + (uint64_t)a[2] * b[3] + + (uint64_t)a[3] * b[2] + + (uint64_t)a[4] * b[1] + + (uint64_t)a[5] * b[0]; + uint32_t t5 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[6] + + (uint64_t)a[1] * b[5] + + (uint64_t)a[2] * b[4] + + (uint64_t)a[3] * b[3] + + (uint64_t)a[4] * b[2] + + (uint64_t)a[5] * b[1] + + (uint64_t)a[6] * b[0]; + uint32_t t6 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[7] + + (uint64_t)a[1] * b[6] + + (uint64_t)a[2] * b[5] + + (uint64_t)a[3] * b[4] + + (uint64_t)a[4] * b[3] + + (uint64_t)a[5] * b[2] + + (uint64_t)a[6] * b[1] + + (uint64_t)a[7] * b[0]; + uint32_t t7 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[8] + + (uint64_t)a[1] * b[7] + + (uint64_t)a[2] * b[6] + + (uint64_t)a[3] * b[5] + + (uint64_t)a[4] * b[4] + + (uint64_t)a[5] * b[3] + + (uint64_t)a[6] * b[2] + + (uint64_t)a[7] * b[1] + + (uint64_t)a[8] * b[0]; + uint32_t t8 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[0] * b[9] + + (uint64_t)a[1] * b[8] + + (uint64_t)a[2] * b[7] + + (uint64_t)a[3] * b[6] + + (uint64_t)a[4] * b[5] + + (uint64_t)a[5] * b[4] + + (uint64_t)a[6] * b[3] + + (uint64_t)a[7] * b[2] + + (uint64_t)a[8] * b[1] + + (uint64_t)a[9] * b[0]; + uint32_t t9 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[1] * b[9] + + (uint64_t)a[2] * b[8] + + (uint64_t)a[3] * b[7] + + (uint64_t)a[4] * b[6] + + (uint64_t)a[5] * b[5] + + (uint64_t)a[6] * b[4] + + (uint64_t)a[7] * b[3] + + (uint64_t)a[8] * b[2] + + (uint64_t)a[9] * b[1]; + uint32_t t10 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[2] * b[9] + + (uint64_t)a[3] * b[8] + + (uint64_t)a[4] * b[7] + + (uint64_t)a[5] * b[6] + + (uint64_t)a[6] * b[5] + + (uint64_t)a[7] * b[4] + + (uint64_t)a[8] * b[3] + + (uint64_t)a[9] * b[2]; + uint32_t t11 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[3] * b[9] + + (uint64_t)a[4] * b[8] + + (uint64_t)a[5] * b[7] + + (uint64_t)a[6] * b[6] + + (uint64_t)a[7] * b[5] + + (uint64_t)a[8] * b[4] + + (uint64_t)a[9] * b[3]; + uint32_t t12 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[4] * b[9] + + (uint64_t)a[5] * b[8] + + (uint64_t)a[6] * b[7] + + (uint64_t)a[7] * b[6] + + (uint64_t)a[8] * b[5] + + (uint64_t)a[9] * b[4]; + uint32_t t13 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[5] * b[9] + + (uint64_t)a[6] * b[8] + + (uint64_t)a[7] * b[7] + + (uint64_t)a[8] * b[6] + + (uint64_t)a[9] * b[5]; + uint32_t t14 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[6] * b[9] + + (uint64_t)a[7] * b[8] + + (uint64_t)a[8] * b[7] + + (uint64_t)a[9] * b[6]; + uint32_t t15 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[7] * b[9] + + (uint64_t)a[8] * b[8] + + (uint64_t)a[9] * b[7]; + uint32_t t16 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[8] * b[9] + + (uint64_t)a[9] * b[8]; + uint32_t t17 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[9] * b[9]; + uint32_t t18 = c & 0x3FFFFFFUL; c = c >> 26; + uint32_t t19 = c; + + c = t0 + (uint64_t)t10 * 0x3D10UL; + t0 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t1 + (uint64_t)t10*0x400UL + (uint64_t)t11 * 0x3D10UL; + t1 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t2 + (uint64_t)t11*0x400UL + (uint64_t)t12 * 0x3D10UL; + t2 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t3 + (uint64_t)t12*0x400UL + (uint64_t)t13 * 0x3D10UL; + r[3] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t4 + (uint64_t)t13*0x400UL + (uint64_t)t14 * 0x3D10UL; + r[4] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t5 + (uint64_t)t14*0x400UL + (uint64_t)t15 * 0x3D10UL; + r[5] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t6 + (uint64_t)t15*0x400UL + (uint64_t)t16 * 0x3D10UL; + r[6] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t7 + (uint64_t)t16*0x400UL + (uint64_t)t17 * 0x3D10UL; + r[7] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t8 + (uint64_t)t17*0x400UL + (uint64_t)t18 * 0x3D10UL; + r[8] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t9 + (uint64_t)t18*0x400UL + (uint64_t)t19 * 0x1000003D10ULL; + r[9] = c & 0x03FFFFFUL; c = c >> 22; + uint64_t d = t0 + c * 0x3D1UL; + r[0] = d & 0x3FFFFFFUL; d = d >> 26; + d = d + t1 + c*0x40; + r[1] = d & 0x3FFFFFFUL; d = d >> 26; + r[2] = t2 + d; +} + +void static inline secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t *r) { + uint64_t c = (uint64_t)a[0] * a[0]; + uint32_t t0 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[1]; + uint32_t t1 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[2] + + (uint64_t)a[1] * a[1]; + uint32_t t2 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[3] + + (uint64_t)(a[1]*2) * a[2]; + uint32_t t3 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[4] + + (uint64_t)(a[1]*2) * a[3] + + (uint64_t)a[2] * a[2]; + uint32_t t4 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[5] + + (uint64_t)(a[1]*2) * a[4] + + (uint64_t)(a[2]*2) * a[3]; + uint32_t t5 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[6] + + (uint64_t)(a[1]*2) * a[5] + + (uint64_t)(a[2]*2) * a[4] + + (uint64_t)a[3] * a[3]; + uint32_t t6 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[7] + + (uint64_t)(a[1]*2) * a[6] + + (uint64_t)(a[2]*2) * a[5] + + (uint64_t)(a[3]*2) * a[4]; + uint32_t t7 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[8] + + (uint64_t)(a[1]*2) * a[7] + + (uint64_t)(a[2]*2) * a[6] + + (uint64_t)(a[3]*2) * a[5] + + (uint64_t)a[4] * a[4]; + uint32_t t8 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[0]*2) * a[9] + + (uint64_t)(a[1]*2) * a[8] + + (uint64_t)(a[2]*2) * a[7] + + (uint64_t)(a[3]*2) * a[6] + + (uint64_t)(a[4]*2) * a[5]; + uint32_t t9 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[1]*2) * a[9] + + (uint64_t)(a[2]*2) * a[8] + + (uint64_t)(a[3]*2) * a[7] + + (uint64_t)(a[4]*2) * a[6] + + (uint64_t)a[5] * a[5]; + uint32_t t10 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[2]*2) * a[9] + + (uint64_t)(a[3]*2) * a[8] + + (uint64_t)(a[4]*2) * a[7] + + (uint64_t)(a[5]*2) * a[6]; + uint32_t t11 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[3]*2) * a[9] + + (uint64_t)(a[4]*2) * a[8] + + (uint64_t)(a[5]*2) * a[7] + + (uint64_t)a[6] * a[6]; + uint32_t t12 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[4]*2) * a[9] + + (uint64_t)(a[5]*2) * a[8] + + (uint64_t)(a[6]*2) * a[7]; + uint32_t t13 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[5]*2) * a[9] + + (uint64_t)(a[6]*2) * a[8] + + (uint64_t)a[7] * a[7]; + uint32_t t14 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[6]*2) * a[9] + + (uint64_t)(a[7]*2) * a[8]; + uint32_t t15 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[7]*2) * a[9] + + (uint64_t)a[8] * a[8]; + uint32_t t16 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)(a[8]*2) * a[9]; + uint32_t t17 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + (uint64_t)a[9] * a[9]; + uint32_t t18 = c & 0x3FFFFFFUL; c = c >> 26; + uint32_t t19 = c; + + c = t0 + (uint64_t)t10 * 0x3D10UL; + t0 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t1 + (uint64_t)t10*0x400UL + (uint64_t)t11 * 0x3D10UL; + t1 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t2 + (uint64_t)t11*0x400UL + (uint64_t)t12 * 0x3D10UL; + t2 = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t3 + (uint64_t)t12*0x400UL + (uint64_t)t13 * 0x3D10UL; + r[3] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t4 + (uint64_t)t13*0x400UL + (uint64_t)t14 * 0x3D10UL; + r[4] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t5 + (uint64_t)t14*0x400UL + (uint64_t)t15 * 0x3D10UL; + r[5] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t6 + (uint64_t)t15*0x400UL + (uint64_t)t16 * 0x3D10UL; + r[6] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t7 + (uint64_t)t16*0x400UL + (uint64_t)t17 * 0x3D10UL; + r[7] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t8 + (uint64_t)t17*0x400UL + (uint64_t)t18 * 0x3D10UL; + r[8] = c & 0x3FFFFFFUL; c = c >> 26; + c = c + t9 + (uint64_t)t18*0x400UL + (uint64_t)t19 * 0x1000003D10ULL; + r[9] = c & 0x03FFFFFUL; c = c >> 22; + uint64_t d = t0 + c * 0x3D1UL; + r[0] = d & 0x3FFFFFFUL; d = d >> 26; + d = d + t1 + c*0x40; + r[1] = d & 0x3FFFFFFUL; d = d >> 26; + r[2] = t2 + d; +} + + +void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) { +#ifdef VERIFY + assert(a->magnitude <= 8); + assert(b->magnitude <= 8); + r->magnitude = 1; + r->normalized = 0; +#endif + secp256k1_fe_mul_inner(a->n, b->n, r->n); +} + +void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { +#ifdef VERIFY + assert(a->magnitude <= 8); + r->magnitude = 1; + r->normalized = 0; +#endif + secp256k1_fe_sqr_inner(a->n, r->n); +} diff --git a/src/field_10x26.h b/src/field_10x26.h new file mode 100644 index 0000000000..11f6cf9999 --- /dev/null +++ b/src/field_10x26.h @@ -0,0 +1,15 @@ +#ifndef _SECP256K1_FIELD_10x26_ +#define _SECP256K1_FIELD_10x26_ + +#include + +typedef struct { + // X = sum(i=0..9, elem[i]*2^26) mod n + uint32_t n[10]; +#ifdef VERIFY + int magnitude; + int normalized; +#endif +} secp256k1_fe_t; + +#endif diff --git a/src/field_5x52.c b/src/field_5x52.c index 8b99501406..0d53352fb5 100644 --- a/src/field_5x52.c +++ b/src/field_5x52.c @@ -42,6 +42,7 @@ void static secp256k1_fe_normalize(secp256k1_fe_t *r) { t3 = c & 0xFFFFFFFFFFFFFULL; c = (c >> 52) + t4; t4 = c & 0x0FFFFFFFFFFFFULL; + assert((c >> 48) == 0); // Subtract p if result >= p uint64_t mask = -(int64_t)((t4 < 0xFFFFFFFFFFFFULL) | (t3 < 0xFFFFFFFFFFFFFULL) | (t2 < 0xFFFFFFFFFFFFFULL) | (t1 < 0xFFFFFFFFFFFFFULL) | (t0 < 0xFFFFEFFFFFC2FULL)); diff --git a/src/field_5x52_int128.c b/src/field_5x52_int128.c index 740e6f1d91..6ad3fa127c 100644 --- a/src/field_5x52_int128.c +++ b/src/field_5x52_int128.c @@ -1,7 +1,7 @@ #include "field.h" void static inline secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) { - unsigned __int128 c = (__int128)a[0] * b[0]; + __int128 c = (__int128)a[0] * b[0]; uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0 c = c + (__int128)a[0] * b[1] + (__int128)a[1] * b[0];