dont do indirection in for batch stuff

This commit is contained in:
cathugger 2020-11-22 10:21:06 +00:00
parent 5b5f414b79
commit 51d87c3857
No known key found for this signature in database
GPG key ID: 9BADDA2DAF6F01A8
19 changed files with 86 additions and 71 deletions

View file

@ -22,9 +22,9 @@
#define fe25519_invert crypto_sign_ed25519_amd64_51_30k_batch_fe25519_invert
#define fe25519_pow2523 crypto_sign_ed25519_amd64_51_30k_batch_fe25519_pow2523
typedef struct
typedef struct
{
unsigned long long v[5];
unsigned long long v[5];
}
fe25519;
@ -62,7 +62,7 @@ void fe25519_nsquare(fe25519 *r, unsigned long long n);
void fe25519_invert(fe25519 *r, const fe25519 *x);
void fe25519_batchinvert(fe25519 *out[],fe25519 tmp[],fe25519 * const in[], size_t num);
void fe25519_batchinvert(fe25519 *out, const fe25519 *in, fe25519 *tmp, size_t num, size_t offset);
void fe25519_pow2523(fe25519 *r, const fe25519 *x);

View file

@ -1,26 +1,34 @@
#include "fe25519.h"
// tmp MUST != out
// tmp MUST != out or in
// in MAY == out
void fe25519_batchinvert(fe25519 *out[],fe25519 tmp[],fe25519 * const in[], size_t num)
void fe25519_batchinvert(fe25519 *out, const fe25519 *in, fe25519 *tmp, size_t num, size_t offset)
{
fe25519 acc;
fe25519 tmpacc;
size_t i;
const fe25519 *inp;
fe25519 *outp;
fe25519_setint(&acc,1);
inp = in;
for (i = 0;i < num;++i) {
tmp[i] = acc;
fe25519_mul(&acc,&acc,in[i]);
fe25519_mul(&acc,&acc,inp);
inp = (const fe25519 *)((const char *)inp + offset);
}
fe25519_invert(&acc,&acc);
i = num;
inp = (const fe25519 *)((const char *)in + offset * num);
outp = (fe25519 *)((char *)out + offset * num);
while (i--) {
fe25519_mul(&tmpacc,&acc,in[i]);
fe25519_mul(out[i],&acc,&tmp[i]);
inp = (const fe25519 *)((const char *)inp - offset);
outp = (fe25519 *)((char *)outp - offset);
fe25519_mul(&tmpacc,&acc,inp);
fe25519_mul(outp,&acc,&tmp[i]);
acc = tmpacc;
}
}

View file

@ -92,7 +92,7 @@ extern int ge25519_unpackneg_vartime(ge25519 *r, const unsigned char p[32]);
extern void ge25519_pack(unsigned char r[32], const ge25519 *p);
extern void ge25519_batchpack_destructive_1(bytes32 out[], ge25519_p3 in[], fe25519 *inz[], fe25519 tmp[], size_t num);
extern void ge25519_batchpack_destructive_1(bytes32 *out, ge25519_p3 *in, fe25519 *tmp, size_t num);
extern void ge25519_batchpack_destructive_finish(bytes32 out, ge25519_p3 *unf);
extern int ge25519_isneutral_vartime(const ge25519 *p);

View file

@ -1,13 +1,12 @@
#include "fe25519.h"
#include "ge25519.h"
// assumes inz[] points to things in in[]
// NOTE: leaves in unfinished state
void ge25519_batchpack_destructive_1(bytes32 out[], ge25519_p3 in[], fe25519 *inz[], fe25519 tmp[], size_t num)
void ge25519_batchpack_destructive_1(bytes32 *out, ge25519_p3 *in, fe25519 *tmp, size_t num)
{
fe25519 ty;
fe25519_batchinvert(inz, tmp, inz, num);
fe25519_batchinvert(&in->z, &in->z, tmp, num, sizeof(ge25519_p3));
for (size_t i = 0; i < num; ++i) {
fe25519_mul(&ty, &in[i].y, &in[i].z);

View file

@ -22,9 +22,9 @@
#define fe25519_batchinvert crypto_sign_ed25519_amd64_64_fe25519_batchinvert
#define fe25519_pow2523 crypto_sign_ed25519_amd64_64_fe25519_pow2523
typedef struct
typedef struct
{
unsigned long long v[4];
unsigned long long v[4];
}
fe25519;
@ -62,7 +62,7 @@ void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e);
void fe25519_invert(fe25519 *r, const fe25519 *x);
void fe25519_batchinvert(fe25519 *out[],fe25519 tmp[],fe25519 * const in[], size_t num);
void fe25519_batchinvert(fe25519 *out, const fe25519 *in, fe25519 *tmp, size_t num, size_t offset);
void fe25519_pow2523(fe25519 *r, const fe25519 *x);

View file

@ -1,26 +1,34 @@
#include "fe25519.h"
// tmp MUST != out
// tmp MUST != out or in
// in MAY == out
void fe25519_batchinvert(fe25519 *out[],fe25519 tmp[],fe25519 * const in[], size_t num)
void fe25519_batchinvert(fe25519 *out, const fe25519 *in, fe25519 *tmp, size_t num, size_t offset)
{
fe25519 acc;
fe25519 tmpacc;
size_t i;
const fe25519 *inp;
fe25519 *outp;
fe25519_setint(&acc,1);
inp = in;
for (i = 0;i < num;++i) {
tmp[i] = acc;
fe25519_mul(&acc,&acc,in[i]);
fe25519_mul(&acc,&acc,inp);
inp = (const fe25519 *)((const char *)inp + offset);
}
fe25519_invert(&acc,&acc);
i = num;
inp = (const fe25519 *)((const char *)in + offset * num);
outp = (fe25519 *)((char *)out + offset * num);
while (i--) {
fe25519_mul(&tmpacc,&acc,in[i]);
fe25519_mul(out[i],&acc,&tmp[i]);
inp = (const fe25519 *)((const char *)inp - offset);
outp = (fe25519 *)((char *)outp - offset);
fe25519_mul(&tmpacc,&acc,inp);
fe25519_mul(outp,&acc,&tmp[i]);
acc = tmpacc;
}
}

View file

@ -81,7 +81,7 @@ extern int ge25519_unpackneg_vartime(ge25519 *r, const unsigned char p[32]);
extern void ge25519_pack(unsigned char r[32], const ge25519 *p);
extern void ge25519_batchpack_destructive_1(bytes32 out[], ge25519_p3 in[], fe25519 *inz[], fe25519 tmp[], size_t num);
extern void ge25519_batchpack_destructive_1(bytes32 *out, ge25519_p3 *in, fe25519 *tmp, size_t num);
extern void ge25519_batchpack_destructive_finish(bytes32 out, ge25519_p3 *unf);
extern int ge25519_isneutral_vartime(const ge25519 *p);

View file

@ -1,13 +1,12 @@
#include "fe25519.h"
#include "ge25519.h"
// assumes inz[] points to things in in[]
// NOTE: leaves in unfinished state
void ge25519_batchpack_destructive_1(bytes32 out[], ge25519_p3 in[], fe25519 *inz[], fe25519 tmp[], size_t num)
void ge25519_batchpack_destructive_1(bytes32 *out, ge25519_p3 *in, fe25519 *tmp, size_t num)
{
fe25519 ty;
fe25519_batchinvert(inz, tmp, inz, num);
fe25519_batchinvert(&in->z, &in->z, tmp, num, sizeof(ge25519_p3));
for (size_t i = 0; i < num; ++i) {
fe25519_mul(&ty, &in[i].y, &in[i].z);

View file

@ -67,23 +67,31 @@ curve25519_setone(bignum25519 out) {
* if that's the case then we're doing batch invert there
*/
static void
curve25519_batchrecip(bignum25519 *out[], bignum25519 tmp[], bignum25519 * const in[], size_t num) {
bignum25519 ALIGN(16) acc, tmpacc;
curve25519_batchrecip(bignum25519 *out, const bignum25519 *in, bignum25519 *tmp, size_t num, size_t offset) {
bignum25519 ALIGN(16) acc,tmpacc;
size_t i;
const bignum25519 *inp;
bignum25519 *outp;
curve25519_setone(acc);
inp = in;
for (i = 0; i < num; ++i) {
curve25519_copy(tmp[i], acc);
curve25519_mul(acc, acc, *in[i]);
curve25519_mul(acc, acc, *inp);
inp = (const bignum25519 *)((const char *)inp + offset);
}
curve25519_recip(acc, acc);
i = num;
inp = (const bignum25519 *)((const char *)in + offset * num);
outp = (bignum25519 *)((char *)out + offset * num);
while (i--) {
curve25519_mul(tmpacc, acc, *in[i]);
curve25519_mul(*out[i], acc, tmp[i]);
inp = (const bignum25519 *)((const char *)inp - offset);
outp = (bignum25519 *)((char *)outp - offset);
curve25519_mul(tmpacc, acc, *inp);
curve25519_mul(*outp, acc, tmp[i]);
curve25519_copy(acc, tmpacc);
}
}

View file

@ -8,15 +8,15 @@ DONNA_INLINE static void
ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
curve25519_mul(r->x, p->x, p->t);
curve25519_mul(r->y, p->y, p->z);
curve25519_mul(r->z, p->z, p->t);
curve25519_mul(r->z, p->z, p->t);
}
DONNA_INLINE static void
ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
curve25519_mul(r->x, p->x, p->t);
curve25519_mul(r->y, p->y, p->z);
curve25519_mul(r->z, p->z, p->t);
curve25519_mul(r->t, p->x, p->y);
curve25519_mul(r->z, p->z, p->t);
curve25519_mul(r->t, p->x, p->y);
}
static void
@ -190,13 +190,12 @@ ge25519_pack(unsigned char r[32], const ge25519 *p) {
r[31] ^= ((parity[0] & 1) << 7);
}
// assumes inz[] points to things in in[]
// NOTE: leaves in unfinished state
static void
ge25519_batchpack_destructive_1(bytes32 out[], ge25519 in[], bignum25519 *inz[], bignum25519 tmp[], size_t num) {
ge25519_batchpack_destructive_1(bytes32 *out, ge25519 *in, bignum25519 *tmp, size_t num) {
bignum25519 ty;
curve25519_batchrecip(inz, tmp, inz, num);
curve25519_batchrecip(&in->z, &in->z, tmp, num, sizeof(ge25519));
for (size_t i = 0; i < num; ++i) {
curve25519_mul(ty, in[i].y, in[i].z);
@ -276,7 +275,7 @@ ge25519_unpack_negative_vartime(ge25519 *r, const unsigned char p[32]) {
#define S2_TABLE_SIZE (1<<(S2_SWINDOWSIZE-2))
/* computes [s1]p1 + [s2]basepoint */
static void
static void
ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
signed char slide1[256], slide2[256];
ge25519_pniels pre1[S1_TABLE_SIZE];
@ -371,7 +370,7 @@ ge25519_scalarmult_base_niels(ge25519 *r, const uint8_t basepoint_table[256][96]
curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
memset(r->z, 0, sizeof(bignum25519));
curve25519_copy(r->t, t.t2d);
r->z[0] = 2;
r->z[0] = 2;
for (i = 3; i < 64; i += 2) {
ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);
ge25519_nielsadd2(r, &t);

View file

@ -14,7 +14,7 @@ ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
curve25519_untangle64(r->x, r->z, xzout);
}
static void
static void
ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
packed64bignum25519 ALIGN(16) zy, xt, xx, zz, ty;
curve25519_tangle64(ty, p->t, p->y);
@ -222,10 +222,10 @@ ge25519_pack(unsigned char r[32], const ge25519 *p) {
// assumes inz[] points to things in in[]
// NOTE: leaves in unfinished state
static void
ge25519_batchpack_destructive_1(bytes32 out[], ge25519 in[], bignum25519 *inz[], bignum25519 tmp[], size_t num) {
ge25519_batchpack_destructive_1(bytes32 *out, ge25519 *in, bignum25519 *tmp, size_t num) {
bignum25519 ALIGN(16) ty;
curve25519_batchrecip(inz, tmp, inz, num);
curve25519_batchrecip(&in->z, &in->z, tmp, num, sizeof(ge25519));
for (size_t i = 0; i < num; ++i) {
curve25519_mul(ty, in[i].y, in[i].z);
@ -395,7 +395,7 @@ ge25519_scalarmult_base_niels(ge25519 *r, const uint8_t table[256][96], const bi
ge25519_scalarmult_base_choose_niels(&t, table, 0, b[1]);
curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
memset(r->z, 0, sizeof(bignum25519));
memset(r->z, 0, sizeof(bignum25519));
r->z[0] = 2;
curve25519_copy(r->t, t.t2d);
for (i = 3; i < 64; i += 2) {

View file

@ -11,8 +11,6 @@
#define ed25519_keygen ed25519_ref10_keygen
#include "ref10/ge.h"
#define GEZ(x) ((x).Z)
/* The basepoint multiplied by 8. */
static const ge_cached ge_eightpoint = {
/* YplusX */
@ -80,8 +78,6 @@ inline static void ge_initeightpoint(void) {}
#define ge_p3_batchtobytes_destructive_1 ge25519_batchpack_destructive_1
#define ge_p3_batchtobytes_destructive_finish ge25519_batchpack_destructive_finish
#define GEZ(x) ((x).z)
#endif
@ -190,8 +186,6 @@ static int ed25519_keypair(unsigned char *pk,unsigned char *sk)
#define ge_p3_batchtobytes_destructive_1 ge25519_batchpack_destructive_1
#define ge_p3_batchtobytes_destructive_finish ge25519_batchpack_destructive_finish
#define GEZ(x) ((x).z)
DONNA_INLINE static void ge_add(ge25519_p1p1 *r,const ge25519 *p,const ge25519_pniels *q)
{
ge25519_pnielsadd_p1p1(r,p,q,0);

View file

@ -53,7 +53,7 @@ extern void fe_sq(fe,const fe);
extern void fe_sq2(fe,const fe);
extern void fe_mul121666(fe,const fe);
extern void fe_invert(fe,const fe);
extern void fe_batchinvert(fe *out[],fe tmp[],fe * const in[], size_t num);
extern void fe_batchinvert(fe *out,fe *in,fe *tmp,size_t num,size_t shift);
extern void fe_pow22523(fe,const fe);
#endif

View file

@ -1,26 +1,34 @@
#include "fe.h"
// tmp MUST != out
// tmp MUST != out or in
// in MAY == out
void fe_batchinvert(fe *out[],fe tmp[],fe * const in[], size_t num)
void fe_batchinvert(fe *out,fe *in,fe *tmp,size_t num,size_t shift)
{
fe acc;
fe tmpacc;
size_t i;
fe *inp;
fe *outp;
fe_1(acc);
inp = in;
for (i = 0;i < num;++i) {
fe_copy(tmp[i],acc);
fe_mul(acc,acc,*in[i]);
fe_mul(acc,acc,*inp);
inp = (fe *)((char *)inp + shift);
}
fe_invert(acc,acc);
i = num;
inp = (fe *)((char *)in + shift * num);
outp = (fe *)((char *)out + shift * num);
while (i--) {
fe_mul(tmpacc,acc,*in[i]);
fe_mul(*out[i],acc,tmp[i]);
inp = (fe *)((char *)inp - shift);
outp = (fe *)((char *)outp - shift);
fe_mul(tmpacc,acc,*inp);
fe_mul(*outp,acc,tmp[i]);
fe_copy(acc,tmpacc);
}
}

View file

@ -77,7 +77,7 @@ typedef unsigned char bytes32[32];
extern void ge_tobytes(unsigned char *,const ge_p2 *);
extern void ge_p3_tobytes(unsigned char *,const ge_p3 *);
extern void ge_p3_batchtobytes_destructive_1(bytes32 out[],ge_p3 in[],fe *inz[],fe tmp[],size_t num);
extern void ge_p3_batchtobytes_destructive_1(bytes32 *out,ge_p3 *in,fe *tmp,size_t num);
extern void ge_p3_batchtobytes_destructive_finish(bytes32 out,ge_p3 *unf);
extern int ge_frombytes_negate_vartime(ge_p3 *,const unsigned char *);

View file

@ -2,11 +2,11 @@
// inz is ge_p3.Z pointer array. contents to .Zs will be overwritten
// NOTE: leaves in unfinished state
void ge_p3_batchtobytes_destructive_1(bytes32 out[],ge_p3 in[],fe *inz[],fe tmp[],size_t num)
void ge_p3_batchtobytes_destructive_1(bytes32 *out,ge_p3 *in,fe *tmp,size_t num)
{
fe y;
fe_batchinvert(inz,tmp,inz,num);
fe_batchinvert(&in->Z,&in->Z,tmp,num,sizeof(ge_p3));
for (size_t i = 0;i < num;++i) {
fe_mul(y,in[i].Y,in[i].Z);

8
main.c
View file

@ -186,6 +186,8 @@ enum worker_type {
WT_BATCH,
};
#define TATTR 0
int main(int argc,char **argv)
{
const char *outfile = 0;
@ -518,7 +520,7 @@ int main(int argc,char **argv)
VEC_ZERO(tstats);
#endif
#if 0
#if TATTR
pthread_attr_t tattr,*tattrp = &tattr;
tret = pthread_attr_init(tattrp);
if (tret) {
@ -526,7 +528,7 @@ int main(int argc,char **argv)
tattrp = 0;
}
else {
tret = pthread_attr_setstacksize(tattrp,80<<10);
tret = pthread_attr_setstacksize(tattrp,2<<20);
if (tret)
perror("pthread_attr_setstacksize");
}
@ -560,7 +562,7 @@ int main(int argc,char **argv)
}
}
#if 0
#if TATTR
if (tattrp) {
tret = pthread_attr_destroy(tattrp);
if (tret)

View file

@ -13,7 +13,6 @@ void *worker_batch(void *task)
// state to keep batch data
ge_p3 ge_batch[BATCHNUM];
fe *(batchgez)[BATCHNUM];
fe tmp_batch[BATCHNUM];
bytes32 pk_batch[BATCHNUM];
@ -24,10 +23,6 @@ void *worker_batch(void *task)
struct statstruct *st = (struct statstruct *)task;
#endif
// set up right pointers
for (size_t b = 0;b < BATCHNUM;++b)
batchgez[b] = &GEZ(ge_batch[b]);
PREFILTER
memcpy(secret,skprefix,SKPREFIX_SIZE);
@ -64,7 +59,7 @@ initseed:
ge_p1p1_to_p3(&ge_public,&sum);
}
// NOTE: leaves unfinished one bit at the very end
ge_p3_batchtobytes_destructive_1(pk_batch,ge_batch,batchgez,tmp_batch,BATCHNUM);
ge_p3_batchtobytes_destructive_1(pk_batch,ge_batch,tmp_batch,BATCHNUM);
#ifdef STATISTICS
st->numcalc.v += BATCHNUM;

View file

@ -14,7 +14,6 @@ void *worker_batch_pass(void *task)
// state to keep batch data
ge_p3 ge_batch[BATCHNUM];
fe *(batchgez)[BATCHNUM];
fe tmp_batch[BATCHNUM];
bytes32 pk_batch[BATCHNUM];
@ -25,10 +24,6 @@ void *worker_batch_pass(void *task)
struct statstruct *st = (struct statstruct *)task;
#endif
// set up right pointers
for (size_t b = 0;b < BATCHNUM;++b)
batchgez[b] = &GEZ(ge_batch[b]);
PREFILTER
memcpy(secret,skprefix,SKPREFIX_SIZE);
@ -70,7 +65,7 @@ initseed:
ge_p1p1_to_p3(&ge_public,&sum);
}
// NOTE: leaves unfinished one bit at the very end
ge_p3_batchtobytes_destructive_1(pk_batch,ge_batch,batchgez,tmp_batch,BATCHNUM);
ge_p3_batchtobytes_destructive_1(pk_batch,ge_batch,tmp_batch,BATCHNUM);
#ifdef STATISTICS
st->numcalc.v += BATCHNUM;