From ae55e850c704b216e15f5fd6af4b52319f727495 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 22 Feb 2015 05:54:56 -0800 Subject: [PATCH] Use faster byteswapping and avoid alignment-increasing casts. --- configure.ac | 2 ++ src/hash.h | 2 +- src/hash_impl.h | 95 +++++++++++++++++++++---------------------------- 3 files changed, 44 insertions(+), 55 deletions(-) diff --git a/configure.ac b/configure.ac index 62f2229362..51f7511bae 100644 --- a/configure.ac +++ b/configure.ac @@ -305,6 +305,8 @@ if test x"$use_endomorphism" = x"yes"; then AC_DEFINE(USE_ENDOMORPHISM, 1, [Define this symbol to use endomorphism optimization]) fi +AC_C_BIGENDIAN() + AC_MSG_NOTICE([Using assembly optimizations: $set_asm]) AC_MSG_NOTICE([Using field implementation: $set_field]) AC_MSG_NOTICE([Using bignum implementation: $set_bignum]) diff --git a/src/hash.h b/src/hash.h index d1e65b968a..ee625b8e58 100644 --- a/src/hash.h +++ b/src/hash.h @@ -12,7 +12,7 @@ typedef struct { uint32_t s[32]; - unsigned char buf[64]; + uint32_t buf[16]; /* In big endian */ size_t bytes; } secp256k1_sha256_t; diff --git a/src/hash_impl.h b/src/hash_impl.h index 72b70037f6..6ceb9aa13e 100644 --- a/src/hash_impl.h +++ b/src/hash_impl.h @@ -27,8 +27,11 @@ (h) = t1 + t2; \ } while(0) -#define ReadBE32(p) (((uint32_t)((p)[0])) << 24 | ((uint32_t)((p)[1])) << 16 | ((uint32_t)((p)[2])) << 8 | ((uint32_t)((p)[3]))) -#define WriteBE32(p, v) do { (p)[0] = (v) >> 24; (p)[1] = (v) >> 16; (p)[2] = (v) >> 8; (p)[3] = (v); } while(0) +#ifdef WORDS_BIGENDIAN +#define BE32(x) (x) +#else +#define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24)) +#endif static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) { hash->s[0] = 0x6a09e667ul; @@ -42,27 +45,27 @@ static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) { hash->bytes = 0; } -/** Perform one SHA-256 transformation, processing a 64-byte chunk. */ -static void secp256k1_sha256_transform(uint32_t* s, const unsigned char* chunk) { +/** Perform one SHA-256 transformation, processing 16 big endian 32-bit words. */ +static void secp256k1_sha256_transform(uint32_t* s, const uint32_t* chunk) { uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7]; uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; - Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = ReadBE32(chunk + 0)); - Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = ReadBE32(chunk + 4)); - Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = ReadBE32(chunk + 8)); - Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = ReadBE32(chunk + 12)); - Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = ReadBE32(chunk + 16)); - Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = ReadBE32(chunk + 20)); - Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = ReadBE32(chunk + 24)); - Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = ReadBE32(chunk + 28)); - Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = ReadBE32(chunk + 32)); - Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = ReadBE32(chunk + 36)); - Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = ReadBE32(chunk + 40)); - Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = ReadBE32(chunk + 44)); - Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = ReadBE32(chunk + 48)); - Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = ReadBE32(chunk + 52)); - Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = ReadBE32(chunk + 56)); - Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = ReadBE32(chunk + 60)); + Round(a, b, c, d, e, f, g, h, 0x428a2f98, w0 = BE32(chunk[0])); + Round(h, a, b, c, d, e, f, g, 0x71374491, w1 = BE32(chunk[1])); + Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf, w2 = BE32(chunk[2])); + Round(f, g, h, a, b, c, d, e, 0xe9b5dba5, w3 = BE32(chunk[3])); + Round(e, f, g, h, a, b, c, d, 0x3956c25b, w4 = BE32(chunk[4])); + Round(d, e, f, g, h, a, b, c, 0x59f111f1, w5 = BE32(chunk[5])); + Round(c, d, e, f, g, h, a, b, 0x923f82a4, w6 = BE32(chunk[6])); + Round(b, c, d, e, f, g, h, a, 0xab1c5ed5, w7 = BE32(chunk[7])); + Round(a, b, c, d, e, f, g, h, 0xd807aa98, w8 = BE32(chunk[8])); + Round(h, a, b, c, d, e, f, g, 0x12835b01, w9 = BE32(chunk[9])); + Round(g, h, a, b, c, d, e, f, 0x243185be, w10 = BE32(chunk[10])); + Round(f, g, h, a, b, c, d, e, 0x550c7dc3, w11 = BE32(chunk[11])); + Round(e, f, g, h, a, b, c, d, 0x72be5d74, w12 = BE32(chunk[12])); + Round(d, e, f, g, h, a, b, c, 0x80deb1fe, w13 = BE32(chunk[13])); + Round(c, d, e, f, g, h, a, b, 0x9bdc06a7, w14 = BE32(chunk[14])); + Round(b, c, d, e, f, g, h, a, 0xc19bf174, w15 = BE32(chunk[15])); Round(a, b, c, d, e, f, g, h, 0xe49b69c1, w0 += sigma1(w14) + w9 + sigma0(w1)); Round(h, a, b, c, d, e, f, g, 0xefbe4786, w1 += sigma1(w15) + w10 + sigma0(w2)); @@ -126,52 +129,36 @@ static void secp256k1_sha256_transform(uint32_t* s, const unsigned char* chunk) } static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t len) { - const unsigned char* end = data + len; - size_t bufsize = hash->bytes % 64; - if (bufsize && bufsize + len >= 64) { + size_t bufsize = hash->bytes & 0x3F; + hash->bytes += len; + while (bufsize + len >= 64) { /* Fill the buffer, and process it. */ - memcpy(hash->buf + bufsize, data, 64 - bufsize); - hash->bytes += 64 - bufsize; + memcpy(((unsigned char*)hash->buf) + bufsize, data, 64 - bufsize); data += 64 - bufsize; + len -= 64 - bufsize; secp256k1_sha256_transform(hash->s, hash->buf); bufsize = 0; } - while (end >= data + 64) { - /* Process full chunks directly from the source. */ - secp256k1_sha256_transform(hash->s, data); - hash->bytes += 64; - data += 64; - } - if (end > data) { + if (len) { /* Fill the buffer with what remains. */ - memcpy(hash->buf + bufsize, data, end - data); - hash->bytes += end - data; + memcpy(((unsigned char*)hash->buf) + bufsize, data, len); } } static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32) { static const unsigned char pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - unsigned char sizedesc[8]; - WriteBE32(sizedesc, hash->bytes >> 29); - WriteBE32(sizedesc + 4, hash->bytes << 3); + uint32_t sizedesc[2]; + uint32_t out[8]; + int i = 0; + sizedesc[0] = BE32(hash->bytes >> 29); + sizedesc[1] = BE32(hash->bytes << 3); secp256k1_sha256_write(hash, pad, 1 + ((119 - (hash->bytes % 64)) % 64)); - secp256k1_sha256_write(hash, sizedesc, 8); - WriteBE32(out32, hash->s[0]); - hash->s[0] = 0; - WriteBE32(out32 + 4, hash->s[1]); - hash->s[1] = 0; - WriteBE32(out32 + 8, hash->s[2]); - hash->s[2] = 0; - WriteBE32(out32 + 12, hash->s[3]); - hash->s[3] = 0; - WriteBE32(out32 + 16, hash->s[4]); - hash->s[4] = 0; - WriteBE32(out32 + 20, hash->s[5]); - hash->s[5] = 0; - WriteBE32(out32 + 24, hash->s[6]); - hash->s[6] = 0; - WriteBE32(out32 + 28, hash->s[7]); - hash->s[7] = 0; + secp256k1_sha256_write(hash, (const unsigned char*)sizedesc, 8); + for (i = 0; i < 8; i++) { + out[i] = BE32(hash->s[i]); + hash->s[i] = 0; + } + memcpy(out32, (const unsigned char*)out, 32); } static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t keylen) {