mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-01-12 12:52:35 -03:00
Merge #18553: Avoid non-trivial global constants in SHA-NI code
8508473094
Avoid non-trivial global constants in SHA-NI code (Pieter Wuille) Pull request description: This is a potential solution for #18456. It seems that the compiler cannot turn `_mm_set_epi64x(<constant>,<constnant>)` into a constant itself, and thus emits a global initializer for the `MASK`, `INIT0`, and `INIT1` global constants in the sha-ni SHA256 implementation. Change this by turning them into dumb byte arrays, loading them into an SSE variable whenever needed. Tested on a SHA-NI capable machine. I do not observe any obvious performance impact (but this is hard to measure, it's already very fast...). ACKs for top commit: laanwj: Code review ACK8508473094
elichai: ACK8508473094
Tree-SHA512: 07049cf1a33624c22df2be48b814d5636c037b368861eb13ee073bdce2b7c902a56e96518218961f55a2a1631a40825ded6dbbc28d7fe0e7fec267d704e39112
This commit is contained in:
commit
ce4e1f0282
1 changed files with 17 additions and 19 deletions
|
@ -11,13 +11,11 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);
|
alignas(__m128i) const uint8_t MASK[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
|
||||||
const __m128i INIT0 = _mm_set_epi64x(0x6a09e667bb67ae85ull, 0x510e527f9b05688cull);
|
alignas(__m128i) const uint8_t INIT0[16] = {0x8c, 0x68, 0x05, 0x9b, 0x7f, 0x52, 0x0e, 0x51, 0x85, 0xae, 0x67, 0xbb, 0x67, 0xe6, 0x09, 0x6a};
|
||||||
const __m128i INIT1 = _mm_set_epi64x(0x3c6ef372a54ff53aull, 0x1f83d9ab5be0cd19ull);
|
alignas(__m128i) const uint8_t INIT1[16] = {0x19, 0xcd, 0xe0, 0x5b, 0xab, 0xd9, 0x83, 0x1f, 0x3a, 0xf5, 0x4f, 0xa5, 0x72, 0xf3, 0x6e, 0x3c};
|
||||||
|
|
||||||
void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
|
void inline __attribute__((always_inline)) QuadRound(__m128i& state0, __m128i& state1, uint64_t k1, uint64_t k0)
|
||||||
{
|
{
|
||||||
|
@ -67,12 +65,12 @@ void inline __attribute__((always_inline)) Unshuffle(__m128i& s0, __m128i& s1)
|
||||||
|
|
||||||
__m128i inline __attribute__((always_inline)) Load(const unsigned char* in)
|
__m128i inline __attribute__((always_inline)) Load(const unsigned char* in)
|
||||||
{
|
{
|
||||||
return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), MASK);
|
return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i*)in), _mm_load_si128((const __m128i*)MASK));
|
||||||
}
|
}
|
||||||
|
|
||||||
void inline __attribute__((always_inline)) Save(unsigned char* out, __m128i s)
|
void inline __attribute__((always_inline)) Save(unsigned char* out, __m128i s)
|
||||||
{
|
{
|
||||||
_mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, MASK));
|
_mm_storeu_si128((__m128i*)out, _mm_shuffle_epi8(s, _mm_load_si128((const __m128i*)MASK)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,8 +147,8 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
|
||||||
__m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1;
|
__m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1;
|
||||||
|
|
||||||
/* Transform 1 */
|
/* Transform 1 */
|
||||||
bs0 = as0 = INIT0;
|
bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
|
||||||
bs1 = as1 = INIT1;
|
bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
|
||||||
am0 = Load(in);
|
am0 = Load(in);
|
||||||
bm0 = Load(in + 64);
|
bm0 = Load(in + 64);
|
||||||
QuadRound(as0, as1, am0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
|
QuadRound(as0, as1, am0, 0xe9b5dba5b5c0fbcfull, 0x71374491428a2f98ull);
|
||||||
|
@ -219,10 +217,10 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
|
||||||
ShiftMessageC(bm1, bm2, bm3);
|
ShiftMessageC(bm1, bm2, bm3);
|
||||||
QuadRound(as0, as1, am3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
|
QuadRound(as0, as1, am3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
|
||||||
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
|
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9A3f7ull, 0xa4506ceb90befffaull);
|
||||||
as0 = _mm_add_epi32(as0, INIT0);
|
as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
|
||||||
bs0 = _mm_add_epi32(bs0, INIT0);
|
bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
|
||||||
as1 = _mm_add_epi32(as1, INIT1);
|
as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
|
||||||
bs1 = _mm_add_epi32(bs1, INIT1);
|
bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));
|
||||||
|
|
||||||
/* Transform 2 */
|
/* Transform 2 */
|
||||||
aso0 = as0;
|
aso0 = as0;
|
||||||
|
@ -275,8 +273,8 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
|
||||||
bm1 = bs1;
|
bm1 = bs1;
|
||||||
|
|
||||||
/* Transform 3 */
|
/* Transform 3 */
|
||||||
bs0 = as0 = INIT0;
|
bs0 = as0 = _mm_load_si128((const __m128i*)INIT0);
|
||||||
bs1 = as1 = INIT1;
|
bs1 = as1 = _mm_load_si128((const __m128i*)INIT1);
|
||||||
QuadRound(as0, as1, am0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
|
QuadRound(as0, as1, am0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
|
||||||
QuadRound(bs0, bs1, bm0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
|
QuadRound(bs0, bs1, bm0, 0xe9b5dba5B5c0fbcfull, 0x71374491428a2f98ull);
|
||||||
QuadRound(as0, as1, am1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
|
QuadRound(as0, as1, am1, 0xab1c5ed5923f82a4ull, 0x59f111f13956c25bull);
|
||||||
|
@ -339,10 +337,10 @@ void Transform_2way(unsigned char* out, const unsigned char* in)
|
||||||
ShiftMessageC(bm1, bm2, bm3);
|
ShiftMessageC(bm1, bm2, bm3);
|
||||||
QuadRound(as0, as1, am3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
|
QuadRound(as0, as1, am3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
|
||||||
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
|
QuadRound(bs0, bs1, bm3, 0xc67178f2bef9a3f7ull, 0xa4506ceb90befffaull);
|
||||||
as0 = _mm_add_epi32(as0, INIT0);
|
as0 = _mm_add_epi32(as0, _mm_load_si128((const __m128i*)INIT0));
|
||||||
bs0 = _mm_add_epi32(bs0, INIT0);
|
bs0 = _mm_add_epi32(bs0, _mm_load_si128((const __m128i*)INIT0));
|
||||||
as1 = _mm_add_epi32(as1, INIT1);
|
as1 = _mm_add_epi32(as1, _mm_load_si128((const __m128i*)INIT1));
|
||||||
bs1 = _mm_add_epi32(bs1, INIT1);
|
bs1 = _mm_add_epi32(bs1, _mm_load_si128((const __m128i*)INIT1));
|
||||||
|
|
||||||
/* Extract hash into out */
|
/* Extract hash into out */
|
||||||
Unshuffle(as0, as1);
|
Unshuffle(as0, as1);
|
||||||
|
|
Loading…
Reference in a new issue