From 104d5dd778e7bacba9613d70afc422ff676e5111 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 22 Mar 2025 15:16:17 +0100 Subject: [PATCH 1/3] SaltedOutpointHasher uses rapidhash SipHashUint256Extra is rather slow. For the purpose of generating a hash from a COutPoint and some seed that is only used inside a hashmap, it is sufficient to use a non-cryptographic hash. rapidhash [1] is a well tested and very fast hash function. This implementation strips down this hash function, originally implemented for a memory buffer, to be used with the COutPoint + 2*64bit seed as the input. [1] https://github.com/Nicoshev/rapidhash --- src/crypto/siphash.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++ src/crypto/siphash.h | 1 + src/util/hasher.h | 2 +- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/crypto/siphash.cpp b/src/crypto/siphash.cpp index 8004a0548ec..434c504ebdd 100644 --- a/src/crypto/siphash.cpp +++ b/src/crypto/siphash.cpp @@ -172,3 +172,53 @@ uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint3 SIPROUND; return v0 ^ v1 ^ v2 ^ v3; } + + +uint64_t ModifiedRapidHash(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra) +{ + auto const rapid_mum = [](uint64_t* a, uint64_t* b) { +#if defined(__SIZEOF_INT128__) + __uint128_t r = *a; + r *= *b; + *a = (uint64_t)r; + *b = (uint64_t)(r >> 64); +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) +#if defined(_M_X64) + *a = _umul128(*a, *b, b); +#else + uint64_t c = __umulh(*a, *b); + *a = *a * *b; + *b = c; +#endif +#else + uint64_t ha = *a >> 32, hb = *b >> 32, la = (uint32_t)*a, lb = (uint32_t)*b, hi, lo; + uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl; + lo = t + (rm1 << 32); + c += lo < t; + hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + *a = lo; + *b = hi; +#endif + }; + + auto const rapid_mix = [&rapid_mum](uint64_t a, uint64_t b) -> uint64_t { + rapid_mum(&a, &b); + return a ^ b; + }; + + // This effectifely behaves like rapidhash with that input: + // seed: k0, data: [val, k1, extra] + // So it hashes 32+8+4 = 44 bytes, plus uses the 8 byte as seed. + + // Default secret parameters. + static constexpr uint64_t secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; + + // no need to mix the seed itself, as it is purely random. + uint64_t seed = k0; + seed = rapid_mix(val.GetUint64(0) ^ secret[2], val.GetUint64(1) ^ seed ^ secret[1]); + seed = rapid_mix(val.GetUint64(2) ^ secret[2], val.GetUint64(3) ^ seed); + uint64_t a = k1 ^ secret[1]; + uint64_t b = extra ^ seed; + rapid_mum(&a, &b); + return rapid_mix(a ^ secret[0], b ^ secret[1]); +} diff --git a/src/crypto/siphash.h b/src/crypto/siphash.h index 4fb3dc2f258..e1d4e266c9a 100644 --- a/src/crypto/siphash.h +++ b/src/crypto/siphash.h @@ -44,5 +44,6 @@ public: */ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val); uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra); +uint64_t ModifiedRapidHash(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra); #endif // BITCOIN_CRYPTO_SIPHASH_H diff --git a/src/util/hasher.h b/src/util/hasher.h index e4594c7ddaf..050e089300d 100644 --- a/src/util/hasher.h +++ b/src/util/hasher.h @@ -47,7 +47,7 @@ public: * @see https://gcc.gnu.org/onlinedocs/gcc-13.2.0/libstdc++/manual/manual/unordered_associative.html */ size_t operator()(const COutPoint& id) const noexcept { - return SipHashUint256Extra(k0, k1, id.hash, id.n); + return ModifiedRapidHash(k0, k1, id.hash, id.n); } }; From 723c49b63bb10da843fbb6efc6928dca415cc47f Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sat, 22 Mar 2025 18:24:33 +0100 Subject: [PATCH 2/3] CCoinsViewCache::BatchWrite lookup optimization In the case when parent cache does not have an entry while child cache does, this reduces the double hash lookup (find + try_emplace) with a single try_emplace. --- src/coins.cpp | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/coins.cpp b/src/coins.cpp index 24a102b0bc1..4350d6699a4 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -186,30 +186,34 @@ bool CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &ha if (!it->second.IsDirty()) { continue; } - CCoinsMap::iterator itUs = cacheCoins.find(it->first); - if (itUs == cacheCoins.end()) { + CCoinsMap::iterator itUs; + bool isInserted = false; + if (!(it->second.IsFresh() && it->second.coin.IsSpent())) { + std::tie(itUs, isInserted) = cacheCoins.try_emplace(it->first); + } else { + itUs = cacheCoins.find(it->first); + } + + if (isInserted) { // The parent cache does not have an entry, while the child cache does. // We can ignore it if it's both spent and FRESH in the child - if (!(it->second.IsFresh() && it->second.coin.IsSpent())) { - // Create the coin in the parent cache, move the data up - // and mark it as dirty. - itUs = cacheCoins.try_emplace(it->first).first; - CCoinsCacheEntry& entry{itUs->second}; - if (cursor.WillErase(*it)) { - // Since this entry will be erased, - // we can move the coin into us instead of copying it - entry.coin = std::move(it->second.coin); - } else { - entry.coin = it->second.coin; - } - cachedCoinsUsage += entry.coin.DynamicMemoryUsage(); - CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); - // We can mark it FRESH in the parent if it was FRESH in the child - // Otherwise it might have just been flushed from the parent's cache - // and already exist in the grandparent - if (it->second.IsFresh()) CCoinsCacheEntry::SetFresh(*itUs, m_sentinel); + // Create the coin in the parent cache, move the data up + // and mark it as dirty. + CCoinsCacheEntry& entry{itUs->second}; + if (cursor.WillErase(*it)) { + // Since this entry will be erased, + // we can move the coin into us instead of copying it + entry.coin = std::move(it->second.coin); + } else { + entry.coin = it->second.coin; } - } else { + cachedCoinsUsage += entry.coin.DynamicMemoryUsage(); + CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + // We can mark it FRESH in the parent if it was FRESH in the child + // Otherwise it might have just been flushed from the parent's cache + // and already exist in the grandparent + if (it->second.IsFresh()) CCoinsCacheEntry::SetFresh(*itUs, m_sentinel); + } else if (itUs != cacheCoins.end()) { // Found the entry in the parent cache if (it->second.IsFresh() && !itUs->second.coin.IsSpent()) { // The coin was marked FRESH in the child cache, but the coin From 18b2c263aab12c49350b20fa2bef407d8a9eff61 Mon Sep 17 00:00:00 2001 From: Martin Leitner-Ankerl Date: Sun, 23 Mar 2025 11:27:25 +0100 Subject: [PATCH 3/3] Use boost::unordered_node_map for CCoinsMap boost::unordered_node_map is a highly optimized hashmap, available since boost 1.82, that is API compatible to std::unordered_map for our use case. It also can use the existing PoolAllocator. --- cmake/module/AddBoostIfNeeded.cmake | 2 +- doc/dependencies.md | 2 +- src/coins.h | 13 +++++++------ src/memusage.h | 20 ++++++++++++++++++++ src/test/pool_tests.cpp | 2 +- src/util/hasher.h | 4 ++++ 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/cmake/module/AddBoostIfNeeded.cmake b/cmake/module/AddBoostIfNeeded.cmake index ecd0d6f2aba..5cf2f8b0c46 100644 --- a/cmake/module/AddBoostIfNeeded.cmake +++ b/cmake/module/AddBoostIfNeeded.cmake @@ -26,7 +26,7 @@ function(add_boost_if_needed) cmake_policy(SET CMP0167 OLD) endif() set(Boost_NO_BOOST_CMAKE ON) - find_package(Boost 1.73.0 REQUIRED) + find_package(Boost 1.82.0 REQUIRED) mark_as_advanced(Boost_INCLUDE_DIR) set_target_properties(Boost::headers PROPERTIES IMPORTED_GLOBAL TRUE) target_compile_definitions(Boost::headers INTERFACE diff --git a/doc/dependencies.md b/doc/dependencies.md index 7c866a433db..23fc2b5214a 100644 --- a/doc/dependencies.md +++ b/doc/dependencies.md @@ -18,7 +18,7 @@ Bitcoin Core requires one of the following compilers. | Dependency | Releases | Version used | Minimum required | Runtime | | --- | --- | --- | --- | --- | | CMake | [link](https://cmake.org/) | N/A | [3.22](https://github.com/bitcoin/bitcoin/pull/30454) | No | -| [Boost](../depends/packages/boost.mk) | [link](https://www.boost.org/users/download/) | [1.81.0](https://github.com/bitcoin/bitcoin/pull/26557) | [1.73.0](https://github.com/bitcoin/bitcoin/pull/29066) | No | +| [Boost](../depends/packages/boost.mk) | [link](https://www.boost.org/users/download/) | [1.82.0](https://github.com/bitcoin/bitcoin/pull/26557) | [1.82.0](https://github.com/bitcoin/bitcoin/pull/29066) | No | | [libevent](../depends/packages/libevent.mk) | [link](https://github.com/libevent/libevent/releases) | [2.1.12-stable](https://github.com/bitcoin/bitcoin/pull/21991) | [2.1.8](https://github.com/bitcoin/bitcoin/pull/24681) | No | | glibc | [link](https://www.gnu.org/software/libc/) | N/A | [2.31](https://github.com/bitcoin/bitcoin/pull/29987) | Yes | | Linux Kernel (if building that platform) | [link](https://www.kernel.org/) | N/A | [3.17.0](https://github.com/bitcoin/bitcoin/pull/27699) | Yes | diff --git a/src/coins.h b/src/coins.h index 61fb4af6420..704ab3d6560 100644 --- a/src/coins.h +++ b/src/coins.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -221,12 +222,12 @@ public: * Using an additional sizeof(void*)*4 for MAX_BLOCK_SIZE_BYTES should thus be sufficient so that * all implementations can allocate the nodes from the PoolAllocator. */ -using CCoinsMap = std::unordered_map, - PoolAllocator>; +using CCoinsMap = boost::unordered_node_map, + PoolAllocator>; using CCoinsMapMemoryResource = CCoinsMap::allocator_type::ResourceType; diff --git a/src/memusage.h b/src/memusage.h index 9d9e549ef22..c0945e3fce0 100644 --- a/src/memusage.h +++ b/src/memusage.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace memusage @@ -215,6 +216,25 @@ static inline size_t DynamicUsage(const std::unordered_map +static inline size_t DynamicUsage(const boost::unordered_node_map, + MAX_BLOCK_SIZE_BYTES, + ALIGN_BYTES>>& m) +{ + auto* pool_resource = m.get_allocator().resource(); + + // The allocated chunks are stored in a std::list. Size per node should + // therefore be 3 pointers: next, previous, and a pointer to the chunk. + size_t estimated_list_node_size = MallocUsage(sizeof(void*) * 3); + size_t usage_resource = estimated_list_node_size * pool_resource->NumAllocatedChunks(); + size_t usage_chunks = MallocUsage(pool_resource->ChunkSizeBytes()) * pool_resource->NumAllocatedChunks(); + return usage_resource + usage_chunks + MallocUsage(sizeof(void*) * m.bucket_count()); +} + } // namespace memusage #endif // BITCOIN_MEMUSAGE_H diff --git a/src/test/pool_tests.cpp b/src/test/pool_tests.cpp index 9d15660126a..834b05c4c5a 100644 --- a/src/test/pool_tests.cpp +++ b/src/test/pool_tests.cpp @@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(memusage_test) { auto std_map = std::unordered_map{}; - using Map = std::unordered_map, std::equal_to, diff --git a/src/util/hasher.h b/src/util/hasher.h index 050e089300d..4b379660193 100644 --- a/src/util/hasher.h +++ b/src/util/hasher.h @@ -35,6 +35,10 @@ private: const uint64_t k0, k1; public: + // instructs Boost.Unordered to not use post-mixing. We can do this because the hash is of high quality. + // This should have a slight performance benefit. + using is_avalanching = std::true_type; + SaltedOutpointHasher(bool deterministic = false); /**