From 3d203c2acfc55b84a7aa29272454b6b196051e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 24 Oct 2024 08:37:57 +0200 Subject: [PATCH 01/24] test: Compare util::Xor with randomized inputs against simple impl Since production code only uses keys of length 8, we're not testing with other values anymore --- src/test/streams_tests.cpp | 93 ++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 14 deletions(-) diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index c7b5cd353e0..c29f34fab58 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -13,19 +13,81 @@ #include using namespace std::string_literals; +using namespace util::hex_literals; BOOST_FIXTURE_TEST_SUITE(streams_tests, BasicTestingSetup) +// Test that obfuscation can be properly reversed even with random chunk sizes. +BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks) +{ + auto apply_random_xor_chunks{[&](std::span write, const std::span key) { + for (size_t offset{0}; offset < write.size();) { + const size_t chunk_size{1 + m_rng.randrange(write.size() - offset)}; + util::Xor(write.subspan(offset, chunk_size), key, offset); + offset += chunk_size; + } + }}; + + for (size_t test{0}; test < 100; ++test) { + const size_t write_size{1 + m_rng.randrange(100U)}; + const std::vector original{m_rng.randbytes(write_size)}; + std::vector roundtrip{original}; + + std::vector key_bytes{m_rng.randbytes(sizeof(uint64_t))}; + uint64_t xor_key; + std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); + apply_random_xor_chunks(roundtrip, key_bytes); + + // Verify intermediate state is different from original (unless key is zero) + const bool all_zero = (xor_key == 0) || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2); + BOOST_CHECK_EQUAL(original != roundtrip, !all_zero); + + apply_random_xor_chunks(roundtrip, key_bytes); + BOOST_CHECK(original == roundtrip); + } +} + +// Compares optimized obfuscation against a trivial byte-by-byte reference implementation +// with random offsets to ensure proper handling of key wrapping. +BOOST_AUTO_TEST_CASE(xor_bytes_reference) +{ + auto expected_xor{[](std::span write, const std::span key, size_t key_offset) { + for (auto& b : write) { + b ^= key[key_offset++ % key.size()]; + } + }}; + + for (size_t test{0}; test < 100; ++test) { + const size_t write_size{1 + m_rng.randrange(100U)}; + const size_t key_offset{m_rng.randrange(3 * 8U)}; // Should wrap around + + std::vector key_bytes{m_rng.randbytes(sizeof(uint64_t))}; + uint64_t xor_key; + std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); + + std::vector expected{m_rng.randbytes(write_size)}; + std::vector actual{expected}; + + expected_xor(expected, key_bytes, key_offset); + util::Xor(actual, key_bytes, key_offset); + + BOOST_CHECK_EQUAL_COLLECTIONS(expected.begin(), expected.end(), actual.begin(), actual.end()); + } +} + BOOST_AUTO_TEST_CASE(xor_file) { fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"}; auto raw_file{[&](const auto& mode) { return fsbridge::fopen(xor_path, mode); }}; const std::vector test1{1, 2, 3}; const std::vector test2{4, 5}; - const std::vector xor_pat{std::byte{0xff}, std::byte{0x00}}; + auto key_bytes{"ff00ff00ff00ff00"_hex_v}; + uint64_t xor_key; + std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); + { // Check errors for missing file - AutoFile xor_file{raw_file("rb"), xor_pat}; + AutoFile xor_file{raw_file("rb"), key_bytes}; BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullpt"}); BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullpt"}); BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullpt"}); @@ -37,7 +99,7 @@ BOOST_AUTO_TEST_CASE(xor_file) #else const char* mode = "wbx"; #endif - AutoFile xor_file{raw_file(mode), xor_pat}; + AutoFile xor_file{raw_file(mode), key_bytes}; xor_file << test1 << test2; } { @@ -50,7 +112,7 @@ BOOST_AUTO_TEST_CASE(xor_file) BOOST_CHECK_EXCEPTION(non_xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: end of file"}); } { - AutoFile xor_file{raw_file("rb"), xor_pat}; + AutoFile xor_file{raw_file("rb"), key_bytes}; std::vector read1, read2; xor_file >> read1 >> read2; BOOST_CHECK_EQUAL(HexStr(read1), HexStr(test1)); @@ -59,7 +121,7 @@ BOOST_AUTO_TEST_CASE(xor_file) BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: end of file"}); } { - AutoFile xor_file{raw_file("rb"), xor_pat}; + AutoFile xor_file{raw_file("rb"), key_bytes}; std::vector read2; // Check that ignore works xor_file.ignore(4); @@ -75,7 +137,7 @@ BOOST_AUTO_TEST_CASE(streams_vector_writer) { unsigned char a(1); unsigned char b(2); - unsigned char bytes[] = { 3, 4, 5, 6 }; + unsigned char bytes[] = {3, 4, 5, 6}; std::vector vch; // Each test runs twice. Serializing a second time at the same starting @@ -227,29 +289,32 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor) // Degenerate case { DataStream ds{in}; - ds.Xor({0x00, 0x00}); + ds.Xor("0000000000000000"_hex_v_u8); BOOST_CHECK_EQUAL(""s, ds.str()); } in.push_back(std::byte{0x0f}); in.push_back(std::byte{0xf0}); - // Single character key { + const auto key_bytes{"ffffffffffffffff"_hex_v}; + uint64_t xor_key; + std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); + DataStream ds{in}; - ds.Xor({0xff}); + ds.Xor("ffffffffffffffff"_hex_v_u8); BOOST_CHECK_EQUAL("\xf0\x0f"s, ds.str()); } - // Multi character key - in.clear(); in.push_back(std::byte{0xf0}); in.push_back(std::byte{0x0f}); { + const auto key_bytes{"ff0fff0fff0fff0f"_hex_v}; + DataStream ds{in}; - ds.Xor({0xff, 0x0f}); + ds.Xor("ff0fff0fff0fff0f"_hex_v_u8); BOOST_CHECK_EQUAL("\x0f\x00"s, ds.str()); } } @@ -272,7 +337,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file) BOOST_CHECK(false); } catch (const std::exception& e) { BOOST_CHECK(strstr(e.what(), - "Rewind limit must be less than buffer size") != nullptr); + "Rewind limit must be less than buffer size") != nullptr); } // The buffer is 25 bytes, allow rewinding 10 bytes. @@ -361,7 +426,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file) BOOST_CHECK(false); } catch (const std::exception& e) { BOOST_CHECK(strstr(e.what(), - "BufferedFile::Fill: end of file") != nullptr); + "BufferedFile::Fill: end of file") != nullptr); } // Attempting to read beyond the end sets the EOF indicator. BOOST_CHECK(bf.eof()); From 5d5f3d06dd845b4b4f3a7839dd86d8d8fcc4b098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 6 Dec 2024 16:18:03 +0100 Subject: [PATCH 02/24] bench: Make XorObfuscationBench more representative Since another PR solves the tiny byte xors during serialization, we're only concentrating on big continuous chunks now. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release \ && cmake --build build -j$(nproc) \ && build/bin/bench_bitcoin -filter='XorObfuscationBench' -min-time=10000 C++ compiler .......................... AppleClang 17.0.0.17000013 | ns/MiB | MiB/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 731,927.62 | 1,366.26 | 0.2% | 10.67 | `XorObfuscationBench` C++ compiler .......................... GNU 13.3.0 | ns/MiB | MiB/s | err% | ins/MiB | cyc/MiB | IPC | bra/MiB | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 941,015.26 | 1,062.68 | 0.0% | 9,437,186.97 | 3,378,911.52 | 2.793 | 1,048,577.15 | 0.0% | 10.99 | `XorObfuscationBench` --- src/bench/xor.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/bench/xor.cpp b/src/bench/xor.cpp index fc9dc5d1721..c76103e8851 100644 --- a/src/bench/xor.cpp +++ b/src/bench/xor.cpp @@ -6,19 +6,28 @@ #include #include #include +#include +#include #include +#include #include -static void Xor(benchmark::Bench& bench) +static void XorObfuscationBench(benchmark::Bench& bench) { - FastRandomContext frc{/*fDeterministic=*/true}; - auto data{frc.randbytes(1024)}; - auto key{frc.randbytes(31)}; + FastRandomContext rng{/*fDeterministic=*/true}; + constexpr size_t bytes{10_MiB}; + auto test_data{rng.randbytes(bytes)}; - bench.batch(data.size()).unit("byte").run([&] { - util::Xor(data, key); + std::vector key_bytes{rng.randbytes(8)}; + uint64_t key; + std::memcpy(&key, key_bytes.data(), 8); + + size_t offset{0}; + bench.batch(bytes / 1_MiB).unit("MiB").run([&] { + util::Xor(test_data, key_bytes, offset++); + ankerl::nanobench::doNotOptimizeAway(test_data); }); } -BENCHMARK(Xor, benchmark::PriorityLevel::HIGH); +BENCHMARK(XorObfuscationBench, benchmark::PriorityLevel::HIGH); From 8e6e0acd36eb60685b2340e00861cb13fcef1111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 5 Apr 2025 18:25:20 +0200 Subject: [PATCH 03/24] refactor: prepare dbwrapper for obfuscation key change Since `CDBWrapper::Read` will still work with vectors, we won't be able to use the obfuscation key field to read into it directly. This commit cleans up this part of the code, obviating that writing `obfuscate_key` is needed since following methods will actually use it implicitly, simplifying the `if (!key_exists` condition to extract the negation into the name of the boolean and inline the single-use `CreateObfuscateKey` which will just complicate the transition. --- src/dbwrapper.cpp | 49 +++++++++++++++++------------------------------ src/dbwrapper.h | 2 -- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index 1c35e11863b..38b75b3b340 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -213,7 +213,10 @@ struct LevelDBContext { }; CDBWrapper::CDBWrapper(const DBParams& params) - : m_db_context{std::make_unique()}, m_name{fs::PathToString(params.path.stem())}, m_path{params.path}, m_is_memory{params.memory_only} + : m_db_context{std::make_unique()}, + m_name{fs::PathToString(params.path.stem())}, + m_path{params.path}, + m_is_memory{params.memory_only} { DBContext().penv = nullptr; DBContext().readoptions.verify_checksums = true; @@ -248,24 +251,23 @@ CDBWrapper::CDBWrapper(const DBParams& params) LogPrintf("Finished database compaction of %s\n", fs::PathToString(params.path)); } - // The base-case obfuscation key, which is a noop. - obfuscate_key = std::vector(OBFUSCATE_KEY_NUM_BYTES, '\000'); + { + obfuscate_key = std::vector(OBFUSCATE_KEY_NUM_BYTES, '\000'); // Needed for unobfuscated Read + const bool key_missing{!Read(OBFUSCATE_KEY_KEY, obfuscate_key)}; + if (key_missing && params.obfuscate && IsEmpty()) { + // Initialize non-degenerate obfuscation if it won't upset existing, non-obfuscated data. + std::vector new_key(OBFUSCATE_KEY_NUM_BYTES); + GetRandBytes(new_key); - bool key_exists = Read(OBFUSCATE_KEY_KEY, obfuscate_key); + // Write `new_key` so we don't obfuscate the key with itself + Write(OBFUSCATE_KEY_KEY, new_key); + obfuscate_key = new_key; - if (!key_exists && params.obfuscate && IsEmpty()) { - // Initialize non-degenerate obfuscation if it won't upset - // existing, non-obfuscated data. - std::vector new_key = CreateObfuscateKey(); + LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); + } - // Write `new_key` so we don't obfuscate the key with itself - Write(OBFUSCATE_KEY_KEY, new_key); - obfuscate_key = new_key; - - LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); + LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); } - - LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); } CDBWrapper::~CDBWrapper() @@ -318,17 +320,6 @@ const std::string CDBWrapper::OBFUSCATE_KEY_KEY("\000obfuscate_key", 14); const unsigned int CDBWrapper::OBFUSCATE_KEY_NUM_BYTES = 8; -/** - * Returns a string (consisting of 8 random bytes) suitable for use as an - * obfuscating XOR key. - */ -std::vector CDBWrapper::CreateObfuscateKey() const -{ - std::vector ret(OBFUSCATE_KEY_NUM_BYTES); - GetRandBytes(ret); - return ret; -} - std::optional CDBWrapper::ReadImpl(std::span key) const { leveldb::Slice slKey(CharCast(key.data()), key.size()); @@ -411,10 +402,6 @@ void CDBIterator::SeekToFirst() { m_impl_iter->iter->SeekToFirst(); } void CDBIterator::Next() { m_impl_iter->iter->Next(); } namespace dbwrapper_private { - -const std::vector& GetObfuscateKey(const CDBWrapper &w) -{ - return w.obfuscate_key; -} +const std::vector& GetObfuscateKey(const CDBWrapper& w) { return w.obfuscate_key; } } // namespace dbwrapper_private diff --git a/src/dbwrapper.h b/src/dbwrapper.h index 789b5be8fc7..33e009833cb 100644 --- a/src/dbwrapper.h +++ b/src/dbwrapper.h @@ -196,8 +196,6 @@ private: //! the length of the obfuscate key in number of bytes static const unsigned int OBFUSCATE_KEY_NUM_BYTES; - std::vector CreateObfuscateKey() const; - //! path to filesystem storage const fs::path m_path; From e50732d25fcdeb73747f27a5341116f454badc9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 5 Apr 2025 19:01:09 +0200 Subject: [PATCH 04/24] refactor: prepare mempool_persist for obfuscation key change --- src/node/mempool_persist.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/node/mempool_persist.cpp b/src/node/mempool_persist.cpp index ff7de8c64a2..320a8d60967 100644 --- a/src/node/mempool_persist.cpp +++ b/src/node/mempool_persist.cpp @@ -58,15 +58,18 @@ bool LoadMempool(CTxMemPool& pool, const fs::path& load_path, Chainstate& active try { uint64_t version; file >> version; - std::vector xor_key; + if (version == MEMPOOL_DUMP_VERSION_NO_XOR_KEY) { - // Leave XOR-key empty + std::vector xor_key; + file.SetXor(xor_key); } else if (version == MEMPOOL_DUMP_VERSION) { + std::vector xor_key; file >> xor_key; + file.SetXor(xor_key); } else { return false; } - file.SetXor(xor_key); + uint64_t total_txns_to_load; file >> total_txns_to_load; uint64_t txns_tried = 0; @@ -177,12 +180,15 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock const uint64_t version{pool.m_opts.persist_v1_dat ? MEMPOOL_DUMP_VERSION_NO_XOR_KEY : MEMPOOL_DUMP_VERSION}; file << version; - std::vector xor_key(8); if (!pool.m_opts.persist_v1_dat) { + std::vector xor_key(8); FastRandomContext{}.fillrand(xor_key); file << xor_key; + file.SetXor(xor_key); + } else { + std::vector xor_key(8); + file.SetXor(xor_key); } - file.SetXor(xor_key); uint64_t mempool_transactions_to_write(vinfo.size()); file << mempool_transactions_to_write; From c5e866b190d6d77302c7bf5f89f8ecace53fc604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 21 Dec 2024 16:15:04 +0100 Subject: [PATCH 05/24] optimization: Migrate fixed-size obfuscation end-to-end from `std::vector` to `uint64_t` Since `util::Xor` accepts `uint64_t` values, we're eliminating any repeated vector-to-uint64_t conversions going back to the loading/saving of these values (we're still serializing them as vectors, but converting as soon as possible to `uint64_t`). This is the reason the tests still generate vector values and convert to `uint64_t` later instead of generating it directly. We're also short-circuit `Xor` calls with 0 key values early to avoid unnecessary calculations (e.g. `MakeWritableByteSpan`) - even assuming that XOR is never called for 0. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release \ && cmake --build build -j$(nproc) \ && build/bin/bench_bitcoin -filter='XorObfuscationBench' -min-time=10000 C++ compiler .......................... AppleClang 17.0.0.17000013 | ns/MiB | MiB/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 14,730.40 | 67,886.80 | 0.1% | 11.01 | `XorObfuscationBench` C++ compiler .......................... GNU 13.3.0 | ns/MiB | MiB/s | err% | ins/MiB | cyc/MiB | IPC | bra/MiB | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 51,187.17 | 19,536.15 | 0.0% | 327,683.95 | 183,747.58 | 1.783 | 65,536.55 | 0.0% | 11.00 | `XorObfuscationBench` ---- A few other benchmarks that seem to have improved as well (tested with Clang only): Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 2,202,618.49 | 454.01 | 0.2% | 11.01 | `ReadBlockBench` | 734,444.92 | 1,361.57 | 0.3% | 10.66 | `ReadRawBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,912,308.06 | 522.93 | 0.4% | 10.98 | `ReadBlockBench` | 49,092.93 | 20,369.53 | 0.2% | 10.99 | `ReadRawBlockBench` --- src/bench/xor.cpp | 7 ++- src/dbwrapper.cpp | 24 ++++----- src/dbwrapper.h | 14 ++--- src/node/blockstorage.cpp | 10 ++-- src/node/blockstorage.h | 2 +- src/node/mempool_persist.cpp | 19 +++---- src/obfuscation.h | 85 ++++++++++++++++++++++++++++++ src/streams.cpp | 19 ++++--- src/streams.h | 40 +++----------- src/test/dbwrapper_tests.cpp | 18 ++----- src/test/fuzz/autofile.cpp | 2 +- src/test/fuzz/buffered_file.cpp | 2 +- src/test/streams_tests.cpp | 92 ++++++++++++++++++++++++--------- 13 files changed, 209 insertions(+), 125 deletions(-) create mode 100644 src/obfuscation.h diff --git a/src/bench/xor.cpp b/src/bench/xor.cpp index c76103e8851..b04f08bba5e 100644 --- a/src/bench/xor.cpp +++ b/src/bench/xor.cpp @@ -19,13 +19,12 @@ static void XorObfuscationBench(benchmark::Bench& bench) constexpr size_t bytes{10_MiB}; auto test_data{rng.randbytes(bytes)}; - std::vector key_bytes{rng.randbytes(8)}; - uint64_t key; - std::memcpy(&key, key_bytes.data(), 8); + const Obfuscation obfuscation{rng.rand64()}; + assert(obfuscation); size_t offset{0}; bench.batch(bytes / 1_MiB).unit("MiB").run([&] { - util::Xor(test_data, key_bytes, offset++); + obfuscation(test_data, offset++); ankerl::nanobench::doNotOptimizeAway(test_data); }); } diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index 38b75b3b340..00317138d5d 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -173,7 +173,7 @@ void CDBBatch::Clear() void CDBBatch::WriteImpl(std::span key, DataStream& ssValue) { leveldb::Slice slKey(CharCast(key.data()), key.size()); - ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent)); + dbwrapper_private::GetObfuscation(parent)(ssValue); leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size()); m_impl_batch->batch.Put(slKey, slValue); } @@ -215,6 +215,7 @@ struct LevelDBContext { CDBWrapper::CDBWrapper(const DBParams& params) : m_db_context{std::make_unique()}, m_name{fs::PathToString(params.path.stem())}, + m_obfuscation{0}, m_path{params.path}, m_is_memory{params.memory_only} { @@ -252,21 +253,22 @@ CDBWrapper::CDBWrapper(const DBParams& params) } { - obfuscate_key = std::vector(OBFUSCATE_KEY_NUM_BYTES, '\000'); // Needed for unobfuscated Read - const bool key_missing{!Read(OBFUSCATE_KEY_KEY, obfuscate_key)}; + m_obfuscation = 0; // Needed for unobfuscated Read + std::vector obfuscate_key_vector(Obfuscation::SIZE_BYTES, '\000'); + const bool key_missing{!Read(OBFUSCATE_KEY_KEY, obfuscate_key_vector)}; if (key_missing && params.obfuscate && IsEmpty()) { // Initialize non-degenerate obfuscation if it won't upset existing, non-obfuscated data. - std::vector new_key(OBFUSCATE_KEY_NUM_BYTES); + std::vector new_key(Obfuscation::SIZE_BYTES); GetRandBytes(new_key); // Write `new_key` so we don't obfuscate the key with itself Write(OBFUSCATE_KEY_KEY, new_key); - obfuscate_key = new_key; + obfuscate_key_vector = new_key; - LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); + LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key_vector)); } - - LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key)); + LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key_vector)); + m_obfuscation = obfuscate_key_vector; } } @@ -317,9 +319,6 @@ size_t CDBWrapper::DynamicMemoryUsage() const // We must use a string constructor which specifies length so that we copy // past the null-terminator. const std::string CDBWrapper::OBFUSCATE_KEY_KEY("\000obfuscate_key", 14); - -const unsigned int CDBWrapper::OBFUSCATE_KEY_NUM_BYTES = 8; - std::optional CDBWrapper::ReadImpl(std::span key) const { leveldb::Slice slKey(CharCast(key.data()), key.size()); @@ -402,6 +401,5 @@ void CDBIterator::SeekToFirst() { m_impl_iter->iter->SeekToFirst(); } void CDBIterator::Next() { m_impl_iter->iter->Next(); } namespace dbwrapper_private { -const std::vector& GetObfuscateKey(const CDBWrapper& w) { return w.obfuscate_key; } - +Obfuscation GetObfuscation(const CDBWrapper& w) { return w.m_obfuscation; } } // namespace dbwrapper_private diff --git a/src/dbwrapper.h b/src/dbwrapper.h index 33e009833cb..5f23f1dc433 100644 --- a/src/dbwrapper.h +++ b/src/dbwrapper.h @@ -63,8 +63,7 @@ namespace dbwrapper_private { * Database obfuscation should be considered an implementation detail of the * specific database. */ -const std::vector& GetObfuscateKey(const CDBWrapper &w); - +Obfuscation GetObfuscation(const CDBWrapper&); }; // namespace dbwrapper_private bool DestroyDB(const std::string& path_str); @@ -166,7 +165,7 @@ public: template bool GetValue(V& value) { try { DataStream ssValue{GetValueImpl()}; - ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent)); + dbwrapper_private::GetObfuscation(parent)(ssValue); ssValue >> value; } catch (const std::exception&) { return false; @@ -179,7 +178,7 @@ struct LevelDBContext; class CDBWrapper { - friend const std::vector& dbwrapper_private::GetObfuscateKey(const CDBWrapper &w); + friend Obfuscation dbwrapper_private::GetObfuscation(const CDBWrapper&); private: //! holds all leveldb-specific fields of this class std::unique_ptr m_db_context; @@ -188,14 +187,11 @@ private: std::string m_name; //! a key used for optional XOR-obfuscation of the database - std::vector obfuscate_key; + Obfuscation m_obfuscation; //! the key under which the obfuscation key is stored static const std::string OBFUSCATE_KEY_KEY; - //! the length of the obfuscate key in number of bytes - static const unsigned int OBFUSCATE_KEY_NUM_BYTES; - //! path to filesystem storage const fs::path m_path; @@ -226,7 +222,7 @@ public: } try { DataStream ssValue{MakeByteSpan(*strValue)}; - ssValue.Xor(obfuscate_key); + m_obfuscation(ssValue); ssValue >> value; } catch (const std::exception&) { return false; diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 3d73459bf6c..d2c1f332054 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -777,13 +777,13 @@ void BlockManager::UnlinkPrunedFiles(const std::set& setFilesToPrune) const AutoFile BlockManager::OpenBlockFile(const FlatFilePos& pos, bool fReadOnly) const { - return AutoFile{m_block_file_seq.Open(pos, fReadOnly), m_xor_key}; + return AutoFile{m_block_file_seq.Open(pos, fReadOnly), m_obfuscation}; } /** Open an undo file (rev?????.dat) */ AutoFile BlockManager::OpenUndoFile(const FlatFilePos& pos, bool fReadOnly) const { - return AutoFile{m_undo_file_seq.Open(pos, fReadOnly), m_xor_key}; + return AutoFile{m_undo_file_seq.Open(pos, fReadOnly), m_obfuscation}; } fs::path BlockManager::GetBlockPosFilename(const FlatFilePos& pos) const @@ -1103,7 +1103,7 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) return pos; } -static auto InitBlocksdirXorKey(const BlockManager::Options& opts) +static Obfuscation InitBlocksdirXorKey(const BlockManager::Options& opts) { // Bytes are serialized without length indicator, so this is also the exact // size of the XOR-key file. @@ -1152,12 +1152,12 @@ static auto InitBlocksdirXorKey(const BlockManager::Options& opts) }; } LogInfo("Using obfuscation key for blocksdir *.dat files (%s): '%s'\n", fs::PathToString(opts.blocks_dir), HexStr(xor_key)); - return std::vector{xor_key.begin(), xor_key.end()}; + return Obfuscation{xor_key}; } BlockManager::BlockManager(const util::SignalInterrupt& interrupt, Options opts) : m_prune_mode{opts.prune_target > 0}, - m_xor_key{InitBlocksdirXorKey(opts)}, + m_obfuscation{InitBlocksdirXorKey(opts)}, m_opts{std::move(opts)}, m_block_file_seq{FlatFileSeq{m_opts.blocks_dir, "blk", m_opts.fast_prune ? 0x4000 /* 16kB */ : BLOCKFILE_CHUNK_SIZE}}, m_undo_file_seq{FlatFileSeq{m_opts.blocks_dir, "rev", UNDOFILE_CHUNK_SIZE}}, diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 324f8e68605..d5ae34d72ed 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -235,7 +235,7 @@ private: const bool m_prune_mode; - const std::vector m_xor_key; + const Obfuscation m_obfuscation; /** Dirty block index entries. */ std::set m_dirty_blockindex; diff --git a/src/node/mempool_persist.cpp b/src/node/mempool_persist.cpp index 320a8d60967..5ef5d3c0913 100644 --- a/src/node/mempool_persist.cpp +++ b/src/node/mempool_persist.cpp @@ -60,12 +60,11 @@ bool LoadMempool(CTxMemPool& pool, const fs::path& load_path, Chainstate& active file >> version; if (version == MEMPOOL_DUMP_VERSION_NO_XOR_KEY) { - std::vector xor_key; - file.SetXor(xor_key); + file.SetObfuscation(0); } else if (version == MEMPOOL_DUMP_VERSION) { - std::vector xor_key; - file >> xor_key; - file.SetXor(xor_key); + Obfuscation obfuscation{0}; + file >> obfuscation; + file.SetObfuscation(obfuscation); } else { return false; } @@ -181,13 +180,11 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock file << version; if (!pool.m_opts.persist_v1_dat) { - std::vector xor_key(8); - FastRandomContext{}.fillrand(xor_key); - file << xor_key; - file.SetXor(xor_key); + const Obfuscation obfuscation{FastRandomContext{}.rand64()}; + file << obfuscation; + file.SetObfuscation(obfuscation); } else { - std::vector xor_key(8); - file.SetXor(xor_key); + file.SetObfuscation(0); } uint64_t mempool_transactions_to_write(vinfo.size()); diff --git a/src/obfuscation.h b/src/obfuscation.h new file mode 100644 index 00000000000..70bc844c6d5 --- /dev/null +++ b/src/obfuscation.h @@ -0,0 +1,85 @@ +// Copyright (c) 2009-present The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_OBFUSCATION_H +#define BITCOIN_OBFUSCATION_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class Obfuscation +{ +public: + static constexpr size_t SIZE_BYTES{sizeof(uint64_t)}; + +private: + std::array rotations; // Cached key rotations + void SetRotations(const uint64_t key) + { + for (size_t i{0}; i < SIZE_BYTES; ++i) { + size_t key_rotation_bits{CHAR_BIT * i}; + if constexpr (std::endian::native == std::endian::big) key_rotation_bits *= -1; + rotations[i] = std::rotr(key, key_rotation_bits); + } + } + + static uint64_t ToUint64(const std::span key_span) + { + uint64_t key{}; + std::memcpy(&key, key_span.data(), SIZE_BYTES); + return key; + } + + static void Xor(std::span write, const uint64_t key, const size_t size) + { + assert(size <= write.size()); + uint64_t raw{}; + std::memcpy(&raw, write.data(), size); + raw ^= key; + std::memcpy(write.data(), &raw, size); + } + +public: + Obfuscation(const uint64_t key) { SetRotations(key); } + Obfuscation(const std::span key_span) : Obfuscation(ToUint64(key_span)) {} + Obfuscation(const std::vector& key_vec) : Obfuscation(MakeByteSpan(key_vec).first()) {} + Obfuscation(const std::vector& key_vec) : Obfuscation(std::span(key_vec).first()) {} + + uint64_t Key() const { return rotations[0]; } + operator bool() const { return Key() != 0; } + void operator()(std::span write, const size_t key_offset_bytes = 0) const + { + if (!*this) return; + const uint64_t rot_key{rotations[key_offset_bytes % SIZE_BYTES]}; // Continue obfuscation from where we left off + for (; write.size() >= SIZE_BYTES; write = write.subspan(SIZE_BYTES)) { // Process multiple bytes at a time + Xor(write, rot_key, SIZE_BYTES); + } + Xor(write, rot_key, write.size()); + } + + template + void Serialize(Stream& s) const + { + std::vector bytes(SIZE_BYTES); + std::memcpy(bytes.data(), &rotations[0], SIZE_BYTES); + s << bytes; + } + + template + void Unserialize(Stream& s) + { + std::vector bytes(SIZE_BYTES); + s >> bytes; + SetRotations(ToUint64(MakeByteSpan(bytes).first())); + } +}; + +#endif // BITCOIN_OBFUSCATION_H diff --git a/src/streams.cpp b/src/streams.cpp index 19c2b474452..2bd142aba8c 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -9,8 +9,7 @@ #include -AutoFile::AutoFile(std::FILE* file, std::vector data_xor) - : m_file{file}, m_xor{std::move(data_xor)} +AutoFile::AutoFile(std::FILE* file, const Obfuscation& obfuscation) : m_file{file}, m_obfuscation{obfuscation} { if (!IsNull()) { auto pos{std::ftell(m_file)}; @@ -21,12 +20,12 @@ AutoFile::AutoFile(std::FILE* file, std::vector data_xor) std::size_t AutoFile::detail_fread(std::span dst) { if (!m_file) throw std::ios_base::failure("AutoFile::read: file handle is nullptr"); - size_t ret = std::fread(dst.data(), 1, dst.size(), m_file); - if (!m_xor.empty()) { - if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::read: position unknown"); - util::Xor(dst.subspan(0, ret), m_xor, *m_position); + const size_t ret = std::fread(dst.data(), 1, dst.size(), m_file); + if (m_obfuscation) { + if (!m_position) throw std::ios_base::failure("AutoFile::read: position unknown"); + m_obfuscation(dst, *m_position); } - if (m_position.has_value()) *m_position += ret; + if (m_position) *m_position += ret; return ret; } @@ -81,7 +80,7 @@ void AutoFile::ignore(size_t nSize) void AutoFile::write(std::span src) { if (!m_file) throw std::ios_base::failure("AutoFile::write: file handle is nullptr"); - if (m_xor.empty()) { + if (!m_obfuscation) { if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) { throw std::ios_base::failure("AutoFile::write: write failed"); } @@ -100,9 +99,9 @@ void AutoFile::write(std::span src) void AutoFile::write_buffer(std::span src) { if (!m_file) throw std::ios_base::failure("AutoFile::write_buffer: file handle is nullptr"); - if (m_xor.size()) { + if (m_obfuscation) { if (!m_position) throw std::ios_base::failure("AutoFile::write_buffer: obfuscation position unknown"); - util::Xor(src, m_xor, *m_position); // obfuscate in-place + m_obfuscation(src, *m_position); // obfuscate in-place } if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) { throw std::ios_base::failure("AutoFile::write_buffer: write failed"); diff --git a/src/streams.h b/src/streams.h index 1ebcff3671f..0a1e7002c5f 100644 --- a/src/streams.h +++ b/src/streams.h @@ -6,6 +6,7 @@ #ifndef BITCOIN_STREAMS_H #define BITCOIN_STREAMS_H +#include #include #include #include @@ -21,30 +22,8 @@ #include #include #include -#include #include -namespace util { -inline void Xor(std::span write, std::span key, size_t key_offset = 0) -{ - if (key.size() == 0) { - return; - } - key_offset %= key.size(); - - for (size_t i = 0, j = key_offset; i != write.size(); i++) { - write[i] ^= key[j++]; - - // This potentially acts on very many bytes of data, so it's - // important that we calculate `j`, i.e. the `key` index in this - // way instead of doing a %, which would effectively be a division - // for each byte Xor'd -- much slower than need be. - if (j == key.size()) - j = 0; - } -} -} // namespace util - /* Minimal stream for overwriting and/or appending to an existing byte vector * * The referenced vector will grow as necessary @@ -261,21 +240,16 @@ public: return (*this); } - template + template DataStream& operator>>(T&& obj) { ::Unserialize(*this, obj); return (*this); } - /** - * XOR the contents of this stream with a certain key. - * - * @param[in] key The key used to XOR the data in this stream. - */ - void Xor(const std::vector& key) + void Obfuscate(const Obfuscation& obfuscation) { - util::Xor(MakeWritableByteSpan(*this), MakeByteSpan(key)); + if (obfuscation) obfuscation(MakeWritableByteSpan(*this)); } /** Compute total memory usage of this object (own memory + any dynamic memory). */ @@ -392,11 +366,11 @@ class AutoFile { protected: std::FILE* m_file; - std::vector m_xor; + Obfuscation m_obfuscation; std::optional m_position; public: - explicit AutoFile(std::FILE* file, std::vector data_xor={}); + explicit AutoFile(std::FILE* file, const Obfuscation& obfuscation = 0); ~AutoFile() { fclose(); } @@ -428,7 +402,7 @@ public: bool IsNull() const { return m_file == nullptr; } /** Continue with a different XOR key */ - void SetXor(std::vector data_xor) { m_xor = data_xor; } + void SetObfuscation(const Obfuscation& obfuscation) { m_obfuscation = obfuscation; } /** Implementation detail, only used internally. */ std::size_t detail_fread(std::span dst); diff --git a/src/test/dbwrapper_tests.cpp b/src/test/dbwrapper_tests.cpp index 3a86036327c..7d9135baf04 100644 --- a/src/test/dbwrapper_tests.cpp +++ b/src/test/dbwrapper_tests.cpp @@ -14,16 +14,6 @@ using util::ToString; -// Test if a string consists entirely of null characters -static bool is_null_key(const std::vector& key) { - bool isnull = true; - - for (unsigned int i = 0; i < key.size(); i++) - isnull &= (key[i] == '\x00'); - - return isnull; -} - BOOST_FIXTURE_TEST_SUITE(dbwrapper_tests, BasicTestingSetup) BOOST_AUTO_TEST_CASE(dbwrapper) @@ -37,7 +27,7 @@ BOOST_AUTO_TEST_CASE(dbwrapper) uint256 res; // Ensure that we're doing real obfuscation when obfuscate=true - BOOST_CHECK(obfuscate != is_null_key(dbwrapper_private::GetObfuscateKey(dbw))); + BOOST_CHECK(obfuscate == dbwrapper_private::GetObfuscation(dbw)); BOOST_CHECK(dbw.Write(key, in)); BOOST_CHECK(dbw.Read(key, res)); @@ -57,7 +47,7 @@ BOOST_AUTO_TEST_CASE(dbwrapper_basic_data) bool res_bool; // Ensure that we're doing real obfuscation when obfuscate=true - BOOST_CHECK(obfuscate != is_null_key(dbwrapper_private::GetObfuscateKey(dbw))); + BOOST_CHECK(obfuscate == dbwrapper_private::GetObfuscation(dbw)); //Simulate block raw data - "b + block hash" std::string key_block = "b" + m_rng.rand256().ToString(); @@ -232,7 +222,7 @@ BOOST_AUTO_TEST_CASE(existing_data_no_obfuscate) BOOST_CHECK_EQUAL(res2.ToString(), in.ToString()); BOOST_CHECK(!odbw.IsEmpty()); // There should be existing data - BOOST_CHECK(is_null_key(dbwrapper_private::GetObfuscateKey(odbw))); // The key should be an empty string + BOOST_CHECK(!dbwrapper_private::GetObfuscation(odbw)); uint256 in2 = m_rng.rand256(); uint256 res3; @@ -269,7 +259,7 @@ BOOST_AUTO_TEST_CASE(existing_data_reindex) // Check that the key/val we wrote with unobfuscated wrapper doesn't exist uint256 res2; BOOST_CHECK(!odbw.Read(key, res2)); - BOOST_CHECK(!is_null_key(dbwrapper_private::GetObfuscateKey(odbw))); + BOOST_CHECK(dbwrapper_private::GetObfuscation(odbw)); uint256 in2 = m_rng.rand256(); uint256 res3; diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp index 81761c7bf96..c76899cda82 100644 --- a/src/test/fuzz/autofile.cpp +++ b/src/test/fuzz/autofile.cpp @@ -20,7 +20,7 @@ FUZZ_TARGET(autofile) FuzzedFileProvider fuzzed_file_provider{fuzzed_data_provider}; AutoFile auto_file{ fuzzed_file_provider.open(), - ConsumeRandomLengthByteVector(fuzzed_data_provider), + fuzzed_data_provider.ConsumeIntegral() }; LIMITED_WHILE(fuzzed_data_provider.ConsumeBool(), 100) { diff --git a/src/test/fuzz/buffered_file.cpp b/src/test/fuzz/buffered_file.cpp index a6a042a25cb..d8c09f88ed8 100644 --- a/src/test/fuzz/buffered_file.cpp +++ b/src/test/fuzz/buffered_file.cpp @@ -22,7 +22,7 @@ FUZZ_TARGET(buffered_file) std::optional opt_buffered_file; AutoFile fuzzed_file{ fuzzed_file_provider.open(), - ConsumeRandomLengthByteVector(fuzzed_data_provider), + fuzzed_data_provider.ConsumeIntegral() }; try { auto n_buf_size = fuzzed_data_provider.ConsumeIntegralInRange(0, 4096); diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index c29f34fab58..ab64825d981 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -20,10 +20,10 @@ BOOST_FIXTURE_TEST_SUITE(streams_tests, BasicTestingSetup) // Test that obfuscation can be properly reversed even with random chunk sizes. BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks) { - auto apply_random_xor_chunks{[&](std::span write, const std::span key) { + auto apply_random_xor_chunks{[&](std::span write, const Obfuscation& obfuscation) { for (size_t offset{0}; offset < write.size();) { const size_t chunk_size{1 + m_rng.randrange(write.size() - offset)}; - util::Xor(write.subspan(offset, chunk_size), key, offset); + obfuscation(write.subspan(offset, chunk_size), offset); offset += chunk_size; } }}; @@ -33,16 +33,15 @@ BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks) const std::vector original{m_rng.randbytes(write_size)}; std::vector roundtrip{original}; - std::vector key_bytes{m_rng.randbytes(sizeof(uint64_t))}; - uint64_t xor_key; - std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); - apply_random_xor_chunks(roundtrip, key_bytes); + const auto key_bytes{m_rng.randbytes(Obfuscation::SIZE_BYTES)}; + const Obfuscation obfuscation{key_bytes}; + apply_random_xor_chunks(roundtrip, obfuscation); // Verify intermediate state is different from original (unless key is zero) - const bool all_zero = (xor_key == 0) || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2); + const bool all_zero = !obfuscation || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2); BOOST_CHECK_EQUAL(original != roundtrip, !all_zero); - apply_random_xor_chunks(roundtrip, key_bytes); + apply_random_xor_chunks(roundtrip, obfuscation); BOOST_CHECK(original == roundtrip); } } @@ -61,20 +60,69 @@ BOOST_AUTO_TEST_CASE(xor_bytes_reference) const size_t write_size{1 + m_rng.randrange(100U)}; const size_t key_offset{m_rng.randrange(3 * 8U)}; // Should wrap around - std::vector key_bytes{m_rng.randbytes(sizeof(uint64_t))}; - uint64_t xor_key; - std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); - + const auto key_bytes{m_rng.randbytes(Obfuscation::SIZE_BYTES)}; + const Obfuscation obfuscation{key_bytes}; std::vector expected{m_rng.randbytes(write_size)}; std::vector actual{expected}; expected_xor(expected, key_bytes, key_offset); - util::Xor(actual, key_bytes, key_offset); + obfuscation(actual, key_offset); BOOST_CHECK_EQUAL_COLLECTIONS(expected.begin(), expected.end(), actual.begin(), actual.end()); } } + +BOOST_AUTO_TEST_CASE(obfuscation_constructors) +{ + constexpr uint64_t test_key = 0x0123456789ABCDEF; + + // Direct uint64_t constructor + const Obfuscation obf1{test_key}; + BOOST_CHECK_EQUAL(obf1.Key(), test_key); + + // std::span constructor + std::array key_bytes{}; + std::memcpy(key_bytes.data(), &test_key, Obfuscation::SIZE_BYTES); + const Obfuscation obf2{std::span{key_bytes}}; + BOOST_CHECK_EQUAL(obf2.Key(), test_key); + + // std::vector constructor + std::vector uint8_key(Obfuscation::SIZE_BYTES); + std::memcpy(uint8_key.data(), &test_key, uint8_key.size()); + const Obfuscation obf4{uint8_key}; + BOOST_CHECK_EQUAL(obf4.Key(), test_key); + + // std::vector constructor + std::vector byte_vector_key(Obfuscation::SIZE_BYTES); + std::memcpy(byte_vector_key.data(), &test_key, byte_vector_key.size()); + const Obfuscation obf5{byte_vector_key}; + BOOST_CHECK_EQUAL(obf5.Key(), test_key); +} + +BOOST_AUTO_TEST_CASE(obfuscation_serialize) +{ + const Obfuscation original{0xDEADBEEF}; + + // Serialize + DataStream ds; + ds << original; + + BOOST_CHECK_EQUAL(ds.size(), 1 + Obfuscation::SIZE_BYTES); // serialized as a vector + + // Deserialize + Obfuscation recovered{0}; + ds >> recovered; + + BOOST_CHECK_EQUAL(recovered.Key(), original.Key()); +} + +BOOST_AUTO_TEST_CASE(obfuscation_empty) +{ + const Obfuscation null_obf{0}; + BOOST_CHECK(!null_obf); +} + BOOST_AUTO_TEST_CASE(xor_file) { fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"}; @@ -99,7 +147,7 @@ BOOST_AUTO_TEST_CASE(xor_file) #else const char* mode = "wbx"; #endif - AutoFile xor_file{raw_file(mode), key_bytes}; + AutoFile xor_file{raw_file(mode), xor_key}; xor_file << test1 << test2; } { @@ -112,7 +160,7 @@ BOOST_AUTO_TEST_CASE(xor_file) BOOST_CHECK_EXCEPTION(non_xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: end of file"}); } { - AutoFile xor_file{raw_file("rb"), key_bytes}; + AutoFile xor_file{raw_file("rb"), xor_key}; std::vector read1, read2; xor_file >> read1 >> read2; BOOST_CHECK_EQUAL(HexStr(read1), HexStr(test1)); @@ -121,7 +169,7 @@ BOOST_AUTO_TEST_CASE(xor_file) BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: end of file"}); } { - AutoFile xor_file{raw_file("rb"), key_bytes}; + AutoFile xor_file{raw_file("rb"), xor_key}; std::vector read2; // Check that ignore works xor_file.ignore(4); @@ -289,7 +337,7 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor) // Degenerate case { DataStream ds{in}; - ds.Xor("0000000000000000"_hex_v_u8); + Obfuscation{0}(ds); BOOST_CHECK_EQUAL(""s, ds.str()); } @@ -297,12 +345,10 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor) in.push_back(std::byte{0xf0}); { - const auto key_bytes{"ffffffffffffffff"_hex_v}; - uint64_t xor_key; - std::memcpy(&xor_key, key_bytes.data(), sizeof(xor_key)); + const Obfuscation obfuscation{"ffffffffffffffff"_hex_v}; DataStream ds{in}; - ds.Xor("ffffffffffffffff"_hex_v_u8); + obfuscation(ds); BOOST_CHECK_EQUAL("\xf0\x0f"s, ds.str()); } @@ -311,10 +357,10 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor) in.push_back(std::byte{0x0f}); { - const auto key_bytes{"ff0fff0fff0fff0f"_hex_v}; + const Obfuscation obfuscation{"ff0fff0fff0fff0f"_hex_v}; DataStream ds{in}; - ds.Xor("ff0fff0fff0fff0f"_hex_v_u8); + obfuscation(ds); BOOST_CHECK_EQUAL("\x0f\x00"s, ds.str()); } } From c497ca6e91fd692756384e1f11e2a856793b98f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 21 Jun 2024 18:00:14 +0200 Subject: [PATCH 06/24] bench: Add COutPoint and SaltedOutpointHasher benchmarks This commit introduces new benchmarks to measure the performance of various operations using SaltedOutpointHasher, including hash computation, set operations, and set creation. These benchmarks are intended to provide insights about coin caching performance (e.g. during IBD). > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SaltedOutpointHasherBench.*' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 58.60 | 17,065,922.04 | 0.3% | 11.02 | `SaltedOutpointHasherBench_create_set` | 11.97 | 83,576,684.83 | 0.1% | 11.01 | `SaltedOutpointHasherBench_hash` | 14.50 | 68,985,850.12 | 0.3% | 10.96 | `SaltedOutpointHasherBench_match` | 13.90 | 71,942,033.47 | 0.4% | 11.03 | `SaltedOutpointHasherBench_mismatch` > C++ compiler .......................... GNU 13.3.0 | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 136.76 | 7,312,133.16 | 0.0% | 1,086.67 | 491.12 | 2.213 | 119.54 | 1.1% | 11.01 | `SaltedOutpointHasherBench_create_set` | 23.82 | 41,978,882.62 | 0.0% | 252.01 | 85.57 | 2.945 | 4.00 | 0.0% | 11.00 | `SaltedOutpointHasherBench_hash` | 60.42 | 16,549,695.42 | 0.1% | 460.51 | 217.04 | 2.122 | 21.00 | 1.4% | 10.99 | `SaltedOutpointHasherBench_match` | 78.66 | 12,713,595.35 | 0.1% | 555.59 | 282.52 | 1.967 | 20.19 | 2.2% | 10.74 | `SaltedOutpointHasherBench_mismatch` --- src/bench/crypto_hash.cpp | 101 +++++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index 2f1ff564388..54aa4a44ef5 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -2,7 +2,6 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. - #include #include #include @@ -12,9 +11,11 @@ #include #include #include -#include #include #include +#include +#include +#include #include #include @@ -205,6 +206,98 @@ static void SipHash_32b(benchmark::Bench& bench) }); } +static void SaltedOutpointHasherBench_hash(benchmark::Bench& bench) +{ + FastRandomContext rng{/*fDeterministic=*/true}; + constexpr size_t size{1000}; + + std::vector outpoints(size); + for (auto& outpoint : outpoints) { + outpoint = {Txid::FromUint256(rng.rand256()), rng.rand32()}; + } + + const SaltedOutpointHasher hasher; + bench.batch(size).run([&] { + size_t result{0}; + for (const auto& outpoint : outpoints) { + result ^= hasher(outpoint); + } + ankerl::nanobench::doNotOptimizeAway(result); + }); +} + +static void SaltedOutpointHasherBench_match(benchmark::Bench& bench) +{ + FastRandomContext rng{/*fDeterministic=*/true}; + constexpr size_t size{1000}; + + std::unordered_set values; + std::vector value_vector; + values.reserve(size); + value_vector.reserve(size); + + for (size_t i{0}; i < size; ++i) { + COutPoint outpoint{Txid::FromUint256(rng.rand256()), rng.rand32()}; + values.emplace(outpoint); + value_vector.push_back(outpoint); + assert(values.contains(outpoint)); + } + + bench.batch(size).run([&] { + bool result{true}; + for (const auto& outpoint : value_vector) { + result ^= values.contains(outpoint); + } + ankerl::nanobench::doNotOptimizeAway(result); + }); +} + +static void SaltedOutpointHasherBench_mismatch(benchmark::Bench& bench) +{ + FastRandomContext rng{/*fDeterministic=*/true}; + constexpr size_t size{1000}; + + std::unordered_set values; + std::vector missing_value_vector; + values.reserve(size); + missing_value_vector.reserve(size); + + for (size_t i{0}; i < size; ++i) { + values.emplace(Txid::FromUint256(rng.rand256()), rng.rand32()); + COutPoint missing_outpoint{Txid::FromUint256(rng.rand256()), rng.rand32()}; + missing_value_vector.push_back(missing_outpoint); + assert(!values.contains(missing_outpoint)); + } + + bench.batch(size).run([&] { + bool result{false}; + for (const auto& outpoint : missing_value_vector) { + result ^= values.contains(outpoint); + } + ankerl::nanobench::doNotOptimizeAway(result); + }); +} + +static void SaltedOutpointHasherBench_create_set(benchmark::Bench& bench) +{ + FastRandomContext rng{/*fDeterministic=*/true}; + constexpr size_t size{1000}; + + std::vector outpoints(size); + for (auto& outpoint : outpoints) { + outpoint = {Txid::FromUint256(rng.rand256()), rng.rand32()}; + } + + bench.batch(size).run([&] { + std::unordered_set set; + set.reserve(size); + for (const auto& outpoint : outpoints) { + set.emplace(outpoint); + } + ankerl::nanobench::doNotOptimizeAway(set.size()); + }); +} + static void MuHash(benchmark::Bench& bench) { MuHash3072 acc; @@ -276,6 +369,10 @@ BENCHMARK(SHA256_32b_SSE4, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA256_32b_AVX2, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA256_32b_SHANI, benchmark::PriorityLevel::HIGH); BENCHMARK(SipHash_32b, benchmark::PriorityLevel::HIGH); +BENCHMARK(SaltedOutpointHasherBench_hash, benchmark::PriorityLevel::HIGH); +BENCHMARK(SaltedOutpointHasherBench_match, benchmark::PriorityLevel::HIGH); +BENCHMARK(SaltedOutpointHasherBench_mismatch, benchmark::PriorityLevel::HIGH); +BENCHMARK(SaltedOutpointHasherBench_create_set, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA256D64_1024_STANDARD, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA256D64_1024_SSE4, benchmark::PriorityLevel::HIGH); BENCHMARK(SHA256D64_1024_AVX2, benchmark::PriorityLevel::HIGH); From ae87260d299ab91a98afa7c28ddc69d2bf178a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 1 Feb 2025 19:01:36 +0100 Subject: [PATCH 07/24] test: Rename k1/k2 to k0/k1 for consistency --- src/test/hash_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/hash_tests.cpp b/src/test/hash_tests.cpp index 2fe44960f46..f08f37e077a 100644 --- a/src/test/hash_tests.cpp +++ b/src/test/hash_tests.cpp @@ -133,18 +133,18 @@ BOOST_AUTO_TEST_CASE(siphash) // Check consistency between CSipHasher and SipHashUint256[Extra]. FastRandomContext ctx; for (int i = 0; i < 16; ++i) { + uint64_t k0 = ctx.rand64(); uint64_t k1 = ctx.rand64(); - uint64_t k2 = ctx.rand64(); uint256 x = m_rng.rand256(); uint32_t n = ctx.rand32(); uint8_t nb[4]; WriteLE32(nb, n); - CSipHasher sip256(k1, k2); + CSipHasher sip256(k0, k1); sip256.Write(x); CSipHasher sip288 = sip256; sip288.Write(nb); - BOOST_CHECK_EQUAL(SipHashUint256(k1, k2, x), sip256.Finalize()); - BOOST_CHECK_EQUAL(SipHashUint256Extra(k1, k2, x, n), sip288.Finalize()); + BOOST_CHECK_EQUAL(SipHashUint256(k0, k1, x), sip256.Finalize()); + BOOST_CHECK_EQUAL(SipHashUint256Extra(k0, k1, x, n), sip288.Finalize()); // TODO modified in follow-up commit } } From 155ba7c349ca748ba6d0cfb6224eeb99fed8262a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 1 Feb 2025 19:16:17 +0100 Subject: [PATCH 08/24] refactor: Extract C0-C3 Siphash constants --- src/crypto/siphash.cpp | 25 +++++++++++++------------ src/crypto/siphash.h | 5 +++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/crypto/siphash.cpp b/src/crypto/siphash.cpp index dda28a6ced4..ad3d2f768d5 100644 --- a/src/crypto/siphash.cpp +++ b/src/crypto/siphash.cpp @@ -17,10 +17,10 @@ CSipHasher::CSipHasher(uint64_t k0, uint64_t k1) { - v[0] = 0x736f6d6570736575ULL ^ k0; - v[1] = 0x646f72616e646f6dULL ^ k1; - v[2] = 0x6c7967656e657261ULL ^ k0; - v[3] = 0x7465646279746573ULL ^ k1; + v[0] = C0 ^ k0; + v[1] = C1 ^ k1; + v[2] = C2 ^ k0; + v[3] = C3 ^ k1; count = 0; tmp = 0; } @@ -97,10 +97,10 @@ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val) /* Specialized implementation for efficiency */ uint64_t d = val.GetUint64(0); - uint64_t v0 = 0x736f6d6570736575ULL ^ k0; - uint64_t v1 = 0x646f72616e646f6dULL ^ k1; - uint64_t v2 = 0x6c7967656e657261ULL ^ k0; - uint64_t v3 = 0x7465646279746573ULL ^ k1 ^ d; + uint64_t v0 = CSipHasher::C0 ^ k0; + uint64_t v1 = CSipHasher::C1 ^ k1; + uint64_t v2 = CSipHasher::C2 ^ k0; + uint64_t v3 = CSipHasher::C3 ^ k1 ^ d; SIPROUND; SIPROUND; @@ -137,10 +137,11 @@ uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint3 /* Specialized implementation for efficiency */ uint64_t d = val.GetUint64(0); - uint64_t v0 = 0x736f6d6570736575ULL ^ k0; - uint64_t v1 = 0x646f72616e646f6dULL ^ k1; - uint64_t v2 = 0x6c7967656e657261ULL ^ k0; - uint64_t v3 = 0x7465646279746573ULL ^ k1 ^ d; + // TODO moved in next commit + uint64_t v0 = CSipHasher::C0 ^ k0; + uint64_t v1 = CSipHasher::C1 ^ k1; + uint64_t v2 = CSipHasher::C2 ^ k0; + uint64_t v3 = CSipHasher::C3 ^ k1 ^ d; SIPROUND; SIPROUND; diff --git a/src/crypto/siphash.h b/src/crypto/siphash.h index c388c94951b..61f4ab08ca1 100644 --- a/src/crypto/siphash.h +++ b/src/crypto/siphash.h @@ -19,6 +19,11 @@ private: uint8_t count; // Only the low 8 bits of the input size matter. public: + static constexpr uint64_t C0{0x736f6d6570736575ULL}; + static constexpr uint64_t C1{0x646f72616e646f6dULL}; + static constexpr uint64_t C2{0x6c7967656e657261ULL}; + static constexpr uint64_t C3{0x7465646279746573ULL}; + /** Construct a SipHash calculator initialized with 128-bit key (k0, k1) */ CSipHasher(uint64_t k0, uint64_t k1); /** Hash a 64-bit integer worth of data From 73cfebb08bda9f2e48148f6cd2481a8c3b07a43c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 1 Feb 2025 19:21:34 +0100 Subject: [PATCH 09/24] optimization: refactor: Introduce Uint256ExtraSipHasher to cache SipHash constant state Previously, only k0 and k1 were stored, causing the constant xor operations to be recomputed in every call to `SipHashUint256Extra`. This commit adds a dedicated `Uint256ExtraSipHasher` class that caches the initial state (v0-v3) and to perform the `SipHash` computation on a `uint256` (with an extra parameter), hiding the constant computation details from higher-level code and improving efficiency. This basically brings the precalculations in the `CSipHasher` constructor to the `uint256` specialized SipHash implementation. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SaltedOutpointHasherBench.*' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 57.27 | 17,462,299.19 | 0.1% | 11.02 | `SaltedOutpointHasherBench_create_set` | 11.24 | 88,997,888.48 | 0.3% | 11.04 | `SaltedOutpointHasherBench_hash` | 13.91 | 71,902,014.20 | 0.2% | 11.01 | `SaltedOutpointHasherBench_match` | 13.29 | 75,230,390.31 | 0.1% | 11.00 | `SaltedOutpointHasherBench_mismatch` compared to master: create_set - 17,462,299.19/17,065,922.04 - 2.3% faster hash - 88,997,888.48/83,576,684.83 - 6.4% faster match - 71,902,014.20/68,985,850.12 - 4.2% faster mismatch - 75,230,390.31/71,942,033.47 - 4.5% faster > C++ compiler .......................... GNU 13.3.0 | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 135.38 | 7,386,349.49 | 0.0% | 1,078.19 | 486.16 | 2.218 | 119.56 | 1.1% | 11.00 | `SaltedOutpointHasherBench_create_set` | 23.67 | 42,254,558.08 | 0.0% | 247.01 | 85.01 | 2.906 | 4.00 | 0.0% | 11.00 | `SaltedOutpointHasherBench_hash` | 58.95 | 16,962,220.14 | 0.1% | 446.55 | 211.74 | 2.109 | 20.86 | 1.4% | 11.01 | `SaltedOutpointHasherBench_match` | 76.98 | 12,991,047.69 | 0.1% | 548.93 | 276.50 | 1.985 | 20.25 | 2.3% | 10.72 | `SaltedOutpointHasherBench_mismatch` compared to master: create_set - 7,386,349.49/7,312,133.16 - 1% faster hash - 42,254,558.08/41,978,882.62 - 0.6% faster match - 16,962,220.14/16,549,695.42 - 2.4% faster mismatch - 12,991,047.69/12,713,595.35 - 2% faster Co-authored-by: sipa --- src/crypto/siphash.cpp | 13 ++++--------- src/crypto/siphash.h | 15 ++++++++++++++- src/test/fuzz/integer.cpp | 2 +- src/test/hash_tests.cpp | 4 ++-- src/util/hasher.cpp | 6 +++--- src/util/hasher.h | 8 +++----- 6 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/crypto/siphash.cpp b/src/crypto/siphash.cpp index ad3d2f768d5..15287f3283b 100644 --- a/src/crypto/siphash.cpp +++ b/src/crypto/siphash.cpp @@ -132,17 +132,12 @@ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val) return v0 ^ v1 ^ v2 ^ v3; } -uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra) +/* Specialized implementation for efficiency */ +uint64_t Uint256ExtraSipHasher::operator()(const uint256& val, uint32_t extra) const noexcept { - /* Specialized implementation for efficiency */ + uint64_t v0 = v[0], v1 = v[1], v2 = v[2], v3 = v[3]; uint64_t d = val.GetUint64(0); - - // TODO moved in next commit - uint64_t v0 = CSipHasher::C0 ^ k0; - uint64_t v1 = CSipHasher::C1 ^ k1; - uint64_t v2 = CSipHasher::C2 ^ k0; - uint64_t v3 = CSipHasher::C3 ^ k1 ^ d; - + v3 ^= d; SIPROUND; SIPROUND; v0 ^= d; diff --git a/src/crypto/siphash.h b/src/crypto/siphash.h index 61f4ab08ca1..a8dd6da3ecd 100644 --- a/src/crypto/siphash.h +++ b/src/crypto/siphash.h @@ -48,6 +48,19 @@ public: * .Finalize() */ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val); -uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra); + +class Uint256ExtraSipHasher { + uint64_t v[4]; + +public: + Uint256ExtraSipHasher(const uint64_t k0, const uint64_t k1) noexcept { + v[0] = CSipHasher::C0 ^ k0; + v[1] = CSipHasher::C1 ^ k1; + v[2] = CSipHasher::C2 ^ k0; + v[3] = CSipHasher::C3 ^ k1; + } + + uint64_t operator()(const uint256& val, uint32_t extra) const noexcept; +}; #endif // BITCOIN_CRYPTO_SIPHASH_H diff --git a/src/test/fuzz/integer.cpp b/src/test/fuzz/integer.cpp index a6729155d1f..66a95533ee1 100644 --- a/src/test/fuzz/integer.cpp +++ b/src/test/fuzz/integer.cpp @@ -119,7 +119,7 @@ FUZZ_TARGET(integer, .init = initialize_integer) (void)MillisToTimeval(i64); (void)SighashToStr(uch); (void)SipHashUint256(u64, u64, u256); - (void)SipHashUint256Extra(u64, u64, u256, u32); + (void)Uint256ExtraSipHasher(u64, u64)(u256, u32); (void)ToLower(ch); (void)ToUpper(ch); { diff --git a/src/test/hash_tests.cpp b/src/test/hash_tests.cpp index f08f37e077a..60f4bbc078b 100644 --- a/src/test/hash_tests.cpp +++ b/src/test/hash_tests.cpp @@ -130,7 +130,7 @@ BOOST_AUTO_TEST_CASE(siphash) ss << TX_WITH_WITNESS(tx); BOOST_CHECK_EQUAL(SipHashUint256(1, 2, ss.GetHash()), 0x79751e980c2a0a35ULL); - // Check consistency between CSipHasher and SipHashUint256[Extra]. + // Check consistency between CSipHasher and SipHashUint256 and Uint256ExtraSipHasher. FastRandomContext ctx; for (int i = 0; i < 16; ++i) { uint64_t k0 = ctx.rand64(); @@ -144,7 +144,7 @@ BOOST_AUTO_TEST_CASE(siphash) CSipHasher sip288 = sip256; sip288.Write(nb); BOOST_CHECK_EQUAL(SipHashUint256(k0, k1, x), sip256.Finalize()); - BOOST_CHECK_EQUAL(SipHashUint256Extra(k0, k1, x, n), sip288.Finalize()); // TODO modified in follow-up commit + BOOST_CHECK_EQUAL(Uint256ExtraSipHasher(k0, k1)(x, n), sip288.Finalize()); } } diff --git a/src/util/hasher.cpp b/src/util/hasher.cpp index 117cfe8dcd1..6ee3134cb7c 100644 --- a/src/util/hasher.cpp +++ b/src/util/hasher.cpp @@ -11,9 +11,9 @@ SaltedTxidHasher::SaltedTxidHasher() : k0{FastRandomContext().rand64()}, k1{FastRandomContext().rand64()} {} -SaltedOutpointHasher::SaltedOutpointHasher(bool deterministic) : - k0{deterministic ? 0x8e819f2607a18de6 : FastRandomContext().rand64()}, - k1{deterministic ? 0xf4020d2e3983b0eb : FastRandomContext().rand64()} +SaltedOutpointHasher::SaltedOutpointHasher(bool deterministic) : hasher{ + deterministic ? 0x8e819f2607a18de6 : FastRandomContext().rand64(), + deterministic ? 0xf4020d2e3983b0eb : FastRandomContext().rand64()} {} SaltedSipHasher::SaltedSipHasher() : diff --git a/src/util/hasher.h b/src/util/hasher.h index 3a75c91a3ef..eb5061ce2fb 100644 --- a/src/util/hasher.h +++ b/src/util/hasher.h @@ -30,12 +30,10 @@ public: class SaltedOutpointHasher { -private: - /** Salt */ - const uint64_t k0, k1; + const Uint256ExtraSipHasher hasher; public: - SaltedOutpointHasher(bool deterministic = false); + explicit SaltedOutpointHasher(bool deterministic = false); /** * Having the hash noexcept allows libstdc++'s unordered_map to recalculate @@ -47,7 +45,7 @@ public: * @see https://gcc.gnu.org/onlinedocs/gcc-13.2.0/libstdc++/manual/manual/unordered_associative.html */ size_t operator()(const COutPoint& id) const noexcept { - return SipHashUint256Extra(k0, k1, id.hash, id.n); + return hasher(id.hash, id.n); } }; From 3d7c8ae9fb4dacfa6126efdce6bc8c17122218f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 19 Nov 2024 16:11:56 +0100 Subject: [PATCH 10/24] bench: measure block (size)serialization speed The SizeComputer is a special serializer which returns what the exact final size will be of the serialized content. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 195,610.62 | 5,112.20 | 0.3% | 11.00 | `SerializeBlock` | 12,061.83 | 82,906.19 | 0.1% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 867,857.55 | 1,152.26 | 0.0% | 8,015,883.90 | 3,116,099.08 | 2.572 | 1,517,035.87 | 0.5% | 10.81 | `SerializeBlock` | 30,928.27 | 32,332.88 | 0.0% | 221,683.03 | 111,055.84 | 1.996 | 53,037.03 | 0.8% | 11.03 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 33 +++++++++++++++++++++++++++++---- src/streams.h | 1 + 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 9558d64f199..ba26457b31a 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -21,11 +21,34 @@ #include #include +static void SizeComputerBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + bench.unit("block").run([&] { + SizeComputer size_computer; + size_computer << TX_WITH_WITNESS(block); + assert(size_computer.size() == benchmark::data::block413567.size()); + }); +} + +static void SerializeBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + // Create output stream and verify first serialization matches input + bench.unit("block").run([&] { + DataStream output_stream(benchmark::data::block413567.size()); + output_stream << TX_WITH_WITNESS(block); + assert(output_stream.size() == benchmark::data::block413567.size()); + }); +} + // These are the two major time-sinks which happen after we have fully received // a block off the wire, but before we can relay the block on to peers using // compact block relay. -static void DeserializeBlockTest(benchmark::Bench& bench) +static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -39,7 +62,7 @@ static void DeserializeBlockTest(benchmark::Bench& bench) }); } -static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) +static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -60,5 +83,7 @@ static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) }); } -BENCHMARK(DeserializeBlockTest, benchmark::PriorityLevel::HIGH); -BENCHMARK(DeserializeAndCheckBlockTest, benchmark::PriorityLevel::HIGH); +BENCHMARK(SizeComputerBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(SerializeBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(DeserializeBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(DeserializeAndCheckBlock, benchmark::PriorityLevel::HIGH); diff --git a/src/streams.h b/src/streams.h index 0a1e7002c5f..81f4dc8c78e 100644 --- a/src/streams.h +++ b/src/streams.h @@ -141,6 +141,7 @@ public: typedef vector_type::reverse_iterator reverse_iterator; explicit DataStream() = default; + explicit DataStream(size_type n) { reserve(n); } explicit DataStream(std::span sp) : DataStream{std::as_bytes(sp)} {} explicit DataStream(std::span sp) : vch(sp.data(), sp.data() + sp.size()) {} From 9f15d4da35f1e4601bdc59810cfd58479c4c822f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 13 Feb 2025 22:49:51 +0100 Subject: [PATCH 11/24] cleanup: remove unused `ser_writedata16be` and `ser_readdata16be` --- src/serialize.h | 11 ----------- src/test/fuzz/integer.cpp | 4 ---- 2 files changed, 15 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 98851056bdb..6807a2be229 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -60,11 +60,6 @@ template inline void ser_writedata16(Stream &s, uint16_t obj) obj = htole16_internal(obj); s.write(std::as_bytes(std::span{&obj, 1})); } -template inline void ser_writedata16be(Stream &s, uint16_t obj) -{ - obj = htobe16_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); -} template inline void ser_writedata32(Stream &s, uint32_t obj) { obj = htole32_internal(obj); @@ -92,12 +87,6 @@ template inline uint16_t ser_readdata16(Stream &s) s.read(std::as_writable_bytes(std::span{&obj, 1})); return le16toh_internal(obj); } -template inline uint16_t ser_readdata16be(Stream &s) -{ - uint16_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); - return be16toh_internal(obj); -} template inline uint32_t ser_readdata32(Stream &s) { uint32_t obj; diff --git a/src/test/fuzz/integer.cpp b/src/test/fuzz/integer.cpp index 66a95533ee1..ddf49be86ae 100644 --- a/src/test/fuzz/integer.cpp +++ b/src/test/fuzz/integer.cpp @@ -236,10 +236,6 @@ FUZZ_TARGET(integer, .init = initialize_integer) const uint16_t deserialized_u16 = ser_readdata16(stream); assert(u16 == deserialized_u16 && stream.empty()); - ser_writedata16be(stream, u16); - const uint16_t deserialized_u16be = ser_readdata16be(stream); - assert(u16 == deserialized_u16be && stream.empty()); - ser_writedata8(stream, u8); const uint8_t deserialized_u8 = ser_readdata8(stream); assert(u8 == deserialized_u8 && stream.empty()); From 5559eb68a9a5857422d06b3a9f15ea539b7581d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 17 Jan 2025 14:29:33 +0100 Subject: [PATCH 12/24] refactor: reduce template bloat in primitive serialization Merged multiple template methods into single constexpr-delimited implementation to reduce template bloat (i.e. related functionality is grouped into a single method, but can be optimized because of C++20 constexpr conditions). This unifies related methods that were only bound before by similar signatures - and enables `SizeComputer` optimizations later --- src/serialize.h | 53 +++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 6807a2be229..48ddd4d565a 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -241,38 +241,47 @@ const Out& AsBase(const In& x) template concept CharNotInt8 = std::same_as && !std::same_as; +template +concept ByteOrIntegral = std::is_same_v || + (std::is_integral_v && !std::is_same_v); + template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Serialize(Stream& s, std::byte a) { ser_writedata8(s, uint8_t(a)); } -template inline void Serialize(Stream& s, int8_t a ) { ser_writedata8(s, a); } -template inline void Serialize(Stream& s, uint8_t a ) { ser_writedata8(s, a); } -template inline void Serialize(Stream& s, int16_t a ) { ser_writedata16(s, a); } -template inline void Serialize(Stream& s, uint16_t a) { ser_writedata16(s, a); } -template inline void Serialize(Stream& s, int32_t a ) { ser_writedata32(s, a); } -template inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); } -template inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); } -template inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); } +template void Serialize(Stream& s, T a) +{ + if constexpr (sizeof(T) == 1) { + ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + ser_writedata16(s, static_cast(a)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + ser_writedata32(s, static_cast(a)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + ser_writedata64(s, static_cast(a)); // (u)int64_t + } +} template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Unserialize(Stream& s, std::byte& a) { a = std::byte{ser_readdata8(s)}; } -template inline void Unserialize(Stream& s, int8_t& a ) { a = ser_readdata8(s); } -template inline void Unserialize(Stream& s, uint8_t& a ) { a = ser_readdata8(s); } -template inline void Unserialize(Stream& s, int16_t& a ) { a = ser_readdata16(s); } -template inline void Unserialize(Stream& s, uint16_t& a) { a = ser_readdata16(s); } -template inline void Unserialize(Stream& s, int32_t& a ) { a = ser_readdata32(s); } -template inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); } -template inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); } -template inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); } +template void Unserialize(Stream& s, T& a) +{ + if constexpr (sizeof(T) == 1) { + a = static_cast(ser_readdata8(s)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + a = static_cast(ser_readdata16(s)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + a = static_cast(ser_readdata32(s)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + a = static_cast(ser_readdata64(s)); // (u)int64_t + } +} template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } - -template inline void Serialize(Stream& s, bool a) { uint8_t f = a; ser_writedata8(s, f); } -template inline void Unserialize(Stream& s, bool& a) { uint8_t f = ser_readdata8(s); a = f; } // clang-format on @@ -478,7 +487,7 @@ public: * serialization, and Unser(stream, object&) for deserialization. Serialization routines (inside * READWRITE, or directly with << and >> operators), can then use Using(object). * - * This works by constructing a Wrapper-wrapped version of object, where T is + * This works by constructing a Wrapper-wrapped version of object, where T is * const during serialization, and non-const during deserialization, which maintains const * correctness. */ From 8f71de5f8f983a42732887067215e5444e7f413e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 20 Mar 2025 10:28:52 +0100 Subject: [PATCH 13/24] refactor: add explicit static extent to spans --- src/serialize.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 48ddd4d565a..c1c145fa4b5 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -53,56 +53,56 @@ constexpr deserialize_type deserialize {}; */ template inline void ser_writedata8(Stream &s, uint8_t obj) { - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata16(Stream &s, uint16_t obj) { obj = htole16_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32(Stream &s, uint32_t obj) { obj = htole32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32be(Stream &s, uint32_t obj) { obj = htobe32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata64(Stream &s, uint64_t obj) { obj = htole64_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline uint8_t ser_readdata8(Stream &s) { uint8_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return obj; } template inline uint16_t ser_readdata16(Stream &s) { uint16_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le16toh_internal(obj); } template inline uint32_t ser_readdata32(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le32toh_internal(obj); } template inline uint32_t ser_readdata32be(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return be32toh_internal(obj); } template inline uint64_t ser_readdata64(Stream &s) { uint64_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le64toh_internal(obj); } @@ -281,7 +281,6 @@ template void Unserialize(Stream& s, T& a) template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } -template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } // clang-format on @@ -534,10 +533,10 @@ struct CustomUintFormatter if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); if (BigEndian) { uint64_t raw = htobe64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).last(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { uint64_t raw = htole64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).first(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template first()); } } @@ -547,10 +546,10 @@ struct CustomUintFormatter static_assert(std::numeric_limits::max() >= MAX && std::numeric_limits::min() <= 0, "Assigned type too small"); uint64_t raw = 0; if (BigEndian) { - s.read(std::as_writable_bytes(std::span{&raw, 1}).last(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).last()); v = static_cast(be64toh_internal(raw)); } else { - s.read(std::as_writable_bytes(std::span{&raw, 1}).first(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).first()); v = static_cast(le64toh_internal(raw)); } } From 2bf6c56cabf4ae0860fd98b46c4808bf242774d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Feb 2025 13:54:57 +0100 Subject: [PATCH 14/24] optimization: merge SizeComputer specializations + add new ones Endianness doesn't affect the final size, we can skip it for `SizeComputer`. We can `if constexpr` previous calls into existing method, short-circuiting existing logic when we only need their serialized sizes. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 191,652.29 | 5,217.78 | 0.4% | 10.96 | `SerializeBlock` | 10,323.55 | 96,865.92 | 0.2% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 614,847.32 | 1,626.42 | 0.0% | 8,015,883.64 | 2,207,628.07 | 3.631 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 26,020.31 | 38,431.52 | 0.0% | 159,390.03 | 93,438.33 | 1.706 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/serialize.h | 115 ++++++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index c1c145fa4b5..611263ac086 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -48,6 +48,16 @@ static const unsigned int MAX_VECTOR_ALLOCATE = 5000000; struct deserialize_type {}; constexpr deserialize_type deserialize {}; +class SizeComputer; + +//! Check if type contains a stream by seeing if it has a GetStream() method. +template +concept ContainsStream = requires(T t) { t.GetStream(); }; + +template +concept ContainsSizeComputer = ContainsStream && + std::is_same_v().GetStream())>, SizeComputer>; + /* * Lowest-level serialization and conversion. */ @@ -107,8 +117,6 @@ template inline uint64_t ser_readdata64(Stream &s) } -class SizeComputer; - /** * Convert any argument to a reference to X, maintaining constness. * @@ -248,7 +256,9 @@ concept ByteOrIntegral = std::is_same_v || template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Serialize(Stream& s, T a) { - if constexpr (sizeof(T) == 1) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(sizeof(T)); + } else if constexpr (sizeof(T) == 1) { ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool } else if constexpr (sizeof(T) == 2) { ser_writedata16(s, static_cast(a)); // (u)int16_t @@ -259,10 +269,38 @@ template void Serialize(Stream& s, T a) ser_writedata64(s, static_cast(a)); // (u)int64_t } } -template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } +template void Serialize(Stream& s, const B (&a)[N]) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, const std::array& a) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(std::as_bytes(span)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(span.size()); + } else { + s.write(std::as_bytes(span)); + } +} template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Unserialize(Stream& s, T& a) @@ -299,12 +337,14 @@ constexpr inline unsigned int GetSizeOfCompactSize(uint64_t nSize) else return sizeof(unsigned char) + sizeof(uint64_t); } -inline void WriteCompactSize(SizeComputer& os, uint64_t nSize); - template void WriteCompactSize(Stream& os, uint64_t nSize) { - if (nSize < 253) + if constexpr (ContainsSizeComputer) + { + os.GetStream().seek(GetSizeOfCompactSize(nSize)); + } + else if (nSize < 253) { ser_writedata8(os, nSize); } @@ -411,7 +451,7 @@ struct CheckVarIntMode { }; template -inline unsigned int GetSizeOfVarInt(I n) +constexpr unsigned int GetSizeOfVarInt(I n) { CheckVarIntMode(); int nRet = 0; @@ -424,25 +464,26 @@ inline unsigned int GetSizeOfVarInt(I n) return nRet; } -template -inline void WriteVarInt(SizeComputer& os, I n); - template void WriteVarInt(Stream& os, I n) { - CheckVarIntMode(); - unsigned char tmp[(sizeof(n)*8+6)/7]; - int len=0; - while(true) { - tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); - if (n <= 0x7F) - break; - n = (n >> 7) - 1; - len++; + if constexpr (ContainsSizeComputer) { + os.GetStream().seek(GetSizeOfVarInt(n)); + } else { + CheckVarIntMode(); + unsigned char tmp[(sizeof(n)*8+6)/7]; + int len=0; + while(true) { + tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); + if (n <= 0x7F) + break; + n = (n >> 7) - 1; + len++; + } + do { + ser_writedata8(os, tmp[len]); + } while(len--); } - do { - ser_writedata8(os, tmp[len]); - } while(len--); } template @@ -531,7 +572,9 @@ struct CustomUintFormatter template void Ser(Stream& s, I v) { if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); - if (BigEndian) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(Bytes); + } else if (BigEndian) { uint64_t raw = htobe64_internal(v); s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { @@ -1062,6 +1105,9 @@ protected: public: SizeComputer() = default; + SizeComputer& GetStream() { return *this; } + const SizeComputer& GetStream() const { return *this; }; + void write(std::span src) { this->nSize += src.size(); @@ -1085,27 +1131,12 @@ public: } }; -template -inline void WriteVarInt(SizeComputer &s, I n) -{ - s.seek(GetSizeOfVarInt(n)); -} - -inline void WriteCompactSize(SizeComputer &s, uint64_t nSize) -{ - s.seek(GetSizeOfCompactSize(nSize)); -} - template size_t GetSerializeSize(const T& t) { return (SizeComputer() << t).size(); } -//! Check if type contains a stream by seeing if has a GetStream() method. -template -concept ContainsStream = requires(T t) { t.GetStream(); }; - /** Wrapper that overrides the GetParams() function of a stream. */ template class ParamsStream From 6269067bc35452b8af89b36607bf982d4c542f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sun, 9 Mar 2025 21:33:36 +0100 Subject: [PATCH 15/24] optimization: add single byte writes Single byte writes are used very often (used for every (u)int8_t or std::byte or bool and for every VarInt's first byte which is also needed for every (pre)Vector). It makes sense to avoid the generalized serialization infrastructure that isn't needed: * AutoFile write doesn't need to allocate 4k buffer for a single byte now; * `VectorWriter` and `DataStream` avoids memcpy/insert calls. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 174,569.19 | 5,728.39 | 0.6% | 10.89 | `SerializeBlock` | 10,241.16 | 97,645.21 | 0.0% | 11.00 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 615,000.56 | 1,626.01 | 0.0% | 8,015,883.64 | 2,208,340.88 | 3.630 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 25,676.76 | 38,945.72 | 0.0% | 159,390.03 | 92,202.10 | 1.729 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 4 ++-- src/bench/rpc_blockchain.cpp | 2 +- src/crypto/sha256.cpp | 15 +++++++++++++++ src/crypto/sha256.h | 1 + src/hash.h | 24 +++++++++++++++++++++++- src/serialize.h | 6 ++++++ src/streams.cpp | 25 +++++++++++++++++++++++++ src/streams.h | 17 +++++++++++++++++ src/test/crypto_tests.cpp | 2 +- src/test/fuzz/autofile.cpp | 4 ++-- 10 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index ba26457b31a..2d9eac12339 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -52,7 +52,7 @@ static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction bench.unit("block").run([&] { CBlock block; @@ -66,7 +66,7 @@ static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction ArgsManager bench_args; const auto chainParams = CreateChainParams(bench_args, ChainType::MAIN); diff --git a/src/bench/rpc_blockchain.cpp b/src/bench/rpc_blockchain.cpp index df951a14e49..a5d28b4a60b 100644 --- a/src/bench/rpc_blockchain.cpp +++ b/src/bench/rpc_blockchain.cpp @@ -33,7 +33,7 @@ struct TestBlockAndIndex { { DataStream stream{benchmark::data::block413567}; std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction stream >> TX_WITH_WITNESS(block); diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 09c5d3123e8..6cf77849aac 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -723,6 +723,21 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len) } return *this; } +CSHA256& CSHA256::Write(unsigned char data) +{ + size_t bufsize = bytes % 64; + + // Add the single byte to the buffer + buf[bufsize] = data; + bytes += 1; + + if (bufsize == 63) { + // Process the buffer if full + Transform(s, buf, 1); + } + + return *this; +} void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) { diff --git a/src/crypto/sha256.h b/src/crypto/sha256.h index b1348631d32..16aae54aa23 100644 --- a/src/crypto/sha256.h +++ b/src/crypto/sha256.h @@ -22,6 +22,7 @@ public: CSHA256(); CSHA256& Write(const unsigned char* data, size_t len); + CSHA256& Write(unsigned char data); void Finalize(unsigned char hash[OUTPUT_SIZE]); CSHA256& Reset(); }; diff --git a/src/hash.h b/src/hash.h index 34486af64a1..da3b1ab1145 100644 --- a/src/hash.h +++ b/src/hash.h @@ -38,6 +38,10 @@ public: sha.Write(input.data(), input.size()); return *this; } + CHash256& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash256& Reset() { sha.Reset(); @@ -63,6 +67,10 @@ public: sha.Write(input.data(), input.size()); return *this; } + CHash160& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash160& Reset() { sha.Reset(); @@ -107,6 +115,10 @@ public: { ctx.Write(UCharCast(src.data()), src.size()); } + void write(std::span src) + { + ctx.Write(*UCharCast(&src[0])); + } /** Compute the double-SHA256 hash of all data written to this object. * @@ -160,13 +172,18 @@ public: m_source.read(dst); this->write(dst); } + void read(std::span dst) + { + m_source.read(dst); + this->write(std::span{dst}); + } void ignore(size_t num_bytes) { std::byte data[1024]; while (num_bytes > 0) { size_t now = std::min(num_bytes, 1024); - read({data, now}); + read(std::span{data, now}); num_bytes -= now; } } @@ -194,6 +211,11 @@ public: m_source.write(src); HashWriter::write(src); } + void write(std::span src) + { + m_source.write(src); + HashWriter::write(src); + } template HashedSourceWriter& operator<<(const T& obj) diff --git a/src/serialize.h b/src/serialize.h index 611263ac086..5ec8b24df80 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -1112,6 +1112,10 @@ public: { this->nSize += src.size(); } + void write(std::span) + { + this->nSize += 1; + } /** Pretend _nSize bytes are written, without specifying them. */ void seek(size_t _nSize) @@ -1161,7 +1165,9 @@ public: template ParamsStream& operator<<(const U& obj) { ::Serialize(*this, obj); return *this; } template ParamsStream& operator>>(U&& obj) { ::Unserialize(*this, obj); return *this; } void write(std::span src) { GetStream().write(src); } + void write(std::span src) { GetStream().write(src); } void read(std::span dst) { GetStream().read(dst); } + void read(std::span dst) { GetStream().read(dst); } void ignore(size_t num) { GetStream().ignore(num); } bool eof() const { return GetStream().eof(); } size_t size() const { return GetStream().size(); } diff --git a/src/streams.cpp b/src/streams.cpp index 2bd142aba8c..60c0e1767db 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -62,6 +62,12 @@ void AutoFile::read(std::span dst) throw std::ios_base::failure(feof() ? "AutoFile::read: end of file" : "AutoFile::read: fread failed"); } } +void AutoFile::read(std::span dst) +{ + if (detail_fread(dst) != 1) { + throw std::ios_base::failure(feof() ? "AutoFile::read: end of file" : "AutoFile::read: fread failed"); + } +} void AutoFile::ignore(size_t nSize) { @@ -95,6 +101,25 @@ void AutoFile::write(std::span src) } } } +void AutoFile::write(std::span src) +{ + if (!m_file) throw std::ios_base::failure("AutoFile::write: file handle is nullptr"); + if (!m_obfuscation) { + if (std::fwrite(src.data(), 1, 1, m_file) != 1) { + throw std::ios_base::failure("AutoFile::write: write failed"); + } + if (m_position.has_value()) *m_position += 1; + } else { + if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown"); + std::byte temp_byte = src[0]; + std::span val(&temp_byte, 1); + m_obfuscation(val, *m_position); + if (fwrite(val.data(), 1, 1, m_file) != 1) { + throw std::ios_base::failure{"XorFile::write: failed"}; + } + *m_position += 1; + } +} void AutoFile::write_buffer(std::span src) { diff --git a/src/streams.h b/src/streams.h index 81f4dc8c78e..5e2bd68af0b 100644 --- a/src/streams.h +++ b/src/streams.h @@ -62,6 +62,17 @@ public: } nPos += src.size(); } + void write(std::span src) + { + assert(nPos <= vchData.size()); + const auto byte{*UCharCast(&src[0])}; + if (nPos < vchData.size()) { + vchData[nPos] = byte; + } else { + vchData.push_back(byte); + } + nPos += 1; + } template VectorWriter& operator<<(const T& obj) { @@ -233,6 +244,10 @@ public: // Write to the end of the buffer vch.insert(vch.end(), src.begin(), src.end()); } + void write(std::span src) + { + vch.push_back(src[0]); + } template DataStream& operator<<(const T& obj) @@ -427,8 +442,10 @@ public: // Stream subset // void read(std::span dst); + void read(std::span dst); void ignore(size_t nSize); void write(std::span src); + void write(std::span src); template AutoFile& operator<<(const T& obj) diff --git a/src/test/crypto_tests.cpp b/src/test/crypto_tests.cpp index 5588d4cdbc6..0aab9ef0e77 100644 --- a/src/test/crypto_tests.cpp +++ b/src/test/crypto_tests.cpp @@ -1079,7 +1079,7 @@ BOOST_AUTO_TEST_CASE(sha256d64) in[j] = m_rng.randbits(8); } for (int j = 0; j < i; ++j) { - CHash256().Write({in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); + CHash256().Write(std::span{in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); } SHA256D64(out2, in, i); BOOST_CHECK(memcmp(out1, out2, 32 * i) == 0); diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp index c76899cda82..43ad9fab264 100644 --- a/src/test/fuzz/autofile.cpp +++ b/src/test/fuzz/autofile.cpp @@ -29,14 +29,14 @@ FUZZ_TARGET(autofile) [&] { std::array arr{}; try { - auto_file.read({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.read(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, [&] { const std::array arr{}; try { - auto_file.write({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.write(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, From c15d13075220c3ce3b16485c4c630be00ca403bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 18 Jan 2025 15:37:08 +0100 Subject: [PATCH 16/24] test: validate duplicate detection in `CheckTransaction` The `CheckTransaction` validation function in https://github.com/bitcoin/bitcoin/blob/master/src/consensus/tx_check.cpp#L41-L45 relies on a correct ordering relation for detecting duplicate transaction inputs. This update to the tests ensures that: * Accurate detection of duplicates: Beyond trivial cases (e.g., two identical inputs), duplicates are detected correctly in more complex scenarios. * Consistency across methods: Both sorted sets and hash-based sets behave identically when detecting duplicates for `COutPoint` and related values. * Robust ordering and equality relations: The function maintains expected behavior for ordering and equality checks. Using randomized testing with shuffled inputs (to avoid any remaining bias introduced), the enhanced test validates that `CheckTransaction` remains robust and reliable across various input configurations. It confirms identical behavior to a hashing-based duplicate detection mechanism, ensuring consistency and correctness. To make sure the new branches in the follow-up commits will be covered, `basic_transaction_tests` was extended a randomized test one comparing against the old implementation (and also an alternative duplicate). The iterations and ranges were chosen such that every new branch is expected to be hit once. --- src/test/transaction_tests.cpp | 220 +++++++++++++++++++++++++++++++-- src/test/util/setup_common.cpp | 5 + src/test/util/setup_common.h | 1 + 3 files changed, 217 insertions(+), 9 deletions(-) diff --git a/src/test/transaction_tests.cpp b/src/test/transaction_tests.cpp index 1375672a418..5ebf096a82f 100644 --- a/src/test/transaction_tests.cpp +++ b/src/test/transaction_tests.cpp @@ -406,20 +406,110 @@ BOOST_AUTO_TEST_CASE(tx_oversized) } } -BOOST_AUTO_TEST_CASE(basic_transaction_tests) +static CMutableTransaction CreateTransaction() { - // Random real transaction (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436) - unsigned char ch[] = {0x01, 0x00, 0x00, 0x00, 0x01, 0x6b, 0xff, 0x7f, 0xcd, 0x4f, 0x85, 0x65, 0xef, 0x40, 0x6d, 0xd5, 0xd6, 0x3d, 0x4f, 0xf9, 0x4f, 0x31, 0x8f, 0xe8, 0x20, 0x27, 0xfd, 0x4d, 0xc4, 0x51, 0xb0, 0x44, 0x74, 0x01, 0x9f, 0x74, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x49, 0x30, 0x46, 0x02, 0x21, 0x00, 0xda, 0x0d, 0xc6, 0xae, 0xce, 0xfe, 0x1e, 0x06, 0xef, 0xdf, 0x05, 0x77, 0x37, 0x57, 0xde, 0xb1, 0x68, 0x82, 0x09, 0x30, 0xe3, 0xb0, 0xd0, 0x3f, 0x46, 0xf5, 0xfc, 0xf1, 0x50, 0xbf, 0x99, 0x0c, 0x02, 0x21, 0x00, 0xd2, 0x5b, 0x5c, 0x87, 0x04, 0x00, 0x76, 0xe4, 0xf2, 0x53, 0xf8, 0x26, 0x2e, 0x76, 0x3e, 0x2d, 0xd5, 0x1e, 0x7f, 0xf0, 0xbe, 0x15, 0x77, 0x27, 0xc4, 0xbc, 0x42, 0x80, 0x7f, 0x17, 0xbd, 0x39, 0x01, 0x41, 0x04, 0xe6, 0xc2, 0x6e, 0xf6, 0x7d, 0xc6, 0x10, 0xd2, 0xcd, 0x19, 0x24, 0x84, 0x78, 0x9a, 0x6c, 0xf9, 0xae, 0xa9, 0x93, 0x0b, 0x94, 0x4b, 0x7e, 0x2d, 0xb5, 0x34, 0x2b, 0x9d, 0x9e, 0x5b, 0x9f, 0xf7, 0x9a, 0xff, 0x9a, 0x2e, 0xe1, 0x97, 0x8d, 0xd7, 0xfd, 0x01, 0xdf, 0xc5, 0x22, 0xee, 0x02, 0x28, 0x3d, 0x3b, 0x06, 0xa9, 0xd0, 0x3a, 0xcf, 0x80, 0x96, 0x96, 0x8d, 0x7d, 0xbb, 0x0f, 0x91, 0x78, 0xff, 0xff, 0xff, 0xff, 0x02, 0x8b, 0xa7, 0x94, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xba, 0xde, 0xec, 0xfd, 0xef, 0x05, 0x07, 0x24, 0x7f, 0xc8, 0xf7, 0x42, 0x41, 0xd7, 0x3b, 0xc0, 0x39, 0x97, 0x2d, 0x7b, 0x88, 0xac, 0x40, 0x94, 0xa8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xc1, 0x09, 0x32, 0x48, 0x3f, 0xec, 0x93, 0xed, 0x51, 0xf5, 0xfe, 0x95, 0xe7, 0x25, 0x59, 0xf2, 0xcc, 0x70, 0x43, 0xf9, 0x88, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00}; - std::vector vch(ch, ch + sizeof(ch) -1); - DataStream stream(vch); + // Serialized random real transaction (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436) + static constexpr auto ser_tx{"01000000016bff7fcd4f8565ef406dd5d63d4ff94f318fe82027fd4dc451b04474019f74b4000000008c493046022100da0dc6aecefe1e06efdf05773757deb168820930e3b0d03f46f5fcf150bf990c022100d25b5c87040076e4f253f8262e763e2dd51e7ff0be157727c4bc42807f17bd39014104e6c26ef67dc610d2cd192484789a6cf9aea9930b944b7e2db5342b9d9e5b9ff79aff9a2ee1978dd7fd01dfc522ee02283d3b06a9d03acf8096968d7dbb0f9178ffffffff028ba7940e000000001976a914badeecfdef0507247fc8f74241d73bc039972d7b88ac4094a802000000001976a914c10932483fec93ed51f5fe95e72559f2cc7043f988ac0000000000"_hex}; CMutableTransaction tx; - stream >> TX_WITH_WITNESS(tx); + DataStream(ser_tx) >> TX_WITH_WITNESS(tx); + return tx; +} + +BOOST_AUTO_TEST_CASE(transaction_duplicate_input_test) +{ + auto tx{CreateTransaction()}; + TxValidationState state; BOOST_CHECK_MESSAGE(CheckTransaction(CTransaction(tx), state) && state.IsValid(), "Simple deserialized transaction should be valid."); - // Check that duplicate txins fail - tx.vin.push_back(tx.vin[0]); - BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with duplicate txins should be invalid."); + // Add duplicate input + tx.vin.emplace_back(tx.vin[0]); + std::ranges::shuffle(tx.vin, m_rng); + BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with 2 duplicate txins should be invalid."); + + // ... add a valid input for more complex check + tx.vin.emplace_back(COutPoint(Txid::FromUint256(uint256{1}), 1)); + std::ranges::shuffle(tx.vin, m_rng); + BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with 3 inputs (2 valid, 1 duplicate) should be invalid."); +} + +BOOST_AUTO_TEST_CASE(transaction_duplicate_detection_test) +{ + // Randomized testing against hash- and tree-based duplicate check + auto reference_duplicate_check_hash{[](const std::vector& vin) { + std::unordered_set vInOutPoints; + for (const auto& txin : vin) { + if (!vInOutPoints.insert(txin.prevout).second) { + return false; + } + } + return true; + }}; + auto reference_duplicate_check_tree{[](const std::vector& vin) { + std::set vInOutPoints; + for (const auto& txin : vin) { + if (!vInOutPoints.insert(txin.prevout).second) { + return false; + } + } + return true; + }}; + + std::vector hashes; + std::vector ns; + for (int i = 0; i < 10; ++i) { + hashes.emplace_back(Txid::FromUint256(m_rng.rand256())); + ns.emplace_back(m_rng.rand32()); + } + auto tx{CreateTransaction()}; + TxValidationState state; + for (int i{0}; i < 100; ++i) { + if (m_rng.randbool()) { + tx.vin.clear(); + } + for (int j{0}, num_inputs{1 + m_rng.randrange(5)}; j < num_inputs; ++j) { + if (COutPoint outpoint(hashes[m_rng.randrange(hashes.size())], ns[m_rng.randrange(ns.size())]); !outpoint.IsNull()) { + tx.vin.emplace_back(outpoint); + } + } + std::ranges::shuffle(tx.vin, m_rng); + + bool actual{CheckTransaction(CTransaction(tx), state)}; + BOOST_CHECK_EQUAL(actual, reference_duplicate_check_hash(tx.vin)); + BOOST_CHECK_EQUAL(actual, reference_duplicate_check_tree(tx.vin)); + } +} + +BOOST_AUTO_TEST_CASE(transaction_null_prevout_detection_test) +{ + // Randomized testing against linear null prevout check + auto reference_null_prevout_check_hash{[](const std::vector& vin) { + for (const auto& txin : vin) { + if (txin.prevout.IsNull()) { + return false; + } + } + return true; + }}; + + auto tx{CreateTransaction()}; + TxValidationState state; + for (int i{0}; i < 100; ++i) { + if (m_rng.randbool()) { + tx.vin.clear(); + } + for (int j{0}, num_inputs{1 + m_rng.randrange(5)}; j < num_inputs; ++j) { + switch (m_rng.randrange(5)) { + case 0: tx.vin.emplace_back(COutPoint()); break; // Null prevout + case 1: tx.vin.emplace_back(Txid::FromUint256(uint256::ZERO), m_rng.rand32()); break; // Null hash, random index + case 2: tx.vin.emplace_back(Txid::FromUint256(m_rng.rand256()), COutPoint::NULL_INDEX); break; // Random hash, Null index + default: tx.vin.emplace_back(Txid::FromUint256(m_rng.rand256()), m_rng.rand32()); // Random prevout + } + } + std::ranges::shuffle(tx.vin, m_rng); + + BOOST_CHECK_EQUAL(CheckTransaction(CTransaction(tx), state), reference_null_prevout_check_hash(tx.vin)); + } } BOOST_AUTO_TEST_CASE(test_Get) @@ -1048,4 +1138,116 @@ BOOST_AUTO_TEST_CASE(test_IsStandard) CheckIsNotStandard(t, "dust"); } +BOOST_AUTO_TEST_CASE(test_uint256_sorting) +{ + // Sorting + std::vector original{ + uint256{1}, + uint256{2}, + uint256{3} + }; + + std::vector shuffled{original}; + std::ranges::shuffle(shuffled, m_rng); + std::sort(shuffled.begin(), shuffled.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end()); + + // Operators + constexpr auto a{uint256{1}}, + b{uint256{2}}, + c{uint256{3}}; + + BOOST_CHECK(a == a); + BOOST_CHECK(a == uint256{1}); + BOOST_CHECK(b == b); + BOOST_CHECK(c == c); + BOOST_CHECK(a != b); + BOOST_CHECK(a != uint256{10}); + BOOST_CHECK(a != c); + BOOST_CHECK(b != c); + + BOOST_CHECK(a < b); + BOOST_CHECK(a < uint256{10}); + BOOST_CHECK(b < c); + BOOST_CHECK(a < c); +} + +BOOST_AUTO_TEST_CASE(test_transaction_identifier_sorting) +{ + std::vector original{ + Txid::FromUint256(uint256{1}), + Txid::FromUint256(uint256{2}), + Txid::FromUint256(uint256{3}) + }; + + std::vector shuffled{original}; + std::ranges::shuffle(shuffled, m_rng); + std::sort(shuffled.begin(), shuffled.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end()); + + // Operators + const auto a(Txid::FromUint256(uint256{1})), + b(Txid::FromUint256(uint256{2})), + c(Txid::FromUint256(uint256{3})); + + BOOST_CHECK(a == uint256{1}); + + BOOST_CHECK(a == a); + BOOST_CHECK(a == Txid::FromUint256(uint256{1})); + BOOST_CHECK(b == b); + BOOST_CHECK(c == c); + BOOST_CHECK(a != b); + BOOST_CHECK(a != Txid::FromUint256(uint256{10})); + BOOST_CHECK(a != c); + BOOST_CHECK(b != c); + + BOOST_CHECK(a < b); + BOOST_CHECK(a < Txid::FromUint256(uint256{10})); + BOOST_CHECK(b < c); + BOOST_CHECK(a < c); +} + +BOOST_AUTO_TEST_CASE(test_coutpoint_sorting) +{ + // Sorting + std::vector original{ + COutPoint(Txid::FromUint256(uint256{1}), 1), + COutPoint(Txid::FromUint256(uint256{1}), 2), + COutPoint(Txid::FromUint256(uint256{1}), 3), + COutPoint(Txid::FromUint256(uint256{2}), 1), + COutPoint(Txid::FromUint256(uint256{2}), 2), + COutPoint(Txid::FromUint256(uint256{2}), 3), + COutPoint(Txid::FromUint256(uint256{3}), 1), + COutPoint(Txid::FromUint256(uint256{3}), 2), + COutPoint(Txid::FromUint256(uint256{3}), 3) + }; + + std::vector shuffled{original}; + std::ranges::shuffle(shuffled, m_rng); + std::sort(shuffled.begin(), shuffled.end()); + + BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end()); + + // Operators + const auto a{COutPoint(Txid::FromUint256(uint256{1}), 1)}, + b{COutPoint(Txid::FromUint256(uint256{1}), 2)}, + c{COutPoint(Txid::FromUint256(uint256{2}), 1)}; + + BOOST_CHECK(a == a); + BOOST_CHECK(a == COutPoint(Txid::FromUint256(uint256{1}), 1)); + BOOST_CHECK(b == b); + BOOST_CHECK(c == c); + BOOST_CHECK(a != b); + BOOST_CHECK(a != COutPoint(Txid::FromUint256(uint256{1}), 10)); + BOOST_CHECK(a != c); + BOOST_CHECK(b != c); + + BOOST_CHECK(a < b); + BOOST_CHECK(a < COutPoint(Txid::FromUint256(uint256{1}), 10)); + BOOST_CHECK(b < c); + BOOST_CHECK(a < c); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/util/setup_common.cpp b/src/test/util/setup_common.cpp index 87f0b31cad8..66c36151023 100644 --- a/src/test/util/setup_common.cpp +++ b/src/test/util/setup_common.cpp @@ -628,3 +628,8 @@ std::ostream& operator<<(std::ostream& os, const uint256& num) { return os << num.ToString(); } + +std::ostream& operator<<(std::ostream& os, const COutPoint& outpoint) +{ + return os << outpoint.hash << ", " << outpoint.n; +} diff --git a/src/test/util/setup_common.h b/src/test/util/setup_common.h index 57bea9086b9..d10da4a726e 100644 --- a/src/test/util/setup_common.h +++ b/src/test/util/setup_common.h @@ -291,6 +291,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::optional& v) std::ostream& operator<<(std::ostream& os, const arith_uint256& num); std::ostream& operator<<(std::ostream& os, const uint160& num); std::ostream& operator<<(std::ostream& os, const uint256& num); +std::ostream& operator<<(std::ostream& os, const COutPoint& outpoint); // @} /** From bf580d7b4e6376c69dc2a7cabe25f2223b2ffb04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 18 Jan 2025 11:43:26 +0100 Subject: [PATCH 17/24] bench: measure `CheckBlock` speed separately from serialization > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 372,743.63 | 2,682.81 | 1.1% | 10.99 | `CheckBlockBench` | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 3,304,694.54 | 302.60 | 0.5% | 11.05 | `DuplicateInputs` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 1,096,261.84 | 912.19 | 0.1% | 7,963,390.88 | 3,487,375.26 | 2.283 | 1,266,941.00 | 1.8% | 11.03 | `CheckBlockBench` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 8,366,309.48 | 119.53 | 0.0% | 23,865,177.67 | 26,620,160.23 | 0.897 | 5,972,887.41 | 4.0% | 10.78 | `DuplicateInputs` --- src/bench/checkblock.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 2d9eac12339..c5ddebfd0ad 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -21,7 +21,7 @@ #include #include -static void SizeComputerBlock(benchmark::Bench& bench) { +static void SizeComputerBlockBench(benchmark::Bench& bench) { CBlock block; DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); @@ -32,7 +32,7 @@ static void SizeComputerBlock(benchmark::Bench& bench) { }); } -static void SerializeBlock(benchmark::Bench& bench) { +static void SerializeBlockBench(benchmark::Bench& bench) { CBlock block; DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); @@ -48,7 +48,7 @@ static void SerializeBlock(benchmark::Bench& bench) { // a block off the wire, but before we can relay the block on to peers using // compact block relay. -static void DeserializeBlock(benchmark::Bench& bench) +static void DeserializeBlockBench(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -83,7 +83,7 @@ static void DeserializeAndCheckBlock(benchmark::Bench& bench) }); } -BENCHMARK(SizeComputerBlock, benchmark::PriorityLevel::HIGH); -BENCHMARK(SerializeBlock, benchmark::PriorityLevel::HIGH); -BENCHMARK(DeserializeBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(SizeComputerBlockBench, benchmark::PriorityLevel::HIGH); +BENCHMARK(SerializeBlockBench, benchmark::PriorityLevel::HIGH); +BENCHMARK(DeserializeBlockBench, benchmark::PriorityLevel::HIGH); BENCHMARK(DeserializeAndCheckBlock, benchmark::PriorityLevel::HIGH); From a300325c5ba30a3a4d709c93fa879afa8a7354f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 21 Jan 2025 14:01:31 +0100 Subject: [PATCH 18/24] bench: add `ProcessTransactionBench` to measure `CheckBlock` in context The newly introduced `ProcessTransactionBench` incorporates multiple steps in the validation pipeline, offering a more comprehensive view of `CheckBlock` performance within a realistic transaction validation context. Previous microbenchmarks, such as DeserializeAndCheckBlockTest and DuplicateInputs, focused on isolated aspects of transaction and block validation. While these tests provided valuable insights for targeted profiling, they lacked context regarding the broader validation process, where interactions between components play a critical role. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='ProcessTransactionBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 9,585.10 | 104,328.55 | 0.1% | 11.03 | `ProcessTransactionBench` > C++ compiler .......................... GNU 13.3.0 | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 56,199.57 | 17,793.73 | 0.1% | 229,263.01 | 178,766.31 | 1.282 | 15,509.97 | 0.5% | 10.91 | `ProcessTransactionBench` --- src/bench/mempool_stress.cpp | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/bench/mempool_stress.cpp b/src/bench/mempool_stress.cpp index fbac25db5f5..2fe0622d18b 100644 --- a/src/bench/mempool_stress.cpp +++ b/src/bench/mempool_stress.cpp @@ -13,10 +13,19 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include class CCoinsViewCache; @@ -126,5 +135,53 @@ static void MempoolCheck(benchmark::Bench& bench) }); } +static void ProcessTransactionBench(benchmark::Bench& bench) +{ + const auto testing_setup{MakeNoLogFileContext()}; + CTxMemPool& pool{*Assert(testing_setup->m_node.mempool)}; + ChainstateManager& chainman{*testing_setup->m_node.chainman}; + + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + std::vector txs(block.vtx.size() - 1); + for (size_t i{1}; i < block.vtx.size(); ++i) { + CMutableTransaction mtx{*block.vtx[i]}; + for (auto& txin : mtx.vin) { + txin.nSequence = CTxIn::SEQUENCE_FINAL; + txin.scriptSig.clear(); + txin.scriptWitness.stack = {WITNESS_STACK_ELEM_OP_TRUE}; + } + txs[i - 1] = MakeTransactionRef(std::move(mtx)); + } + + CCoinsViewCache* coins_tip{nullptr}; + size_t cached_coin_count{0}; + { + LOCK(cs_main); + coins_tip = &chainman.ActiveChainstate().CoinsTip(); + for (const auto& tx : txs) { + const Coin coin(CTxOut(2 * tx->GetValueOut(), P2WSH_OP_TRUE), 1, /*fCoinBaseIn=*/false); + for (const auto& in : tx->vin) { + coins_tip->AddCoin(in.prevout, Coin{coin}, /*possible_overwrite=*/false); + cached_coin_count++; + } + } + } + + bench.batch(txs.size()).run([&] { + LOCK2(cs_main, pool.cs); + assert(coins_tip->GetCacheSize() == cached_coin_count); + for (const auto& tx : txs) pool.removeRecursive(*tx, MemPoolRemovalReason::REPLACED); + assert(pool.size() == 0); + + for (const auto& tx : txs) { + const auto res{chainman.ProcessTransaction(tx, /*test_accept=*/true)}; + assert(res.m_result_type == MempoolAcceptResult::ResultType::VALID); + } + }); +} + BENCHMARK(ComplexMemPool, benchmark::PriorityLevel::HIGH); BENCHMARK(MempoolCheck, benchmark::PriorityLevel::HIGH); +BENCHMARK(ProcessTransactionBench, benchmark::PriorityLevel::HIGH); From 7b576a440d675dd29525efd6b3254552ce049834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 25 Jan 2025 14:07:24 +0100 Subject: [PATCH 19/24] optimization: move duplicate checks outside of coinbase branch `IsCoinBase` means single input with NULL prevout, so it makes sense to restrict duplicate check to non-coinbase transactions only. The behavior is the same as before, except that single-input-transactions aren't checked for duplicates anymore (~70-90% of the cases, see https://transactionfee.info/charts/transactions-1in). I've added braces to the conditions and loops to simplify review of followup commits. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 335,917.12 | 2,976.92 | 1.3% | 11.01 | `CheckBlockBench` | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 3,286,337.42 | 304.29 | 1.1% | 10.90 | `DuplicateInputs` | 9,561.02 | 104,591.35 | 0.2% | 11.02 | `ProcessTransactionBench` --- src/consensus/tx_check.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp index b3fee1e8b1a..050bca84d47 100644 --- a/src/consensus/tx_check.cpp +++ b/src/consensus/tx_check.cpp @@ -38,22 +38,25 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state) // of a tx as spent, it does not check if the tx has duplicate inputs. // Failure to run this check will result in either a crash or an inflation bug, depending on the implementation of // the underlying coins database. - std::set vInOutPoints; - for (const auto& txin : tx.vin) { - if (!vInOutPoints.insert(txin.prevout).second) - return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); - } + if (tx.vin.size() == 1) { + if (tx.IsCoinBase()) { + if (tx.vin[0].scriptSig.size() < 2 || tx.vin[0].scriptSig.size() > 100) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length"); + } + } + } else { + std::set vInOutPoints; + for (const auto& txin : tx.vin) { + if (!vInOutPoints.insert(txin.prevout).second) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); + } + } - if (tx.IsCoinBase()) - { - if (tx.vin[0].scriptSig.size() < 2 || tx.vin[0].scriptSig.size() > 100) - return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length"); - } - else - { - for (const auto& txin : tx.vin) - if (txin.prevout.IsNull()) + for (const auto& txin : tx.vin) { + if (txin.prevout.IsNull()) { return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null"); + } + } } return true; From 765b71b90b0333c3531f313781edd7ad77abebdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 17 Jan 2025 22:39:59 +0100 Subject: [PATCH 20/24] optimization: simplify duplicate checks for trivial inputs No need to create a set for checking duplicates for two-input-transactions. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 314,137.30 | 3,183.32 | 1.2% | 11.04 | `CheckBlockBench` | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 3,220,592.73 | 310.50 | 1.3% | 10.92 | `DuplicateInputs` | 9,425.98 | 106,089.77 | 0.3% | 11.00 | `ProcessTransactionBench` --- src/consensus/tx_check.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp index 050bca84d47..cd465e56d44 100644 --- a/src/consensus/tx_check.cpp +++ b/src/consensus/tx_check.cpp @@ -45,11 +45,17 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state) } } } else { - std::set vInOutPoints; - for (const auto& txin : tx.vin) { - if (!vInOutPoints.insert(txin.prevout).second) { + if (tx.vin.size() == 2) { + if (tx.vin[0].prevout == tx.vin[1].prevout) { return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); } + } else { + std::set vInOutPoints; + for (const auto& txin : tx.vin) { + if (!vInOutPoints.insert(txin.prevout).second) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); + } + } } for (const auto& txin : tx.vin) { From cb8c012b87ac45cf17dafe13914767c0dfd9071c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 18 Jan 2025 11:54:47 +0100 Subject: [PATCH 21/24] optimization: replace tree with sorted vector A pre-sized vector retains locality (enabling SIMD operations), speeding up sorting and equality checks. It's also simpler (therefore more reliable) than a sorted set. It also causes less memory fragmentation. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 181,922.54 | 5,496.85 | 0.2% | 10.98 | `CheckBlockBench` | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 997,739.30 | 1,002.27 | 1.0% | 10.94 | `DuplicateInputs` | 9,449.28 | 105,828.15 | 0.3% | 10.99 | `ProcessTransactionBench` Co-authored-by: Pieter Wuille --- src/consensus/tx_check.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp index cd465e56d44..9457596fbb9 100644 --- a/src/consensus/tx_check.cpp +++ b/src/consensus/tx_check.cpp @@ -50,11 +50,14 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state) return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); } } else { - std::set vInOutPoints; + std::vector sortedPrevouts; + sortedPrevouts.reserve(tx.vin.size()); for (const auto& txin : tx.vin) { - if (!vInOutPoints.insert(txin.prevout).second) { - return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); - } + sortedPrevouts.push_back(txin.prevout); + } + std::sort(sortedPrevouts.begin(), sortedPrevouts.end()); + if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); } } From f07c79f034f72cb7d0885444854f162dc5c2df55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sat, 25 Jan 2025 14:24:02 +0100 Subject: [PATCH 22/24] optimization: look for NULL prevouts in the sorted values For the 2 input case we simply check them both, like we did with equality. For the general case, we take advantage of sorting, making invalid value detection constant time instead of linear in the worst case. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 179,971.00 | 5,556.45 | 0.3% | 11.02 | `CheckBlockBench` | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 963,177.98 | 1,038.23 | 1.7% | 10.92 | `DuplicateInputs` | 9,410.90 | 106,259.75 | 0.3% | 11.01 | `ProcessTransactionBench` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 834,855.94 | 1,197.81 | 0.0% | 6,518,548.86 | 2,656,039.78 | 2.454 | 919,160.84 | 1.5% | 10.78 | `CheckBlockBench` | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 4,261,492.75 | 234.66 | 0.0% | 17,379,823.40 | 13,559,793.33 | 1.282 | 4,265,714.28 | 3.4% | 11.00 | `DuplicateInputs` | 55,819.53 | 17,914.88 | 0.1% | 227,828.15 | 177,520.09 | 1.283 | 15,184.36 | 0.4% | 10.91 | `ProcessTransactionBench` --- src/consensus/tx_check.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp index 9457596fbb9..0a471deda24 100644 --- a/src/consensus/tx_check.cpp +++ b/src/consensus/tx_check.cpp @@ -44,25 +44,27 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state) return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length"); } } + } else if (tx.vin.size() == 2) { + if (tx.vin[0].prevout == tx.vin[1].prevout) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); + } + if (tx.vin[0].prevout.IsNull() || tx.vin[1].prevout.IsNull()) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null"); + } } else { - if (tx.vin.size() == 2) { - if (tx.vin[0].prevout == tx.vin[1].prevout) { - return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); - } - } else { - std::vector sortedPrevouts; - sortedPrevouts.reserve(tx.vin.size()); - for (const auto& txin : tx.vin) { - sortedPrevouts.push_back(txin.prevout); - } - std::sort(sortedPrevouts.begin(), sortedPrevouts.end()); - if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) { - return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); - } + std::vector sortedPrevouts; + sortedPrevouts.reserve(tx.vin.size()); + for (const auto& txin : tx.vin) { + sortedPrevouts.push_back(txin.prevout); + } + std::sort(sortedPrevouts.begin(), sortedPrevouts.end()); + if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) { + return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate"); } - for (const auto& txin : tx.vin) { - if (txin.prevout.IsNull()) { + for (const auto& in : sortedPrevouts) { + if (!in.hash.IsNull()) break; // invalid values can only be at the beginning + if (in.IsNull()) { return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null"); } } From b5dc42874d20a23429293d9037a1fda99e2282c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 15 Apr 2025 12:31:20 +0200 Subject: [PATCH 23/24] test: assert CScript allocation characteristics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifies that script types are correctly allocated using prevector's direct (stack) or indirect (heap) storage based on their size: Direct (stack) allocated script types (size ≤ 28 bytes): * OP_RETURN (small) * P2WPKH * P2SH * P2PKH Indirect (heap) allocated script types (size > 28 bytes): * P2WSH * P2TR * P2PK * MULTISIG (small) This test provides a baseline for verifying changes to prevector's inline capacity. --- src/test/script_tests.cpp | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/test/script_tests.cpp b/src/test/script_tests.cpp index 4a5dc561cc3..700a0fd3427 100644 --- a/src/test/script_tests.cpp +++ b/src/test/script_tests.cpp @@ -1129,6 +1129,91 @@ BOOST_AUTO_TEST_CASE(script_CHECKMULTISIG23) BOOST_CHECK_MESSAGE(err == SCRIPT_ERR_INVALID_STACK_OPERATION, ScriptErrorString(err)); } +BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) +{ + BOOST_CHECK_EQUAL(sizeof(prevector<28, unsigned char>), 32); + BOOST_CHECK_EQUAL(sizeof(CScriptBase), 32); + BOOST_CHECK_EQUAL(sizeof(CScript), 32); + BOOST_CHECK_EQUAL(sizeof(CTxOut), 40); + + CKey dummyKey; + dummyKey.MakeNewKey(true); + + std::vector> dummyVSolutions; + + // Small OP_RETURN is stack allocated + { + const auto scriptSmallOpReturn{CScript() << OP_RETURN << std::vector(10, 0xaa)}; + BOOST_CHECK_EQUAL(Solver(scriptSmallOpReturn, dummyVSolutions), TxoutType::NULL_DATA); + BOOST_CHECK_EQUAL(scriptSmallOpReturn.size(), 12); + BOOST_CHECK_EQUAL(scriptSmallOpReturn.capacity(), 28); + BOOST_CHECK_EQUAL(scriptSmallOpReturn.allocated_memory(), 0); + } + + // P2WPKH is stack allocated + { + const auto scriptP2WPKH{GetScriptForDestination(WitnessV0KeyHash{PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}}})}; + BOOST_CHECK_EQUAL(Solver(scriptP2WPKH, dummyVSolutions), TxoutType::WITNESS_V0_KEYHASH); + BOOST_CHECK_EQUAL(scriptP2WPKH.size(), 22); + BOOST_CHECK_EQUAL(scriptP2WPKH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2WPKH.allocated_memory(), 0); + } + + // P2SH is stack allocated + { + const auto scriptP2SH{GetScriptForDestination(ScriptHash{CScript{} << OP_TRUE})}; + BOOST_CHECK(scriptP2SH.IsPayToScriptHash()); + BOOST_CHECK_EQUAL(scriptP2SH.size(), 23); + BOOST_CHECK_EQUAL(scriptP2SH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2SH.allocated_memory(), 0); + } + + // P2PKH is stack allocated + { + const auto scriptP2PKH{GetScriptForDestination(PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}})}; + BOOST_CHECK_EQUAL(Solver(scriptP2PKH, dummyVSolutions), TxoutType::PUBKEYHASH); + BOOST_CHECK_EQUAL(scriptP2PKH.size(), 25); + BOOST_CHECK_EQUAL(scriptP2PKH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2PKH.allocated_memory(), 0); + } + + // P2WSH is heap allocated + { + const auto scriptP2WSH{GetScriptForDestination(WitnessV0ScriptHash{CScript{} << OP_TRUE})}; + BOOST_CHECK(scriptP2WSH.IsPayToWitnessScriptHash()); + BOOST_CHECK_EQUAL(scriptP2WSH.size(), 34); + BOOST_CHECK_EQUAL(scriptP2WSH.capacity(), 34); + BOOST_CHECK_EQUAL(scriptP2WSH.allocated_memory(), 34); + } + + // P2TR is heap allocated + { + const auto scriptTaproot{GetScriptForDestination(WitnessV1Taproot{XOnlyPubKey{CPubKey{dummyKey.GetPubKey()}}})}; + BOOST_CHECK_EQUAL(Solver(scriptTaproot, dummyVSolutions), TxoutType::WITNESS_V1_TAPROOT); + BOOST_CHECK_EQUAL(scriptTaproot.size(), 34); + BOOST_CHECK_EQUAL(scriptTaproot.capacity(), 34); + BOOST_CHECK_EQUAL(scriptTaproot.allocated_memory(), 34); + } + + // P2PK is heap allocated + { + const auto scriptPubKey{GetScriptForRawPubKey(CPubKey{dummyKey.GetPubKey()})}; + BOOST_CHECK_EQUAL(Solver(scriptPubKey, dummyVSolutions), TxoutType::PUBKEY); + BOOST_CHECK_EQUAL(scriptPubKey.size(), 35); + BOOST_CHECK_EQUAL(scriptPubKey.capacity(), 35); + BOOST_CHECK_EQUAL(scriptPubKey.allocated_memory(), 35); + } + + // MULTISIG is always heap allocated + { + const auto scriptMultisig{GetScriptForMultisig(1, std::vector{2, CPubKey{dummyKey.GetPubKey()}})}; + BOOST_CHECK_EQUAL(Solver(scriptMultisig, dummyVSolutions), TxoutType::MULTISIG); + BOOST_CHECK_EQUAL(scriptMultisig.size(), 71); + BOOST_CHECK_EQUAL(scriptMultisig.capacity(), 103); + BOOST_CHECK_EQUAL(scriptMultisig.allocated_memory(), 103); + } +} + /* Wrapper around ProduceSignature to combine two scriptsigs */ SignatureData CombineSignatures(const CTxOut& txout, const CMutableTransaction& tx, const SignatureData& scriptSig1, const SignatureData& scriptSig2) { From b6b4235c143eda6f0fa4ed86f2dcd47709b3b6ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 27 Mar 2025 18:40:08 +0100 Subject: [PATCH 24/24] Allocate `P2WSH`/`P2TR`/`P2PK` scripts on stack The current `prevector` size of 28 bytes (chosen to fill the `sizeof(CScript)` aligned size) was introduced in 2015 (https://github.com/bitcoin/bitcoin/pull/6914) before SegWit and TapRoot. However, the increasingly common `P2WSH` and `P2TR` scripts are both 34 bytes, and are forced to use heap (re)allocation rather than efficient inline storage. The core trade-off of this change is to eliminate heap allocations for common 34-36 byte scripts at the cost of increasing the base memory footprint of all `CScript` objects by 8 bytes (while still respecting peak memory usage defined by `-dbcache`). Increasing the `prevector` size allows these scripts to be stored on the stack, avoiding heap allocations, reducing potential memory fragmentation, and improving performance during cache flushes. Massif analysis confirms a lower stable memory usage after flushing, suggesting the elimination of heap allocations outweighs the larger base size for common workloads. Due to memory alignment, increasing the `prevector` size to 36 bytes doesn't change the overall `sizeof(CScript)` compared to an increase to 34 bytes, allowing us to include `P2PK` scripts as well at no additional memory cost. Performance benchmarks for AssumeUTXO load and flush show: - Small dbcache (450MB): ~1% performance penalty due to more frequent flushes - Large dbcache (4500-4500MB+): ~6-7% performance improvement due to fewer heap allocations Full IBD and reindex-chainstate with larger `dbcache` values also show an overall ~3% speedup. Co-authored-by: Ava Chow Co-authored-by: Andrew Toth --- src/bench/checkqueue.cpp | 2 +- src/bench/prevector.cpp | 36 ++++++++++++++--------------- src/script/script.h | 2 +- src/test/script_tests.cpp | 34 +++++++++++++-------------- src/test/validation_flush_tests.cpp | 6 ++--- 5 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp index 8134154eb11..72eaa6588ad 100644 --- a/src/bench/checkqueue.cpp +++ b/src/bench/checkqueue.cpp @@ -16,7 +16,7 @@ static const size_t BATCHES = 101; static const size_t BATCH_SIZE = 30; -static const int PREVECTOR_SIZE = 28; +static const int PREVECTOR_SIZE = 36; static const unsigned int QUEUE_BATCH_SIZE = 128; // This Benchmark tests the CheckQueue with a slightly realistic workload, diff --git a/src/bench/prevector.cpp b/src/bench/prevector.cpp index 60c43e86e1b..97e897519d8 100644 --- a/src/bench/prevector.cpp +++ b/src/bench/prevector.cpp @@ -27,22 +27,22 @@ template static void PrevectorDestructor(benchmark::Bench& bench) { bench.batch(2).run([&] { - prevector<28, T> t0; - prevector<28, T> t1; - t0.resize(28); - t1.resize(29); + prevector<36, T> t0; + prevector<36, T> t1; + t0.resize(36); + t1.resize(37); }); } template static void PrevectorClear(benchmark::Bench& bench) { - prevector<28, T> t0; - prevector<28, T> t1; + prevector<36, T> t0; + prevector<36, T> t1; bench.batch(2).run([&] { - t0.resize(28); + t0.resize(36); t0.clear(); - t1.resize(29); + t1.resize(37); t1.clear(); }); } @@ -50,12 +50,12 @@ static void PrevectorClear(benchmark::Bench& bench) template static void PrevectorResize(benchmark::Bench& bench) { - prevector<28, T> t0; - prevector<28, T> t1; + prevector<36, T> t0; + prevector<36, T> t1; bench.batch(4).run([&] { - t0.resize(28); + t0.resize(36); t0.resize(0); - t1.resize(29); + t1.resize(37); t1.resize(0); }); } @@ -64,8 +64,8 @@ template static void PrevectorDeserialize(benchmark::Bench& bench) { DataStream s0{}; - prevector<28, T> t0; - t0.resize(28); + prevector<36, T> t0; + t0.resize(36); for (auto x = 0; x < 900; ++x) { s0 << t0; } @@ -74,7 +74,7 @@ static void PrevectorDeserialize(benchmark::Bench& bench) s0 << t0; } bench.batch(1000).run([&] { - prevector<28, T> t1; + prevector<36, T> t1; for (auto x = 0; x < 1000; ++x) { s0 >> t1; } @@ -86,7 +86,7 @@ template static void PrevectorFillVectorDirect(benchmark::Bench& bench) { bench.run([&] { - std::vector> vec; + std::vector> vec; vec.reserve(260); for (size_t i = 0; i < 260; ++i) { vec.emplace_back(); @@ -99,11 +99,11 @@ template static void PrevectorFillVectorIndirect(benchmark::Bench& bench) { bench.run([&] { - std::vector> vec; + std::vector> vec; vec.reserve(260); for (size_t i = 0; i < 260; ++i) { // force allocation - vec.emplace_back(29, T{}); + vec.emplace_back(37, T{}); } }); } diff --git a/src/script/script.h b/src/script/script.h index f4579849803..9c6d25eb157 100644 --- a/src/script/script.h +++ b/src/script/script.h @@ -406,7 +406,7 @@ private: * Tests in October 2015 showed use of this reduced dbcache memory usage by 23% * and made an initial sync 13% faster. */ -typedef prevector<28, unsigned char> CScriptBase; +typedef prevector<36, unsigned char> CScriptBase; bool GetScriptOp(CScriptBase::const_iterator& pc, CScriptBase::const_iterator end, opcodetype& opcodeRet, std::vector* pvchRet); diff --git a/src/test/script_tests.cpp b/src/test/script_tests.cpp index 700a0fd3427..f240fe8949b 100644 --- a/src/test/script_tests.cpp +++ b/src/test/script_tests.cpp @@ -1131,10 +1131,10 @@ BOOST_AUTO_TEST_CASE(script_CHECKMULTISIG23) BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) { - BOOST_CHECK_EQUAL(sizeof(prevector<28, unsigned char>), 32); - BOOST_CHECK_EQUAL(sizeof(CScriptBase), 32); - BOOST_CHECK_EQUAL(sizeof(CScript), 32); - BOOST_CHECK_EQUAL(sizeof(CTxOut), 40); + BOOST_CHECK_EQUAL(sizeof(prevector<34, uint8_t>), sizeof(prevector<36, uint8_t>)); + BOOST_CHECK_EQUAL(sizeof(CScriptBase), 40); + BOOST_CHECK_EQUAL(sizeof(CScript), 40); + BOOST_CHECK_EQUAL(sizeof(CTxOut), 48); CKey dummyKey; dummyKey.MakeNewKey(true); @@ -1146,7 +1146,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) const auto scriptSmallOpReturn{CScript() << OP_RETURN << std::vector(10, 0xaa)}; BOOST_CHECK_EQUAL(Solver(scriptSmallOpReturn, dummyVSolutions), TxoutType::NULL_DATA); BOOST_CHECK_EQUAL(scriptSmallOpReturn.size(), 12); - BOOST_CHECK_EQUAL(scriptSmallOpReturn.capacity(), 28); + BOOST_CHECK_EQUAL(scriptSmallOpReturn.capacity(), 36); BOOST_CHECK_EQUAL(scriptSmallOpReturn.allocated_memory(), 0); } @@ -1155,7 +1155,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) const auto scriptP2WPKH{GetScriptForDestination(WitnessV0KeyHash{PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}}})}; BOOST_CHECK_EQUAL(Solver(scriptP2WPKH, dummyVSolutions), TxoutType::WITNESS_V0_KEYHASH); BOOST_CHECK_EQUAL(scriptP2WPKH.size(), 22); - BOOST_CHECK_EQUAL(scriptP2WPKH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2WPKH.capacity(), 36); BOOST_CHECK_EQUAL(scriptP2WPKH.allocated_memory(), 0); } @@ -1164,7 +1164,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) const auto scriptP2SH{GetScriptForDestination(ScriptHash{CScript{} << OP_TRUE})}; BOOST_CHECK(scriptP2SH.IsPayToScriptHash()); BOOST_CHECK_EQUAL(scriptP2SH.size(), 23); - BOOST_CHECK_EQUAL(scriptP2SH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2SH.capacity(), 36); BOOST_CHECK_EQUAL(scriptP2SH.allocated_memory(), 0); } @@ -1173,35 +1173,35 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) const auto scriptP2PKH{GetScriptForDestination(PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}})}; BOOST_CHECK_EQUAL(Solver(scriptP2PKH, dummyVSolutions), TxoutType::PUBKEYHASH); BOOST_CHECK_EQUAL(scriptP2PKH.size(), 25); - BOOST_CHECK_EQUAL(scriptP2PKH.capacity(), 28); + BOOST_CHECK_EQUAL(scriptP2PKH.capacity(), 36); BOOST_CHECK_EQUAL(scriptP2PKH.allocated_memory(), 0); } - // P2WSH is heap allocated + // P2WSH is stack allocated { const auto scriptP2WSH{GetScriptForDestination(WitnessV0ScriptHash{CScript{} << OP_TRUE})}; BOOST_CHECK(scriptP2WSH.IsPayToWitnessScriptHash()); BOOST_CHECK_EQUAL(scriptP2WSH.size(), 34); - BOOST_CHECK_EQUAL(scriptP2WSH.capacity(), 34); - BOOST_CHECK_EQUAL(scriptP2WSH.allocated_memory(), 34); + BOOST_CHECK_EQUAL(scriptP2WSH.capacity(), 36); + BOOST_CHECK_EQUAL(scriptP2WSH.allocated_memory(), 0); } - // P2TR is heap allocated + // P2TR is stack allocated { const auto scriptTaproot{GetScriptForDestination(WitnessV1Taproot{XOnlyPubKey{CPubKey{dummyKey.GetPubKey()}}})}; BOOST_CHECK_EQUAL(Solver(scriptTaproot, dummyVSolutions), TxoutType::WITNESS_V1_TAPROOT); BOOST_CHECK_EQUAL(scriptTaproot.size(), 34); - BOOST_CHECK_EQUAL(scriptTaproot.capacity(), 34); - BOOST_CHECK_EQUAL(scriptTaproot.allocated_memory(), 34); + BOOST_CHECK_EQUAL(scriptTaproot.capacity(), 36); + BOOST_CHECK_EQUAL(scriptTaproot.allocated_memory(), 0); } - // P2PK is heap allocated + // P2PK is stack allocated { const auto scriptPubKey{GetScriptForRawPubKey(CPubKey{dummyKey.GetPubKey()})}; BOOST_CHECK_EQUAL(Solver(scriptPubKey, dummyVSolutions), TxoutType::PUBKEY); BOOST_CHECK_EQUAL(scriptPubKey.size(), 35); - BOOST_CHECK_EQUAL(scriptPubKey.capacity(), 35); - BOOST_CHECK_EQUAL(scriptPubKey.allocated_memory(), 35); + BOOST_CHECK_EQUAL(scriptPubKey.capacity(), 36); + BOOST_CHECK_EQUAL(scriptPubKey.allocated_memory(), 0); } // MULTISIG is always heap allocated diff --git a/src/test/validation_flush_tests.cpp b/src/test/validation_flush_tests.cpp index c325f7deb2b..b109c5be078 100644 --- a/src/test/validation_flush_tests.cpp +++ b/src/test/validation_flush_tests.cpp @@ -26,10 +26,8 @@ BOOST_AUTO_TEST_CASE(getcoinscachesizestate) LOCK(::cs_main); auto& view = chainstate.CoinsTip(); - // The number of bytes consumed by coin's heap data, i.e. CScript - // (prevector<28, unsigned char>) when assigned 56 bytes of data per above. - // - // See also: Coin::DynamicMemoryUsage(). + // The number of bytes consumed by coin's heap data, i.e. CScript (prevector<36, unsigned char>) + // when assigned 56 bytes of data per above. See also: Coin::DynamicMemoryUsage(). constexpr unsigned int COIN_SIZE = is_64_bit ? 80 : 64; auto print_view_mem_usage = [](CCoinsViewCache& view) {