From c77e3107b813ccb638480f100c5ab2a1d9043a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 22:08:25 +0100 Subject: [PATCH 1/8] refactor: rename leftover WriteBlockBench The benchmark was referencing the old name of the method --- src/bench/readwriteblock.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bench/readwriteblock.cpp b/src/bench/readwriteblock.cpp index cdf86185ae9..10434b15d20 100644 --- a/src/bench/readwriteblock.cpp +++ b/src/bench/readwriteblock.cpp @@ -27,7 +27,7 @@ static CBlock CreateTestBlock() return block; } -static void SaveBlockBench(benchmark::Bench& bench) +static void WriteBlockBench(benchmark::Bench& bench) { const auto testing_setup{MakeNoLogFileContext(ChainType::MAIN)}; auto& blockman{testing_setup->m_node.chainman->m_blockman}; @@ -63,6 +63,6 @@ static void ReadRawBlockBench(benchmark::Bench& bench) }); } -BENCHMARK(SaveBlockBench, benchmark::PriorityLevel::HIGH); +BENCHMARK(WriteBlockBench, benchmark::PriorityLevel::HIGH); BENCHMARK(ReadBlockBench, benchmark::PriorityLevel::HIGH); BENCHMARK(ReadRawBlockBench, benchmark::PriorityLevel::HIGH); From 3197155f91a48bdf760ad4242ff7c75f66e47c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 20:11:52 +0100 Subject: [PATCH 2/8] refactor: collect block read operations into try block Reorganized error handling in block-related operations by grouping related operations together within the same scope. In `ReadBlockUndo()` and `ReadBlock()`, moved all deserialization operations, comments and checksum verification inside a single try/catch block for cleaner error handling. In `WriteBlockUndo()`, consolidated hash calculation and data writing operations within a common block to better express their logical relationship. --- src/node/blockstorage.cpp | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index fe44aa8a3f2..2e49443b569 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -666,24 +666,26 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index return false; } - // Read block - uint256 hashChecksum; - HashVerifier verifier{filein}; // Use HashVerifier as reserializing may lose data, c.f. commit d342424301013ec47dc146a4beb49d5c9319d80a try { + // Read block + HashVerifier verifier{filein}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243 + verifier << index.pprev->GetBlockHash(); verifier >> blockundo; + + uint256 hashChecksum; filein >> hashChecksum; + + // Verify checksum + if (hashChecksum != verifier.GetHash()) { + LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); + return false; + } } catch (const std::exception& e) { LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); return false; } - // Verify checksum - if (hashChecksum != verifier.GetHash()) { - LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); - return false; - } - return true; } @@ -945,15 +947,14 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Write index header fileout << GetParams().MessageStart() << blockundo_size; - // Write undo data pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; - fileout << blockundo; - - // Calculate & write checksum - HashWriter hasher{}; - hasher << block.pprev->GetBlockHash(); - hasher << blockundo; - fileout << hasher.GetHash(); + { + // Calculate checksum + HashWriter hasher{}; + hasher << block.pprev->GetBlockHash() << blockundo; + // Write undo data & checksum + fileout << blockundo << hasher.GetHash(); + } // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) // we want to flush the rev (undo) file once we've written the last block, which is indicated by the last height @@ -992,8 +993,8 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const return false; } - // Read block try { + // Read block filein >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); @@ -1091,8 +1092,8 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) // Write index header fileout << GetParams().MessageStart() << block_size; - // Write block pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + // Write block fileout << TX_WITH_WITNESS(block); return pos; } From 6640dd52c9fcb85d77f081780c02ee37b8089091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 7 Apr 2025 17:47:57 +0200 Subject: [PATCH 3/8] Narrow scope of undofile write to avoid possible resource management issue `AutoFile{OpenUndoFile(pos)}` was still in scope when `FlushUndoFile(pos.nFile)` was called, which could lead to file handle conflicts or other unexpected behavior. Co-authored-by: Hodlinator <172445034+hodlinator@users.noreply.github.com> Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> --- src/node/blockstorage.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 2e49443b569..e65d461b906 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -938,22 +938,31 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt LogError("FindUndoPos failed"); return false; } - // Open history file to append - AutoFile fileout{OpenUndoFile(pos)}; - if (fileout.IsNull()) { - LogError("OpenUndoFile failed"); - return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); - } - // Write index header - fileout << GetParams().MessageStart() << blockundo_size; - pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; { - // Calculate checksum - HashWriter hasher{}; - hasher << block.pprev->GetBlockHash() << blockundo; - // Write undo data & checksum - fileout << blockundo << hasher.GetHash(); + // Open history file to append + AutoFile fileout{OpenUndoFile(pos)}; + if (fileout.IsNull()) { + LogError("OpenUndoFile failed"); + return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); + } + + // Write index header + fileout << GetParams().MessageStart() << blockundo_size; + pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + { + // Calculate checksum + HashWriter hasher{}; + hasher << block.pprev->GetBlockHash() << blockundo; + // Write undo data & checksum + fileout << blockundo << hasher.GetHash(); + } + + // Make sure `AutoFile` goes out of scope before we call `FlushUndoFile` + if (fileout.fclose()) { + LogError("WriteBlockUndo: fclose failed"); + return false; + } } // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) From a4de16049222d0a0f5530f4e366254478a21ab44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 22:09:38 +0100 Subject: [PATCH 4/8] scripted-diff: shorten BLOCK_SERIALIZATION_HEADER_SIZE constant Renames the constant to be less verbose and better reflect its purpose: it represents the size of the storage header that precedes serialized block data on disk, not to be confused with a block's own header. -BEGIN VERIFY SCRIPT- git grep -q "STORAGE_HEADER_BYTES" $(git ls-files) && echo "Error: Target name STORAGE_HEADER_BYTES already exists in the codebase" && exit 1 sed -i 's/BLOCK_SERIALIZATION_HEADER_SIZE/STORAGE_HEADER_BYTES/g' $(git grep -l 'BLOCK_SERIALIZATION_HEADER_SIZE') -END VERIFY SCRIPT- --- src/node/blockstorage.cpp | 6 +++--- src/node/blockstorage.h | 6 +++--- src/test/blockmanager_tests.cpp | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index e65d461b906..d6d0274c681 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -949,7 +949,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Write index header fileout << GetParams().MessageStart() << blockundo_size; - pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + pos.nPos += STORAGE_HEADER_BYTES; { // Calculate checksum HashWriter hasher{}; @@ -1087,7 +1087,7 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) { const unsigned int block_size{static_cast(GetSerializeSize(TX_WITH_WITNESS(block)))}; - FlatFilePos pos{FindNextBlockPos(block_size + BLOCK_SERIALIZATION_HEADER_SIZE, nHeight, block.GetBlockTime())}; + FlatFilePos pos{FindNextBlockPos(block_size + STORAGE_HEADER_BYTES, nHeight, block.GetBlockTime())}; if (pos.IsNull()) { LogError("FindNextBlockPos failed"); return FlatFilePos(); @@ -1101,7 +1101,7 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) // Write index header fileout << GetParams().MessageStart() << block_size; - pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + pos.nPos += STORAGE_HEADER_BYTES; // Write block fileout << TX_WITH_WITNESS(block); return pos; diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 665c2ccd834..e4da408afb0 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -75,10 +75,10 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB /** Size of header written by WriteBlock before a serialized CBlock (8 bytes) */ -static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE{std::tuple_size_v + sizeof(unsigned int)}; +static constexpr size_t STORAGE_HEADER_BYTES{std::tuple_size_v + sizeof(unsigned int)}; /** Total overhead when writing undo data: header (8 bytes) plus checksum (32 bytes) */ -static constexpr size_t UNDO_DATA_DISK_OVERHEAD{BLOCK_SERIALIZATION_HEADER_SIZE + uint256::size()}; +static constexpr size_t UNDO_DATA_DISK_OVERHEAD{STORAGE_HEADER_BYTES + uint256::size()}; // Because validation code takes pointers to the map's CBlockIndex objects, if // we ever switch to another associative container, we need to either use a @@ -164,7 +164,7 @@ private: * blockfile info, and checks if there is enough disk space to save the block. * * The nAddSize argument passed to this function should include not just the size of the serialized CBlock, but also the size of - * separator fields (BLOCK_SERIALIZATION_HEADER_SIZE). + * separator fields (STORAGE_HEADER_BYTES). */ [[nodiscard]] FlatFilePos FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime); [[nodiscard]] bool FlushChainstateBlockFile(int tip_height); diff --git a/src/test/blockmanager_tests.cpp b/src/test/blockmanager_tests.cpp index 8f8cce687f5..26850d0305b 100644 --- a/src/test/blockmanager_tests.cpp +++ b/src/test/blockmanager_tests.cpp @@ -17,7 +17,7 @@ #include #include -using node::BLOCK_SERIALIZATION_HEADER_SIZE; +using node::STORAGE_HEADER_BYTES; using node::BlockManager; using node::KernelNotifications; using node::MAX_BLOCKFILE_SIZE; @@ -40,12 +40,12 @@ BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos) }; BlockManager blockman{*Assert(m_node.shutdown_signal), blockman_opts}; // simulate adding a genesis block normally - BOOST_CHECK_EQUAL(blockman.WriteBlock(params->GenesisBlock(), 0).nPos, BLOCK_SERIALIZATION_HEADER_SIZE); + BOOST_CHECK_EQUAL(blockman.WriteBlock(params->GenesisBlock(), 0).nPos, STORAGE_HEADER_BYTES); // simulate what happens during reindex // simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file // the block is found at offset 8 because there is an 8 byte serialization header // consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file. - const FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE}; + const FlatFilePos pos{0, STORAGE_HEADER_BYTES}; blockman.UpdateBlockInfo(params->GenesisBlock(), 0, pos); // now simulate what happens after reindex for the first new block processed // the actual block contents don't matter, just that it's a block. @@ -54,7 +54,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos) // 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293 // add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301 FlatFilePos actual{blockman.WriteBlock(params->GenesisBlock(), 1)}; - BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(TX_WITH_WITNESS(params->GenesisBlock())) + BLOCK_SERIALIZATION_HEADER_SIZE); + BOOST_CHECK_EQUAL(actual.nPos, STORAGE_HEADER_BYTES + ::GetSerializeSize(TX_WITH_WITNESS(params->GenesisBlock())) + STORAGE_HEADER_BYTES); } BOOST_FIXTURE_TEST_CASE(blockmanager_scan_unlink_already_pruned_files, TestChain100Setup) @@ -172,7 +172,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file) FlatFilePos pos2{blockman.WriteBlock(block2, /*nHeight=*/2)}; // Two blocks in the file - BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2); + BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + STORAGE_HEADER_BYTES) * 2); // First two blocks are written as expected // Errors are expected because block data is junk, thrown AFTER successful read @@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file) // Metadata is updated... BOOST_CHECK_EQUAL(block_data->nBlocks, 3); // ...but there are still only two blocks in the file - BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2); + BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + STORAGE_HEADER_BYTES) * 2); // Block 2 was not overwritten: blockman.ReadBlock(read_block, pos2); From 67fcc64802385a6224bb3e9b2069272b9994bf9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 24 Jan 2025 15:12:28 +0100 Subject: [PATCH 5/8] log: unify error messages for (read/write)[undo]block Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> --- src/node/blockstorage.cpp | 39 ++++++++++++++++----------------- src/streams.cpp | 2 +- src/test/blockmanager_tests.cpp | 4 ++-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index d6d0274c681..085a2460a6f 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -662,7 +662,7 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index // Open history file to read AutoFile filein{OpenUndoFile(pos, true)}; if (filein.IsNull()) { - LogError("OpenUndoFile failed for %s", pos.ToString()); + LogError("OpenUndoFile failed for %s while reading block undo", pos.ToString()); return false; } @@ -678,11 +678,11 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index // Verify checksum if (hashChecksum != verifier.GetHash()) { - LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); + LogError("Checksum mismatch at %s while reading block undo", pos.ToString()); return false; } } catch (const std::exception& e) { - LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); + LogError("Deserialize or I/O error - %s at %s while reading block undo", e.what(), pos.ToString()); return false; } @@ -935,7 +935,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt FlatFilePos pos; const unsigned int blockundo_size{static_cast(GetSerializeSize(blockundo))}; if (!FindUndoPos(state, block.nFile, pos, blockundo_size + UNDO_DATA_DISK_OVERHEAD)) { - LogError("FindUndoPos failed"); + LogError("FindUndoPos failed for %s while writing block undo", pos.ToString()); return false; } @@ -943,7 +943,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Open history file to append AutoFile fileout{OpenUndoFile(pos)}; if (fileout.IsNull()) { - LogError("OpenUndoFile failed"); + LogError("OpenUndoFile failed for %s while writing block undo", pos.ToString()); return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); } @@ -998,7 +998,7 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const // Open history file to read AutoFile filein{OpenBlockFile(pos, true)}; if (filein.IsNull()) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("OpenBlockFile failed for %s while reading block", pos.ToString()); return false; } @@ -1006,19 +1006,19 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const // Read block filein >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { - LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); + LogError("Deserialize or I/O error - %s at %s while reading block", e.what(), pos.ToString()); return false; } // Check the header if (!CheckProofOfWork(block.GetHash(), block.nBits, GetConsensus())) { - LogError("%s: Errors in block header at %s\n", __func__, pos.ToString()); + LogError("Errors in block header at %s while reading block", pos.ToString()); return false; } // Signet only: check block solution if (GetConsensus().signet_blocks && !CheckSignetBlockSolution(block, GetConsensus())) { - LogError("%s: Errors in block solution at %s\n", __func__, pos.ToString()); + LogError("Errors in block solution at %s while reading block", pos.ToString()); return false; } @@ -1033,7 +1033,7 @@ bool BlockManager::ReadBlock(CBlock& block, const CBlockIndex& index) const return false; } if (block.GetHash() != index.GetBlockHash()) { - LogError("%s: GetHash() doesn't match index for %s at %s\n", __func__, index.ToString(), block_pos.ToString()); + LogError("GetHash() doesn't match index for %s at %s while reading block", index.ToString(), block_pos.ToString()); return false; } return true; @@ -1045,13 +1045,13 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& // If nPos is less than 8 the pos is null and we don't have the block data // Return early to prevent undefined behavior of unsigned int underflow if (hpos.nPos < 8) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("Failed for %s while reading raw block", pos.ToString()); return false; } hpos.nPos -= 8; // Seek back 8 bytes for meta header AutoFile filein{OpenBlockFile(hpos, true)}; if (filein.IsNull()) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("OpenBlockFile failed for %s while reading raw block", pos.ToString()); return false; } @@ -1062,22 +1062,21 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& filein >> blk_start >> blk_size; if (blk_start != GetParams().MessageStart()) { - LogError("%s: Block magic mismatch for %s: %s versus expected %s\n", __func__, pos.ToString(), - HexStr(blk_start), - HexStr(GetParams().MessageStart())); + LogError("Block magic mismatch for %s: %s versus expected %s while reading raw block", + pos.ToString(), HexStr(blk_start), HexStr(GetParams().MessageStart())); return false; } if (blk_size > MAX_SIZE) { - LogError("%s: Block data is larger than maximum deserialization size for %s: %s versus %s\n", __func__, pos.ToString(), - blk_size, MAX_SIZE); + LogError("Block data is larger than maximum deserialization size for %s: %s versus %s while reading raw block", + pos.ToString(), blk_size, MAX_SIZE); return false; } block.resize(blk_size); // Zeroing of memory is intentional here filein.read(MakeWritableByteSpan(block)); } catch (const std::exception& e) { - LogError("%s: Read from block file failed: %s for %s\n", __func__, e.what(), pos.ToString()); + LogError("Read from block file failed: %s for %s while reading raw block", e.what(), pos.ToString()); return false; } @@ -1089,12 +1088,12 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) const unsigned int block_size{static_cast(GetSerializeSize(TX_WITH_WITNESS(block)))}; FlatFilePos pos{FindNextBlockPos(block_size + STORAGE_HEADER_BYTES, nHeight, block.GetBlockTime())}; if (pos.IsNull()) { - LogError("FindNextBlockPos failed"); + LogError("FindNextBlockPos failed for %s while writing block", pos.ToString()); return FlatFilePos(); } AutoFile fileout{OpenBlockFile(pos)}; if (fileout.IsNull()) { - LogError("OpenBlockFile failed"); + LogError("OpenBlockFile failed for %s while writing block", pos.ToString()); m_opts.notifications.fatalError(_("Failed to write block.")); return FlatFilePos(); } diff --git a/src/streams.cpp b/src/streams.cpp index d82824ee583..d26f526edb9 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -94,7 +94,7 @@ void AutoFile::write(std::span src) std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin()); util::Xor(buf_now, m_xor, *m_position); if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) { - throw std::ios_base::failure{"XorFile::write: failed"}; + throw std::ios_base::failure{"AutoFile::write: failed"}; } src = src.subspan(buf_now.size()); *m_position += buf_now.size(); diff --git a/src/test/blockmanager_tests.cpp b/src/test/blockmanager_tests.cpp index 26850d0305b..49e49b2d536 100644 --- a/src/test/blockmanager_tests.cpp +++ b/src/test/blockmanager_tests.cpp @@ -179,12 +179,12 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file) CBlock read_block; BOOST_CHECK_EQUAL(read_block.nVersion, 0); { - ASSERT_DEBUG_LOG("ReadBlock: Errors in block header"); + ASSERT_DEBUG_LOG("Errors in block header"); BOOST_CHECK(!blockman.ReadBlock(read_block, pos1)); BOOST_CHECK_EQUAL(read_block.nVersion, 1); } { - ASSERT_DEBUG_LOG("ReadBlock: Errors in block header"); + ASSERT_DEBUG_LOG("Errors in block header"); BOOST_CHECK(!blockman.ReadBlock(read_block, pos2)); BOOST_CHECK_EQUAL(read_block.nVersion, 2); } From 056cb3c0d2efb86447b7ff7788c206e2e7d72c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Wed, 26 Mar 2025 12:40:20 +0100 Subject: [PATCH 6/8] refactor: clear up blockstorage/streams in preparation for optimization Made every OpenBlockFile#fReadOnly value explicit. Replaced hard-coded values in ReadRawBlock with STORAGE_HEADER_BYTES. Changed `STORAGE_HEADER_BYTES` and `UNDO_DATA_DISK_OVERHEAD` to `uint32_t` to avoid casts. Also added `LIFETIMEBOUND` to the `AutoFile` parameter of `BufferedFile`, which stores a reference to the underlying `AutoFile`, allowing Clang to emit warnings if the referenced `AutoFile` might be destroyed while `BufferedFile` still exists. Without this attribute, code with lifetime violations wouldn't trigger compiler warnings. Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> --- src/node/blockstorage.cpp | 23 +++++++++++------------ src/node/blockstorage.h | 6 +++--- src/streams.h | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 085a2460a6f..cf2e388c174 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -529,7 +529,7 @@ bool BlockManager::LoadBlockIndexDB(const std::optional& snapshot_block } for (std::set::iterator it = setBlkDataFiles.begin(); it != setBlkDataFiles.end(); it++) { FlatFilePos pos(*it, 0); - if (OpenBlockFile(pos, true).IsNull()) { + if (OpenBlockFile(pos, /*fReadOnly=*/true).IsNull()) { return false; } } @@ -933,7 +933,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Write undo information to disk if (block.GetUndoPos().IsNull()) { FlatFilePos pos; - const unsigned int blockundo_size{static_cast(GetSerializeSize(blockundo))}; + const auto blockundo_size{static_cast(GetSerializeSize(blockundo))}; if (!FindUndoPos(state, block.nFile, pos, blockundo_size + UNDO_DATA_DISK_OVERHEAD)) { LogError("FindUndoPos failed for %s while writing block undo", pos.ToString()); return false; @@ -996,7 +996,7 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const block.SetNull(); // Open history file to read - AutoFile filein{OpenBlockFile(pos, true)}; + AutoFile filein{OpenBlockFile(pos, /*fReadOnly=*/true)}; if (filein.IsNull()) { LogError("OpenBlockFile failed for %s while reading block", pos.ToString()); return false; @@ -1041,15 +1041,14 @@ bool BlockManager::ReadBlock(CBlock& block, const CBlockIndex& index) const bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& pos) const { - FlatFilePos hpos = pos; - // If nPos is less than 8 the pos is null and we don't have the block data - // Return early to prevent undefined behavior of unsigned int underflow - if (hpos.nPos < 8) { - LogError("Failed for %s while reading raw block", pos.ToString()); + if (pos.nPos < STORAGE_HEADER_BYTES) { + // If nPos is less than STORAGE_HEADER_BYTES, we can't read the header that precedes the block data + // This would cause an unsigned integer underflow when trying to position the file cursor + // This can happen after pruning or default constructed positions + LogError("Failed for %s while reading raw block storage header", pos.ToString()); return false; } - hpos.nPos -= 8; // Seek back 8 bytes for meta header - AutoFile filein{OpenBlockFile(hpos, true)}; + AutoFile filein{OpenBlockFile({pos.nFile, pos.nPos - STORAGE_HEADER_BYTES}, /*fReadOnly=*/true)}; if (filein.IsNull()) { LogError("OpenBlockFile failed for %s while reading raw block", pos.ToString()); return false; @@ -1091,7 +1090,7 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) LogError("FindNextBlockPos failed for %s while writing block", pos.ToString()); return FlatFilePos(); } - AutoFile fileout{OpenBlockFile(pos)}; + AutoFile fileout{OpenBlockFile(pos, /*fReadOnly=*/false)}; if (fileout.IsNull()) { LogError("OpenBlockFile failed for %s while writing block", pos.ToString()); m_opts.notifications.fatalError(_("Failed to write block.")); @@ -1210,7 +1209,7 @@ void ImportBlocks(ChainstateManager& chainman, std::span import_ if (!fs::exists(chainman.m_blockman.GetBlockPosFilename(pos))) { break; // No block files left to reindex } - AutoFile file{chainman.m_blockman.OpenBlockFile(pos, true)}; + AutoFile file{chainman.m_blockman.OpenBlockFile(pos, /*fReadOnly=*/true)}; if (file.IsNull()) { break; // This error is logged in OpenBlockFile } diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index e4da408afb0..324f8e68605 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -75,10 +75,10 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB /** Size of header written by WriteBlock before a serialized CBlock (8 bytes) */ -static constexpr size_t STORAGE_HEADER_BYTES{std::tuple_size_v + sizeof(unsigned int)}; +static constexpr uint32_t STORAGE_HEADER_BYTES{std::tuple_size_v + sizeof(unsigned int)}; /** Total overhead when writing undo data: header (8 bytes) plus checksum (32 bytes) */ -static constexpr size_t UNDO_DATA_DISK_OVERHEAD{STORAGE_HEADER_BYTES + uint256::size()}; +static constexpr uint32_t UNDO_DATA_DISK_OVERHEAD{STORAGE_HEADER_BYTES + uint256::size()}; // Because validation code takes pointers to the map's CBlockIndex objects, if // we ever switch to another associative container, we need to either use a @@ -400,7 +400,7 @@ public: void UpdatePruneLock(const std::string& name, const PruneLockInfo& lock_info) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); /** Open a block file (blk?????.dat) */ - AutoFile OpenBlockFile(const FlatFilePos& pos, bool fReadOnly = false) const; + AutoFile OpenBlockFile(const FlatFilePos& pos, bool fReadOnly) const; /** Translation to a filesystem path */ fs::path GetBlockPosFilename(const FlatFilePos& pos) const; diff --git a/src/streams.h b/src/streams.h index 20bdaf2c060..81d95feaa8e 100644 --- a/src/streams.h +++ b/src/streams.h @@ -523,7 +523,7 @@ private: } public: - BufferedFile(AutoFile& file, uint64_t nBufSize, uint64_t nRewindIn) + BufferedFile(AutoFile& file LIFETIMEBOUND, uint64_t nBufSize, uint64_t nRewindIn) : m_src{file}, nReadLimit{std::numeric_limits::max()}, nRewind{nRewindIn}, vchBuf(nBufSize, std::byte{0}) { if (nRewindIn >= nBufSize) From 520965e2939567e0e5b7bcf598f3891bf4a806c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Wed, 26 Mar 2025 12:52:29 +0100 Subject: [PATCH 7/8] optimization: bulk serialization reads in `UndoRead`, `ReadBlock` The obfuscation (XOR) operations are currently done byte-by-byte during serialization. Buffering the reads will enable batching the obfuscation operations later. Different operating systems handle file caching differently, so reading larger batches (and processing them from memory) is measurably faster, likely because of fewer native fread calls and reduced lock contention. Note that `ReadRawBlock` doesn't need buffering since it already reads the whole block directly. Unlike `ReadBlockUndo`, the new `ReadBlock` implementation delegates to `ReadRawBlock`, which uses more memory than a buffered alternative but results in slightly simpler code and a small performance increase (~0.4%). This approach also clearly documents that `ReadRawBlock` is a logical subset of `ReadBlock` functionality. The current implementation, which iterates over a fixed-size buffer, provides a more general alternative to Cory Fields' solution of reading the entire block size in advance. Buffer sizes were selected based on benchmarking to ensure the buffered reader produces performance similar to reading the whole block into memory. Smaller buffers were slower, while larger ones showed diminishing returns. ------ > macOS Sequoia 15.3.1 > C++ compiler .......................... Clang 19.1.7 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=10000 Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 2,271,441.67 | 440.25 | 0.1% | 11.00 | `ReadBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,738,971.29 | 575.05 | 0.2% | 10.97 | `ReadBlockBench` ------ > Ubuntu 24.04.2 LTS > C++ compiler .......................... GNU 13.3.0 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=20000 Before: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 6,895,987.11 | 145.01 | 0.0% | 71,055,269.86 | 23,977,374.37 | 2.963 | 5,074,828.78 | 0.4% | 22.00 | `ReadBlockBench` After: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 5,771,882.71 | 173.25 | 0.0% | 65,741,889.82 | 20,453,232.33 | 3.214 | 3,971,321.75 | 0.3% | 22.01 | `ReadBlockBench` Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> Co-authored-by: Ryan Ofsky Co-authored-by: Martin Leitner-Ankerl Co-authored-by: Cory Fields --- src/node/blockstorage.cpp | 12 ++++----- src/streams.h | 46 +++++++++++++++++++++++++++++++- src/test/streams_tests.cpp | 54 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 7 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index cf2e388c174..5e1de9f0af4 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -660,11 +660,12 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index const FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())}; // Open history file to read - AutoFile filein{OpenUndoFile(pos, true)}; - if (filein.IsNull()) { + AutoFile file{OpenUndoFile(pos, true)}; + if (file.IsNull()) { LogError("OpenUndoFile failed for %s while reading block undo", pos.ToString()); return false; } + BufferedReader filein{std::move(file)}; try { // Read block @@ -996,15 +997,14 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const block.SetNull(); // Open history file to read - AutoFile filein{OpenBlockFile(pos, /*fReadOnly=*/true)}; - if (filein.IsNull()) { - LogError("OpenBlockFile failed for %s while reading block", pos.ToString()); + std::vector block_data; + if (!ReadRawBlock(block_data, pos)) { return false; } try { // Read block - filein >> TX_WITH_WITNESS(block); + SpanReader{block_data} >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { LogError("Deserialize or I/O error - %s at %s while reading block", e.what(), pos.ToString()); return false; diff --git a/src/streams.h b/src/streams.h index 81d95feaa8e..f19bef5df22 100644 --- a/src/streams.h +++ b/src/streams.h @@ -467,6 +467,8 @@ public: } }; +using DataBuffer = std::vector; + /** Wrapper around an AutoFile& that implements a ring buffer to * deserialize from. It guarantees the ability to rewind a given number of bytes. * @@ -481,7 +483,7 @@ private: uint64_t m_read_pos{0}; //!< how many bytes have been read from this uint64_t nReadLimit; //!< up to which position we're allowed to read uint64_t nRewind; //!< how many bytes we guarantee to rewind - std::vector vchBuf; //!< the buffer + DataBuffer vchBuf; //! read data from the source to fill the buffer bool Fill() { @@ -614,4 +616,46 @@ public: } }; +/** + * Wrapper that buffers reads from an underlying stream. + * Requires underlying stream to support read() and detail_fread() calls + * to support fixed-size and variable-sized reads, respectively. + */ +template +class BufferedReader +{ + S& m_src; + DataBuffer m_buf; + size_t m_buf_pos; + +public: + //! Requires stream ownership to prevent leaving the stream at an unexpected position after buffered reads. + explicit BufferedReader(S&& stream LIFETIMEBOUND, size_t size = 1 << 16) + requires std::is_rvalue_reference_v + : m_src{stream}, m_buf(size), m_buf_pos{size} {} + + void read(std::span dst) + { + if (const auto available{std::min(dst.size(), m_buf.size() - m_buf_pos)}) { + std::copy_n(m_buf.begin() + m_buf_pos, available, dst.begin()); + m_buf_pos += available; + dst = dst.subspan(available); + } + if (dst.size()) { + assert(m_buf_pos == m_buf.size()); + m_src.read(dst); + + m_buf_pos = 0; + m_buf.resize(m_src.detail_fread(m_buf)); + } + } + + template + BufferedReader& operator>>(T&& obj) + { + Unserialize(*this, obj); + return *this; + } +}; + #endif // BITCOIN_STREAMS_H diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 1a44e66932c..6f47b98f297 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -2,6 +2,8 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include +#include #include #include #include @@ -553,6 +555,58 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand) fs::remove(streams_test_filename); } +BOOST_AUTO_TEST_CASE(buffered_reader_matches_autofile_random_content) +{ + const size_t file_size{1 + m_rng.randrange(1 << 17)}; + const size_t buf_size{1 + m_rng.randrange(file_size)}; + const FlatFilePos pos{0, 0}; + + const FlatFileSeq test_file{m_args.GetDataDirBase(), "buffered_file_test_random", node::BLOCKFILE_CHUNK_SIZE}; + const std::vector obfuscation{m_rng.randbytes(8)}; + + // Write out the file with random content + { + AutoFile{test_file.Open(pos, /*read_only=*/false), obfuscation}.write(m_rng.randbytes(file_size)); + } + BOOST_CHECK_EQUAL(fs::file_size(test_file.FileName(pos)), file_size); + + { + AutoFile direct_file{test_file.Open(pos, /*read_only=*/true), obfuscation}; + + AutoFile buffered_file{test_file.Open(pos, /*read_only=*/true), obfuscation}; + BufferedReader buffered_reader{std::move(buffered_file), buf_size}; + + for (size_t total_read{0}; total_read < file_size;) { + const size_t read{Assert(std::min(1 + m_rng.randrange(m_rng.randbool() ? buf_size : 2 * buf_size), file_size - total_read))}; + + DataBuffer direct_file_buffer{read}; + direct_file.read(direct_file_buffer); + + DataBuffer buffered_buffer{read}; + buffered_reader.read(buffered_buffer); + + BOOST_CHECK_EQUAL_COLLECTIONS( + direct_file_buffer.begin(), direct_file_buffer.end(), + buffered_buffer.begin(), buffered_buffer.end() + ); + + total_read += read; + } + + { + DataBuffer excess_byte{1}; + BOOST_CHECK_EXCEPTION(direct_file.read(excess_byte), std::ios_base::failure, HasReason{"end of file"}); + } + + { + DataBuffer excess_byte{1}; + BOOST_CHECK_EXCEPTION(buffered_reader.read(excess_byte), std::ios_base::failure, HasReason{"end of file"}); + } + } + + fs::remove(test_file.FileName(pos)); +} + BOOST_AUTO_TEST_CASE(streams_hashed) { DataStream stream{}; From 8d801e3efbf1e3b1f9a0060b777788f271cb21c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Wed, 26 Mar 2025 12:54:07 +0100 Subject: [PATCH 8/8] optimization: bulk serialization writes in `WriteBlockUndo` and `WriteBlock` Similarly to the serialization reads optimization, buffered writes will enable batched XOR calculations. This is especially beneficial since the current implementation requires copying the write input's `std::span` to perform obfuscation. Batching allows us to apply XOR operations on the internal buffer instead, reducing unnecessary data copying and improving performance. ------ > macOS Sequoia 15.3.1 > C++ compiler .......................... Clang 19.1.7 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='WriteBlockBench' -min-time=10000 Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 5,149,564.31 | 194.19 | 0.8% | 10.95 | `WriteBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 2,990,564.63 | 334.39 | 1.5% | 11.27 | `WriteBlockBench` ------ > Ubuntu 24.04.2 LTS > C++ compiler .......................... GNU 13.3.0 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='WriteBlockBench' -min-time=20000 Before: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 5,152,973.58 | 194.06 | 2.2% | 19,350,886.41 | 8,784,539.75 | 2.203 | 3,079,335.21 | 0.4% | 23.18 | `WriteBlockBench` After: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 4,145,681.13 | 241.21 | 4.0% | 15,337,596.85 | 5,732,186.47 | 2.676 | 2,239,662.64 | 0.1% | 23.94 | `WriteBlockBench` Co-authored-by: Ryan Ofsky Co-authored-by: Cory Fields --- src/node/blockstorage.cpp | 16 +++---- src/streams.cpp | 24 +++++++---- src/streams.h | 44 +++++++++++++++++++ src/test/streams_tests.cpp | 88 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+), 17 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 5e1de9f0af4..3d73459bf6c 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -942,11 +942,12 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt { // Open history file to append - AutoFile fileout{OpenUndoFile(pos)}; - if (fileout.IsNull()) { + AutoFile file{OpenUndoFile(pos)}; + if (file.IsNull()) { LogError("OpenUndoFile failed for %s while writing block undo", pos.ToString()); return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); } + BufferedWriter fileout{file}; // Write index header fileout << GetParams().MessageStart() << blockundo_size; @@ -959,11 +960,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt fileout << blockundo << hasher.GetHash(); } - // Make sure `AutoFile` goes out of scope before we call `FlushUndoFile` - if (fileout.fclose()) { - LogError("WriteBlockUndo: fclose failed"); - return false; - } + fileout.flush(); // Make sure `AutoFile`/`BufferedWriter` go out of scope before we call `FlushUndoFile` } // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) @@ -1090,12 +1087,13 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) LogError("FindNextBlockPos failed for %s while writing block", pos.ToString()); return FlatFilePos(); } - AutoFile fileout{OpenBlockFile(pos, /*fReadOnly=*/false)}; - if (fileout.IsNull()) { + AutoFile file{OpenBlockFile(pos, /*fReadOnly=*/false)}; + if (file.IsNull()) { LogError("OpenBlockFile failed for %s while writing block", pos.ToString()); m_opts.notifications.fatalError(_("Failed to write block.")); return FlatFilePos(); } + BufferedWriter fileout{file}; // Write index header fileout << GetParams().MessageStart() << block_size; diff --git a/src/streams.cpp b/src/streams.cpp index d26f526edb9..19c2b474452 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -87,21 +87,29 @@ void AutoFile::write(std::span src) } if (m_position.has_value()) *m_position += src.size(); } else { - if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown"); std::array buf; - while (src.size() > 0) { + while (src.size()) { auto buf_now{std::span{buf}.first(std::min(src.size(), buf.size()))}; - std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin()); - util::Xor(buf_now, m_xor, *m_position); - if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) { - throw std::ios_base::failure{"AutoFile::write: failed"}; - } + std::copy_n(src.begin(), buf_now.size(), buf_now.begin()); + write_buffer(buf_now); src = src.subspan(buf_now.size()); - *m_position += buf_now.size(); } } } +void AutoFile::write_buffer(std::span src) +{ + if (!m_file) throw std::ios_base::failure("AutoFile::write_buffer: file handle is nullptr"); + if (m_xor.size()) { + if (!m_position) throw std::ios_base::failure("AutoFile::write_buffer: obfuscation position unknown"); + util::Xor(src, m_xor, *m_position); // obfuscate in-place + } + if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) { + throw std::ios_base::failure("AutoFile::write_buffer: write failed"); + } + if (m_position) *m_position += src.size(); +} + bool AutoFile::Commit() { return ::FileCommit(m_file); diff --git a/src/streams.h b/src/streams.h index f19bef5df22..1ebcff3671f 100644 --- a/src/streams.h +++ b/src/streams.h @@ -445,6 +445,9 @@ public: /** Wrapper around TruncateFile(). */ bool Truncate(unsigned size); + //! Write a mutable buffer more efficiently than write(), obfuscating the buffer in-place. + void write_buffer(std::span src); + // // Stream subset // @@ -658,4 +661,45 @@ public: } }; +/** + * Wrapper that buffers writes to an underlying stream. + * Requires underlying stream to support write_buffer() method + * for efficient buffer flushing and obfuscation. + */ +template +class BufferedWriter +{ + S& m_dst; + DataBuffer m_buf; + size_t m_buf_pos{0}; + +public: + explicit BufferedWriter(S& stream LIFETIMEBOUND, size_t size = 1 << 16) : m_dst{stream}, m_buf(size) {} + + ~BufferedWriter() { flush(); } + + void flush() + { + if (m_buf_pos) m_dst.write_buffer(std::span{m_buf}.first(m_buf_pos)); + m_buf_pos = 0; + } + + void write(std::span src) + { + while (const auto available{std::min(src.size(), m_buf.size() - m_buf_pos)}) { + std::copy_n(src.begin(), available, m_buf.begin() + m_buf_pos); + m_buf_pos += available; + if (m_buf_pos == m_buf.size()) flush(); + src = src.subspan(available); + } + } + + template + BufferedWriter& operator<<(const T& obj) + { + Serialize(*this, obj); + return *this; + } +}; + #endif // BITCOIN_STREAMS_H diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 6f47b98f297..c7b5cd353e0 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -607,6 +607,94 @@ BOOST_AUTO_TEST_CASE(buffered_reader_matches_autofile_random_content) fs::remove(test_file.FileName(pos)); } +BOOST_AUTO_TEST_CASE(buffered_writer_matches_autofile_random_content) +{ + const size_t file_size{1 + m_rng.randrange(1 << 17)}; + const size_t buf_size{1 + m_rng.randrange(file_size)}; + const FlatFilePos pos{0, 0}; + + const FlatFileSeq test_buffered{m_args.GetDataDirBase(), "buffered_write_test", node::BLOCKFILE_CHUNK_SIZE}; + const FlatFileSeq test_direct{m_args.GetDataDirBase(), "direct_write_test", node::BLOCKFILE_CHUNK_SIZE}; + const std::vector obfuscation{m_rng.randbytes(8)}; + + { + DataBuffer test_data{m_rng.randbytes(file_size)}; + + AutoFile direct_file{test_direct.Open(pos, /*read_only=*/false), obfuscation}; + + AutoFile buffered_file{test_buffered.Open(pos, /*read_only=*/false), obfuscation}; + BufferedWriter buffered{buffered_file, buf_size}; + + for (size_t total_written{0}; total_written < file_size;) { + const size_t write_size{Assert(std::min(1 + m_rng.randrange(m_rng.randbool() ? buf_size : 2 * buf_size), file_size - total_written))}; + + auto current_span = std::span{test_data}.subspan(total_written, write_size); + direct_file.write(current_span); + buffered.write(current_span); + + total_written += write_size; + } + // Destructors of AutoFile and BufferedWriter will flush/close here + } + + // Compare the resulting files + DataBuffer direct_result{file_size}; + { + AutoFile verify_direct{test_direct.Open(pos, /*read_only=*/true), obfuscation}; + verify_direct.read(direct_result); + + DataBuffer excess_byte{1}; + BOOST_CHECK_EXCEPTION(verify_direct.read(excess_byte), std::ios_base::failure, HasReason{"end of file"}); + } + + DataBuffer buffered_result{file_size}; + { + AutoFile verify_buffered{test_buffered.Open(pos, /*read_only=*/true), obfuscation}; + verify_buffered.read(buffered_result); + + DataBuffer excess_byte{1}; + BOOST_CHECK_EXCEPTION(verify_buffered.read(excess_byte), std::ios_base::failure, HasReason{"end of file"}); + } + + BOOST_CHECK_EQUAL_COLLECTIONS( + direct_result.begin(), direct_result.end(), + buffered_result.begin(), buffered_result.end() + ); + + fs::remove(test_direct.FileName(pos)); + fs::remove(test_buffered.FileName(pos)); +} + +BOOST_AUTO_TEST_CASE(buffered_writer_reader) +{ + const uint32_t v1{m_rng.rand32()}, v2{m_rng.rand32()}, v3{m_rng.rand32()}; + const fs::path test_file{m_args.GetDataDirBase() / "test_buffered_write_read.bin"}; + + // Write out the values through a precisely sized BufferedWriter + { + AutoFile file{fsbridge::fopen(test_file, "w+b")}; + BufferedWriter f(file, sizeof(v1) + sizeof(v2) + sizeof(v3)); + f << v1 << v2; + f.write(std::as_bytes(std::span{&v3, 1})); + } + // Read back and verify using BufferedReader + { + uint32_t _v1{0}, _v2{0}, _v3{0}; + AutoFile file{fsbridge::fopen(test_file, "rb")}; + BufferedReader f(std::move(file), sizeof(v1) + sizeof(v2) + sizeof(v3)); + f >> _v1 >> _v2; + f.read(std::as_writable_bytes(std::span{&_v3, 1})); + BOOST_CHECK_EQUAL(_v1, v1); + BOOST_CHECK_EQUAL(_v2, v2); + BOOST_CHECK_EQUAL(_v3, v3); + + DataBuffer excess_byte{1}; + BOOST_CHECK_EXCEPTION(f.read(excess_byte), std::ios_base::failure, HasReason{"end of file"}); + } + + fs::remove(test_file); +} + BOOST_AUTO_TEST_CASE(streams_hashed) { DataStream stream{};