From 1d99994da8483e8a68b51a7884310c906eadb010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 17 Dec 2024 13:05:41 +0100 Subject: [PATCH] optimization: Buffer serialization writes in `SaveBlockUndo` and `SaveBlock` Similarly to the serialization reads, buffered writes will enable batched xor calculations - especially since currently we need to copy the write inputs Span to do the obfuscation on it, batching will enable doing the xor on the internal buffer instead. All write operations are delegated to `AutoFile`. Xor key offsets are also calculated based on where we are in the underlying file. To avoid the buffered write of 4096 in `AutoFile.write`, we're disabling obfuscation for the underlying `m_dest` and doing it ourselves for the whole buffer (instead of byte-by-byte) before writing it to file. We can't obfuscate the write's src directly since it would mutate the method's input (for very big writes it would avoid copying), but we can fill our own buffer and xor all of that safely. `BufferedFile` wasn't used here because it does too many unrelated operations (and might be removed in the future). Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 5,260,934.43 | 190.08 | 1.2% | 11.08 | `SaveBlockToDiskBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,804,208.61 | 554.26 | 1.4% | 10.89 | `SaveBlockToDiskBench` --- src/node/blockstorage.cpp | 6 ++--- src/streams.cpp | 2 +- src/streams.h | 49 ++++++++++++++++++++++++++++++++++++- src/test/streams_tests.cpp | 50 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index aa0fda39ce..054a11dc71 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -942,7 +942,6 @@ bool BlockManager::SaveBlockUndo(const CBlockUndo& blockundo, BlockValidationSta const BlockfileType type = BlockfileTypeForHeight(block.nHeight); auto& cursor = *Assert(WITH_LOCK(cs_LastBlockFile, return m_blockfile_cursors[type])); - // Write undo information to disk if (block.GetUndoPos().IsNull()) { FlatFilePos pos; const uint32_t blockundo_size{static_cast(GetSerializeSize(blockundo))}; @@ -950,8 +949,7 @@ bool BlockManager::SaveBlockUndo(const CBlockUndo& blockundo, BlockValidationSta LogError("%s: FindUndoPos failed\n", __func__); return false; } - // Open history file to append - AutoFile fileout{OpenUndoFile(pos)}; + BufferedWriteOnlyFile fileout{m_undo_file_seq, pos, m_xor_key}; if (fileout.IsNull()) { LogError("%s: OpenUndoFile failed\n", __func__); return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); @@ -1095,7 +1093,7 @@ FlatFilePos BlockManager::SaveBlock(const CBlock& block, int nHeight) LogError("%s: FindNextBlockPos failed\n", __func__); return FlatFilePos(); } - AutoFile fileout{OpenBlockFile(pos)}; + BufferedWriteOnlyFile fileout{m_block_file_seq, pos, m_xor_key}; if (fileout.IsNull()) { LogError("%s: OpenBlockFile failed\n", __func__); m_opts.notifications.fatalError(_("Failed to write block.")); diff --git a/src/streams.cpp b/src/streams.cpp index cd496ee2be..7ff2dd9fdf 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -51,7 +51,7 @@ void AutoFile::seek(int64_t offset, int origin) } } -int64_t AutoFile::tell() +int64_t AutoFile::tell() const { if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::tell: position unknown"); return *m_position; diff --git a/src/streams.h b/src/streams.h index 102e00be0b..611ce6d6aa 100644 --- a/src/streams.h +++ b/src/streams.h @@ -439,7 +439,7 @@ public: void seek(int64_t offset, int origin); /** Find position within the file. Will throw if unknown. */ - int64_t tell(); + int64_t tell() const; /** Wrapper around FileCommit(). */ bool Commit(); @@ -652,4 +652,51 @@ public: template void operator>>(T&& obj) { ::Unserialize(*this, obj); } }; +class BufferedWriteOnlyFile { + const std::vector& m_xor_key; + AutoFile m_dest; + std::vector m_buf; + size_t m_buf_pos{0}; + + void flush() { + if (m_buf_pos == 0) return; + + const auto bytes = (m_buf_pos == m_buf.size()) ? m_buf : Span{m_buf}.first(m_buf_pos); + util::Xor(bytes, m_xor_key, m_dest.tell()); + m_dest.write(bytes); + + m_buf_pos = 0; + } + +public: + explicit BufferedWriteOnlyFile(const FlatFileSeq& block_file_seq, + const FlatFilePos& pos, + const std::vector& m_xor_key, + const size_t buf_size = 1 << 20) + : m_xor_key{m_xor_key}, + m_dest{block_file_seq.Open(pos, /*read_only=*/false), {}}, // We'll handle obfuscation internally + m_buf{buf_size} {} + + ~BufferedWriteOnlyFile() { flush(); } + + void write(Span src) { + while (!src.empty()) { + if (m_buf_pos == m_buf.size()) flush(); + + const size_t chunk = Assert(std::min(src.size(), m_buf.size() - m_buf_pos)); + std::memcpy(m_buf.data() + m_buf_pos, src.data(), chunk); + m_buf_pos += chunk; + src = src.subspan(chunk); + } + } + + bool IsNull() const { return m_dest.IsNull(); } + int64_t tell() const { return m_dest.tell() + m_buf_pos; } + + template BufferedWriteOnlyFile& operator<<(const T& obj) { + ::Serialize(*this, obj); + return *this; + } +}; + #endif // BITCOIN_STREAMS_H diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 0f5d03db2f..8ed1165734 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -590,6 +590,56 @@ BOOST_AUTO_TEST_CASE(buffered_read_only_file_matches_autofile_random_content) try { fs::remove(test_file.FileName(pos)); } catch (...) {} } +BOOST_AUTO_TEST_CASE(buffered_write_only_file_matches_autofile_random_content) +{ + const FlatFileSeq test_buffered{m_args.GetDataDirBase(), "buffered_write_test", node::BLOCKFILE_CHUNK_SIZE}; + const FlatFileSeq test_direct{m_args.GetDataDirBase(), "direct_write_test", node::BLOCKFILE_CHUNK_SIZE}; + constexpr size_t file_size{1 << 20}; + constexpr size_t max_write_length{100}; + const FlatFilePos pos{0, 0}; + + FastRandomContext rng{/*fDeterministic=*/false}; + const std::vector obfuscation{rng.randbytes(8)}; + + { + std::vector test_data{rng.randbytes(file_size)}; + AutoFile direct_file{test_direct.Open(pos, false), obfuscation}; + BufferedWriteOnlyFile buffered{test_buffered, pos, obfuscation}; + BOOST_CHECK_EQUAL(direct_file.tell(), buffered.tell()); + + for (size_t total_written{0}; total_written < file_size;) { + const size_t write_size{Assert(std::min(rng.randrange(max_write_length) + 1, file_size - total_written))}; + + auto current_span = Span{test_data}.subspan(total_written, write_size); + direct_file.write(current_span); + buffered.write(current_span); + + BOOST_CHECK_EQUAL(direct_file.tell(), buffered.tell()); + + total_written += write_size; + } + } + + // Compare the resulting files + AutoFile verify_direct{test_direct.Open(pos, true), obfuscation}; + std::vector direct_result{file_size}; + verify_direct.read(direct_result); + + AutoFile verify_buffered{test_buffered.Open(pos, true), obfuscation}; + std::vector buffered_result{file_size}; + verify_buffered.read(buffered_result); + + BOOST_CHECK_EQUAL_COLLECTIONS( + direct_result.begin(), direct_result.end(), + buffered_result.begin(), buffered_result.end() + ); + + try { + fs::remove(test_direct.FileName(pos)); + fs::remove(test_buffered.FileName(pos)); + } catch (...) {} +} + BOOST_AUTO_TEST_CASE(streams_hashed) { DataStream stream{};