From 751077c6e25e010cff85fe9793a8c5b843350f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 1 Apr 2025 15:46:23 +0200 Subject: [PATCH 1/3] Coins: Add `kHeader` to `CDBBatch::size_estimate` The initialization of the manual `size_estimate` in `CDBBatch::Clear()` is corrected from `0` to `kHeader` (LevelDB's fixed batch header size). This aligns the manual estimate with LevelDB's actual size immediately after clearing, fixing discrepancies that would otherwise be caught by tests in the next commit (e.g., `coins_tests`, `validation_chainstatemanager_tests`). --- src/dbwrapper.cpp | 8 ++++++-- src/dbwrapper.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index b3faff10cea..f4935c4f357 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -158,14 +158,18 @@ struct CDBBatch::WriteBatchImpl { CDBBatch::CDBBatch(const CDBWrapper& _parent) : parent{_parent}, - m_impl_batch{std::make_unique()} {}; + m_impl_batch{std::make_unique()} +{ + Clear(); +}; CDBBatch::~CDBBatch() = default; void CDBBatch::Clear() { m_impl_batch->batch.Clear(); - size_estimate = 0; + assert(m_impl_batch->batch.ApproximateSize() == kHeader); + size_estimate = kHeader; // TODO remove } void CDBBatch::WriteImpl(std::span key, DataStream& ssValue) diff --git a/src/dbwrapper.h b/src/dbwrapper.h index 98446361e4e..1ff7dc1d2ee 100644 --- a/src/dbwrapper.h +++ b/src/dbwrapper.h @@ -75,6 +75,8 @@ class CDBBatch friend class CDBWrapper; private: + static constexpr size_t kHeader{12}; // See: src/leveldb/db/write_batch.cc#L27 + const CDBWrapper &parent; struct WriteBatchImpl; From 8b5e19d8b5bf59ae7e178fd7c27ab724d1a433be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 1 Apr 2025 14:06:10 +0200 Subject: [PATCH 2/3] refactor: Delegate to LevelDB for CDBBatch size estimation Serialized batch size can be queried via the underlying LevelDB implementation calling the native `leveldb::WriteBatch::ApproximateSize()`. The previous manual calculation was added in https://github.com/bitcoin/bitcoin/pull/10195/commits/e66dbde6d14cb5f253b3bf8850a98f4fda2d9f49 as part of https://github.com/bitcoin/bitcoin/pull/10195. At that time (April 2017), the version of LevelDB used by Bitcoin Core (and even the latest source) lacked a native function for this. LevelDB added this capability in https://github.com/google/leveldb/commit/69e2bd224b7f11e021527cb95bab18f1ee6e1b3b, merged later that year. The old manual estimation method (`SizeEstimate()`) is kept temporarily in this commit, and assertions are added in `txdb.cpp` to verify its results against `ApproximateSize()` during batch writes. This ensures the native function behaves as expected before removing the manual calculation in the subsequent commit. --- src/dbwrapper.cpp | 5 +++++ src/dbwrapper.h | 3 ++- src/txdb.cpp | 23 +++++++++++++++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index f4935c4f357..9b1d043b335 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -200,6 +200,11 @@ void CDBBatch::EraseImpl(std::span key) size_estimate += 2 + (slKey.size() > 127) + slKey.size(); } +size_t CDBBatch::ApproximateSize() const +{ + return m_impl_batch->batch.ApproximateSize(); +} + struct LevelDBContext { //! custom environment this database is using (may be nullptr in case of default environment) leveldb::Env* penv; diff --git a/src/dbwrapper.h b/src/dbwrapper.h index 1ff7dc1d2ee..6dd8575b55b 100644 --- a/src/dbwrapper.h +++ b/src/dbwrapper.h @@ -119,7 +119,8 @@ public: ssKey.clear(); } - size_t SizeEstimate() const { return size_estimate; } + size_t ApproximateSize() const; + size_t SizeEstimate() const { return size_estimate; } // TODO replace with ApproximateSize }; class CDBIterator diff --git a/src/txdb.cpp b/src/txdb.cpp index 1622039d63b..a1e2aa7feb6 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -113,24 +113,39 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB // transition from old_tip to hashBlock. // A vector is used for future extensibility, as we may want to support // interrupting after partial writes from multiple independent reorgs. + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(DB_BEST_BLOCK); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(DB_HEAD_BLOCKS, Vector(hashBlock, old_tip)); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove for (auto it{cursor.Begin()}; it != cursor.End();) { if (it->second.IsDirty()) { CoinEntry entry(&it->first); - if (it->second.coin.IsSpent()) + if (it->second.coin.IsSpent()) { + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(entry); - else + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove + } else { + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(entry, it->second.coin); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove + } + changed++; } count++; it = cursor.NextAndMaybeErase(*it); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove if (batch.SizeEstimate() > m_options.batch_write_bytes) { LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); + + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove m_db->WriteBatch(batch); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Clear(); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove + if (m_options.simulate_crash_ratio) { static FastRandomContext rng; if (rng.randrange(m_options.simulate_crash_ratio) == 0) { @@ -142,11 +157,15 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB } // In the last batch, mark the database as consistent with hashBlock again. + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(DB_HEAD_BLOCKS); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(DB_BEST_BLOCK, hashBlock); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); bool ret = m_db->WriteBatch(batch); + assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count); return ret; } From e419b0e17f8acfe577c35c62a8a71a19aad249f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 1 Apr 2025 14:46:15 +0200 Subject: [PATCH 3/3] refactor: Remove manual CDBBatch size estimation Remove the manual batch size estimation logic (`SizeEstimate()` method and `size_estimate` member) from `CDBBatch`. Size is now determined solely by the `ApproximateSize()` method introduced in the previous commit, which delegates to the native LevelDB function. The manual calculation is no longer necessary as LevelDB now provides this functionality directly, and the previous commit verified that the native function's results matched the manual estimation. Assertions comparing the two methods are removed from `txdb.cpp`. Co-authored-by: Wladimir J. van der Laan --- src/dbwrapper.cpp | 16 ---------------- src/dbwrapper.h | 5 ----- src/txdb.cpp | 22 +++------------------- 3 files changed, 3 insertions(+), 40 deletions(-) diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp index 9b1d043b335..1c35e11863b 100644 --- a/src/dbwrapper.cpp +++ b/src/dbwrapper.cpp @@ -168,8 +168,6 @@ CDBBatch::~CDBBatch() = default; void CDBBatch::Clear() { m_impl_batch->batch.Clear(); - assert(m_impl_batch->batch.ApproximateSize() == kHeader); - size_estimate = kHeader; // TODO remove } void CDBBatch::WriteImpl(std::span key, DataStream& ssValue) @@ -178,26 +176,12 @@ void CDBBatch::WriteImpl(std::span key, DataStream& ssValue) ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent)); leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size()); m_impl_batch->batch.Put(slKey, slValue); - // LevelDB serializes writes as: - // - byte: header - // - varint: key length (1 byte up to 127B, 2 bytes up to 16383B, ...) - // - byte[]: key - // - varint: value length - // - byte[]: value - // The formula below assumes the key and value are both less than 16k. - size_estimate += 3 + (slKey.size() > 127) + slKey.size() + (slValue.size() > 127) + slValue.size(); } void CDBBatch::EraseImpl(std::span key) { leveldb::Slice slKey(CharCast(key.data()), key.size()); m_impl_batch->batch.Delete(slKey); - // LevelDB serializes erases as: - // - byte: header - // - varint: key length - // - byte[]: key - // The formula below assumes the key is less than 16kB. - size_estimate += 2 + (slKey.size() > 127) + slKey.size(); } size_t CDBBatch::ApproximateSize() const diff --git a/src/dbwrapper.h b/src/dbwrapper.h index 6dd8575b55b..789b5be8fc7 100644 --- a/src/dbwrapper.h +++ b/src/dbwrapper.h @@ -75,8 +75,6 @@ class CDBBatch friend class CDBWrapper; private: - static constexpr size_t kHeader{12}; // See: src/leveldb/db/write_batch.cc#L27 - const CDBWrapper &parent; struct WriteBatchImpl; @@ -85,8 +83,6 @@ private: DataStream ssKey{}; DataStream ssValue{}; - size_t size_estimate{0}; - void WriteImpl(std::span key, DataStream& ssValue); void EraseImpl(std::span key); @@ -120,7 +116,6 @@ public: } size_t ApproximateSize() const; - size_t SizeEstimate() const { return size_estimate; } // TODO replace with ApproximateSize }; class CDBIterator diff --git a/src/txdb.cpp b/src/txdb.cpp index a1e2aa7feb6..bb6ee2eb524 100644 --- a/src/txdb.cpp +++ b/src/txdb.cpp @@ -113,39 +113,27 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB // transition from old_tip to hashBlock. // A vector is used for future extensibility, as we may want to support // interrupting after partial writes from multiple independent reorgs. - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(DB_BEST_BLOCK); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(DB_HEAD_BLOCKS, Vector(hashBlock, old_tip)); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove for (auto it{cursor.Begin()}; it != cursor.End();) { if (it->second.IsDirty()) { CoinEntry entry(&it->first); if (it->second.coin.IsSpent()) { - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(entry); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove } else { - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(entry, it->second.coin); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove } changed++; } count++; it = cursor.NextAndMaybeErase(*it); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove - if (batch.SizeEstimate() > m_options.batch_write_bytes) { - LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); + if (batch.ApproximateSize() > m_options.batch_write_bytes) { + LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0)); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove m_db->WriteBatch(batch); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Clear(); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove - if (m_options.simulate_crash_ratio) { static FastRandomContext rng; if (rng.randrange(m_options.simulate_crash_ratio) == 0) { @@ -157,15 +145,11 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB } // In the last batch, mark the database as consistent with hashBlock again. - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Erase(DB_HEAD_BLOCKS); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove batch.Write(DB_BEST_BLOCK, hashBlock); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove - LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0)); + LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0)); bool ret = m_db->WriteBatch(batch); - assert(batch.ApproximateSize() == batch.SizeEstimate()); // TODO remove LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count); return ret; }