Merge bitcoin/bitcoin#32185: coins: replace manual CDBBatch size estimation with LevelDB's native ApproximateSize

e419b0e17f refactor: Remove manual CDBBatch size estimation (Lőrinc) 8b5e19d8b5 refactor: Delegate to LevelDB for CDBBatch size estimation (Lőrinc) 751077c6e2 Coins: Add `kHeader` to `CDBBatch::size_estimate` (Lőrinc) Pull request description: ### Summary The manual batch size estimation of `CDBBatch` serialized size was [added](e66dbde6d1) when LevelDB [didn't expose this functionality yet](https://github.com/google/leveldb/commit/69e2bd2). The PR refactors the logic to use the native `leveldb::WriteBatch::ApproximateSize()` function, structured in 3 focused commits to incrementally replace the old behavior safely. ### Context The previous manual size calculation initialized the estimate to 0, instead of LevelDB's header size (containing an 8-byte sequence number followed by a 4-byte count). This PR corrects that and transitions to the now-available native LevelDB function for improved accuracy and maintainability. ### Approach The fix and refactor follow a strangle pattern over three commits: * correct the initialization bug in the existing manual calculation, isolating the fix and ensuring the subsequent assertions use the corrected logic; * introduce the native `ApproximateSize()` method alongside the corrected manual one, adding assertions to verify their equivalence at runtime; * remove the verified manual calculation logic and assertions, leaving only the native method. ACKs for top commit: sipa: utACK e419b0e17f TheCharlatan: ACK e419b0e17f laanwj: Code review ACK e419b0e17f Tree-SHA512: a12b973dd480d4ffec4ec89a119bf0b6f73bde4e634329d6e4cc3454b867f2faf3742b78ec4a3b6d98ac4fb28fb2174f44ede42d6c701ed871987a7274560691
2025-04-29 14:59:39 -04:00 · 2025-04-08 15:44:34 -04:00 · 2025-04-08 15:44:34 -04:00 · 8d2ead2a86
commit 8d2ead2a86
parent ad0eee5492 e419b0e17f
3 changed files with 18 additions and 24 deletions
--- a/src/dbwrapper.cpp
+++ b/src/dbwrapper.cpp
@ -158,14 +158,16 @@ struct CDBBatch::WriteBatchImpl {

 CDBBatch::CDBBatch(const CDBWrapper& _parent)
    : parent{_parent},
-      m_impl_batch{std::make_unique<CDBBatch::WriteBatchImpl>()} {};
+      m_impl_batch{std::make_unique<CDBBatch::WriteBatchImpl>()}
+{
+    Clear();
+};

 CDBBatch::~CDBBatch() = default;

 void CDBBatch::Clear()
 {
    m_impl_batch->batch.Clear();
-    size_estimate = 0;
 }

 void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& ssValue)
@ -174,26 +176,17 @@ void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& ssValue)
    ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent));
    leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size());
    m_impl_batch->batch.Put(slKey, slValue);
-    // LevelDB serializes writes as:
-    // - byte: header
-    // - varint: key length (1 byte up to 127B, 2 bytes up to 16383B, ...)
-    // - byte[]: key
-    // - varint: value length
-    // - byte[]: value
-    // The formula below assumes the key and value are both less than 16k.
-    size_estimate += 3 + (slKey.size() > 127) + slKey.size() + (slValue.size() > 127) + slValue.size();
 }

 void CDBBatch::EraseImpl(std::span<const std::byte> key)
 {
    leveldb::Slice slKey(CharCast(key.data()), key.size());
    m_impl_batch->batch.Delete(slKey);
-    // LevelDB serializes erases as:
-    // - byte: header
-    // - varint: key length
-    // - byte[]: key
-    // The formula below assumes the key is less than 16kB.
-    size_estimate += 2 + (slKey.size() > 127) + slKey.size();
+}
+
+size_t CDBBatch::ApproximateSize() const
+{
+    return m_impl_batch->batch.ApproximateSize();
 }

 struct LevelDBContext {
--- a/src/dbwrapper.h
+++ b/src/dbwrapper.h
@ -83,8 +83,6 @@ private:
    DataStream ssKey{};
    DataStream ssValue{};

-    size_t size_estimate{0};
-
    void WriteImpl(std::span<const std::byte> key, DataStream& ssValue);
    void EraseImpl(std::span<const std::byte> key);

@ -117,7 +115,7 @@ public:
        ssKey.clear();
    }

-    size_t SizeEstimate() const { return size_estimate; }
+    size_t ApproximateSize() const;
 };

 class CDBIterator
--- a/src/txdb.cpp
+++ b/src/txdb.cpp
@ -119,16 +119,19 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB
    for (auto it{cursor.Begin()}; it != cursor.End();) {
        if (it->second.IsDirty()) {
            CoinEntry entry(&it->first);
-            if (it->second.coin.IsSpent())
+            if (it->second.coin.IsSpent()) {
                batch.Erase(entry);
-            else
+            } else {
                batch.Write(entry, it->second.coin);
+            }
+
            changed++;
        }
        count++;
        it = cursor.NextAndMaybeErase(*it);
-        if (batch.SizeEstimate() > m_options.batch_write_bytes) {
-            LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0));
+        if (batch.ApproximateSize() > m_options.batch_write_bytes) {
+            LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0));
+
            m_db->WriteBatch(batch);
            batch.Clear();
            if (m_options.simulate_crash_ratio) {
@ -145,7 +148,7 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB
    batch.Erase(DB_HEAD_BLOCKS);
    batch.Write(DB_BEST_BLOCK, hashBlock);

-    LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0));
+    LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0));
    bool ret = m_db->WriteBatch(batch);
    LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count);
    return ret;