Merge bitcoin/bitcoin#32185: coins: replace manual CDBBatch size estimation with LevelDB's native ApproximateSize

e419b0e17f refactor: Remove manual CDBBatch size estimation (Lőrinc)
8b5e19d8b5 refactor: Delegate to LevelDB for CDBBatch size estimation (Lőrinc)
751077c6e2 Coins: Add `kHeader` to `CDBBatch::size_estimate` (Lőrinc)

Pull request description:

  ### Summary

  The manual batch size estimation of `CDBBatch` serialized size was [added](e66dbde6d1) when LevelDB [didn't expose this functionality  yet](https://github.com/google/leveldb/commit/69e2bd2).
  The PR refactors the logic to use the native `leveldb::WriteBatch::ApproximateSize()` function, structured in 3 focused commits to incrementally replace the old behavior safely.

  ### Context

  The previous manual size calculation initialized the estimate to 0, instead of LevelDB's header size (containing an 8-byte sequence number followed by a 4-byte count).
  This PR corrects that and transitions to the now-available native LevelDB function for improved accuracy and maintainability.

  ### Approach
  The fix and refactor follow a strangle pattern over three commits:
  * correct the initialization bug in the existing manual calculation, isolating the fix and ensuring the subsequent assertions use the corrected logic;
  * introduce the native `ApproximateSize()` method alongside the corrected manual one, adding assertions to verify their equivalence at runtime;
  * remove the verified manual calculation logic and assertions, leaving only the native method.

ACKs for top commit:
  sipa:
    utACK e419b0e17f
  TheCharlatan:
    ACK e419b0e17f
  laanwj:
    Code review ACK e419b0e17f

Tree-SHA512: a12b973dd480d4ffec4ec89a119bf0b6f73bde4e634329d6e4cc3454b867f2faf3742b78ec4a3b6d98ac4fb28fb2174f44ede42d6c701ed871987a7274560691
This commit is contained in:
Ryan Ofsky 2025-04-08 15:44:34 -04:00
commit 8d2ead2a86
No known key found for this signature in database
GPG key ID: 46800E30FC748A66
3 changed files with 18 additions and 24 deletions

View file

@ -158,14 +158,16 @@ struct CDBBatch::WriteBatchImpl {
CDBBatch::CDBBatch(const CDBWrapper& _parent)
: parent{_parent},
m_impl_batch{std::make_unique<CDBBatch::WriteBatchImpl>()} {};
m_impl_batch{std::make_unique<CDBBatch::WriteBatchImpl>()}
{
Clear();
};
CDBBatch::~CDBBatch() = default;
void CDBBatch::Clear()
{
m_impl_batch->batch.Clear();
size_estimate = 0;
}
void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& ssValue)
@ -174,26 +176,17 @@ void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& ssValue)
ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent));
leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size());
m_impl_batch->batch.Put(slKey, slValue);
// LevelDB serializes writes as:
// - byte: header
// - varint: key length (1 byte up to 127B, 2 bytes up to 16383B, ...)
// - byte[]: key
// - varint: value length
// - byte[]: value
// The formula below assumes the key and value are both less than 16k.
size_estimate += 3 + (slKey.size() > 127) + slKey.size() + (slValue.size() > 127) + slValue.size();
}
void CDBBatch::EraseImpl(std::span<const std::byte> key)
{
leveldb::Slice slKey(CharCast(key.data()), key.size());
m_impl_batch->batch.Delete(slKey);
// LevelDB serializes erases as:
// - byte: header
// - varint: key length
// - byte[]: key
// The formula below assumes the key is less than 16kB.
size_estimate += 2 + (slKey.size() > 127) + slKey.size();
}
size_t CDBBatch::ApproximateSize() const
{
return m_impl_batch->batch.ApproximateSize();
}
struct LevelDBContext {

View file

@ -83,8 +83,6 @@ private:
DataStream ssKey{};
DataStream ssValue{};
size_t size_estimate{0};
void WriteImpl(std::span<const std::byte> key, DataStream& ssValue);
void EraseImpl(std::span<const std::byte> key);
@ -117,7 +115,7 @@ public:
ssKey.clear();
}
size_t SizeEstimate() const { return size_estimate; }
size_t ApproximateSize() const;
};
class CDBIterator

View file

@ -119,16 +119,19 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB
for (auto it{cursor.Begin()}; it != cursor.End();) {
if (it->second.IsDirty()) {
CoinEntry entry(&it->first);
if (it->second.coin.IsSpent())
if (it->second.coin.IsSpent()) {
batch.Erase(entry);
else
} else {
batch.Write(entry, it->second.coin);
}
changed++;
}
count++;
it = cursor.NextAndMaybeErase(*it);
if (batch.SizeEstimate() > m_options.batch_write_bytes) {
LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0));
if (batch.ApproximateSize() > m_options.batch_write_bytes) {
LogDebug(BCLog::COINDB, "Writing partial batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0));
m_db->WriteBatch(batch);
batch.Clear();
if (m_options.simulate_crash_ratio) {
@ -145,7 +148,7 @@ bool CCoinsViewDB::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashB
batch.Erase(DB_HEAD_BLOCKS);
batch.Write(DB_BEST_BLOCK, hashBlock);
LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.SizeEstimate() * (1.0 / 1048576.0));
LogDebug(BCLog::COINDB, "Writing final batch of %.2f MiB\n", batch.ApproximateSize() * (1.0 / 1048576.0));
bool ret = m_db->WriteBatch(batch);
LogDebug(BCLog::COINDB, "Committed %u changed transaction outputs (out of %u) to coin database...\n", (unsigned int)changed, (unsigned int)count);
return ret;