From 0ad7d7abdbcffc11a46413545a214a716f56dc95 Mon Sep 17 00:00:00 2001 From: Andrew Toth Date: Sat, 31 Aug 2024 13:41:51 -0400 Subject: [PATCH 1/5] test: chainstate write test for periodic chainstate flush --- src/test/CMakeLists.txt | 1 + src/test/chainstate_write_tests.cpp | 44 +++++++++++++++++++++++++++++ src/validation.cpp | 4 +-- src/validation.h | 5 ++-- 4 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/test/chainstate_write_tests.cpp diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 859b9132067..95b43a0aade 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -47,6 +47,7 @@ add_executable(test_bitcoin blockmanager_tests.cpp bloom_tests.cpp bswap_tests.cpp + chainstate_write_tests.cpp checkqueue_tests.cpp cluster_linearize_tests.cpp coins_tests.cpp diff --git a/src/test/chainstate_write_tests.cpp b/src/test/chainstate_write_tests.cpp new file mode 100644 index 00000000000..3d285eb3b01 --- /dev/null +++ b/src/test/chainstate_write_tests.cpp @@ -0,0 +1,44 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +#include + +BOOST_AUTO_TEST_SUITE(chainstate_write_tests) + +BOOST_FIXTURE_TEST_CASE(chainstate_write_interval, TestingSetup) +{ + struct TestSubscriber final : CValidationInterface { + bool m_did_flush{false}; + void ChainStateFlushed(ChainstateRole, const CBlockLocator&) override + { + m_did_flush = true; + } + }; + + const auto sub{std::make_shared()}; + m_node.validation_signals->RegisterSharedValidationInterface(sub); + auto& chainstate{Assert(m_node.chainman)->ActiveChainstate()}; + BlockValidationState state_dummy{}; + + // The first periodic flush sets m_last_flush and does not flush + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(!sub->m_did_flush); + + SetMockTime(GetTime() + 23h + 59min); + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(!sub->m_did_flush); + + SetMockTime(GetTime() + 24h); + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(sub->m_did_flush); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index 191d80ef06f..c5e603e9322 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2878,7 +2878,7 @@ bool Chainstate::FlushStateToDisk( } } } - const auto nNow{SteadyClock::now()}; + const auto nNow{NodeClock::now()}; // Avoid writing/flushing immediately after startup. if (m_last_write == decltype(m_last_write){}) { m_last_write = nNow; @@ -2951,7 +2951,7 @@ bool Chainstate::FlushStateToDisk( m_last_flush = nNow; full_flush_completed = true; TRACEPOINT(utxocache, flush, - int64_t{Ticks(SteadyClock::now() - nNow)}, + int64_t{Ticks(NodeClock::now() - nNow)}, (uint32_t)mode, (uint64_t)coins_count, (uint64_t)coins_mem_usage, diff --git a/src/validation.h b/src/validation.h index e2ff5925c58..3add3df4810 100644 --- a/src/validation.h +++ b/src/validation.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -802,8 +803,8 @@ private: void UpdateTip(const CBlockIndex* pindexNew) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); - SteadyClock::time_point m_last_write{}; - SteadyClock::time_point m_last_flush{}; + NodeClock::time_point m_last_write{}; + NodeClock::time_point m_last_flush{}; /** * In case of an invalid snapshot, rename the coins leveldb directory so From 7998eda03402a8dd8227a506c022f8bef81f38ef Mon Sep 17 00:00:00 2001 From: Andrew Toth Date: Sat, 31 Aug 2024 11:13:11 -0400 Subject: [PATCH 2/5] validation: write chainstate to disk every hour Remove the 24 hour periodic flush interval and write the chainstate along with blocks and block index every hour --- src/test/chainstate_write_tests.cpp | 7 +-- src/validation.cpp | 68 +++++++++++++---------------- src/validation.h | 1 - 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/src/test/chainstate_write_tests.cpp b/src/test/chainstate_write_tests.cpp index 3d285eb3b01..c76164099c2 100644 --- a/src/test/chainstate_write_tests.cpp +++ b/src/test/chainstate_write_tests.cpp @@ -25,17 +25,18 @@ BOOST_FIXTURE_TEST_CASE(chainstate_write_interval, TestingSetup) auto& chainstate{Assert(m_node.chainman)->ActiveChainstate()}; BlockValidationState state_dummy{}; - // The first periodic flush sets m_last_flush and does not flush + // The first periodic flush sets m_last_write and does not flush chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(!sub->m_did_flush); - SetMockTime(GetTime() + 23h + 59min); + // The periodic flush interval is 1 hour + SetMockTime(GetTime() + 59min); chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(!sub->m_did_flush); - SetMockTime(GetTime() + 24h); + SetMockTime(GetTime() + 1h); chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(sub->m_did_flush); diff --git a/src/validation.cpp b/src/validation.cpp index c5e603e9322..d530f45c042 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -90,10 +90,8 @@ using node::SnapshotMetadata; /** Size threshold for warning about slow UTXO set flush to disk. */ static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB -/** Time to wait between writing blocks/block index to disk. */ +/** Time to wait between writing blocks/block index and chainstate to disk. */ static constexpr std::chrono::hours DATABASE_WRITE_INTERVAL{1}; -/** Time to wait between flushing chainstate to disk. */ -static constexpr std::chrono::hours DATABASE_FLUSH_INTERVAL{24}; /** Maximum age of our tip for us to be considered current for fee estimation */ static constexpr std::chrono::hours MAX_FEE_ESTIMATION_TIP_AGE{3}; const std::vector CHECKLEVEL_DOC { @@ -2883,21 +2881,16 @@ bool Chainstate::FlushStateToDisk( if (m_last_write == decltype(m_last_write){}) { m_last_write = nNow; } - if (m_last_flush == decltype(m_last_flush){}) { - m_last_flush = nNow; - } // The cache is large and we're within 10% and 10 MiB of the limit, but we have time now (not in the middle of a block processing). bool fCacheLarge = mode == FlushStateMode::PERIODIC && cache_state >= CoinsCacheSizeState::LARGE; // The cache is over the limit, we have to write now. bool fCacheCritical = mode == FlushStateMode::IF_NEEDED && cache_state >= CoinsCacheSizeState::CRITICAL; - // It's been a while since we wrote the block index to disk. Do this frequently, so we don't need to redownload after a crash. + // It's been a while since we wrote the block index and chain state to disk. Do this frequently, so we don't need to redownload or reindex after a crash. bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow > m_last_write + DATABASE_WRITE_INTERVAL; - // It's been very long since we flushed the cache. Do this infrequently, to optimize cache usage. - bool fPeriodicFlush = mode == FlushStateMode::PERIODIC && nNow > m_last_flush + DATABASE_FLUSH_INTERVAL; // Combine all conditions that result in a full cache flush. - fDoFullFlush = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicFlush || fFlushForPrune; - // Write blocks and block index to disk. - if (fDoFullFlush || fPeriodicWrite) { + fDoFullFlush = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; + // Write blocks, block index and best chain related state to disk. + if (fDoFullFlush) { // Ensure we can write block index if (!CheckDiskSpace(m_blockman.m_opts.blocks_dir)) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); @@ -2928,34 +2921,33 @@ bool Chainstate::FlushStateToDisk( m_blockman.UnlinkPrunedFiles(setFilesToPrune); } m_last_write = nNow; - } - // Flush best chain related state. This can only be done if the blocks / block index write was also done. - if (fDoFullFlush && !CoinsTip().GetBestBlock().IsNull()) { - if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); - LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", - coins_count, coins_mem_usage >> 10), BCLog::BENCH); - // Typical Coin structures on disk are around 48 bytes in size. - // Pushing a new one to the database can cause it to be written - // twice (once in the log, and once in the tables). This is already - // an overestimation, as most will delete an existing entry or - // overwrite one. Still, use a conservative safety factor of 2. - if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { - return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); + if (!CoinsTip().GetBestBlock().IsNull()) { + if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); + LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", + coins_count, coins_mem_usage >> 10), BCLog::BENCH); + + // Typical Coin structures on disk are around 48 bytes in size. + // Pushing a new one to the database can cause it to be written + // twice (once in the log, and once in the tables). This is already + // an overestimation, as most will delete an existing entry or + // overwrite one. Still, use a conservative safety factor of 2. + if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { + return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); + } + // Flush the chainstate (which may refer to block index entries). + const auto empty_cache{(mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical}; + if (empty_cache ? !CoinsTip().Flush() : !CoinsTip().Sync()) { + return FatalError(m_chainman.GetNotifications(), state, _("Failed to write to coin database.")); + } + full_flush_completed = true; + TRACEPOINT(utxocache, flush, + int64_t{Ticks(NodeClock::now() - nNow)}, + (uint32_t)mode, + (uint64_t)coins_count, + (uint64_t)coins_mem_usage, + (bool)fFlushForPrune); } - // Flush the chainstate (which may refer to block index entries). - const auto empty_cache{(mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical}; - if (empty_cache ? !CoinsTip().Flush() : !CoinsTip().Sync()) { - return FatalError(m_chainman.GetNotifications(), state, _("Failed to write to coin database.")); - } - m_last_flush = nNow; - full_flush_completed = true; - TRACEPOINT(utxocache, flush, - int64_t{Ticks(NodeClock::now() - nNow)}, - (uint32_t)mode, - (uint64_t)coins_count, - (uint64_t)coins_mem_usage, - (bool)fFlushForPrune); } } if (full_flush_completed && m_chainman.m_options.signals) { diff --git a/src/validation.h b/src/validation.h index 3add3df4810..df04b4af29d 100644 --- a/src/validation.h +++ b/src/validation.h @@ -804,7 +804,6 @@ private: EXCLUSIVE_LOCKS_REQUIRED(::cs_main); NodeClock::time_point m_last_write{}; - NodeClock::time_point m_last_flush{}; /** * In case of an invalid snapshot, rename the coins leveldb directory so From a33aaadb2d3da75512b80c1793d92c7536c0b801 Mon Sep 17 00:00:00 2001 From: Andrew Toth Date: Sat, 31 Aug 2024 11:14:18 -0400 Subject: [PATCH 3/5] refactor: rename fDoFullFlush to should_write --- src/validation.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/validation.cpp b/src/validation.cpp index d530f45c042..3ef5fbf665a 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2831,7 +2831,6 @@ bool Chainstate::FlushStateToDisk( try { { bool fFlushForPrune = false; - bool fDoFullFlush = false; CoinsCacheSizeState cache_state = GetCoinsCacheSizeState(); LOCK(m_blockman.cs_LastBlockFile); @@ -2887,10 +2886,10 @@ bool Chainstate::FlushStateToDisk( bool fCacheCritical = mode == FlushStateMode::IF_NEEDED && cache_state >= CoinsCacheSizeState::CRITICAL; // It's been a while since we wrote the block index and chain state to disk. Do this frequently, so we don't need to redownload or reindex after a crash. bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow > m_last_write + DATABASE_WRITE_INTERVAL; - // Combine all conditions that result in a full cache flush. - fDoFullFlush = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; + // Combine all conditions that result in a write to disk. + bool should_write = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; // Write blocks, block index and best chain related state to disk. - if (fDoFullFlush) { + if (should_write) { // Ensure we can write block index if (!CheckDiskSpace(m_blockman.m_opts.blocks_dir)) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); From 690729577db06d3ef7d948e5f08c1534c2122f5e Mon Sep 17 00:00:00 2001 From: Andrew Toth Date: Sun, 8 Sep 2024 11:48:53 -0400 Subject: [PATCH 4/5] refactor: replace m_last_write with m_next_write Co-Authored-By: l0rinc --- src/validation.cpp | 12 ++++++------ src/validation.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/validation.cpp b/src/validation.cpp index 3ef5fbf665a..379e8604131 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2876,18 +2876,19 @@ bool Chainstate::FlushStateToDisk( } } const auto nNow{NodeClock::now()}; - // Avoid writing/flushing immediately after startup. - if (m_last_write == decltype(m_last_write){}) { - m_last_write = nNow; - } // The cache is large and we're within 10% and 10 MiB of the limit, but we have time now (not in the middle of a block processing). bool fCacheLarge = mode == FlushStateMode::PERIODIC && cache_state >= CoinsCacheSizeState::LARGE; // The cache is over the limit, we have to write now. bool fCacheCritical = mode == FlushStateMode::IF_NEEDED && cache_state >= CoinsCacheSizeState::CRITICAL; // It's been a while since we wrote the block index and chain state to disk. Do this frequently, so we don't need to redownload or reindex after a crash. - bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow > m_last_write + DATABASE_WRITE_INTERVAL; + bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow >= m_next_write; // Combine all conditions that result in a write to disk. bool should_write = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; + + if (should_write || m_next_write == NodeClock::time_point::max()) { + m_next_write = nNow + DATABASE_WRITE_INTERVAL; + } + // Write blocks, block index and best chain related state to disk. if (should_write) { // Ensure we can write block index @@ -2919,7 +2920,6 @@ bool Chainstate::FlushStateToDisk( m_blockman.UnlinkPrunedFiles(setFilesToPrune); } - m_last_write = nNow; if (!CoinsTip().GetBestBlock().IsNull()) { if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); diff --git a/src/validation.h b/src/validation.h index df04b4af29d..5b14e466c2e 100644 --- a/src/validation.h +++ b/src/validation.h @@ -803,7 +803,7 @@ private: void UpdateTip(const CBlockIndex* pindexNew) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); - NodeClock::time_point m_last_write{}; + NodeClock::time_point m_next_write{NodeClock::time_point::max()}; /** * In case of an invalid snapshot, rename the coins leveldb directory so From b7f3b6fd96ca2d5d1f9538affef682f778bd07e8 Mon Sep 17 00:00:00 2001 From: Andrew Toth Date: Sun, 8 Sep 2024 11:51:13 -0400 Subject: [PATCH 5/5] validation: add randomness to periodic write interval Co-Authored-By: Pieter Wuille Co-Authored-By: l0rinc --- src/test/chainstate_write_tests.cpp | 8 ++++---- src/validation.cpp | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/test/chainstate_write_tests.cpp b/src/test/chainstate_write_tests.cpp index c76164099c2..ccca2f9be10 100644 --- a/src/test/chainstate_write_tests.cpp +++ b/src/test/chainstate_write_tests.cpp @@ -25,18 +25,18 @@ BOOST_FIXTURE_TEST_CASE(chainstate_write_interval, TestingSetup) auto& chainstate{Assert(m_node.chainman)->ActiveChainstate()}; BlockValidationState state_dummy{}; - // The first periodic flush sets m_last_write and does not flush + // The first periodic flush sets m_next_write and does not flush chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(!sub->m_did_flush); - // The periodic flush interval is 1 hour - SetMockTime(GetTime() + 59min); + // The periodic flush interval is between 50 and 70 minutes (inclusive) + SetMockTime(GetTime() + 49min); chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(!sub->m_did_flush); - SetMockTime(GetTime() + 1h); + SetMockTime(GetTime() + 70min); chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); m_node.validation_signals->SyncWithValidationInterfaceQueue(); BOOST_CHECK(sub->m_did_flush); diff --git a/src/validation.cpp b/src/validation.cpp index 379e8604131..c7b82678f16 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -90,8 +90,12 @@ using node::SnapshotMetadata; /** Size threshold for warning about slow UTXO set flush to disk. */ static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB -/** Time to wait between writing blocks/block index and chainstate to disk. */ -static constexpr std::chrono::hours DATABASE_WRITE_INTERVAL{1}; +/** Time window to wait between writing blocks/block index and chainstate to disk. + * Randomize writing time inside the window to prevent a situation where the + * network over time settles into a few cohorts of synchronized writers. +*/ +static constexpr auto DATABASE_WRITE_INTERVAL_MIN{50min}; +static constexpr auto DATABASE_WRITE_INTERVAL_MAX{70min}; /** Maximum age of our tip for us to be considered current for fee estimation */ static constexpr std::chrono::hours MAX_FEE_ESTIMATION_TIP_AGE{3}; const std::vector CHECKLEVEL_DOC { @@ -2886,7 +2890,8 @@ bool Chainstate::FlushStateToDisk( bool should_write = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; if (should_write || m_next_write == NodeClock::time_point::max()) { - m_next_write = nNow + DATABASE_WRITE_INTERVAL; + constexpr auto range{DATABASE_WRITE_INTERVAL_MAX - DATABASE_WRITE_INTERVAL_MIN}; + m_next_write = FastRandomContext().rand_uniform_delay(nNow + DATABASE_WRITE_INTERVAL_MIN, range); } // Write blocks, block index and best chain related state to disk.