Merge bitcoin/bitcoin#25571: refactor: Make mapBlocksUnknownParent local, and rename it

dd065dae9f refactor: Make mapBlocksUnknownParent local, and rename it (Hennadii Stepanov)

Pull request description:

  This PR is a second attempt at #19594. This PR has two motivations:

  - Improve code hygiene by eliminating a global variable, `mapBlocksUnknownParent`
  - Fix fuzz test OOM when running too long ([see #19594 comment](https://github.com/bitcoin/bitcoin/pull/19594#issuecomment-958801638))

  A minor added advantage is to release `mapBlocksUnknownParent` memory when the reindexing phase is done. The current situation is somewhat similar to a memory leak because this map exists unused for the remaining lifetime of the process. It's true that this map should be empty of data elements after use, but its internal metadata (indexing structures, etc.) can have non-trivial size because there can be many thousands of simultaneous elements in this map.

  This PR helps our efforts to reduce the use of global variables. This variable isn't just global, it's hidden inside a function (it looks like a local variable but has the `static` attribute).

  This global variable exists because the `-reindex` processing code calls `LoadExternalBlockFile()` multiple times (once for each block file), but that function must preserve some state between calls (the `mapBlocksUnknownParent` map). This PR fixes this by allocating this map as a local variable in the caller's scope and passing it in on each call. When reindexing completes, the map goes out of scope and is deallocated.

  I tested this manually by reindexing on mainnet and signet. Also, the existing `feature_reindex.py` functional test passes.

ACKs for top commit:
  mzumsande:
    re-ACK dd065dae9f
  theStack:
    re-ACK dd065dae9f
  shaavan:
    reACK dd065dae9f

Tree-SHA512: 9cd20e44d2fa1096dd405bc107bc065ea8f904f5b3f63080341b08d8cf57b790df565f58815c2f331377d044d5306708b4bf6bdfc5ef8d0ed85d8e97d744732c
This commit is contained in:
fanquake 2022-07-29 15:43:21 +01:00
commit 5871b5b5ab
No known key found for this signature in database
GPG key ID: 2EEB9F5CC09526C1
4 changed files with 60 additions and 12 deletions

View file

@ -21,6 +21,7 @@
#include <util/system.h>
#include <validation.h>
#include <map>
#include <unordered_map>
namespace node {
@ -834,6 +835,9 @@ void ThreadImport(ChainstateManager& chainman, std::vector<fs::path> vImportFile
// -reindex
if (fReindex) {
int nFile = 0;
// Map of disk positions for blocks with unknown parent (only used for reindex);
// parent hash -> child disk position, multiple children can have the same parent.
std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
while (true) {
FlatFilePos pos(nFile, 0);
if (!fs::exists(GetBlockPosFilename(pos))) {
@ -844,7 +848,7 @@ void ThreadImport(ChainstateManager& chainman, std::vector<fs::path> vImportFile
break; // This error is logged in OpenBlockFile
}
LogPrintf("Reindexing block file blk%05u.dat...\n", (unsigned int)nFile);
chainman.ActiveChainstate().LoadExternalBlockFile(file, &pos);
chainman.ActiveChainstate().LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent);
if (ShutdownRequested()) {
LogPrintf("Shutdown requested. Exit %s\n", __func__);
return;

View file

@ -31,6 +31,13 @@ FUZZ_TARGET_INIT(load_external_block_file, initialize_load_external_block_file)
if (fuzzed_block_file == nullptr) {
return;
}
FlatFilePos flat_file_pos;
g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file, fuzzed_data_provider.ConsumeBool() ? &flat_file_pos : nullptr);
if (fuzzed_data_provider.ConsumeBool()) {
// Corresponds to the -reindex case (track orphan blocks across files).
FlatFilePos flat_file_pos;
std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file, &flat_file_pos, &blocks_with_unknown_parent);
} else {
// Corresponds to the -loadblock= case (orphan blocks aren't tracked across files).
g_setup->m_node.chainman->ActiveChainstate().LoadExternalBlockFile(fuzzed_block_file);
}
}

View file

@ -57,6 +57,7 @@
#include <warnings.h>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <deque>
#include <numeric>
@ -4256,11 +4257,16 @@ bool CChainState::LoadGenesisBlock()
return true;
}
void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
void CChainState::LoadExternalBlockFile(
FILE* fileIn,
FlatFilePos* dbp,
std::multimap<uint256, FlatFilePos>* blocks_with_unknown_parent)
{
AssertLockNotHeld(m_chainstate_mutex);
// Map of disk positions for blocks with unknown parent (only used for reindex)
static std::multimap<uint256, FlatFilePos> mapBlocksUnknownParent;
// Either both should be specified (-reindex), or neither (-loadblock).
assert(!dbp == !blocks_with_unknown_parent);
int64_t nStart = GetTimeMillis();
int nLoaded = 0;
@ -4310,8 +4316,9 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
if (hash != m_params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) {
LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(),
block.hashPrevBlock.ToString());
if (dbp)
mapBlocksUnknownParent.insert(std::make_pair(block.hashPrevBlock, *dbp));
if (dbp && blocks_with_unknown_parent) {
blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp);
}
continue;
}
@ -4340,13 +4347,15 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
NotifyHeaderTip(*this);
if (!blocks_with_unknown_parent) continue;
// Recursively process earlier encountered successors of this block
std::deque<uint256> queue;
queue.push_back(hash);
while (!queue.empty()) {
uint256 head = queue.front();
queue.pop_front();
std::pair<std::multimap<uint256, FlatFilePos>::iterator, std::multimap<uint256, FlatFilePos>::iterator> range = mapBlocksUnknownParent.equal_range(head);
auto range = blocks_with_unknown_parent->equal_range(head);
while (range.first != range.second) {
std::multimap<uint256, FlatFilePos>::iterator it = range.first;
std::shared_ptr<CBlock> pblockrecursive = std::make_shared<CBlock>();
@ -4361,7 +4370,7 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
}
}
range.first++;
mapBlocksUnknownParent.erase(it);
blocks_with_unknown_parent->erase(it);
NotifyHeaderTip(*this);
}
}

View file

@ -575,8 +575,36 @@ public:
bool ResizeCoinsCaches(size_t coinstip_size, size_t coinsdb_size)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
/** Import blocks from an external file */
void LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp = nullptr)
/**
* Import blocks from an external file
*
* During reindexing, this function is called for each block file (datadir/blocks/blk?????.dat).
* It reads all blocks contained in the given file and attempts to process them (add them to the
* block index). The blocks may be out of order within each file and across files. Often this
* function reads a block but finds that its parent hasn't been read yet, so the block can't be
* processed yet. The function will add an entry to the blocks_with_unknown_parent map (which is
* passed as an argument), so that when the block's parent is later read and processed, this
* function can re-read the child block from disk and process it.
*
* Because a block's parent may be in a later file, not just later in the same file, the
* blocks_with_unknown_parent map must be passed in and out with each call. It's a multimap,
* rather than just a map, because multiple blocks may have the same parent (when chain splits
* or stale blocks exist). It maps from parent-hash to child-disk-position.
*
* This function can also be used to read blocks from user-specified block files using the
* -loadblock= option. There's no unknown-parent tracking, so the last two arguments are omitted.
*
*
* @param[in] fileIn FILE handle to file containing blocks to read
* @param[in] dbp (optional) Disk block position (only for reindex)
* @param[in,out] blocks_with_unknown_parent (optional) Map of disk positions for blocks with
* unknown parent, key is parent block hash
* (only used for reindex)
* */
void LoadExternalBlockFile(
FILE* fileIn,
FlatFilePos* dbp = nullptr,
std::multimap<uint256, FlatFilePos>* blocks_with_unknown_parent = nullptr)
EXCLUSIVE_LOCKS_REQUIRED(!m_chainstate_mutex);
/**