mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-01-25 10:43:19 -03:00
Merge bitcoin/bitcoin#27866: blockstorage: Return on fatal flush errors
d8041d4e04
blockstorage: Return on fatal undo file flush error (TheCharlatan)f0207e0030
blockstorage: Return on fatal block file flush error (TheCharlatan)5671c15f45
blockstorage: Mark FindBlockPos as nodiscard (TheCharlatan) Pull request description: The goal of this PR is to establish that fatal blockstorage flush errors should be treated as errors at their call site. Prior to this patch `FlushBlockFile` may have failed without returning in `Chainstate::FlushStateToDisk`, leading to a potential write from `WriteBlockIndexDB` that may refer to a block that is not fully flushed to disk yet. By returning if either `FlushUndoFile` or `FlushBlockFile` fail, we ensure that no further write operations take place that may lead to an inconsistent database when crashing. Add `[[nodiscard]]` annotations to them such that they are not ignored in future. Functions that call either `FlushUndoFile` or `FlushBlockFile`, need to handle these extra abort cases properly. Since `Chainstate::FlushStateToDisk` already produces an abort error in case of `WriteBlockIndexDB` failing, no extra logic for functions calling `Chainstate::FlushStateToDisk` is required. Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called by `FindBlockPos`, while `FlushUndoFile` is only called by `FlushBlockFile` and `WriteUndoDataForBlock`. For both these cases, the flush error is not further bubbled up. Instead, the error is logged and a comment is provided why bubbling up an error would be less desirable in these cases. --- This pull request is part of a larger effort towards improving the shutdown / abort / fatal error handling in validation code. It is a first step towards implementing proper fatal error return type enforcement similar as proposed by theuni in this pull request [comment](https://github.com/bitcoin/bitcoin/pull/27711#issuecomment-1563561502). For ease of review of these critical changes, a first step would be checking that `AbortNode` leads to early and error-conveying returns at its call site. Further work for enforcing returns when `AbortNode` is called is done in https://github.com/bitcoin/bitcoin/pull/27862. ACKs for top commit: stickies-v: re-ACKd8041d4
ryanofsky: Code review ACKd8041d4e04
Tree-SHA512: 47ade9b873b15e567c8f60ca538d5a0daf32163e1031be3212a3a45eb492b866664b225f2787c9e40f3e0c089140157d8fd1039abc00c7bdfeec1b52ecd7e219
This commit is contained in:
commit
f562856d02
3 changed files with 47 additions and 10 deletions
|
@ -651,16 +651,19 @@ bool BlockManager::UndoReadFromDisk(CBlockUndo& blockundo, const CBlockIndex& in
|
|||
return true;
|
||||
}
|
||||
|
||||
void BlockManager::FlushUndoFile(int block_file, bool finalize)
|
||||
bool BlockManager::FlushUndoFile(int block_file, bool finalize)
|
||||
{
|
||||
FlatFilePos undo_pos_old(block_file, m_blockfile_info[block_file].nUndoSize);
|
||||
if (!UndoFileSeq().Flush(undo_pos_old, finalize)) {
|
||||
m_opts.notifications.flushError("Flushing undo file to disk failed. This is likely the result of an I/O error.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
|
||||
bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
|
||||
{
|
||||
bool success = true;
|
||||
LOCK(cs_LastBlockFile);
|
||||
|
||||
if (m_blockfile_info.size() < 1) {
|
||||
|
@ -668,17 +671,23 @@ void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo)
|
|||
// chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which
|
||||
// then calls FlushStateToDisk()), resulting in a call to this function before we
|
||||
// have populated `m_blockfile_info` via LoadBlockIndexDB().
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
assert(static_cast<int>(m_blockfile_info.size()) > m_last_blockfile);
|
||||
|
||||
FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize);
|
||||
if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) {
|
||||
m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error.");
|
||||
success = false;
|
||||
}
|
||||
// we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks,
|
||||
// e.g. during IBD or a sync after a node going offline
|
||||
if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo);
|
||||
if (!fFinalize || finalize_undo) {
|
||||
if (!FlushUndoFile(m_last_blockfile, finalize_undo)) {
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
uint64_t BlockManager::CalculateCurrentUsage()
|
||||
|
@ -771,7 +780,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne
|
|||
if (!fKnown) {
|
||||
LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString());
|
||||
}
|
||||
FlushBlockFile(!fKnown, finalize_undo);
|
||||
|
||||
// Do not propagate the return code. The flush concerns a previous block
|
||||
// and undo file that has already been written to. If a flush fails
|
||||
// here, and we crash, there is no expected additional block data
|
||||
// inconsistency arising from the flush failure here. However, the undo
|
||||
// data may be inconsistent after a crash if the flush is called during
|
||||
// a reindex. A flush error might also leave some of the data files
|
||||
// untrimmed.
|
||||
if (!FlushBlockFile(!fKnown, finalize_undo)) {
|
||||
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning,
|
||||
"Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n",
|
||||
m_last_blockfile, !fKnown, finalize_undo, nFile);
|
||||
}
|
||||
m_last_blockfile = nFile;
|
||||
m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking.
|
||||
}
|
||||
|
@ -862,7 +883,14 @@ bool BlockManager::WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValid
|
|||
// with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in
|
||||
// the FindBlockPos function
|
||||
if (_pos.nFile < m_last_blockfile && static_cast<uint32_t>(block.nHeight) == m_blockfile_info[_pos.nFile].nHeightLast) {
|
||||
FlushUndoFile(_pos.nFile, true);
|
||||
// Do not propagate the return code, a failed flush here should not
|
||||
// be an indication for a failed write. If it were propagated here,
|
||||
// the caller would assume the undo data not to be written, when in
|
||||
// fact it is. Note though, that a failed flush might leave the data
|
||||
// file untrimmed.
|
||||
if (!FlushUndoFile(_pos.nFile, true)) {
|
||||
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, "Failed to flush undo file %05i\n", _pos.nFile);
|
||||
}
|
||||
} else if (_pos.nFile == m_last_blockfile && static_cast<uint32_t>(block.nHeight) > m_undo_height_in_last_blockfile) {
|
||||
m_undo_height_in_last_blockfile = block.nHeight;
|
||||
}
|
||||
|
|
|
@ -119,9 +119,14 @@ private:
|
|||
*/
|
||||
bool LoadBlockIndex()
|
||||
EXCLUSIVE_LOCKS_REQUIRED(cs_main);
|
||||
void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
|
||||
void FlushUndoFile(int block_file, bool finalize = false);
|
||||
bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
|
||||
|
||||
/** Return false if block file or undo file flushing fails. */
|
||||
[[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false);
|
||||
|
||||
/** Return false if undo file flushing fails. */
|
||||
[[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false);
|
||||
|
||||
[[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
|
||||
bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);
|
||||
|
||||
FlatFileSeq BlockFileSeq() const;
|
||||
|
|
|
@ -2594,7 +2594,11 @@ bool Chainstate::FlushStateToDisk(
|
|||
LOG_TIME_MILLIS_WITH_CATEGORY("write block and undo data to disk", BCLog::BENCH);
|
||||
|
||||
// First make sure all block and undo data is flushed to disk.
|
||||
m_blockman.FlushBlockFile();
|
||||
// TODO: Handle return error, or add detailed comment why it is
|
||||
// safe to not return an error upon failure.
|
||||
if (!m_blockman.FlushBlockFile()) {
|
||||
LogPrintLevel(BCLog::VALIDATION, BCLog::Level::Warning, "%s: Failed to flush block file.\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
// Then update all block file information (which may refer to block and undo files).
|
||||
|
|
Loading…
Add table
Reference in a new issue