txgraph: (feature) destroying Ref means removing transaction

Before this commit, if a TxGraph::Ref object is destroyed, it becomes impossible
to refer to, but the actual corresponding transaction node in the TxGraph remains,
and remains indefinitely as there is no way to remove it.

Fix this by making the destruction of TxGraph::Ref trigger immediate removal of
the corresponding transaction in TxGraph, both in main and staging if it exists.
This commit is contained in:
Pieter Wuille 2024-12-03 11:25:49 -05:00
parent 972df15af2
commit f7cc1ed7d2
3 changed files with 168 additions and 65 deletions

View file

@ -148,6 +148,32 @@ struct SimTxGraph
if (oversized.has_value() && *oversized) oversized = std::nullopt;
}
/** Destroy the transaction from the graph, including from the removed set. This will
* trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized
* value is cleared (destroying does not clear oversizedness in TxGraph of the main
* graph while staging exists). */
void DestroyTransaction(TxGraph::Ref& ref, bool reset_oversize)
{
// Special case the empty Ref.
if (!ref) return;
auto pos = Find(ref);
if (pos == MISSING) {
// Wipe the ref, if it exists, from the removed vector. Use std::partition rather
// than std::erase because we don't care about the order of the entries that
// remain.
auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != &ref; });
removed.erase(remove, removed.end());
} else {
graph.RemoveTransactions(SetType::Singleton(pos));
simrevmap.erase(simmap[pos].get());
simmap[pos].reset();
// This may invalidate our cached oversized value.
if (reset_oversize && oversized.has_value() && *oversized) {
oversized = std::nullopt;
}
}
}
/** Construct the set with all positions in this graph corresponding to the specified
* TxGraph::Refs. All of them must occur in this graph and not be removed. */
SetType MakeSet(std::span<TxGraph::Ref* const> arg)
@ -333,6 +359,28 @@ FUZZ_TARGET(txgraph)
sim.SetTransactionFee(ref, fee);
}
break;
} else if (command-- == 0) {
// ~Ref.
std::vector<TxGraph::Ref*> to_destroy;
to_destroy.push_back(&pick_fn());
while (true) {
// Keep adding either the ancestors or descendants the already picked
// transactions have in both graphs (main and staging) combined. Destroying
// will trigger deletions in both, so to have consistent TxGraph behavior, the
// set must be closed under ancestors, or descendants, in both graphs.
auto old_size = to_destroy.size();
for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt);
if (to_destroy.size() == old_size) break;
}
// The order in which these ancestors/descendants are destroyed should not matter;
// randomly shuffle them.
std::shuffle(to_destroy.begin(), to_destroy.end(), rng);
for (TxGraph::Ref* ptr : to_destroy) {
for (size_t level = 0; level < sims.size(); ++level) {
sims[level].DestroyTransaction(*ptr, level == sims.size() - 1);
}
}
break;
} else if (command-- == 0) {
// Cleanup.
auto cleaned = real->Cleanup();
@ -446,6 +494,10 @@ FUZZ_TARGET(txgraph)
if (alt) {
real->AbortStaging();
sims.pop_back();
// Reset the cached oversized value (if TxGraph::Ref destructions triggered
// removals of main transactions while staging was active, then aborting will
// cause it to be re-evaluated in TxGraph).
sims.back().oversized = std::nullopt;
} else {
real->CommitStaging();
sims.erase(sims.begin());

View file

@ -236,7 +236,7 @@ private:
* transaction. */
struct Entry
{
/** Pointer to the corresponding Ref object, if any. */
/** Pointer to the corresponding Ref object, or nullptr if none. */
Ref* m_ref;
/** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */
Locator m_locator[MAX_LEVELS];
@ -311,19 +311,24 @@ public:
auto& entry = m_entries[idx];
Assume(entry.m_ref != nullptr);
entry.m_ref = nullptr;
if (!entry.IsWiped()) {
for (size_t level = 0; level < m_clustersets.size(); ++level) {
m_clustersets[level].m_to_remove.push_back(idx);
}
}
}
// Functions related to various normalization/application steps.
/** If cluster is not in the top level, copy it there, and return a pointer to it. */
Cluster* PullIn(Cluster* cluster) noexcept;
/** If cluster is not in to_level, copy it there, and return a pointer to it. */
Cluster* PullIn(Cluster* cluster, int to_level) noexcept;
/** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a
* NEEDS_SPLIT* QualityLevel). */
void ApplyRemovals() noexcept;
/** Split an individual cluster (which must be in the top-level ClusterSet). */
void ApplyRemovals(int up_to_level) noexcept;
/** Split an individual cluster. */
void Split(Cluster& cluster) noexcept;
/** Split all clusters that need splitting in the top ClusterSet. */
void SplitAll() noexcept;
/** Populate the top ClusterSet's m_group_data (and m_oversized) based on m_deps_to_add. */
/** Split all clusters that need splitting in ClusterSets up to the specified level. */
void SplitAll(int up_to_level) noexcept;
/** Populate the top ClusterSet's m_group_data (and m_oversized) based on its m_deps_to_add. */
void GroupClusters() noexcept;
/** Merge the specified clusters. */
void Merge(std::span<Cluster*> to_merge) noexcept;
@ -370,6 +375,17 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept
}
// Update the transaction count.
--m_clustersets[level].m_txcount;
// Adjust the status of Locators of this transaction at higher levels.
for (size_t after_level = level + 1; after_level < m_clustersets.size(); ++after_level) {
if (entry.m_locator[after_level].IsPresent()) {
break;
} else if (entry.m_locator[after_level].IsRemoved()) {
entry.m_locator[after_level].SetMissing();
break;
} else {
--m_clustersets[after_level].m_txcount;
}
}
// If this was the last level the Locator was Present at, add it to the m_wiped list (which
// will be processed by Cleanup).
if (entry.IsWiped()) m_wiped.push_back(idx);
@ -424,6 +440,7 @@ std::vector<Cluster*> TxGraphImpl::GetConflicts() const noexcept
// All Clusters at level-1 containing transactions in m_removed are conflicts.
for (auto i : m_clustersets[level].m_removed) {
auto& entry = m_entries[i];
if (entry.IsWiped()) continue;
Assume(entry.m_locator[level - 1].IsPresent());
ret.push_back(entry.m_locator[level - 1].cluster);
}
@ -776,9 +793,8 @@ Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept
return nullptr;
}
Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept
Cluster* TxGraphImpl::PullIn(Cluster* cluster, int to_level) noexcept
{
int to_level = m_clustersets.size() - 1;
if (to_level == 0) return cluster;
int level = cluster->m_level;
Assume(level <= to_level);
@ -794,40 +810,49 @@ Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept
return cluster;
}
void TxGraphImpl::ApplyRemovals() noexcept
void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept
{
int level = m_clustersets.size() - 1;
auto& clusterset = m_clustersets[level];
auto& to_remove = clusterset.m_to_remove;
// Skip if there is nothing to remove.
if (to_remove.empty()) return;
// Wipe cached m_group_data and m_oversized, as they may be invalidated by removals.
clusterset.m_group_data = std::nullopt;
clusterset.m_group_clusters.clear();
clusterset.m_oversized = false;
// Pull in all Clusters that are not in the top ClusterSet.
for (GraphIndex index : clusterset.m_to_remove) {
auto cluster = FindCluster(index, level);
PullIn(cluster);
}
// Group the set of to-be-removed entries by Cluster*.
std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept {
return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster);
});
// Process per Cluster.
std::span to_remove_span{to_remove};
while (!to_remove_span.empty()) {
Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster;
if (cluster != nullptr) {
// If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it
// can pop off whatever applies to it.
cluster->ApplyRemovals(*this, to_remove_span);
} else {
// Otherwise, skip this already-removed entry.
to_remove_span = to_remove_span.subspan(1);
Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size());
for (int level = 0; level <= up_to_level; ++level) {
auto& clusterset = m_clustersets[level];
auto& to_remove = clusterset.m_to_remove;
// Skip if there is nothing to remove in this level.
if (to_remove.empty()) continue;
// Wipe cached m_group_data and m_oversized, as they may be invalidated by removals.
clusterset.m_group_data = std::nullopt;
clusterset.m_group_clusters.clear();
if (size_t(level) == m_clustersets.size() - 1) {
// Do not wipe the oversized state of a lower level graph (main) if a higher level
// one (staging) exists. The reason for this is that the alternative would mean that
// cluster merges may need to be applied to a formerly-oversized main graph while
// staging exists (to satisfy chunk feerate queries into main, for example), and such
// merges could conflict with pulls of some of their constituents into staging.
clusterset.m_oversized = false;
}
// Pull in all Clusters that are not in the ClusterSet at level level.
for (GraphIndex index : clusterset.m_to_remove) {
auto cluster = FindCluster(index, level);
if (cluster != nullptr) PullIn(cluster, level);
}
// Group the set of to-be-removed entries by Cluster*.
std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept {
return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster);
});
// Process per Cluster.
std::span to_remove_span{to_remove};
while (!to_remove_span.empty()) {
Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster;
if (cluster != nullptr) {
// If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it
// can pop off whatever applies to it.
cluster->ApplyRemovals(*this, to_remove_span);
} else {
// Otherwise, skip this already-removed entry.
to_remove_span = to_remove_span.subspan(1);
}
}
clusterset.m_to_remove.clear();
}
clusterset.m_to_remove.clear();
}
void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept
@ -840,7 +865,7 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept
for (int i = 0; i < 2; ++i) {
GraphIndex idx = i ? b : a;
Entry& entry = m_entries[idx];
// Update linked Ref.
// Update linked Ref, if any exists.
if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx;
// Update the locators for both levels. The rest of the Entry information will not change,
// so no need to invoke Cluster::Updated().
@ -898,7 +923,7 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept
{
// To split a Cluster, first make sure all removals are applied (as we might need to split
// again afterwards otherwise).
ApplyRemovals();
ApplyRemovals(cluster.m_level);
bool del = cluster.Split(*this);
if (del) {
// Cluster::Split reports whether the Cluster is to be deleted.
@ -906,14 +931,17 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept
}
}
void TxGraphImpl::SplitAll() noexcept
void TxGraphImpl::SplitAll(int up_to_level) noexcept
{
Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size());
// Before splitting all Cluster, first make sure all removals are applied.
ApplyRemovals();
for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) {
auto& queue = m_clustersets.back().m_clusters[int(quality)];
while (!queue.empty()) {
Split(*queue.back().get());
ApplyRemovals(up_to_level);
for (int level = 0; level <= up_to_level; ++level) {
for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) {
auto& queue = m_clustersets[level].m_clusters[int(quality)];
while (!queue.empty()) {
Split(*queue.back().get());
}
}
}
}
@ -925,7 +953,7 @@ void TxGraphImpl::GroupClusters() noexcept
// Before computing which Clusters need to be merged together, first apply all removals and
// split the Clusters into connected components. If we would group first, we might end up
// with inefficient and/or oversized Clusters which just end up being split again anyway.
SplitAll();
SplitAll(level);
auto& clusterset = m_clustersets[level];
// If the groupings have been computed already, nothing is left to be done.
@ -1166,7 +1194,7 @@ void TxGraphImpl::ApplyDependencies() noexcept
// Pull in all the Clusters the dependencies are in.
for (Cluster*& cluster : clusterset.m_group_clusters) {
cluster = PullIn(cluster);
cluster = PullIn(cluster, level);
}
// For each group of to-be-merged Clusters.
@ -1292,7 +1320,7 @@ bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept
Assume(GetRefGraph(arg) == this);
size_t level = main_only ? 0 : m_clustersets.size() - 1;
// Make sure the transaction isn't scheduled for removal.
if (level == m_clustersets.size() - 1) ApplyRemovals();
ApplyRemovals(level);
auto cluster = FindCluster(GetRefIndex(arg), level);
return cluster != nullptr;
}
@ -1303,6 +1331,7 @@ std::vector<TxGraph::Ref*> Cluster::GetAncestorRefs(const TxGraphImpl& graph, Cl
// Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them.
for (auto idx : m_depgraph.Ancestors(idx)) {
const auto& entry = graph.m_entries[m_mapping[idx]];
Assume(entry.m_ref != nullptr);
ret.push_back(entry.m_ref);
}
return ret;
@ -1314,6 +1343,7 @@ std::vector<TxGraph::Ref*> Cluster::GetDescendantRefs(const TxGraphImpl& graph,
// Translate all descendants (in arbitrary order) to Refs (if they have any), and return them.
for (auto idx : m_depgraph.Descendants(idx)) {
const auto& entry = graph.m_entries[m_mapping[idx]];
Assume(entry.m_ref != nullptr);
ret.push_back(entry.m_ref);
}
return ret;
@ -1325,6 +1355,7 @@ std::vector<TxGraph::Ref*> Cluster::GetClusterRefs(const TxGraphImpl& graph) noe
// Translate all transactions in the Cluster (in linearization order) to Refs.
for (auto idx : m_linearization) {
const auto& entry = graph.m_entries[m_mapping[idx]];
Assume(entry.m_ref != nullptr);
ret.push_back(entry.m_ref);
}
return ret;
@ -1355,7 +1386,8 @@ std::vector<TxGraph::Ref*> TxGraphImpl::GetAncestors(const Ref& arg, bool main_o
Assume(GetRefGraph(arg) == this);
// Apply all dependencies, as the result might be incorrect otherwise.
size_t level = main_only ? 0 : m_clustersets.size() - 1;
ApplyDependencies();
ApplyRemovals(level);
if (level == m_clustersets.size() - 1) ApplyDependencies();
// Ancestry cannot be known if unapplied dependencies remain.
Assume(!m_clustersets[level].m_oversized);
// Find the Cluster the argument is in, and return the empty vector if it isn't in any.
@ -1373,7 +1405,8 @@ std::vector<TxGraph::Ref*> TxGraphImpl::GetDescendants(const Ref& arg, bool main
Assume(GetRefGraph(arg) == this);
// Apply all dependencies, as the result might be incorrect otherwise.
size_t level = main_only ? 0 : m_clustersets.size() - 1;
ApplyDependencies();
ApplyRemovals(level);
if (level == m_clustersets.size() - 1) ApplyDependencies();
// Ancestry cannot be known if unapplied dependencies remain.
Assume(!m_clustersets[level].m_oversized);
// Find the Cluster the argument is in, and return the empty vector if it isn't in any.
@ -1391,7 +1424,8 @@ std::vector<TxGraph::Ref*> TxGraphImpl::GetCluster(const Ref& arg, bool main_onl
Assume(GetRefGraph(arg) == this);
// Apply all dependencies, as the result might be incorrect otherwise.
size_t level = main_only ? 0 : m_clustersets.size() - 1;
ApplyDependencies();
SplitAll(level);
if (level == m_clustersets.size() - 1) ApplyDependencies();
// Cluster linearization cannot be known if unapplied dependencies remain.
Assume(!m_clustersets[level].m_oversized);
// Find the Cluster the argument is in, and return the empty vector if it isn't in any.
@ -1405,7 +1439,7 @@ std::vector<TxGraph::Ref*> TxGraphImpl::GetCluster(const Ref& arg, bool main_onl
TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept
{
size_t level = main_only ? 0 : m_clustersets.size() - 1;
ApplyRemovals();
ApplyRemovals(level);
return m_clustersets[level].m_txcount;
}
@ -1415,12 +1449,12 @@ FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept
// transaction having been removed already.
if (GetRefGraph(arg) == nullptr) return {};
Assume(GetRefGraph(arg) == this);
// Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction.
ApplyRemovals();
// Find the cluster the argument is in (the level does not matter as individual feerates will
// be identical if it occurs in both), and return the empty FeeFrac if it isn't in any.
Cluster* cluster{nullptr};
for (int level = 0; size_t(level) < m_clustersets.size(); ++level) {
// Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction.
ApplyRemovals(level);
if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) {
cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster;
break;
@ -1438,7 +1472,8 @@ FeeFrac TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept
if (GetRefGraph(arg) == nullptr) return {};
Assume(GetRefGraph(arg) == this);
// Apply all dependencies, as the result might be inaccurate otherwise.
ApplyDependencies();
SplitAll(0);
if (m_clustersets.size() == 1) ApplyDependencies();
// Chunk feerates cannot be accurately known if unapplied dependencies remain.
Assume(!m_clustersets[0].m_oversized);
// Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any.
@ -1456,7 +1491,7 @@ bool TxGraphImpl::IsOversized(bool main_only) noexcept
size_t level = main_only ? 0 : m_clustersets.size() - 1;
// Find which Clusters will need to be merged together, as that is where the oversize
// property is assessed.
GroupClusters();
if (level == m_clustersets.size() - 1) GroupClusters();
return m_clustersets[level].m_oversized;
}
@ -1499,6 +1534,11 @@ void TxGraphImpl::AbortStaging() noexcept
}
// Destroy the staging graph data.
m_clustersets.pop_back();
if (!m_clustersets.back().m_group_data.has_value()) {
// In case m_oversized in main was kept after a Ref destruction while staging exists, we
// need to re-evaluate m_oversized now.
m_clustersets.back().m_oversized = false;
}
}
void TxGraphImpl::CommitStaging() noexcept
@ -1627,9 +1667,9 @@ void TxGraphImpl::SanityCheck() const
if (entry.IsWiped()) {
// If the Entry is not IsPresent anywhere, it should be in m_wiped.
expected_wiped.insert(idx);
} else {
// Every non-wiped Entry must have a Ref that points back to it.
assert(entry.m_ref != nullptr);
}
if (entry.m_ref != nullptr) {
// If a Ref is pointed to, it must point back to this GraphIndex in this TxGraphImpl.
assert(GetRefGraph(*entry.m_ref) == this);
assert(GetRefIndex(*entry.m_ref) == idx);
}
@ -1690,6 +1730,14 @@ void TxGraphImpl::SanityCheck() const
// Verify that the contents of m_removed matches what was expected based on the Entry vector.
std::set<GraphIndex> actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end());
for (auto i : expected_wiped) {
// If a transaction exists in both main and staging, and is removed from staging (adding
// it to m_removed there), and consequently destroyed (wiping the locator completely),
// it can remain in m_removed despite not having an IsRemoved() locator. Exclude those
// transactions from the comparison here.
actual_removed.erase(i);
expected_removed[level].erase(i);
}
assert(actual_removed == expected_removed[level]);
}

View file

@ -42,7 +42,8 @@ public:
Ref() noexcept = default;
/** Test if this Ref is not empty. */
explicit operator bool() const noexcept { return m_graph != nullptr; }
/** Destroy this Ref. */
/** Destroy this Ref. If it is not empty, the corresponding transaction is removed (in both
* main and staging, if it exists). */
virtual ~Ref();
// Support moving a Ref.
Ref& operator=(Ref&& other) noexcept;
@ -119,7 +120,9 @@ public:
/** Determine whether the graph is oversized (contains a connected component of more than the
* configured maximum cluster count). If main_only is false and a staging graph exists, it is
* queried; otherwise the main graph is queried. Some of the functions below are not available
* for oversized graphs. The mutators above are always available. */
* for oversized graphs. The mutators above are always available. Removing a transaction by
* destroying its Ref while staging exists will not clear main's oversizedness until staging
* is aborted or committed. */
virtual bool IsOversized(bool main_only = false) noexcept = 0;
/** Get the feerate of the chunk which transaction arg is in in the main graph. Returns the
* empty FeeFrac if arg does not exist in the main graph. The main graph must not be