From 29e3d06975de1c1a2e8c694da1dea5d964a3aa40 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 25 Oct 2024 14:11:50 -0400 Subject: [PATCH 01/20] clusterlin: add FixLinearization function + fuzz test This function takes an existing ordering for transactions in a DepGraph, and makes it a valid linearization for it (i.e., topological). Any topological prefix of the input remains untouched. --- src/cluster_linearize.h | 29 +++++++++++++++ src/test/fuzz/cluster_linearize.cpp | 55 +++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 50b121d9e4c..870106e0fcd 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1336,6 +1336,35 @@ std::vector MergeLinearizations(const DepGraph& depgraph, return ret; } +/** Make linearization topological, retaining its ordering where possible. */ +template +void FixLinearization(const DepGraph& depgraph, Span linearization) noexcept +{ + // This algorithm can be summarized as moving every element in the linearization backwards + // until it is placed after all this ancestors. + SetType done; + const auto len = linearization.size(); + // Iterate over the elements of linearization from back to front (i is distance from back). + for (ClusterIndex i = 0; i < len; ++i) { + /** The element at that position. */ + ClusterIndex elem = linearization[len - 1 - i]; + /** j represents how far from the back of the linearization elem should be placed. */ + ClusterIndex j = i; + // Figure out which elements elem needs to be placed before. + SetType place_before = done & depgraph.Ancestors(elem); + // Find which position to place elem in (updating j), continuously moving the elements + // in between forward. + while (place_before.Any()) { + auto to_swap = linearization[len - 1 - (j - 1)]; + place_before.Reset(to_swap); + linearization[len - 1 - (j--)] = to_swap; + } + // Put elem in its final position and mark it as done. + linearization[len - 1 - j] = elem; + done.Set(elem); + } +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 5b3770636ab..3560f3a5770 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -1118,3 +1118,58 @@ FUZZ_TARGET(clusterlin_merge) auto cmp2 = CompareChunks(chunking_merged, chunking2); assert(cmp2 >= 0); } + +FUZZ_TARGET(clusterlin_fix_linearization) +{ + // Verify expected properties of FixLinearization() on arbitrary linearizations. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Construct an arbitrary linearization (not necessarily topological for depgraph). + std::vector linearization; + /** Which transactions of depgraph are yet to be included in linearization. */ + TestBitSet todo = depgraph.Positions(); + /** Whether the linearization constructed so far is topological. */ + bool topological{true}; + /** How long the prefix of the constructed linearization is which is topological. */ + size_t topo_prefix = 0; + while (todo.Any()) { + // Figure out the index in all elements of todo to append to linearization next. + uint64_t val{0}; + try { + reader >> VARINT(val); + } catch (const std::ios_base::failure&) {} + val %= todo.Count(); + // Find which element in todo that corresponds to. + for (auto i : todo) { + if (val == 0) { + // Found it. + linearization.push_back(i); + // Track whether or not the linearization is topological for depgraph. + todo.Reset(i); + if (todo.Overlaps(depgraph.Ancestors(i))) topological = false; + topo_prefix += topological; + break; + } + --val; + } + } + assert(linearization.size() == depgraph.TxCount()); + + // Then make a fixed copy of linearization. + auto linearization_fixed = linearization; + FixLinearization(depgraph, linearization_fixed); + // Sanity check it (which includes testing whether it is topological). + SanityCheck(depgraph, linearization_fixed); + + // If the linearization was topological already, FixLinearization cannot have modified it. + if (topological) assert(linearization_fixed == linearization); + // In any case, the topo_prefix long prefix of linearization cannot be changed. + assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix, + linearization_fixed.begin())); +} From 5f3d8d1f401590409b6d421209d3356e54a918c4 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 12 Nov 2024 15:13:58 -0500 Subject: [PATCH 02/20] clusterlin: make IsAcyclic() a DepGraph member function ... instead of being a separate test-only function. --- src/cluster_linearize.h | 11 +++++++++++ src/test/fuzz/cluster_linearize.cpp | 2 +- src/test/util/cluster_linearize.h | 14 +------------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 870106e0fcd..3b8893ccaec 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -309,6 +309,17 @@ public: return a < b; }); } + + /** Check if this graph is acyclic. */ + bool IsAcyclic() const noexcept + { + for (auto i : Positions()) { + if ((Ancestors(i) & Descendants(i)) != SetType::Singleton(i)) { + return false; + } + } + return true; + } }; /** A set of transactions together with their aggregate feerate. */ diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 3560f3a5770..68f8dcd183e 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -407,7 +407,7 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) SanityCheck(depgraph); // Verify the graph is a DAG. - assert(IsAcyclic(depgraph)); + assert(depgraph.IsAcyclic()); } FUZZ_TARGET(clusterlin_components) diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 871aa9d74ed..cdde421637a 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -23,18 +23,6 @@ using namespace cluster_linearize; using TestBitSet = BitSet<32>; -/** Check if a graph is acyclic. */ -template -bool IsAcyclic(const DepGraph& depgraph) noexcept -{ - for (ClusterIndex i : depgraph.Positions()) { - if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) { - return false; - } - } - return true; -} - /** A formatter for a bespoke serialization for acyclic DepGraph objects. * * The serialization format outputs information about transactions in a topological order (parents @@ -337,7 +325,7 @@ void SanityCheck(const DepGraph& depgraph) assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child))); } } - if (IsAcyclic(depgraph)) { + if (depgraph.IsAcyclic()) { // If DepGraph is acyclic, serialize + deserialize must roundtrip. std::vector ser; VectorWriter writer(ser, 0); From b48703029761b1eeff6a42fc30a52e8aa4cdf110 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 21 Aug 2024 14:37:00 -0400 Subject: [PATCH 03/20] txgraph: (feature) add initial version This adds an initial version of the txgraph module, with the TxGraph class. It encapsulates knowledge about the fees, sizes, and dependencies between all mempool transactions, but nothing else. In particular, it lacks knowledge about txids, inputs, outputs, CTransactions, ... and so for. Instead, it exposes a generic TxGraph::Ref type to reference nodes in the TxGraph, which can be passed around and stored by layers on top. --- src/CMakeLists.txt | 1 + src/txgraph.cpp | 1193 ++++++++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 125 +++++ 3 files changed, 1319 insertions(+) create mode 100644 src/txgraph.cpp create mode 100644 src/txgraph.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 889c00c7832..a03c2b1bb01 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -280,6 +280,7 @@ add_library(bitcoin_node STATIC EXCLUDE_FROM_ALL signet.cpp torcontrol.cpp txdb.cpp + txgraph.cpp txmempool.cpp txorphanage.cpp txrequest.cpp diff --git a/src/txgraph.cpp b/src/txgraph.cpp new file mode 100644 index 00000000000..cc63dbde9fc --- /dev/null +++ b/src/txgraph.cpp @@ -0,0 +1,1193 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +using namespace cluster_linearize; + +// Forward declare the TxGraph implementation class. +class TxGraphImpl; + +/** Position of a ClusterIndex within a Cluster::m_linearization. */ +using LinearizationIndex = uint32_t; +/** Position of a Cluster within Graph::m_clusters. */ +using ClusterSetIndex = uint32_t; + +/** Quality levels for cached linearizations. */ +enum class QualityLevel +{ + /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ + NEEDS_SPLIT, + /** This cluster has undergone changes that warrant re-linearization. */ + NEEDS_RELINEARIZE, + /** The minimal level of linearization has been performed, but it is not known to be optimal. */ + ACCEPTABLE, + /** The linearization is known to be optimal. */ + OPTIMAL, + /** This cluster is not registered in any m_clusters. + * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + NONE, +}; + +/** A grouping of connected transactions inside a TxGraphImpl. */ +class Cluster +{ + friend class TxGraphImpl; + using GraphIndex = TxGraph::GraphIndex; + using SetType = BitSet; + /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ + DepGraph m_depgraph; + /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. */ + std::vector m_mapping; + /** The current linearization of the cluster. Size equals m_mapping.TxCount(). + * This is always kept topological. */ + std::vector m_linearization; + /** The quality level of m_linearization. */ + QualityLevel m_quality{QualityLevel::NONE}; + /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + +public: + /** Construct an empty Cluster. */ + Cluster() noexcept = default; + /** Construct a singleton Cluster. */ + explicit Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_index) noexcept; + + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + Cluster(const Cluster&) = delete; + Cluster& operator=(const Cluster&) = delete; + Cluster(Cluster&&) = delete; + Cluster& operator=(Cluster&&) = delete; + + // Generic helper functions. + + /** Get the number of transactions in this Cluster. */ + LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Only called by Graph::SwapIndexes. */ + void UpdateMapping(ClusterIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } + /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ + void Updated(TxGraphImpl& graph) noexcept; + + // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. + + /** Apply any number of removals from the front of to_remove, popping them off. */ + void ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept; + /** Split this cluster (must have a NEEDS_SPLIT* quality). Returns whether to delete this + * Cluster afterwards. */ + [[nodiscard]] bool Split(TxGraphImpl& graph) noexcept; + /** Move all transactions from cluster to *this (as separate components). */ + void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept; + /** Given a span of (parent, child) pairs that all belong to this Cluster (or be removed), + apply them. */ + void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; + /** Improve the linearization of this Cluster. */ + void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + + // Functions that implement the Cluster-specific side of public TxGraph functions. + + /** Get a vector of Refs for the ancestors of a given Cluster element. */ + std::vector GetAncestorRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept; + /** Get a vector of Refs for the descendants of a given Cluster element. */ + std::vector GetDescendantRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ + std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; + /** Get the individual transaction feerate of a Cluster element. */ + FeeFrac GetIndividualFeerate(ClusterIndex idx) noexcept; + /** Modify the fee of a Cluster element. */ + void SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept; +}; + +/** The transaction graph. + * + * The overall design of the data structure consists of 3 interlinked representations: + * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) + * + * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects + * refer back to the Clusters and Refs the corresponding transaction is contained in. + * + * While redundant, this permits moving all of them independently, without invalidating things + * or costly iteration to fix up everything: + * - Entry objects can be moved to fill holes left by removed transactions in the Entry vector + * (see TxGraphImpl::Cleanup). + * - Clusters can be rewritten continuously (removals can cause them to split, new dependencies + * can cause them to be merged). + * - Ref objects can be held outside the class, while permitting them to be moved around, and + * inherited from. + */ +class TxGraphImpl final : public TxGraph +{ + friend class Cluster; +private: + /** Internal RNG. */ + FastRandomContext m_rng; + + /** Information about one group of Clusters to be merged. */ + struct GroupData + { + /** Where the clusters to be merged start in m_group_clusters. */ + uint32_t m_cluster_offset; + /** How many clusters to merge. */ + uint32_t m_cluster_count; + /** Where the dependencies for this cluster group in m_deps_to_add start. */ + uint32_t m_deps_offset; + /** How many dependencies to add. */ + uint32_t m_deps_count; + }; + + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::vector> m_clusters[int(QualityLevel::NONE)]; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Which clusters are to be merged. GroupData::m_cluster_offset indexes into this. */ + std::vector m_group_clusters; + /** Information about the merges to be performed, if known. */ + std::optional> m_group_data = std::vector{}; + /** Total number of transactions in this ClusterSet (explicit + implicit). */ + GraphIndex m_txcount{0}; + + /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + struct Locator + { + /** Which Cluster the Entry appears in (nullptr = missing). */ + Cluster* cluster{nullptr}; + /** Where in the Cluster it appears (only if cluster != nullptr). */ + ClusterIndex index{0}; + + /** Mark this Locator as missing. */ + void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as present, in the specified Cluster. */ + void SetPresent(Cluster* c, ClusterIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is present (in some Cluster). */ + bool IsPresent() const noexcept { return cluster != nullptr; } + }; + + /** A class of objects held internally in TxGraphImpl, with information about a single + * transaction. */ + struct Entry + { + /** Pointer to the corresponding Ref object, if any. */ + Ref* m_ref; + /** Which Cluster and position therein this Entry appears in. */ + Locator m_locator; + /** The chunk feerate of this transaction (if not missing) */ + FeeFrac m_chunk_feerate; + + /** Check whether this Entry is not present in any Cluster. */ + bool IsWiped() const noexcept + { + return !m_locator.IsPresent(); + } + }; + + /** The set of all transactions. GraphIndex values index into this. */ + std::vector m_entries; + + /** Set of Entries that have no IsPresent locators left, and need to be cleaned up. */ + std::vector m_wiped; + +public: + /** Construct a new TxGraphImpl. */ + explicit TxGraphImpl() noexcept {} + + // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). + TxGraphImpl(const TxGraphImpl&) = delete; + TxGraphImpl& operator=(const TxGraphImpl&) = delete; + TxGraphImpl(TxGraphImpl&&) = delete; + TxGraphImpl& operator=(TxGraphImpl&&) = delete; + + // Simple helper functions. + + /** Swap the Entrys referred to by a and b. */ + void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; + /** Extract a Cluster. */ + std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** Delete a Cluster. */ + void DeleteCluster(Cluster& cluster) noexcept; + /** Insert a Cluster. */ + ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ + void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + + // Functions for handling Refs. + + /** Only called by Ref's move constructor/assignment to update Ref locations. */ + void UpdateRef(GraphIndex idx, Ref& new_location) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = &new_location; + } + + /** Only called by Ref::~Ref to unlink Refs. */ + void UnlinkRef(GraphIndex idx) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = nullptr; + } + + // Functions related to various normalization/application steps. + /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a + * NEEDS_SPLIT* QualityLevel). */ + void ApplyRemovals() noexcept; + /** Split an individual cluster. */ + void Split(Cluster& cluster) noexcept; + /** Split all clusters that need splitting. */ + void SplitAll() noexcept; + /** Populate m_group_data based on m_deps_to_add. */ + void GroupClusters() noexcept; + /** Merge the specified clusters. */ + void Merge(std::span to_merge) noexcept; + /** Apply all m_deps_to_add to the relevant Clusters. */ + void ApplyDependencies() noexcept; + /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ + void MakeAcceptable(Cluster& cluster) noexcept; + + // Implementations for the public TxGraph interface. + + Ref AddTransaction(const FeeFrac& feerate) noexcept final; + void RemoveTransaction(Ref& arg) noexcept final; + void AddDependency(Ref& parent, Ref& child) noexcept final; + void SetTransactionFee(Ref&, int64_t fee) noexcept final; + std::vector Cleanup() noexcept final; + + bool Exists(const Ref& arg) noexcept final; + FeeFrac GetChunkFeerate(const Ref& arg) noexcept final; + FeeFrac GetIndividualFeerate(const Ref& arg) noexcept final; + std::vector GetCluster(const Ref& arg) noexcept final; + std::vector GetAncestors(const Ref& arg) noexcept final; + std::vector GetDescendants(const Ref& arg) noexcept final; + GraphIndex GetTransactionCount() noexcept final; +}; + +void Cluster::Updated(TxGraphImpl& graph) noexcept +{ + // Update all the Locators for this Cluster's Entrys. + for (ClusterIndex idx : m_linearization) { + auto& entry = graph.m_entries[m_mapping[idx]]; + entry.m_locator.SetPresent(this, idx); + } + + // Compute its chunking and store its information in the Entry's m_chunk_feerate. + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + // Iterate over the transactions in the linearization, which must match those in chunk. + while (true) { + ClusterIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = chunk.feerate; + chunk.transactions.Reset(idx); + if (chunk.transactions.None()) break; + } + } +} + +void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept +{ + // Iterate over the prefix of to_remove that applies to this cluster. + SetType todo; + do { + GraphIndex idx = to_remove.front(); + auto& entry = graph.m_entries[idx]; + auto& locator = entry.m_locator; + // Stop once we hit an entry that applies to another Cluster. + if (locator.cluster != this) break; + // - Remember it in a set of to-remove ClusterIndexes. + todo.Set(locator.index); + // - Remove from m_mapping. + m_mapping[locator.index] = GraphIndex(-1); + // - Mark it as removed in the Entry's locator. + locator.SetMissing(); + if (entry.IsWiped()) graph.m_wiped.push_back(idx); + to_remove = to_remove.subspan(1); + --graph.m_txcount; + } while(!to_remove.empty()); + + Assume(todo.Any()); + // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries + // removed, so we benefit from batching all the removals). + m_depgraph.RemoveTransactions(todo); + m_mapping.resize(m_depgraph.PositionRange()); + + // Filter removals out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + Updated(graph); +} + +bool Cluster::Split(TxGraphImpl& graph) noexcept +{ + // This function can only be called when the Cluster needs splitting. + Assume(m_quality == QualityLevel::NEEDS_SPLIT); + /** Which positions are still left in this Cluster. */ + auto todo = m_depgraph.Positions(); + /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and + * its position therein. */ + std::vector> remap(m_depgraph.PositionRange()); + std::vector new_clusters; + bool first{true}; + // Iterate over the connected components of this Cluster's m_depgraph. + while (todo.Any()) { + auto component = m_depgraph.FindConnectedComponent(todo); + if (first && component == todo) { + // The existing Cluster is an entire component. Leave it be, but update its quality. + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + // We need to recompute and cache its chunking. + Updated(graph); + return false; + } + first = false; + // Construct a new Cluster to hold the found component. + auto new_cluster = std::make_unique(); + new_clusters.push_back(new_cluster.get()); + // Remember that all the component's transaction go to this new Cluster. The positions + // will be determined below, so use -1 for now. + for (auto i : component) { + remap[i] = {new_cluster.get(), ClusterIndex(-1)}; + } + graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + todo -= component; + } + // Redistribute the transactions. + for (auto i : m_linearization) { + /** The cluster which transaction originally in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy the transaction to the new cluster's depgraph, and remember the position. + remap[i].second = new_cluster->m_depgraph.AddTransaction(m_depgraph.FeeRate(i)); + // Create new mapping entry. + new_cluster->m_mapping.push_back(m_mapping[i]); + // Create a new linearization entry. As we're only appending transactions, they equal the + // ClusterIndex. + new_cluster->m_linearization.push_back(remap[i].second); + } + // Redistribute the dependencies. + for (auto i : m_linearization) { + /** The cluster transaction in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy its parents, translating positions. + SetType new_parents; + for (auto par : m_depgraph.GetReducedParents(i)) new_parents.Set(remap[par].second); + new_cluster->m_depgraph.AddDependencies(new_parents, remap[i].second); + } + // Update all the Locators of moved transactions. + for (Cluster* new_cluster : new_clusters) { + new_cluster->Updated(graph); + } + // Wipe this Cluster, and return that it needs to be deleted. + m_depgraph = DepGraph{}; + m_mapping.clear(); + m_linearization.clear(); + return true; +} + +void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept +{ + /** Vector to store the positions in this Cluster for each position in other. */ + std::vector remap(other.m_depgraph.PositionRange()); + // Iterate over all transactions in the other Cluster (the one being absorbed). + for (auto pos : other.m_linearization) { + auto idx = other.m_mapping[pos]; + // Copy the transaction into this Cluster, and remember its position. + auto new_pos = m_depgraph.AddTransaction(other.m_depgraph.FeeRate(pos)); + remap[pos] = new_pos; + if (new_pos == m_mapping.size()) { + m_mapping.push_back(idx); + } else { + m_mapping[new_pos] = idx; + } + m_linearization.push_back(new_pos); + // Copy the transaction's dependencies, translating them using remap. + SetType parents; + for (auto par : other.m_depgraph.GetReducedParents(pos)) { + parents.Set(remap[par]); + } + m_depgraph.AddDependencies(parents, remap[pos]); + // Update the transaction's Locator. There is no need to call Updated() to update chunk + // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting + // merged Cluster later anyway). + graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + } + // Purge the other Cluster, now that everything has been moved. + other.m_depgraph = DepGraph{}; + other.m_linearization.clear(); + other.m_mapping.clear(); +} + +void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept +{ + // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters + // between which dependencies are added, which simply concatenates their linearizations. Invoke + // PostLinearize, which has the effect that the linearization becomes a merge-sort of the + // constituent linearizations. Do this here rather than in Cluster::Merge, because this + // function is only invoked once per merged Cluster, rather than once per constituent one. + // This concatenation + post-linearization could be replaced with an explicit merge-sort. + PostLinearize(m_depgraph, m_linearization); + + // Sort the list of dependencies to apply by child, so those can be applied in batch. + std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; }); + // Iterate over groups of to-be-added dependencies with the same child. + auto it = to_apply.begin(); + while (it != to_apply.end()) { + auto& first_child = graph.m_entries[it->second].m_locator; + ClusterIndex child_idx = first_child.index; + // Iterate over all to-be-added dependencies within that same child, gather the relevant + // parents. + SetType parents; + while (it != to_apply.end()) { + auto& child = graph.m_entries[it->second].m_locator; + auto& parent = graph.m_entries[it->first].m_locator; + Assume(child.cluster == this && parent.cluster == this); + if (child.index != child_idx) break; + parents.Set(parent.index); + ++it; + } + // Push all dependencies to the underlying DepGraph. Note that this is O(N) in the size of + // the cluster, regardless of the number of parents being added, so batching them together + // has a performance benefit. + m_depgraph.AddDependencies(parents, child_idx); + } + + // Finally fix the linearization, as the new dependencies may have invalidated the + // linearization, and post-linearize it to fix up the worst problems with it. + FixLinearization(m_depgraph, m_linearization); + PostLinearize(m_depgraph, m_linearization); + + // Finally push the changes to graph.m_entries. + Updated(graph); +} + +std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +{ + Assume(quality != QualityLevel::NONE); + + auto& quality_clusters = m_clusters[int(quality)]; + Assume(setindex < quality_clusters.size()); + + // Extract the Cluster-owning unique_ptr. + std::unique_ptr ret = std::move(quality_clusters[setindex]); + ret->m_quality = QualityLevel::NONE; + ret->m_setindex = ClusterSetIndex(-1); + + // Clean up space in quality_cluster. + auto max_setindex = quality_clusters.size() - 1; + if (setindex != max_setindex) { + // If the cluster was not the last element of quality_clusters, move that to take its place. + quality_clusters.back()->m_quality = quality; + quality_clusters.back()->m_setindex = setindex; + quality_clusters[setindex] = std::move(quality_clusters.back()); + } + // The last element of quality_clusters is now unused; drop it. + quality_clusters.pop_back(); + + return ret; +} + +ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +{ + // Cannot insert with quality level NONE (as that would mean not inserted). + Assume(quality != QualityLevel::NONE); + // The passed-in Cluster must not currently be in the TxGraphImpl. + Assume(cluster->m_quality == QualityLevel::NONE); + + // Append it at the end of the relevant TxGraphImpl::m_cluster. + auto& quality_clusters = m_clusters[int(quality)]; + ClusterSetIndex ret = quality_clusters.size(); + cluster->m_quality = quality; + cluster->m_setindex = ret; + quality_clusters.push_back(std::move(cluster)); + return ret; +} + +void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +{ + Assume(new_quality != QualityLevel::NONE); + + // Don't do anything if the quality did not change. + if (old_quality == new_quality) return; + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(old_quality, old_index); + // And re-insert it where it belongs. + InsertCluster(std::move(cluster_ptr), new_quality); +} + +void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept +{ + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + // And throw it away. + cluster_ptr.reset(); +} + +void TxGraphImpl::ApplyRemovals() noexcept +{ + auto& to_remove = m_to_remove; + // Skip if there is nothing to remove. + if (to_remove.empty()) return; + // Wipe cached m_group_data, as it may be invalidated by removals. + m_group_data = std::nullopt; + m_group_clusters.clear(); + // Group the set of to-be-removed entries by Cluster*. + std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + }); + // Process per Cluster. + std::span to_remove_span{m_to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. + to_remove_span = to_remove_span.subspan(1); + } + } + m_to_remove.clear(); +} + +void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept +{ + Assume(a < m_entries.size()); + Assume(b < m_entries.size()); + // Swap the Entry objects. + std::swap(m_entries[a], m_entries[b]); + // Iterate over both objects. + for (int i = 0; i < 2; ++i) { + GraphIndex idx = i ? b : a; + Entry& entry = m_entries[idx]; + // Update linked Ref. + if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update the locator. The rest of the Entry information will not change, so no need to + // invoke Cluster::Updated(). + Locator& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } + } +} + +std::vector TxGraphImpl::Cleanup() noexcept +{ + ApplyDependencies(); + std::vector ret; + // Sort the GraphIndex that need to be cleaned up. This groups them (so duplicates can be + // processed just once). They are sorted in reverse, so the last ones get processed first. + // This means earlier-processed GraphIndexes will not move of later-processed ones (which + // might invalidate them). + std::sort(m_wiped.begin(), m_wiped.end(), std::greater{}); + GraphIndex last(-1); + for (GraphIndex idx : m_wiped) { + // m_wiped should never contain the same GraphIndex twice (the code below would fail + // if so, because GraphIndexes get invalidated by removing them). + Assume(idx != last); + last = idx; + Entry& entry = m_entries[idx]; + // Gather Ref pointers that are being unlinked. + if (entry.m_ref != nullptr) { + ret.push_back(entry.m_ref); + GetRefGraph(*entry.m_ref) = nullptr; + m_entries[idx].m_ref = nullptr; + } + // Verify removed entries don't have anything that could hold a reference back. + Assume(!entry.m_locator.IsPresent()); + if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); + m_entries.pop_back(); + } + m_wiped.clear(); + return ret; +} + +void TxGraphImpl::Split(Cluster& cluster) noexcept +{ + // To split a Cluster, first make sure all removals are applied (as we might need to split + // again afterwards otherwise). + ApplyRemovals(); + bool del = cluster.Split(*this); + if (del) { + // Cluster::Split reports whether the Cluster is to be deleted. + DeleteCluster(cluster); + } +} + +void TxGraphImpl::SplitAll() noexcept +{ + // Before splitting all Cluster, first make sure all removals are applied. + ApplyRemovals(); + auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } +} + +void TxGraphImpl::GroupClusters() noexcept +{ + // Before computing which Clusters need to be merged together, first apply all removals and + // split the Clusters into connected components. If we would group first, we might end up + // with inefficient Clusters which just end up being split again anyway. + SplitAll(); + + // If the groupings have been computed already, nothing is left to be done. + if (m_group_data.has_value()) return; + + /** Annotated clusters: an entry for each Cluster, together with the representative for the + * partition it is in if known, or with nullptr if not yet known. */ + std::vector> an_clusters; + /** Annotated dependencies: an entry for each m_deps_to_apply entry (excluding ones that apply + * to removed transactions), together with the representative root of the partition of + * Clusters it applies to. */ + std::vector, Cluster*>> an_deps; + + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Skip dependencies for which the parent or child transaction is removed. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + an_clusters.emplace_back(par_cluster, nullptr); + // Do not include a duplicate when parent and child are identical, as it'll be removed + // below anyway. + if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr); + } + // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters + // to which dependencies apply. + std::sort(an_clusters.begin(), an_clusters.end()); + an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + + // Sort the dependencies by child Cluster. + std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + auto [_a_par, a_chl] = a; + auto [_b_par, b_chl] = b; + auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; + auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + return std::less{}(a_chl_cluster, b_chl_cluster); + }); + + // Run the union-find algorithm to to find partitions of the input Clusters which need to be + // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + { + /** Each PartitionData entry contains information about a single input Cluster. */ + struct PartitionData + { + /** The cluster this holds information for. */ + Cluster* cluster; + /** All PartitionData entries belonging to the same partition are organized in a tree. + * Each element points to its parent, or to itself if it is the root. The root is then + * a representative for the entire tree, and can be found by walking upwards from any + * element. */ + PartitionData* parent; + /** (only if this is a root, so when parent == this) An upper bound on the height of + * tree for this partition. */ + unsigned rank; + }; + /** Information about each input Cluster. Sorted by Cluster* pointer. */ + std::vector partition_data; + + /** Given a Cluster, find its corresponding PartitionData. */ + auto locate_uf = [&](Cluster* arg) noexcept -> PartitionData* { + auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, + [](auto& a, Cluster* ptr) noexcept { return a.cluster < ptr; }); + Assume(it != partition_data.end()); + Assume(it->cluster == arg); + return &*it; + }; + + /** Given a PartitionData, find the root of the tree it is in (its representative). */ + static constexpr auto find_uf = [](PartitionData* data) noexcept -> PartitionData* { + while (data->parent != data) { + // Replace pointers to parents with pointers to grandparents. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = data->parent; + data->parent = par->parent; + data = par; + } + return data; + }; + + /** Given two PartitionDatas, union the partitions they are in, and return their + * representative. */ + static constexpr auto union_uf = [](PartitionData* arg1, PartitionData* arg2) noexcept { + // Find the roots of the trees, and bail out if they are already equal (which would + // mean they are in the same partition already). + auto rep1 = find_uf(arg1); + auto rep2 = find_uf(arg2); + if (rep1 == rep2) return rep1; + // Pick the lower-rank root to become a child of the higher-rank one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. + if (rep1->rank < rep2->rank) std::swap(rep1, rep2); + rep2->parent = rep1; + rep1->rank += (rep1->rank == rep2->rank); + return rep1; + }; + + // Start by initializing every Cluster as its own singleton partition. + partition_data.resize(an_clusters.size()); + for (size_t i = 0; i < an_clusters.size(); ++i) { + partition_data[i].cluster = an_clusters[i].first; + partition_data[i].parent = &partition_data[i]; + partition_data[i].rank = 0; + } + + // Run through all parent/child pairs in m_deps_to_add, and union the + // the partitions their Clusters are in. + Cluster* last_chl_cluster{nullptr}; + PartitionData* last_partition{nullptr}; + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Nothing to do if parent and child are in the same Cluster. + if (par_cluster == chl_cluster) continue; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + Assume(par != chl); + if (chl_cluster == last_chl_cluster) { + // If the child Clusters is the same as the previous iteration, union with the + // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add + // is sorted by child Cluster, so batches with the same child are expected. + last_partition = union_uf(locate_uf(par_cluster), last_partition); + } else { + last_chl_cluster = chl_cluster; + last_partition = union_uf(locate_uf(par_cluster), locate_uf(chl_cluster)); + } + } + + // Populate the an_clusters and an_deps data structures with the list of input Clusters, + // and the input dependencies, annotated with the representative of the Cluster partition + // it applies to. + an_deps.reserve(m_deps_to_add.size()); + auto deps_it = m_deps_to_add.begin(); + for (size_t i = 0; i < partition_data.size(); ++i) { + auto& data = partition_data[i]; + // Find the representative of the partition Cluster i is in, and store it with the + // Cluster. + auto rep = find_uf(&data)->cluster; + an_clusters[i].second = rep; + // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. + while (deps_it != m_deps_to_add.end()) { + auto [par, chl] = *deps_it; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Skip dependencies that apply to earlier Clusters (those necessary are for + // deleted transactions, as otherwise we'd have processed them already). + if (!std::less{}(chl_cluster, data.cluster)) { + if (chl_cluster != data.cluster) break; + auto par_cluster = m_entries[par].m_locator.cluster; + // Also filter out dependencies applying to a removed parent. + if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); + } + ++deps_it; + } + } + } + + // Sort both an_clusters and an_deps by representative of the partition they are in, grouping + // all those applying to the same partition together. + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + + // Translate the resulting cluster groups to the m_group_data structure. + m_group_data = std::vector{}; + m_group_clusters.clear(); + m_group_clusters.reserve(an_clusters.size()); + m_deps_to_add.clear(); + m_deps_to_add.reserve(an_deps.size()); + auto an_deps_it = an_deps.begin(); + auto an_clusters_it = an_clusters.begin(); + while (an_deps_it != an_deps.end()) { + auto rep = an_deps_it->second; + // Create and initialize a new GroupData entry for the partition. + auto& new_entry = m_group_data->emplace_back(); + new_entry.m_cluster_offset = m_group_clusters.size(); + new_entry.m_cluster_count = 0; + new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_count = 0; + // Add all its clusters to it (copying those from an_clusters to m_group_clusters). + while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { + m_group_clusters.push_back(an_clusters_it->first); + ++an_clusters_it; + ++new_entry.m_cluster_count; + } + // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). + while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { + m_deps_to_add.push_back(an_deps_it->first); + ++an_deps_it; + ++new_entry.m_deps_count; + } + } + Assume(an_deps_it == an_deps.end()); + Assume(an_clusters_it == an_clusters.end()); +} + +void TxGraphImpl::Merge(std::span to_merge) noexcept +{ + Assume(!to_merge.empty()); + // Nothing to do if a group consists of just a single Cluster. + if (to_merge.size() == 1) return; + + // Move the largest Cluster to the front of to_merge. As all transactions in other to-be-merged + // Clusters will be moved to that one, putting the largest one first minimizes the number of + // moves. + size_t max_size_pos{0}; + ClusterIndex max_size = to_merge[max_size_pos]->GetTxCount(); + for (size_t i = 1; i < to_merge.size(); ++i) { + ClusterIndex size = to_merge[i]->GetTxCount(); + if (size > max_size) { + max_size_pos = i; + max_size = size; + } + } + if (max_size_pos != 0) std::swap(to_merge[0], to_merge[max_size_pos]); + + // Merge all further Clusters in the group into the first one, and delete them. + for (size_t i = 1; i < to_merge.size(); ++i) { + to_merge[0]->Merge(*this, *to_merge[i]); + DeleteCluster(*to_merge[i]); + } +} + +void TxGraphImpl::ApplyDependencies() noexcept +{ + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + GroupClusters(); + Assume(m_group_data.has_value()); + // Nothing to do if there are no dependencies to be added. + if (m_deps_to_add.empty()) return; + + // For each group of to-be-merged Clusters. + Assume(m_group_data.has_value()); + for (const auto& group_data : *m_group_data) { + // Invoke Merge() to merge them into a single Cluster. + auto cluster_span = std::span{m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + Merge(cluster_span); + // Actually apply all to-be-added dependencies (for each, parent and child belong to the + // same Cluster because of the merging above). + auto deps_span = std::span{m_deps_to_add} + .subspan(group_data.m_deps_offset, group_data.m_deps_count); + Assume(!deps_span.empty()); + const auto& loc = m_entries[deps_span[0].second].m_locator; + Assume(loc.IsPresent()); + loc.cluster->ApplyDependencies(*this, deps_span); + } + + // Wipe the list of to-be-added dependencies now that they are applied. + m_deps_to_add.clear(); + // Also no further Cluster mergings are needed (note that we clear, but don't set to + // std::nullopt, as that would imply the groupings are unknown). + m_group_data->clear(); + m_group_clusters.clear(); +} + +void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept +{ + // We can only relinearize Clusters that do not need splitting. + Assume(m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE || + m_quality == QualityLevel::NEEDS_RELINEARIZE); + // No work is required for Clusters which are already optimally linearized. + if (m_quality == QualityLevel::OPTIMAL) return; + // Invoke the actual linearization algorithm (passing in the existing one). + uint64_t rng_seed = graph.m_rng.rand64(); + auto [linearization, optimal] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization); + // Postlinearize if the result isn't optimal already. This guarantees (among other things) + // that the chunks of the resulting linearization are all connected. + if (!optimal) PostLinearize(m_depgraph, linearization); + // Update the linearization. + m_linearization = std::move(linearization); + // Update the Cluster's quality. + auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; + graph.SetClusterQuality(m_quality, m_setindex, new_quality); + // Update the Entry objects. + Updated(graph); +} + +void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept +{ + // Relinearize the Cluster if needed. + if (cluster.m_quality == QualityLevel::NEEDS_RELINEARIZE) { + cluster.Relinearize(*this, 10000); + } +} + +Cluster::Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_index) noexcept +{ + // Create a new transaction in the DepGraph, and remember its position in m_mapping. + auto cluster_idx = m_depgraph.AddTransaction(feerate); + m_mapping.push_back(graph_index); + m_linearization.push_back(cluster_idx); +} + +TxGraph::Ref TxGraphImpl::AddTransaction(const FeeFrac& feerate) noexcept +{ + // Construct a new Ref. + Ref ret; + // Construct a new Entry, and link it with the Ref. + auto idx = m_entries.size(); + m_entries.emplace_back(); + auto& entry = m_entries.back(); + entry.m_ref = &ret; + GetRefGraph(ret) = this; + GetRefIndex(ret) = idx; + // Construct a new singleton Cluster (which is necessarily optimally linearized). + auto cluster = std::make_unique(*this, feerate, idx); + auto cluster_ptr = cluster.get(); + InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + cluster_ptr->Updated(*this); + ++m_txcount; + // Return the Ref. + return ret; +} + +void TxGraphImpl::RemoveTransaction(Ref& arg) noexcept +{ + // Don't do anything if the Ref is empty (which may be indicative of the transaction already + // having been removed). + if (GetRefGraph(arg) == nullptr) return; + Assume(GetRefGraph(arg) == this); + // Find the Cluster the transaction is in, and stop if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return; + // Remember that the transaction is to be removed. + m_to_remove.push_back(GetRefIndex(arg)); +} + +void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept +{ + // Don't do anything if either Ref is empty (which may be indicative of it having already been + // removed). + if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; + Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + // Find the Cluster the parent and child transaction are in, and stop if either appears to be + // already removed. + auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + if (par_cluster == nullptr) return; + auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + if (chl_cluster == nullptr) return; + // Wipe m_group_data (as it will need to be recomputed). + m_group_data.reset(); + m_group_clusters.clear(); + // Remember that this dependency is to be applied. + m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); +} + +bool TxGraphImpl::Exists(const Ref& arg) noexcept +{ + if (GetRefGraph(arg) == nullptr) return false; + Assume(GetRefGraph(arg) == this); + // Make sure the transaction isn't scheduled for removal. + ApplyRemovals(); + return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); +} + +std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept +{ + std::vector ret; + // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Ancestors(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept +{ + std::vector ret; + // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Descendants(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept +{ + std::vector ret; + // Translate all transactions in the Cluster (in linearization order) to Refs. + for (auto idx : m_linearization) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +FeeFrac Cluster::GetIndividualFeerate(ClusterIndex idx) noexcept +{ + return m_depgraph.FeeRate(idx); +} + +std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then dispatch to it. + MakeAcceptable(*cluster); + return cluster->GetClusterRefs(*this); +} + +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +{ + ApplyRemovals(); + return m_txcount; +} + +FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept +{ + // Return the empty FeeFrac if the passed Ref is empty (which may be indicative of the + // transaction having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction. + ApplyRemovals(); + // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); +} + +FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +{ + // Return the empty FeeFrac if the passed Ref is empty (which may be indicative of the + // transaction having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all dependencies, as the result might be inaccurate otherwise. + ApplyDependencies(); + // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then return the transaction's + // chunk feerate. + MakeAcceptable(*cluster); + const auto& entry = m_entries[GetRefIndex(arg)]; + return entry.m_chunk_feerate; +} + +void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept +{ + // Make sure the specified ClusterIndex exists in this Cluster. + Assume(m_depgraph.Positions()[idx]); + // Bail out if the fee isn't actually being changed. + if (m_depgraph.FeeRate(idx).fee == fee) return; + // Update the fee, remember that relinearization will be necessary, and update the Entries + // in the same Cluster. + m_depgraph.FeeRate(idx).fee = fee; + if (m_quality != QualityLevel::NEEDS_SPLIT) { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } + Updated(graph); +} + +void TxGraphImpl::SetTransactionFee(Ref& ref, int64_t fee) noexcept +{ + // Return the empty FeeFrac if the passed Ref is empty (which may be indicative of the + // transaction having been removed already. + if (GetRefGraph(ref) == nullptr) return; + Assume(GetRefGraph(ref) == this); + // Find the entry, its locator, and inform its Cluster about the new feerate, if any. + auto& entry = m_entries[GetRefIndex(ref)]; + auto& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } +} + +} // namespace + +TxGraph::Ref::~Ref() +{ + if (m_graph) { + // Inform the TxGraph about the Ref being destroyed. + m_graph->UnlinkRef(m_index); + m_graph = nullptr; + } +} + +TxGraph::Ref& TxGraph::Ref::operator=(Ref&& other) noexcept +{ + // Inform both TxGraphs about the Refs being swapped. + if (m_graph) m_graph->UpdateRef(m_index, other); + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually swap the contents. + std::swap(m_graph, other.m_graph); + std::swap(m_index, other.m_index); + return *this; +} + +TxGraph::Ref::Ref(Ref&& other) noexcept +{ + // Inform the TxGraph of other that its Ref is being moved. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually move the contents. + std::swap(m_graph, other.m_graph); + std::swap(m_index, other.m_index); +} + +std::unique_ptr MakeTxGraph() noexcept +{ + return std::make_unique(); +} diff --git a/src/txgraph.h b/src/txgraph.h new file mode 100644 index 00000000000..8f27a859467 --- /dev/null +++ b/src/txgraph.h @@ -0,0 +1,125 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include + +#ifndef BITCOIN_TXGRAPH_H +#define BITCOIN_TXGRAPH_H + +/** No connected component within TxGraph is allowed to exceed this number of transactions. */ +static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; + +/** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. */ +class TxGraph +{ +public: + /** Internal identifier for a transaction within a TxGraph. */ + using GraphIndex = uint32_t; + + /** Data type used to reference transactions within a TxGraph. + * + * Every transaction within a TxGraph has exactly one corresponding TxGraph::Ref, held by users + * of the class. Destroying the TxGraph::Ref removes the corresponding transaction. + * + * Users of the class can inherit from TxGraph::Ref. If all Refs are inherited this way, the + * Ref* pointers returned by TxGraph functions can be used as this inherited type. + */ + class Ref + { + // Allow TxGraph's GetRefGraph and GetRefIndex to access internals. + friend class TxGraph; + /** Which Graph the Entry lives in. nullptr if this Ref is empty. */ + TxGraph* m_graph = nullptr; + /** Index into the Graph's m_entries. Only used if m_graph != nullptr. */ + GraphIndex m_index = GraphIndex(-1); + public: + /** Construct an empty Ref (not pointing to any Entry). */ + Ref() noexcept = default; + /** Test if this Ref is not empty. */ + explicit operator bool() const noexcept { return m_graph != nullptr; } + /** Destroy this Ref. */ + virtual ~Ref(); + // Support moving a Ref. + Ref& operator=(Ref&& other) noexcept; + Ref(Ref&& other) noexcept; + // Do not permit copy constructing or copy assignment. A TxGraph entry can have at most one + // Ref pointing to it. + Ref& operator=(const Ref&) = delete; + Ref(const Ref&) = delete; + }; + +protected: + // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. + friend class TxGraph::Ref; + /** Inform the TxGraph implementation that a TxGraph::Ref has moved. */ + virtual void UpdateRef(GraphIndex index, Ref& new_location) noexcept = 0; + /** Inform the TxGraph implementation that a TxGraph::Ref was destroyed. */ + virtual void UnlinkRef(GraphIndex index) noexcept = 0; + // Allow TxGraph implementations (inheriting from it) to access Ref internals. + static TxGraph*& GetRefGraph(Ref& arg) noexcept { return arg.m_graph; } + static TxGraph* GetRefGraph(const Ref& arg) noexcept { return arg.m_graph; } + static GraphIndex& GetRefIndex(Ref& arg) noexcept { return arg.m_index; } + static GraphIndex GetRefIndex(const Ref& arg) noexcept { return arg.m_index; } + +public: + /** Virtual destructor, so inheriting is safe. */ + virtual ~TxGraph() = default; + /** Construct a new transaction with the specified feerate, and return a Ref to it. */ + [[nodiscard]] virtual Ref AddTransaction(const FeeFrac& feerate) noexcept = 0; + /** Remove the specified transaction. This is a no-op if the transaction was already removed. + * + * TxGraph may internally reorder transaction removals with dependency additions for + * performance reasons. If together with any transaction removal all its descendants, or all + * its ancestors, are removed as well (which is what always happens in realistic scenarios), + * this reordering will not affect the behavior of TxGraph. + * + * As an example, imagine 3 transactions A,B,C where B depends on A. If a dependency of C on B + * is added, and then B is deleted, C will still depend on A. If the deletion of B is reordered + * before the C->B dependency is added, it has no effect instead. If, together with the + * deletion of B also either A or C is deleted, there is no distinction. + */ + virtual void RemoveTransaction(Ref& arg) noexcept = 0; + /** Add a dependency between two specified transactions. Parent may not be a descendant of + * child already (but may be an ancestor of it already, in which case this is a no-op). If + * either transaction is already removed, this is a no-op. */ + virtual void AddDependency(Ref& parent, Ref& child) noexcept = 0; + /** Modify the fee of the specified transaction. If the transaction does not exist (or was + * removed), this has no effect. */ + virtual void SetTransactionFee(Ref& arg, int64_t fee) noexcept = 0; + /** Return a vector of pointers to Ref objects for transactions which have been removed from + * the graph, and have not been destroyed yet. Each transaction is only reported once by + * Cleanup(). Afterwards, all Refs will be empty. */ + [[nodiscard]] virtual std::vector Cleanup() noexcept = 0; + + /** Determine whether arg exists in this graph (i.e., was not removed). */ + virtual bool Exists(const Ref& arg) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeeFrac if arg + * does not exist. */ + virtual FeeFrac GetChunkFeerate(const Ref& arg) noexcept = 0; + /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if + * arg does not exist. */ + virtual FeeFrac GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get pointers to all transactions in the connected component ("cluster") which arg is in. + * The transactions will be returned in a topologically-valid order of acceptable quality. + * Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetCluster(const Ref& arg) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction. Returns {} if arg does not + * exist. */ + virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; + /** Get pointers to all descendants of the specified transaction. Returns {} if arg does not + * exist in the graph. */ + virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; + /** Get the total number of transactions in the graph. */ + virtual GraphIndex GetTransactionCount() noexcept = 0; +}; + +/** Construct a new TxGraph. */ +std::unique_ptr MakeTxGraph() noexcept; + +#endif // BITCOIN_TXGRAPH_H From 543a981912c94160b264fdd7a88d0b52236813e8 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 14:15:12 -0500 Subject: [PATCH 04/20] txgraph: (tests) add simulation fuzz test This adds a simulation fuzz test for txgraph, by comparing with a naive reimplementation that models the entire graph as a single DepGraph, and clusters in TxGraph as connected components within that DepGraph. --- src/test/fuzz/CMakeLists.txt | 1 + src/test/fuzz/txgraph.cpp | 427 +++++++++++++++++++++++++++++++++++ 2 files changed, 428 insertions(+) create mode 100644 src/test/fuzz/txgraph.cpp diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt index f65ed62b2d0..7396fb77b57 100644 --- a/src/test/fuzz/CMakeLists.txt +++ b/src/test/fuzz/CMakeLists.txt @@ -122,6 +122,7 @@ add_executable(fuzz tx_in.cpp tx_out.cpp tx_pool.cpp + txgraph.cpp txorphan.cpp txrequest.cpp # Visual Studio 2022 version 17.12 introduced a bug diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp new file mode 100644 index 00000000000..31ad17693b1 --- /dev/null +++ b/src/test/fuzz/txgraph.cpp @@ -0,0 +1,427 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cluster_linearize; + +namespace { + +/** Data type representing a naive simulated TxGraph, keeping all transactions (even from + * disconnected components) in a single DepGraph. */ +struct SimTxGraph +{ + /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's + * cluster count, so we can exercise situations with more transactions than fit in one + * cluster. */ + static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + /** Set type to use in the simulation. */ + using SetType = BitSet; + /** Data type for representing positions within SimTxGraph::graph. */ + using Pos = ClusterIndex; + /** Constant to mean "missing in this graph". */ + static constexpr auto MISSING = Pos(-1); + + /** The dependency graph (for all transactions in the simulation, regardless of + * connectivity/clustering). */ + DepGraph graph; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ + std::array, MAX_TRANSACTIONS> simmap; + /** For each TxGraph::Ref in graph, the position it corresponds with. */ + std::map simrevmap; + /** The set of TxGraph::Ref entries that have been removed, but not yet Cleanup()'ed in + * the real TxGraph. */ + std::vector> removed; + + /** Determine the number of (non-removed) transactions in the graph. */ + ClusterIndex GetTransactionCount() const { return graph.TxCount(); } + + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ + Pos Find(const TxGraph::Ref& ref) const + { + if (!ref) return MISSING; + auto it = simrevmap.find(&ref); + if (it != simrevmap.end()) return it->second; + return MISSING; + } + + /** Given a position in this simulated graph, get the corresponding TxGraph::Ref. */ + TxGraph::Ref& GetRef(Pos pos) + { + assert(graph.Positions()[pos]); + assert(simmap[pos]); + return *simmap[pos].get(); + } + + /** Add a new transaction to the simulation. */ + TxGraph::Ref& AddTransaction(const FeeFrac& feerate) + { + assert(graph.TxCount() < MAX_TRANSACTIONS); + auto simpos = graph.AddTransaction(feerate); + assert(graph.Positions()[simpos]); + simmap[simpos] = std::make_unique(); + auto ptr = simmap[simpos].get(); + simrevmap[ptr] = simpos; + return *ptr; + } + + /** Add a dependency between two positions in this graph. */ + void AddDependency(TxGraph::Ref& parent, TxGraph::Ref& child) + { + auto par_pos = Find(parent); + if (par_pos == MISSING) return; + auto chl_pos = Find(child); + if (chl_pos == MISSING) return; + graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + } + + /** Modify the transaction fee of a ref, if it exists. */ + void SetTransactionFee(TxGraph::Ref& ref, int64_t fee) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.FeeRate(pos).fee = fee; + } + + /** Remove the transaction in the specified position from the graph. */ + void RemoveTransaction(TxGraph::Ref& ref) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + // Remember the TxGraph::Ref corresponding to this position, because we still expect + // to see it when calling Cleanup(). + removed.push_back(std::move(simmap[pos])); + simmap[pos].reset(); + } + + /** Construct the set with all positions in this graph corresponding to the specified + * TxGraph::Refs. All of them must occur in this graph and not be removed. */ + SetType MakeSet(std::span arg) + { + SetType ret; + for (TxGraph::Ref* ptr : arg) { + auto pos = Find(*ptr); + assert(pos != Pos(-1)); + ret.Set(pos); + } + return ret; + } + + /** Get the set of ancestors (desc=false) or descendants (desc=true) in this graph. */ + SetType GetAncDesc(TxGraph::Ref& arg, bool desc) + { + auto pos = Find(arg); + if (pos == MISSING) return {}; + return desc ? graph.Descendants(pos) : graph.Ancestors(pos); + } + + /** Given a set of Refs (given as a vector of pointers), expand the set to include all its + * ancestors (desc=false) or all its descendants (desc=true) in this graph. */ + void IncludeAncDesc(std::vector& arg, bool desc) + { + std::vector ret; + for (auto ptr : arg) { + auto simpos = Find(*ptr); + if (simpos != MISSING) { + for (auto i : desc ? graph.Descendants(simpos) : graph.Ancestors(simpos)) { + ret.push_back(simmap[i].get()); + } + } else { + ret.push_back(ptr); + } + } + // Deduplicate. + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + // Replace input. + arg = std::move(ret); + } +}; + +} // namespace + +FUZZ_TARGET(txgraph) +{ + SeedRandomStateForTest(SeedRand::ZEROS); + FuzzedDataProvider provider(buffer.data(), buffer.size()); + + /** Internal test RNG, used only for decisions which would require significant amount of data + * to be read from the provider, without realistically impacting test sensitivity. */ + InsecureRandomContext rng(0xdecade2009added + buffer.size()); + + /** Variable used whenever an empty TxGraph::Ref is needed. */ + TxGraph::Ref empty_ref; + + // Construct a real and a simulated graph. + auto real = MakeTxGraph(); + SimTxGraph sim; + + /** Function to pick any Ref (in sim real, sim.removed, or empty). */ + auto pick_fn = [&]() noexcept -> TxGraph::Ref& { + auto tx_count = sim.GetTransactionCount(); + /** The number of possible choices. */ + size_t choices = tx_count + sim.removed.size() + 1; + /** Pick one of them. */ + auto choice = provider.ConsumeIntegralInRange(0, choices - 1); + if (choice < tx_count) { + // Return from real. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count; + } + if (choice < sim.removed.size()) { + // Return from removed. + return *sim.removed[choice]; + } else { + choice -= sim.removed.size(); + } + // Return empty. + assert(choice == 0); + return empty_ref; + }; + + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { + // Read a one-byte command. + int command = provider.ConsumeIntegral(); + // Treat it lowest bit as a flag (which selects a variant of some of the operations), and + // leave the rest of the bits in command. + bool alt = command & 1; + command >>= 1; + + // Keep decrementing command for each applicable operation, until one is hit. Multiple + // iterations may be necessary. + while (true) { + if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + // AddTransaction. + int64_t fee; + int32_t size; + if (alt) { + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + size = provider.ConsumeIntegralInRange(1, 0x3fffff); + } else { + fee = provider.ConsumeIntegral(); + size = provider.ConsumeIntegral() + 1; + } + FeeFrac feerate{fee, size}; + // Create a real TxGraph::Ref. + auto ref = real->AddTransaction(feerate); + // Create a unique_ptr place in the simulation to put the Ref in. + auto& ref_loc = sim.AddTransaction(feerate); + // Move it in place. + ref_loc = std::move(ref); + break; + } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + // AddDependency. + auto& par = pick_fn(); + auto& chl = pick_fn(); + auto pos_par = sim.Find(par); + auto pos_chl = sim.Find(chl); + if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { + // Determine if adding this would introduce a cycle (not allowed by TxGraph), + // and if so, skip. + if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. + auto temp_depgraph = sim.graph; + temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); + auto todo = temp_depgraph.Positions(); + bool oversize{false}; + while (todo.Any()) { + auto component = temp_depgraph.FindConnectedComponent(todo); + if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; + todo -= component; + } + if (oversize) break; + } + sim.AddDependency(par, chl); + real->AddDependency(par, chl); + break; + } else if (sim.removed.size() < 100 && command-- == 0) { + // RemoveTransaction. Either all its ancestors or all its descendants are also + // removed (if any), to make sure TxGraph's reordering of removals and dependencies + // has no effect. + std::vector to_remove; + to_remove.push_back(&pick_fn()); + sim.IncludeAncDesc(to_remove, alt); + // The order in which these ancestors/descendants are removed should not matter; + // randomly shuffle them. + std::shuffle(to_remove.begin(), to_remove.end(), rng); + for (TxGraph::Ref* ptr : to_remove) { + real->RemoveTransaction(*ptr); + sim.RemoveTransaction(*ptr); + } + break; + } else if (sim.GetTransactionCount() > 0 && command-- == 0) { + // SetTransactionFee. + int64_t fee; + if (alt) { + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + } else { + fee = provider.ConsumeIntegral(); + } + auto& ref = pick_fn(); + real->SetTransactionFee(ref, fee); + sim.SetTransactionFee(ref, fee); + break; + } else if (command-- == 0) { + // Cleanup. + auto cleaned = real->Cleanup(); + assert(sim.removed.size() == cleaned.size()); + std::sort(cleaned.begin(), cleaned.end()); + std::sort(sim.removed.begin(), sim.removed.end()); + for (size_t i = 0; i < sim.removed.size(); ++i) { + assert(cleaned[i] == sim.removed[i].get()); + } + sim.removed.clear(); + break; + } else if (command-- == 0) { + // GetTransactionCount. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + break; + } else if (command-- == 0) { + // Exists. + auto& ref = pick_fn(); + bool exists = real->Exists(ref); + bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + assert(exists == should_exist); + break; + } else if (command-- == 0) { + // GetIndividualFeerate. + auto& ref = pick_fn(); + auto feerate = real->GetIndividualFeerate(ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + assert(feerate == sim.graph.FeeRate(simpos)); + } + break; + } else if (command-- == 0) { + // GetChunkFeerate. + auto& ref = pick_fn(); + auto feerate = real->GetChunkFeerate(ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + // Just do some quick checks that the reported value is in range. A full + // recomputation of expected chunk feerates is done at the end. + assert(feerate.size >= sim.graph.FeeRate(simpos).size); + } + break; + } else if (command-- == 0) { + // GetAncestors/GetDescendants. + auto& ref = pick_fn(); + auto result_set = sim.MakeSet(alt ? real->GetDescendants(ref) : + real->GetAncestors(ref)); + auto expect_set = sim.GetAncDesc(ref, alt); + assert(result_set == expect_set); + break; + } else if (command-- == 0) { + // GetCluster. + auto& ref = pick_fn(); + auto result = real->GetCluster(ref); + // Check cluster count limit. + assert(result.size() <= CLUSTER_COUNT_LIMIT); + // Require the result to be topologically valid and not contain duplicates. + auto left = sim.graph.Positions(); + for (auto refptr : result) { + auto simpos = sim.Find(*refptr); + assert(simpos != SimTxGraph::MISSING); + assert(left[simpos]); + left.Reset(simpos); + assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + } + // Require the set to be connected. + auto result_set = sim.MakeSet(result); + assert(sim.graph.IsConnected(result_set)); + // If ref exists, the result must contain it. If not, it must be empty. + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + assert(result_set[simpos]); + } else { + assert(result_set.None()); + } + // Require the set not to have ancestors or descendants outside of it. + for (auto i : result_set) { + assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + } + break; + } + } + } + // Compare simple properties of the graph with the simulation. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + + // Perform a full comparison. + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(sim.GetRef(i))); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(sim.GetRef(i))); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(sim.GetRef(i)); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(*ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + ClusterIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } + } + } + } +} From 17b76ed4e1254efa7225e3b3d1ce6a6990039ef3 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 22:45:46 -0500 Subject: [PATCH 05/20] txgraph: (tests) add internal sanity check function To make testing more powerful, expose a function to perform an internal sanity check on the state of a TxGraph. This is especially important as TxGraphImpl contains many redundantly represented pieces of information: * graph contains clusters, which refer to entries, but the entries refer back * graph maintains pointers to Ref objects, which point back to the graph. This lets us make sure they are always in sync. --- src/test/fuzz/txgraph.cpp | 8 +++ src/txgraph.cpp | 109 ++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 3 ++ 3 files changed, 120 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 31ad17693b1..c9b777d8968 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -369,6 +369,11 @@ FUZZ_TARGET(txgraph) } } } + + // After running all modifications, perform an internal sanity check (before invoking + // inspectors that may modify the internal state). + real->SanityCheck(); + // Compare simple properties of the graph with the simulation. assert(real->GetTransactionCount() == sim.GetTransactionCount()); @@ -424,4 +429,7 @@ FUZZ_TARGET(txgraph) } } } + + // Sanity check again (because invoking inspectors may modify internal unobservable state). + real->SanityCheck(); } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index cc63dbde9fc..bf3e5621b93 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -77,6 +78,8 @@ public: /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Given a ClusterIndex into this Cluster, find the corresponding GraphIndex. */ + GraphIndex GetClusterEntry(ClusterIndex index) const noexcept { return m_mapping[index]; } /** Only called by Graph::SwapIndexes. */ void UpdateMapping(ClusterIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ @@ -109,6 +112,10 @@ public: FeeFrac GetIndividualFeerate(ClusterIndex idx) noexcept; /** Modify the fee of a Cluster element. */ void SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept; + + // Debugging functions. + + void SanityCheck(const TxGraphImpl& graph) const; }; /** The transaction graph. @@ -176,6 +183,8 @@ private: void SetMissing() noexcept { cluster = nullptr; index = 0; } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, ClusterIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is missing. */ + bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -277,6 +286,8 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + + void SanityCheck() const final; }; void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -1156,6 +1167,104 @@ void TxGraphImpl::SetTransactionFee(Ref& ref, int64_t fee) noexcept } } +void Cluster::SanityCheck(const TxGraphImpl& graph) const +{ + // There must be an m_mapping for each m_depgraph position (including holes). + assert(m_depgraph.PositionRange() == m_mapping.size()); + // The linearization for this Cluster must contain every transaction once. + assert(m_depgraph.TxCount() == m_linearization.size()); + // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. + + // Compute the chunking of m_linearization. + LinearizationChunking linchunking(m_depgraph, m_linearization); + + // Verify m_linearization. + SetType m_done; + assert(m_depgraph.IsAcyclic()); + for (auto lin_pos : m_linearization) { + assert(lin_pos < m_mapping.size()); + const auto& entry = graph.m_entries[m_mapping[lin_pos]]; + // Check that the linearization is topological. + m_done.Set(lin_pos); + assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + // Check that the Entry has a locator pointing back to this Cluster & position within it. + assert(entry.m_locator.cluster == this); + assert(entry.m_locator.index == lin_pos); + // Check linearization position and chunk feerate. + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + // If this Cluster has an acceptable quality level, its chunks must be connected. + if (m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL) { + assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + } + } + // Verify that each element of m_depgraph occured in m_linearization. + assert(m_done == m_depgraph.Positions()); +} + +void TxGraphImpl::SanityCheck() const +{ + /** Which GraphIndexes ought to occur in m_wiped, based on m_entries. */ + std::set expected_wiped; + /** Which Clusters ought to occur in m_clusters, based on m_entries. */ + std::set expected_clusters; + + // Go over all Entry objects in m_entries. + for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { + const auto& entry = m_entries[idx]; + if (entry.IsWiped()) { + // If the Entry is not IsPresent anywhere, it should be in m_wiped. + expected_wiped.insert(idx); + } else { + // Every non-wiped Entry must have a Ref that points back to it. + assert(entry.m_ref != nullptr); + assert(GetRefGraph(*entry.m_ref) == this); + assert(GetRefIndex(*entry.m_ref) == idx); + } + // Verify the Entry m_locator. + const auto& locator = entry.m_locator; + // Every Locator must be in exactly one of these 2 states. + assert(locator.IsMissing() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the m_clusters. + expected_clusters.insert(locator.cluster); + } + + } + + std::set actual_clusters; + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters); + + // Verify that the contents of m_wiped matches what was expected based on the Entry vector. + std::set actual_wiped(m_wiped.begin(), m_wiped.end()); + assert(actual_wiped == expected_wiped); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 8f27a859467..8a51cac9ce1 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -117,6 +117,9 @@ public: virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; /** Get the total number of transactions in the graph. */ virtual GraphIndex GetTransactionCount() noexcept = 0; + + /** Perform an internal consistency check on this object. */ + virtual void SanityCheck() const = 0; }; /** Construct a new TxGraph. */ From 2d2cb1dc4c907133dbc80991fda449e4cebb9cf5 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:53:50 -0500 Subject: [PATCH 06/20] txgraph: (feature) make max cluster count configurable and "oversize" state Instead of leaving the responsibility on higher layers to guarantee that no connected component within TxGraph (a barely exposed concept, except through GetCluster()) exceeds the cluster count limit, move this responsibility to TxGraph itself: * TxGraph retains a cluster count limit, but it becomes configurable at construction time (this primarily helps with testing that it is properly enforced). * It is always allowed to perform mutators on TxGraph, even if they would cause the cluster count limit to be exceeded. Instead, TxGraph exposes an IsOversized() function, which queries whether it is in a special "oversize" state. * During oversize state, many inspectors are unavailable, but mutators remain valid, so the higher layer can "fix" the oversize state before continuing. --- src/test/fuzz/txgraph.cpp | 185 +++++++++++++++++++++++--------------- src/txgraph.cpp | 99 +++++++++++++------- src/txgraph.h | 32 ++++--- 3 files changed, 199 insertions(+), 117 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index c9b777d8968..6ce4807a8b1 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -27,7 +27,7 @@ struct SimTxGraph /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's * cluster count, so we can exercise situations with more transactions than fit in one * cluster. */ - static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + static constexpr unsigned MAX_TRANSACTIONS = MAX_CLUSTER_COUNT_LIMIT * 2; /** Set type to use in the simulation. */ using SetType = BitSet; /** Data type for representing positions within SimTxGraph::graph. */ @@ -45,6 +45,31 @@ struct SimTxGraph /** The set of TxGraph::Ref entries that have been removed, but not yet Cleanup()'ed in * the real TxGraph. */ std::vector> removed; + /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ + std::optional oversized; + /** The configured maximum number of transactions per cluster. */ + ClusterIndex max_cluster_count; + + /** Construct a new SimData with the specified maximum cluster count. */ + explicit SimTxGraph(ClusterIndex max_cluster) : max_cluster_count(max_cluster) {} + + /** Check whether this graph is oversized (contains a connected component whose number of + * transactions exceeds max_cluster_count. */ + bool IsOversized() + { + if (!oversized.has_value()) { + // Only recompute when oversized isn't already known. + oversized = false; + auto todo = graph.Positions(); + // Iterate over all connected components of the graph. + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + if (component.Count() > max_cluster_count) oversized = true; + todo -= component; + } + } + return *oversized; + } /** Determine the number of (non-removed) transactions in the graph. */ ClusterIndex GetTransactionCount() const { return graph.TxCount(); } @@ -86,6 +111,8 @@ struct SimTxGraph auto chl_pos = Find(child); if (chl_pos == MISSING) return; graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; } /** Modify the transaction fee of a ref, if it exists. */ @@ -107,6 +134,8 @@ struct SimTxGraph // to see it when calling Cleanup(). removed.push_back(std::move(simmap[pos])); simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (oversized.has_value() && *oversized) oversized = std::nullopt; } /** Construct the set with all positions in this graph corresponding to the specified @@ -167,9 +196,12 @@ FUZZ_TARGET(txgraph) /** Variable used whenever an empty TxGraph::Ref is needed. */ TxGraph::Ref empty_ref; + // Decide the maximum number of transactions per cluster we will use in this simulation. + auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + // Construct a real and a simulated graph. - auto real = MakeTxGraph(); - SimTxGraph sim; + auto real = MakeTxGraph(max_count); + SimTxGraph sim(max_count); /** Function to pick any Ref (in sim real, sim.removed, or empty). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref& { @@ -239,17 +271,6 @@ FUZZ_TARGET(txgraph) // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. if (sim.graph.Ancestors(pos_par)[pos_chl]) break; - // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. - auto temp_depgraph = sim.graph; - temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); - auto todo = temp_depgraph.Positions(); - bool oversize{false}; - while (todo.Any()) { - auto component = temp_depgraph.FindConnectedComponent(todo); - if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; - todo -= component; - } - if (oversize) break; } sim.AddDependency(par, chl); real->AddDependency(par, chl); @@ -284,13 +305,17 @@ FUZZ_TARGET(txgraph) } else if (command-- == 0) { // Cleanup. auto cleaned = real->Cleanup(); - assert(sim.removed.size() == cleaned.size()); - std::sort(cleaned.begin(), cleaned.end()); - std::sort(sim.removed.begin(), sim.removed.end()); - for (size_t i = 0; i < sim.removed.size(); ++i) { - assert(cleaned[i] == sim.removed[i].get()); + if (!sim.IsOversized()) { + assert(sim.removed.size() == cleaned.size()); + std::sort(cleaned.begin(), cleaned.end()); + std::sort(sim.removed.begin(), sim.removed.end()); + for (size_t i = 0; i < sim.removed.size(); ++i) { + assert(cleaned[i] == sim.removed[i].get()); + } + sim.removed.clear(); + } else { + assert(cleaned.empty()); } - sim.removed.clear(); break; } else if (command-- == 0) { // GetTransactionCount. @@ -303,6 +328,10 @@ FUZZ_TARGET(txgraph) bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; + } else if (command-- == 0) { + // IsOversized. + assert(sim.IsOversized() == real->IsOversized()); + break; } else if (command-- == 0) { // GetIndividualFeerate. auto& ref = pick_fn(); @@ -314,7 +343,7 @@ FUZZ_TARGET(txgraph) assert(feerate == sim.graph.FeeRate(simpos)); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetChunkFeerate. auto& ref = pick_fn(); auto feerate = real->GetChunkFeerate(ref); @@ -327,20 +356,22 @@ FUZZ_TARGET(txgraph) assert(feerate.size >= sim.graph.FeeRate(simpos).size); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto& ref = pick_fn(); - auto result_set = sim.MakeSet(alt ? real->GetDescendants(ref) : - real->GetAncestors(ref)); + auto result = alt ? real->GetDescendants(ref) : real->GetAncestors(ref); + assert(result.size() <= max_count); + auto result_set = sim.MakeSet(result); + assert(result.size() == result_set.Count()); auto expect_set = sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetCluster. auto& ref = pick_fn(); auto result = real->GetCluster(ref); // Check cluster count limit. - assert(result.size() <= CLUSTER_COUNT_LIMIT); + assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. auto left = sim.graph.Positions(); for (auto refptr : result) { @@ -375,56 +406,62 @@ FUZZ_TARGET(txgraph) real->SanityCheck(); // Compare simple properties of the graph with the simulation. + assert(real->IsOversized() == sim.IsOversized()); assert(real->GetTransactionCount() == sim.GetTransactionCount()); - // Perform a full comparison. - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(sim.GetRef(i))); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(sim.GetRef(i))); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(sim.GetRef(i)); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(*ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - ClusterIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(sim.GetRef(i))); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(sim.GetRef(i))); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(sim.GetRef(i)); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(*ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + ClusterIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index bf3e5621b93..9df705a0f0d 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -49,7 +49,7 @@ class Cluster { friend class TxGraphImpl; using GraphIndex = TxGraph::GraphIndex; - using SetType = BitSet; + using SetType = BitSet; /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ DepGraph m_depgraph; /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. */ @@ -143,6 +143,8 @@ class TxGraphImpl final : public TxGraph private: /** Internal RNG. */ FastRandomContext m_rng; + /** This TxGraphImpl's maximum cluster count limit. */ + const ClusterIndex m_max_cluster_count; /** Information about one group of Clusters to be merged. */ struct GroupData @@ -170,6 +172,9 @@ private: std::optional> m_group_data = std::vector{}; /** Total number of transactions in this ClusterSet (explicit + implicit). */ GraphIndex m_txcount{0}; + /** Whether we know that merging clusters (as determined by m_to_merge) would exceed the max + cluster size. */ + bool m_oversized{false}; /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ struct Locator @@ -214,8 +219,12 @@ private: std::vector m_wiped; public: - /** Construct a new TxGraphImpl. */ - explicit TxGraphImpl() noexcept {} + /** Construct a new TxGraphImpl with the specified maximum cluster count. */ + explicit TxGraphImpl(ClusterIndex max_cluster_count) noexcept : + m_max_cluster_count(max_cluster_count) + { + Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + } // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). TxGraphImpl(const TxGraphImpl&) = delete; @@ -262,7 +271,7 @@ public: void Split(Cluster& cluster) noexcept; /** Split all clusters that need splitting. */ void SplitAll() noexcept; - /** Populate m_group_data based on m_deps_to_add. */ + /** Populate m_group_data (and m_oversized) based on m_deps_to_add. */ void GroupClusters() noexcept; /** Merge the specified clusters. */ void Merge(std::span to_merge) noexcept; @@ -286,6 +295,7 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + bool IsOversized() noexcept final; void SanityCheck() const final; }; @@ -561,9 +571,10 @@ void TxGraphImpl::ApplyRemovals() noexcept auto& to_remove = m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; - // Wipe cached m_group_data, as it may be invalidated by removals. + // Wipe cached m_group_data and m_oversized, as it may be invalidated by removals. m_group_data = std::nullopt; m_group_clusters.clear(); + m_oversized = false; // Group the set of to-be-removed entries by Cluster*. std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); @@ -609,30 +620,32 @@ std::vector TxGraphImpl::Cleanup() noexcept { ApplyDependencies(); std::vector ret; - // Sort the GraphIndex that need to be cleaned up. This groups them (so duplicates can be - // processed just once). They are sorted in reverse, so the last ones get processed first. - // This means earlier-processed GraphIndexes will not move of later-processed ones (which - // might invalidate them). - std::sort(m_wiped.begin(), m_wiped.end(), std::greater{}); - GraphIndex last(-1); - for (GraphIndex idx : m_wiped) { - // m_wiped should never contain the same GraphIndex twice (the code below would fail - // if so, because GraphIndexes get invalidated by removing them). - Assume(idx != last); - last = idx; - Entry& entry = m_entries[idx]; - // Gather Ref pointers that are being unlinked. - if (entry.m_ref != nullptr) { - ret.push_back(entry.m_ref); - GetRefGraph(*entry.m_ref) = nullptr; - m_entries[idx].m_ref = nullptr; + if (!m_oversized) { + // Sort the GraphIndex that need to be cleaned up. This groups them (so duplicates can be + // processed just once). They are sorted in reverse, so the last ones get processed first. + // This means earlier-processed GraphIndexes will not move of later-processed ones (which + // might invalidate them). + std::sort(m_wiped.begin(), m_wiped.end(), std::greater{}); + GraphIndex last = GraphIndex(-1); + for (GraphIndex idx : m_wiped) { + // m_wiped should never contain the same GraphIndex twice (the code below would fail + // if so, because GraphIndexes get invalidated by removing them). + Assume(idx != last); + last = idx; + Entry& entry = m_entries[idx]; + // Gather Ref pointers that are being unlinked. + if (entry.m_ref != nullptr) { + ret.push_back(entry.m_ref); + GetRefGraph(*entry.m_ref) = nullptr; + m_entries[idx].m_ref = nullptr; + } + // Verify removed entries don't have anything that could hold a reference back. + Assume(!entry.m_locator.IsPresent()); + if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); + m_entries.pop_back(); } - // Verify removed entries don't have anything that could hold a reference back. - Assume(!entry.m_locator.IsPresent()); - if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); - m_entries.pop_back(); + m_wiped.clear(); } - m_wiped.clear(); return ret; } @@ -662,7 +675,7 @@ void TxGraphImpl::GroupClusters() noexcept { // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up - // with inefficient Clusters which just end up being split again anyway. + // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(); // If the groupings have been computed already, nothing is left to be done. @@ -838,9 +851,11 @@ void TxGraphImpl::GroupClusters() noexcept new_entry.m_cluster_count = 0; new_entry.m_deps_offset = m_deps_to_add.size(); new_entry.m_deps_count = 0; + uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { m_group_clusters.push_back(an_clusters_it->first); + total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } @@ -850,6 +865,10 @@ void TxGraphImpl::GroupClusters() noexcept ++an_deps_it; ++new_entry.m_deps_count; } + // Detect oversizedness. + if (total_count > m_max_cluster_count) { + m_oversized = true; + } } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); @@ -889,6 +908,8 @@ void TxGraphImpl::ApplyDependencies() noexcept Assume(m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (m_deps_to_add.empty()) return; + // Dependencies cannot be applied if it would result in oversized clusters. + if (m_oversized) return; // For each group of to-be-merged Clusters. Assume(m_group_data.has_value()); @@ -1061,6 +1082,8 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(!m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1076,6 +1099,8 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(!m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1091,6 +1116,8 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Cluster linearization cannot be known if unapplied dependencies remain. + Assume(!m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1128,6 +1155,8 @@ FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); + // Chunk feerates cannot be accurately known if unapplied dependencies remain. + Assume(!m_oversized); // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1138,6 +1167,14 @@ FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept return entry.m_chunk_feerate; } +bool TxGraphImpl::IsOversized() noexcept +{ + // Find which Clusters will need to be merged together, as that is where the oversize + // property is assessed. + GroupClusters(); + return m_oversized; +} + void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept { // Make sure the specified ClusterIndex exists in this Cluster. @@ -1173,6 +1210,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.PositionRange() == m_mapping.size()); // The linearization for this Cluster must contain every transaction once. assert(m_depgraph.TxCount() == m_linearization.size()); + // The number of transactions in a Cluster cannot exceed m_max_cluster_count. + assert(m_linearization.size() <= graph.m_max_cluster_count); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1296,7 +1335,7 @@ TxGraph::Ref::Ref(Ref&& other) noexcept std::swap(m_index, other.m_index); } -std::unique_ptr MakeTxGraph() noexcept +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept { - return std::make_unique(); + return std::make_unique(max_cluster_count); } diff --git a/src/txgraph.h b/src/txgraph.h index 8a51cac9ce1..a4588631404 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -12,8 +12,7 @@ #ifndef BITCOIN_TXGRAPH_H #define BITCOIN_TXGRAPH_H -/** No connected component within TxGraph is allowed to exceed this number of transactions. */ -static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; +static constexpr unsigned MAX_CLUSTER_COUNT_LIMIT{64}; /** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. */ class TxGraph @@ -93,36 +92,43 @@ public: * removed), this has no effect. */ virtual void SetTransactionFee(Ref& arg, int64_t fee) noexcept = 0; /** Return a vector of pointers to Ref objects for transactions which have been removed from - * the graph, and have not been destroyed yet. Each transaction is only reported once by - * Cleanup(). Afterwards, all Refs will be empty. */ + * the graph, and have not been destroyed yet. This has no effect if the graph is oversized + * (see below). Each transaction is only reported once by Cleanup(). Afterwards, all Refs will + * be empty. */ [[nodiscard]] virtual std::vector Cleanup() noexcept = 0; /** Determine whether arg exists in this graph (i.e., was not removed). */ virtual bool Exists(const Ref& arg) noexcept = 0; + /** Determine whether the graph is oversized (contains a connected component of more than the + * configured maximum cluster count). Some of the functions below are not available + * for oversized graphs. The mutators above are always available. */ + virtual bool IsOversized() noexcept = 0; /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeeFrac if arg - * does not exist. */ + * does not exist. The graph must not be oversized. */ virtual FeeFrac GetChunkFeerate(const Ref& arg) noexcept = 0; /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if - * arg does not exist. */ + * arg does not exist. This is available even for oversized graphs. */ virtual FeeFrac GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the connected component ("cluster") which arg is in. * The transactions will be returned in a topologically-valid order of acceptable quality. * Returns {} if arg does not exist in the queried graph. */ virtual std::vector GetCluster(const Ref& arg) noexcept = 0; - /** Get pointers to all ancestors of the specified transaction. Returns {} if arg does not - * exist. */ + /** Get pointers to all ancestors of the specified transaction. The queried graph must not be + * oversized. Returns {} if arg does not exist. */ virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; - /** Get pointers to all descendants of the specified transaction. Returns {} if arg does not - * exist in the graph. */ + /** Get pointers to all descendants of the specified transaction. The graph must not be + * oversized. Returns {} if arg does not exist in the graph. */ virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. */ + /** Get the total number of transactions in the graph. This is available even for oversized + * graphs. */ virtual GraphIndex GetTransactionCount() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; }; -/** Construct a new TxGraph. */ -std::unique_ptr MakeTxGraph() noexcept; +/** Construct a new TxGraph with the specified limit on transactions within a cluster. That + * number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */ +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept; #endif // BITCOIN_TXGRAPH_H From 60f4e41254149e0f79edf99c4e3ab8e7888eb35a Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 13:31:23 -0500 Subject: [PATCH 07/20] txgraph: (optimization) delay chunking while sub-acceptable Chunk-based information (primarily, chunk feerates) are never accessed without first bringing the relevant Clusters to an "acceptable" quality level. Thus, while operations are ongoing and Clusters are not acceptable, we can omit computing the chunkings and chunk feerates for Clusters. --- src/txgraph.cpp | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 9df705a0f0d..aad6ac1cd6f 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -307,21 +307,25 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[m_mapping[idx]]; entry.m_locator.SetPresent(this, idx); } - - // Compute its chunking and store its information in the Entry's m_chunk_feerate. - LinearizationChunking chunking(m_depgraph, m_linearization); - LinearizationIndex lin_idx{0}; - // Iterate over the chunks. - for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { - auto chunk = chunking.GetChunk(chunk_idx); - // Iterate over the transactions in the linearization, which must match those in chunk. - while (true) { - ClusterIndex idx = m_linearization[lin_idx++]; - GraphIndex graph_idx = m_mapping[idx]; - auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = chunk.feerate; - chunk.transactions.Reset(idx); - if (chunk.transactions.None()) break; + // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its + // information in the Entry's m_chunk_feerate. These fields are only accessed after making + // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that + // quality level yet. + if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + // Iterate over the transactions in the linearization, which must match those in chunk. + while (true) { + ClusterIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = chunk.feerate; + chunk.transactions.Reset(idx); + if (chunk.transactions.None()) break; + } } } } @@ -380,7 +384,8 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); - // We need to recompute and cache its chunking. + // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its + // chunking. Updated(graph); return false; } @@ -1230,12 +1235,12 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(entry.m_locator.cluster == this); assert(entry.m_locator.index == lin_pos); // Check linearization position and chunk feerate. - if (!linchunking.GetChunk(0).transactions[lin_pos]) { - linchunking.MarkDone(linchunking.GetChunk(0).transactions); - } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); - // If this Cluster has an acceptable quality level, its chunks must be connected. - if (m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL) { + if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } } From b37d322447321a491468ce0aabf608d08bc4fb43 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 18:10:24 -0500 Subject: [PATCH 08/20] txgraph: (optimization) special-case removal of tail of cluster When transactions are removed from the tail of a cluster, we know the existing linearization remains acceptable/optimal (if it already was), but may just need splitting, so special case these into separate quality levels. --- src/txgraph.cpp | 66 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index aad6ac1cd6f..2b6bfe863c2 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -33,6 +33,10 @@ enum class QualityLevel { /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, + /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ + NEEDS_SPLIT_ACCEPTABLE, + /** This cluster may have multiple disconnected components, which are all OPTIMAL. */ + NEEDS_SPLIT_OPTIMAL, /** This cluster has undergone changes that warrant re-linearization. */ NEEDS_RELINEARIZE, /** The minimal level of linearization has been performed, but it is not known to be optimal. */ @@ -351,26 +355,56 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove --graph.m_txcount; } while(!to_remove.empty()); + auto quality = m_quality; Assume(todo.Any()); // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries // removed, so we benefit from batching all the removals). m_depgraph.RemoveTransactions(todo); m_mapping.resize(m_depgraph.PositionRange()); - // Filter removals out of m_linearization. - m_linearization.erase(std::remove_if( - m_linearization.begin(), - m_linearization.end(), - [&](auto pos) { return todo[pos]; }), m_linearization.end()); - - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + // First remove all removals at the end of the linearization. + while (!m_linearization.empty() && todo[m_linearization.back()]) { + todo.Reset(m_linearization.back()); + m_linearization.pop_back(); + } + if (todo.None()) { + // If no further removals remain, and thus all removals were at the end, we may be able + // to leave the cluster at a better quality level. + if (quality == QualityLevel::OPTIMAL || quality == QualityLevel::NEEDS_SPLIT_OPTIMAL) { + quality = QualityLevel::NEEDS_SPLIT_OPTIMAL; + } else if (quality == QualityLevel::ACCEPTABLE || quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE) { + quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } else if (quality == QualityLevel::NEEDS_RELINEARIZE) { + quality = QualityLevel::NEEDS_SPLIT; + } + } else { + // If more removals remain, filter those out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + quality = QualityLevel::NEEDS_SPLIT; + } + graph.SetClusterQuality(m_quality, m_setindex, quality); Updated(graph); } bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. - Assume(m_quality == QualityLevel::NEEDS_SPLIT); + Assume(m_quality == QualityLevel::NEEDS_SPLIT || m_quality == QualityLevel::NEEDS_SPLIT_OPTIMAL || + m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE); + // Determine the new quality the split-off Clusters will have. + QualityLevel new_quality = m_quality == QualityLevel::NEEDS_SPLIT ? QualityLevel::NEEDS_RELINEARIZE : + m_quality == QualityLevel::NEEDS_SPLIT_OPTIMAL ? QualityLevel::OPTIMAL : + QualityLevel::ACCEPTABLE; + // If the cluster was NEEDS_SPLIT_OPTIMAL, and we're thus going to produce OPTIMAL clusters, we + // need to post-linearize first to make sure the split-out versions are all connected. This is + // not necessary for other quality levels as they will be subject to LIMO and PostLinearization + // anyway in MakeAcceptable(). + if (m_quality == QualityLevel::NEEDS_SPLIT_OPTIMAL) { + PostLinearize(m_depgraph, m_linearization); + } /** Which positions are still left in this Cluster. */ auto todo = m_depgraph.Positions(); /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and @@ -383,7 +417,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept auto component = m_depgraph.FindConnectedComponent(todo); if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_quality, m_setindex, new_quality); // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its // chunking. Updated(graph); @@ -398,7 +432,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), ClusterIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + graph.InsertCluster(std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -670,9 +704,11 @@ void TxGraphImpl::SplitAll() noexcept { // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); - auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; - while (!queue.empty()) { - Split(*queue.back().get()); + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) { + auto& queue = m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } @@ -1189,8 +1225,10 @@ void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept // Update the fee, remember that relinearization will be necessary, and update the Entries // in the same Cluster. m_depgraph.FeeRate(idx).fee = fee; - if (m_quality != QualityLevel::NEEDS_SPLIT) { + if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } else if (m_quality == QualityLevel::NEEDS_SPLIT_OPTIMAL || m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE) { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } From 4c9fa2278b684d12c36492b3f0bf4580afe86c96 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:37:53 -0500 Subject: [PATCH 09/20] txgraph: (refactor) group per-graph data in ClusterSet This is a preparation for a next commit where a TxGraph will start representing potentially two distinct graphs (a main one, and a staging one with proposed changes). --- src/txgraph.cpp | 161 ++++++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 75 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 2b6bfe863c2..dcb9e156581 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -25,7 +25,7 @@ class TxGraphImpl; /** Position of a ClusterIndex within a Cluster::m_linearization. */ using LinearizationIndex = uint32_t; -/** Position of a Cluster within Graph::m_clusters. */ +/** Position of a Cluster within Graph::ClusterSet::m_clusters. */ using ClusterSetIndex = uint32_t; /** Quality levels for cached linearizations. */ @@ -43,12 +43,12 @@ enum class QualityLevel ACCEPTABLE, /** The linearization is known to be optimal. */ OPTIMAL, - /** This cluster is not registered in any m_clusters. - * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + /** This cluster is not registered in any ClusterSet::m_clusters. + * This must be the last entry in QualityLevel as ClusterSet::m_clusters is sized using it. */ NONE, }; -/** A grouping of connected transactions inside a TxGraphImpl. */ +/** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */ class Cluster { friend class TxGraphImpl; @@ -63,7 +63,7 @@ class Cluster std::vector m_linearization; /** The quality level of m_linearization. */ QualityLevel m_quality{QualityLevel::NONE}; - /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; public: @@ -72,7 +72,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -163,22 +163,29 @@ private: uint32_t m_deps_count; }; - /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ - std::vector> m_clusters[int(QualityLevel::NONE)]; - /** Which removals have yet to be applied. */ - std::vector m_to_remove; - /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes - * into this. */ - std::vector> m_deps_to_add; - /** Which clusters are to be merged. GroupData::m_cluster_offset indexes into this. */ - std::vector m_group_clusters; - /** Information about the merges to be performed, if known. */ - std::optional> m_group_data = std::vector{}; - /** Total number of transactions in this ClusterSet (explicit + implicit). */ - GraphIndex m_txcount{0}; - /** Whether we know that merging clusters (as determined by m_to_merge) would exceed the max - cluster size. */ - bool m_oversized{false}; + /** The collection of all Clusters in main or staged. */ + struct ClusterSet + { + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::vector> m_clusters[int(QualityLevel::NONE)]; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Which clusters are to be merged. GroupData::m_cluster_offset indexes into this. */ + std::vector m_group_clusters; + /** Information about the merges to be performed, if known. */ + std::optional> m_group_data = std::vector{}; + /** Total number of transactions in this ClusterSet (explicit + implicit). */ + GraphIndex m_txcount{0}; + /** Whether we know that merging clusters (as determined by m_to_merge) would exceed the max + cluster size. */ + bool m_oversized{false}; + }; + + /** The ClusterSet for this TxGraphImpl. */ + ClusterSet m_clusterset; /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ struct Locator @@ -352,7 +359,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove locator.SetMissing(); if (entry.IsWiped()) graph.m_wiped.push_back(idx); to_remove = to_remove.subspan(1); - --graph.m_txcount; + --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; @@ -547,7 +554,7 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust { Assume(quality != QualityLevel::NONE); - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. @@ -577,7 +584,7 @@ ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, Q Assume(cluster->m_quality == QualityLevel::NONE); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; @@ -607,19 +614,20 @@ void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept void TxGraphImpl::ApplyRemovals() noexcept { - auto& to_remove = m_to_remove; + auto& clusterset = m_clusterset; + auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; - // Wipe cached m_group_data and m_oversized, as it may be invalidated by removals. - m_group_data = std::nullopt; - m_group_clusters.clear(); - m_oversized = false; + // Wipe cached m_group_data and m_oversized, as they may be invalidated by removals. + clusterset.m_group_data = std::nullopt; + clusterset.m_group_clusters.clear(); + clusterset.m_oversized = false; // Group the set of to-be-removed entries by Cluster*. - std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); }); // Process per Cluster. - std::span to_remove_span{m_to_remove}; + std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; if (cluster != nullptr) { @@ -631,7 +639,7 @@ void TxGraphImpl::ApplyRemovals() noexcept to_remove_span = to_remove_span.subspan(1); } } - m_to_remove.clear(); + to_remove.clear(); } void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept @@ -659,7 +667,7 @@ std::vector TxGraphImpl::Cleanup() noexcept { ApplyDependencies(); std::vector ret; - if (!m_oversized) { + if (!m_clusterset.m_oversized) { // Sort the GraphIndex that need to be cleaned up. This groups them (so duplicates can be // processed just once). They are sorted in reverse, so the last ones get processed first. // This means earlier-processed GraphIndexes will not move of later-processed ones (which @@ -705,7 +713,7 @@ void TxGraphImpl::SplitAll() noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) { - auto& queue = m_clusters[int(quality)]; + auto& queue = m_clusterset.m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } @@ -719,8 +727,9 @@ void TxGraphImpl::GroupClusters() noexcept // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(); + auto& clusterset = m_clusterset; // If the groupings have been computed already, nothing is left to be done. - if (m_group_data.has_value()) return; + if (clusterset.m_group_data.has_value()) return; /** Annotated clusters: an entry for each Cluster, together with the representative for the * partition it is in if known, or with nullptr if not yet known. */ @@ -731,7 +740,7 @@ void TxGraphImpl::GroupClusters() noexcept std::vector, Cluster*>> an_deps; // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Skip dependencies for which the parent or child transaction is removed. @@ -747,7 +756,7 @@ void TxGraphImpl::GroupClusters() noexcept an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); // Sort the dependencies by child Cluster. - std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; @@ -824,7 +833,7 @@ void TxGraphImpl::GroupClusters() noexcept // the partitions their Clusters are in. Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Nothing to do if parent and child are in the same Cluster. @@ -846,8 +855,8 @@ void TxGraphImpl::GroupClusters() noexcept // Populate the an_clusters and an_deps data structures with the list of input Clusters, // and the input dependencies, annotated with the representative of the Cluster partition // it applies to. - an_deps.reserve(m_deps_to_add.size()); - auto deps_it = m_deps_to_add.begin(); + an_deps.reserve(clusterset.m_deps_to_add.size()); + auto deps_it = clusterset.m_deps_to_add.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; // Find the representative of the partition Cluster i is in, and store it with the @@ -855,7 +864,7 @@ void TxGraphImpl::GroupClusters() noexcept auto rep = find_uf(&data)->cluster; an_clusters[i].second = rep; // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. - while (deps_it != m_deps_to_add.end()) { + while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; auto chl_cluster = m_entries[chl].m_locator.cluster; // Skip dependencies that apply to earlier Clusters (those necessary are for @@ -877,38 +886,38 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); // Translate the resulting cluster groups to the m_group_data structure. - m_group_data = std::vector{}; - m_group_clusters.clear(); - m_group_clusters.reserve(an_clusters.size()); - m_deps_to_add.clear(); - m_deps_to_add.reserve(an_deps.size()); + clusterset.m_group_data = std::vector{}; + clusterset.m_group_clusters.clear(); + clusterset.m_group_clusters.reserve(an_clusters.size()); + clusterset.m_deps_to_add.clear(); + clusterset.m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_deps_it != an_deps.end()) { auto rep = an_deps_it->second; // Create and initialize a new GroupData entry for the partition. - auto& new_entry = m_group_data->emplace_back(); - new_entry.m_cluster_offset = m_group_clusters.size(); + auto& new_entry = clusterset.m_group_data->emplace_back(); + new_entry.m_cluster_offset = clusterset.m_group_clusters.size(); new_entry.m_cluster_count = 0; - new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_offset = clusterset.m_deps_to_add.size(); new_entry.m_deps_count = 0; uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { - m_group_clusters.push_back(an_clusters_it->first); + clusterset.m_group_clusters.push_back(an_clusters_it->first); total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { - m_deps_to_add.push_back(an_deps_it->first); + clusterset.m_deps_to_add.push_back(an_deps_it->first); ++an_deps_it; ++new_entry.m_deps_count; } // Detect oversizedness. if (total_count > m_max_cluster_count) { - m_oversized = true; + clusterset.m_oversized = true; } } Assume(an_deps_it == an_deps.end()); @@ -946,22 +955,23 @@ void TxGraphImpl::ApplyDependencies() noexcept { // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). GroupClusters(); - Assume(m_group_data.has_value()); + auto& clusterset = m_clusterset; + Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. - if (m_deps_to_add.empty()) return; + if (clusterset.m_deps_to_add.empty()) return; // Dependencies cannot be applied if it would result in oversized clusters. - if (m_oversized) return; + if (clusterset.m_oversized) return; // For each group of to-be-merged Clusters. - Assume(m_group_data.has_value()); - for (const auto& group_data : *m_group_data) { + Assume(clusterset.m_group_data.has_value()); + for (const auto& group_data : *clusterset.m_group_data) { // Invoke Merge() to merge them into a single Cluster. - auto cluster_span = std::span{m_group_clusters} + auto cluster_span = std::span{clusterset.m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); Merge(cluster_span); // Actually apply all to-be-added dependencies (for each, parent and child belong to the // same Cluster because of the merging above). - auto deps_span = std::span{m_deps_to_add} + auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); const auto& loc = m_entries[deps_span[0].second].m_locator; @@ -970,11 +980,11 @@ void TxGraphImpl::ApplyDependencies() noexcept } // Wipe the list of to-be-added dependencies now that they are applied. - m_deps_to_add.clear(); + clusterset.m_deps_to_add.clear(); // Also no further Cluster mergings are needed (note that we clear, but don't set to // std::nullopt, as that would imply the groupings are unknown). - m_group_data->clear(); - m_group_clusters.clear(); + clusterset.m_group_data->clear(); + clusterset.m_group_clusters.clear(); } void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept @@ -1031,7 +1041,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeeFrac& feerate) noexcept auto cluster_ptr = cluster.get(); InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_txcount; + ++m_clusterset.m_txcount; // Return the Ref. return ret; } @@ -1046,7 +1056,7 @@ void TxGraphImpl::RemoveTransaction(Ref& arg) noexcept auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_to_remove.push_back(GetRefIndex(arg)); + m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); } void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept @@ -1062,10 +1072,10 @@ void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; if (chl_cluster == nullptr) return; // Wipe m_group_data (as it will need to be recomputed). - m_group_data.reset(); - m_group_clusters.clear(); + m_clusterset.m_group_data.reset(); + m_clusterset.m_group_clusters.clear(); // Remember that this dependency is to be applied. - m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); } bool TxGraphImpl::Exists(const Ref& arg) noexcept @@ -1124,7 +1134,7 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(!m_oversized); + Assume(!m_clusterset.m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1141,7 +1151,7 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(!m_oversized); + Assume(!m_clusterset.m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1158,7 +1168,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept // Apply all dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(!m_oversized); + Assume(!m_clusterset.m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1170,7 +1180,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept { ApplyRemovals(); - return m_txcount; + return m_clusterset.m_txcount; } FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1197,7 +1207,7 @@ FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept // Apply all dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(!m_oversized); + Assume(!m_clusterset.m_oversized); // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1213,7 +1223,7 @@ bool TxGraphImpl::IsOversized() noexcept // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(); - return m_oversized; + return m_clusterset.m_oversized; } void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept @@ -1318,11 +1328,12 @@ void TxGraphImpl::SanityCheck() const } + auto& clusterset = m_clusterset; std::set actual_clusters; // For all quality levels... for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { QualityLevel quality{qual}; - const auto& quality_clusters = m_clusters[qual]; + const auto& quality_clusters = clusterset.m_clusters[qual]; // ... for all clusters in them ... for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { const auto& cluster = *quality_clusters[setindex]; From b6a14f74c3f9f89c609865c315cc3f032151f2cc Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 10:00:59 -0500 Subject: [PATCH 10/20] txgraph: (refactor) abstract out ClearLocator Move a number of related modifications to TxGraphImpl into a separate function for removal of transactions. This is preparation for a later commit where this will be useful in more than one place. --- src/txgraph.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index dcb9e156581..11a6daa0e22 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -255,6 +255,8 @@ public: ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist. */ + void ClearLocator(GraphIndex index) noexcept; // Functions for handling Refs. @@ -311,6 +313,18 @@ public: void SanityCheck() const final; }; +void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +{ + auto& entry = m_entries[idx]; + Assume(entry.m_locator.IsPresent()); + // Change the locator from Present to Missing. + entry.m_locator.SetMissing(); + // Update the transaction count. + --m_clusterset.m_txcount; + // Add it to the m_wiped list (which will be processed by Cleanup). + if (entry.IsWiped()) m_wiped.push_back(idx); +} + void Cluster::Updated(TxGraphImpl& graph) noexcept { // Update all the Locators for this Cluster's Entrys. @@ -356,10 +370,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // - Remove from m_mapping. m_mapping[locator.index] = GraphIndex(-1); // - Mark it as removed in the Entry's locator. - locator.SetMissing(); - if (entry.IsWiped()) graph.m_wiped.push_back(idx); + graph.ClearLocator(idx); to_remove = to_remove.subspan(1); - --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; From 82c947f165c4c592773fa16ac648d6302d0ead8c Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 4 Dec 2024 09:40:53 -0500 Subject: [PATCH 11/20] txgraph: (feature) add staging support In order to make it easy to evaluate proposed changes to a TxGraph, introduce a "staging" mode, where mutators (AddTransaction, AddDependency, RemoveTransaction) do not modify the actual graph, but just a staging version of it. That staging graph can then be commited (replacing the main one with it), or aborted (discarding the staging). --- src/test/fuzz/txgraph.cpp | 323 ++++++++++-------- src/txgraph.cpp | 682 ++++++++++++++++++++++++++++---------- src/txgraph.h | 83 +++-- 3 files changed, 751 insertions(+), 337 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 6ce4807a8b1..faef8bb2377 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,8 @@ using namespace cluster_linearize; namespace { /** Data type representing a naive simulated TxGraph, keeping all transactions (even from - * disconnected components) in a single DepGraph. */ + * disconnected components) in a single DepGraph. Unlike the real TxGraph, this only models + * a single graph, and multiple instances are used to simulate main/staging. */ struct SimTxGraph { /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's @@ -38,21 +40,29 @@ struct SimTxGraph /** The dependency graph (for all transactions in the simulation, regardless of * connectivity/clustering). */ DepGraph graph; - /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ - std::array, MAX_TRANSACTIONS> simmap; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). Use shared_ptr + * so that a SimTxGraph can be copied to create a staging one, while sharing Refs with + * the main graph. */ + std::array, MAX_TRANSACTIONS> simmap; /** For each TxGraph::Ref in graph, the position it corresponds with. */ std::map simrevmap; /** The set of TxGraph::Ref entries that have been removed, but not yet Cleanup()'ed in * the real TxGraph. */ - std::vector> removed; + std::vector> removed; /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ std::optional oversized; /** The configured maximum number of transactions per cluster. */ ClusterIndex max_cluster_count; - /** Construct a new SimData with the specified maximum cluster count. */ + /** Construct a new SimTxGraph with the specified maximum cluster count. */ explicit SimTxGraph(ClusterIndex max_cluster) : max_cluster_count(max_cluster) {} + // Permit copying and moving. + SimTxGraph(const SimTxGraph&) noexcept = default; + SimTxGraph& operator=(const SimTxGraph&) noexcept = default; + SimTxGraph(SimTxGraph&&) noexcept = default; + SimTxGraph& operator=(SimTxGraph&&) noexcept = default; + /** Check whether this graph is oversized (contains a connected component whose number of * transactions exceeds max_cluster_count. */ bool IsOversized() @@ -97,7 +107,7 @@ struct SimTxGraph assert(graph.TxCount() < MAX_TRANSACTIONS); auto simpos = graph.AddTransaction(feerate); assert(graph.Positions()[simpos]); - simmap[simpos] = std::make_unique(); + simmap[simpos] = std::make_shared(); auto ptr = simmap[simpos].get(); simrevmap[ptr] = simpos; return *ptr; @@ -199,32 +209,42 @@ FUZZ_TARGET(txgraph) // Decide the maximum number of transactions per cluster we will use in this simulation. auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); - // Construct a real and a simulated graph. + // Construct a real graph, and a vector of simulated graphs (main, and possibly staging). auto real = MakeTxGraph(max_count); - SimTxGraph sim(max_count); + std::vector sims; + sims.reserve(2); + sims.emplace_back(max_count); - /** Function to pick any Ref (in sim real, sim.removed, or empty). */ + /** Function to pick any Ref (in either sim graph, either sim.removed, or empty). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref& { - auto tx_count = sim.GetTransactionCount(); + size_t tx_count[2] = {sims[0].GetTransactionCount(), 0}; /** The number of possible choices. */ - size_t choices = tx_count + sim.removed.size() + 1; + size_t choices = tx_count[0] + sims[0].removed.size() + 1; + if (sims.size() == 2) { + tx_count[1] = sims[1].GetTransactionCount(); + choices += tx_count[1] + sims[1].removed.size(); + } /** Pick one of them. */ auto choice = provider.ConsumeIntegralInRange(0, choices - 1); - if (choice < tx_count) { - // Return from real. - for (auto i : sim.graph.Positions()) { - if (choice == 0) return sim.GetRef(i); - --choice; + // Consider both main and (if it exists) staging. + for (size_t level = 0; level < sims.size(); ++level) { + auto& sim = sims[level]; + if (choice < tx_count[level]) { + // Return from graph. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count[level]; + } + if (choice < sim.removed.size()) { + // Return from removed. + return *sim.removed[choice]; + } else { + choice -= sim.removed.size(); } - assert(false); - } else { - choice -= tx_count; - } - if (choice < sim.removed.size()) { - // Return from removed. - return *sim.removed[choice]; - } else { - choice -= sim.removed.size(); } // Return empty. assert(choice == 0); @@ -234,15 +254,24 @@ FUZZ_TARGET(txgraph) LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); - // Treat it lowest bit as a flag (which selects a variant of some of the operations), and - // leave the rest of the bits in command. + // Treat the lowest bit of a command as a flag (which selects a variant of some of the + // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave + // the rest of the bits in command. bool alt = command & 1; - command >>= 1; + bool use_main = command & 2; + command >>= 2; + + // Provide convenient aliases for the top simulated graph (main, or staging if it exists), + // one for the simulated graph selected based on use_main (for operations that can operate + // on both graphs), and one that always refers to the main graph. + auto& top_sim = sims.back(); + auto& sel_sim = use_main ? sims[0] : top_sim; + auto& main_sim = sims[0]; // Keep decrementing command for each applicable operation, until one is hit. Multiple // iterations may be necessary. while (true) { - if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { // AddTransaction. int64_t fee; int32_t size; @@ -256,41 +285,41 @@ FUZZ_TARGET(txgraph) FeeFrac feerate{fee, size}; // Create a real TxGraph::Ref. auto ref = real->AddTransaction(feerate); - // Create a unique_ptr place in the simulation to put the Ref in. - auto& ref_loc = sim.AddTransaction(feerate); + // Create a shared_ptr place in the simulation to put the Ref in. + auto& ref_loc = top_sim.AddTransaction(feerate); // Move it in place. ref_loc = std::move(ref); break; - } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { // AddDependency. auto& par = pick_fn(); auto& chl = pick_fn(); - auto pos_par = sim.Find(par); - auto pos_chl = sim.Find(chl); + auto pos_par = top_sim.Find(par); + auto pos_chl = top_sim.Find(chl); if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. - if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break; } - sim.AddDependency(par, chl); + top_sim.AddDependency(par, chl); real->AddDependency(par, chl); break; - } else if (sim.removed.size() < 100 && command-- == 0) { + } else if (top_sim.removed.size() < 100 && command-- == 0) { // RemoveTransaction. Either all its ancestors or all its descendants are also // removed (if any), to make sure TxGraph's reordering of removals and dependencies // has no effect. std::vector to_remove; to_remove.push_back(&pick_fn()); - sim.IncludeAncDesc(to_remove, alt); + top_sim.IncludeAncDesc(to_remove, alt); // The order in which these ancestors/descendants are removed should not matter; // randomly shuffle them. std::shuffle(to_remove.begin(), to_remove.end(), rng); for (TxGraph::Ref* ptr : to_remove) { real->RemoveTransaction(*ptr); - sim.RemoveTransaction(*ptr); + top_sim.RemoveTransaction(*ptr); } break; - } else if (sim.GetTransactionCount() > 0 && command-- == 0) { + } else if (sel_sim.GetTransactionCount() > 0 && command-- == 0) { // SetTransactionFee. int64_t fee; if (alt) { @@ -300,92 +329,98 @@ FUZZ_TARGET(txgraph) } auto& ref = pick_fn(); real->SetTransactionFee(ref, fee); - sim.SetTransactionFee(ref, fee); + for (auto& sim : sims) { + sim.SetTransactionFee(ref, fee); + } break; } else if (command-- == 0) { // Cleanup. auto cleaned = real->Cleanup(); - if (!sim.IsOversized()) { - assert(sim.removed.size() == cleaned.size()); + if (sims.size() == 1 && !top_sim.IsOversized()) { + assert(top_sim.removed.size() == cleaned.size()); std::sort(cleaned.begin(), cleaned.end()); - std::sort(sim.removed.begin(), sim.removed.end()); - for (size_t i = 0; i < sim.removed.size(); ++i) { - assert(cleaned[i] == sim.removed[i].get()); + std::sort(top_sim.removed.begin(), top_sim.removed.end()); + for (size_t i = 0; i < top_sim.removed.size(); ++i) { + assert(cleaned[i] == top_sim.removed[i].get()); } - sim.removed.clear(); + top_sim.removed.clear(); } else { assert(cleaned.empty()); } break; } else if (command-- == 0) { // GetTransactionCount. - assert(real->GetTransactionCount() == sim.GetTransactionCount()); + assert(real->GetTransactionCount(use_main) == sel_sim.GetTransactionCount()); break; } else if (command-- == 0) { // Exists. auto& ref = pick_fn(); - bool exists = real->Exists(ref); - bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + bool exists = real->Exists(ref, use_main); + bool should_exist = sel_sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; } else if (command-- == 0) { // IsOversized. - assert(sim.IsOversized() == real->IsOversized()); + assert(sel_sim.IsOversized() == real->IsOversized(use_main)); break; } else if (command-- == 0) { // GetIndividualFeerate. auto& ref = pick_fn(); auto feerate = real->GetIndividualFeerate(ref); - auto simpos = sim.Find(ref); - if (simpos == SimTxGraph::MISSING) { - assert(feerate.IsEmpty()); - } else { - assert(feerate == sim.graph.FeeRate(simpos)); + bool found{false}; + for (auto& sim : sims) { + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + found = true; + assert(feerate == sim.graph.FeeRate(simpos)); + } } + if (!found) assert(feerate.IsEmpty()); break; - } else if (!sim.IsOversized() && command-- == 0) { - // GetChunkFeerate. + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetMainChunkFeerate. auto& ref = pick_fn(); - auto feerate = real->GetChunkFeerate(ref); - auto simpos = sim.Find(ref); + auto feerate = real->GetMainChunkFeerate(ref); + auto simpos = main_sim.Find(ref); if (simpos == SimTxGraph::MISSING) { assert(feerate.IsEmpty()); } else { // Just do some quick checks that the reported value is in range. A full // recomputation of expected chunk feerates is done at the end. - assert(feerate.size >= sim.graph.FeeRate(simpos).size); + assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); } break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto& ref = pick_fn(); - auto result = alt ? real->GetDescendants(ref) : real->GetAncestors(ref); + auto result = alt ? real->GetDescendants(ref, use_main) + : real->GetAncestors(ref, use_main); assert(result.size() <= max_count); - auto result_set = sim.MakeSet(result); + auto result_set = sel_sim.MakeSet(result); assert(result.size() == result_set.Count()); - auto expect_set = sim.GetAncDesc(ref, alt); + auto expect_set = sel_sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetCluster. auto& ref = pick_fn(); - auto result = real->GetCluster(ref); + auto result = real->GetCluster(ref, use_main); // Check cluster count limit. assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. - auto left = sim.graph.Positions(); + auto left = sel_sim.graph.Positions(); for (auto refptr : result) { - auto simpos = sim.Find(*refptr); + auto simpos = sel_sim.Find(*refptr); assert(simpos != SimTxGraph::MISSING); assert(left[simpos]); left.Reset(simpos); - assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left)); } // Require the set to be connected. - auto result_set = sim.MakeSet(result); - assert(sim.graph.IsConnected(result_set)); + auto result_set = sel_sim.MakeSet(result); + assert(sel_sim.graph.IsConnected(result_set)); // If ref exists, the result must contain it. If not, it must be empty. - auto simpos = sim.Find(ref); + auto simpos = sel_sim.Find(ref); if (simpos != SimTxGraph::MISSING) { assert(result_set[simpos]); } else { @@ -393,8 +428,27 @@ FUZZ_TARGET(txgraph) } // Require the set not to have ancestors or descendants outside of it. for (auto i : result_set) { - assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); - assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Descendants(i).IsSubsetOf(result_set)); + } + break; + } else if (command-- == 0) { + // HaveStaging. + assert((sims.size() == 2) == real->HaveStaging()); + break; + } else if (sims.size() < 2 && command-- == 0) { + // StartStaging. + sims.emplace_back(sims.back()); + real->StartStaging(); + break; + } else if (sims.size() > 1 && command-- == 0) { + // AbortStaging/CommitStaging. + if (alt) { + real->AbortStaging(); + sims.pop_back(); + } else { + real->CommitStaging(); + sims.erase(sims.begin()); } break; } @@ -404,63 +458,70 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + assert(real->HaveStaging() == (sims.size() > 1)); - // Compare simple properties of the graph with the simulation. - assert(real->IsOversized() == sim.IsOversized()); - assert(real->GetTransactionCount() == sim.GetTransactionCount()); - - // If the graph (and the simulation) are not oversized, perform a full comparison. - if (!sim.IsOversized()) { - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(sim.GetRef(i))); - assert(anc.Count() <= max_count); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(sim.GetRef(i))); - assert(desc.Count() <= max_count); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(sim.GetRef(i)); - assert(cluster.size() <= max_count); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(*ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - ClusterIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph + // inspector functions that support both. + for (int main_only = 0; main_only < 2; ++main_only) { + auto& sim = main_only ? sims[0] : sims.back(); + // Compare simple properties of the graph with the simulation. + assert(real->IsOversized(main_only) == sim.IsOversized()); + assert(real->GetTransactionCount(main_only) == sim.GetTransactionCount()); + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(sim.GetRef(i), main_only)); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(sim.GetRef(i), main_only)); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(sim.GetRef(i), main_only); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(*ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + if (sims.size() == 1 || main_only) { + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + ClusterIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetMainChunkFeerate(*cluster[idx])); + ++idx; + } + } } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 11a6daa0e22..7c4da0111f5 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -20,6 +20,9 @@ namespace { using namespace cluster_linearize; +/** The maximum number of levels a TxGraph can have (0 = main, 1 = staging). */ +static constexpr int MAX_LEVELS{2}; + // Forward declare the TxGraph implementation class. class TxGraphImpl; @@ -65,6 +68,8 @@ class Cluster QualityLevel m_quality{QualityLevel::NONE}; /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + /** Which level this Cluster is at in the graph (-1=not inserted, 0=main, 1=staging). */ + int m_level{-1}; public: /** Construct an empty Cluster. */ @@ -72,7 +77,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ + // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -88,6 +93,17 @@ public: void UpdateMapping(ClusterIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ void Updated(TxGraphImpl& graph) noexcept; + /** Create a copy of this Cluster, returning a pointer to it (used by PullIn). */ + Cluster* CopyTo(TxGraphImpl& graph, int to_level) const noexcept; + /** Get the list of Clusters that conflict with this one (at level-1). */ + void GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept; + /** Mark all the Entry objects belonging to this Cluster as missing. The Cluster must be + * deleted immediately after. */ + void MakeTransactionsMissing(TxGraphImpl& graph) noexcept; + /** Remove all transactions in a Cluster. */ + void Clear(TxGraphImpl& graph) noexcept; + /** Change a Cluster's level from level to level-1. */ + void LevelDown(TxGraphImpl& graph) noexcept; // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. @@ -119,16 +135,20 @@ public: // Debugging functions. - void SanityCheck(const TxGraphImpl& graph) const; + void SanityCheck(const TxGraphImpl& graph, int level) const; }; -/** The transaction graph. +/** The transaction graph, including staged changes. * * The overall design of the data structure consists of 3 interlinked representations: * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). - * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl::ClusterSet). * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) * + * The Clusters are kept in one or two ClusterSet objects, one for the "main" graph, and one for + * the proposed changes ("staging"). If a transaction occurs in both, they share the same Entry, + * but there will be a separate Cluster per graph. + * * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects * refer back to the Clusters and Refs the corresponding transaction is contained in. * @@ -177,6 +197,8 @@ private: std::vector m_group_clusters; /** Information about the merges to be performed, if known. */ std::optional> m_group_data = std::vector{}; + /** Which entries were removed in this ClusterSet (so they can be wiped on abort). */ + std::vector m_removed; /** Total number of transactions in this ClusterSet (explicit + implicit). */ GraphIndex m_txcount{0}; /** Whether we know that merging clusters (as determined by m_to_merge) would exceed the max @@ -184,23 +206,28 @@ private: bool m_oversized{false}; }; - /** The ClusterSet for this TxGraphImpl. */ - ClusterSet m_clusterset; + /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ + std::vector m_clustersets; - /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + /** A Locator that describes whether, where, and in which Cluster an Entry appears. + * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. */ struct Locator { /** Which Cluster the Entry appears in (nullptr = missing). */ Cluster* cluster{nullptr}; - /** Where in the Cluster it appears (only if cluster != nullptr). */ + /** Where in the Cluster it appears (if cluster == nullptr: 0 = missing, -1 = removed). */ ClusterIndex index{0}; - /** Mark this Locator as missing. */ + /** Mark this Locator as missing (= same as lower level, or non-existing if level 0). */ void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as removed (not allowed in level 0). */ + void SetRemoved() noexcept { cluster = nullptr; index = ClusterIndex(-1); } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, ClusterIndex i) noexcept { cluster = c; index = i; } /** Check if this Locator is missing. */ bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } + /** Check if this Locator is removed. */ + bool IsRemoved() const noexcept { return cluster == nullptr && index == ClusterIndex(-1); } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -211,19 +238,22 @@ private: { /** Pointer to the corresponding Ref object, if any. */ Ref* m_ref; - /** Which Cluster and position therein this Entry appears in. */ - Locator m_locator; - /** The chunk feerate of this transaction (if not missing) */ - FeeFrac m_chunk_feerate; + /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ + Locator m_locator[MAX_LEVELS]; + /** The chunk feerate of this transaction in main (if present in m_locator[0]) */ + FeeFrac m_main_chunk_feerate; /** Check whether this Entry is not present in any Cluster. */ bool IsWiped() const noexcept { - return !m_locator.IsPresent(); + for (int level = 0; level < MAX_LEVELS; ++level) { + if (m_locator[level].IsPresent()) return false; + } + return true; } }; - /** The set of all transactions. GraphIndex values index into this. */ + /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ std::vector m_entries; /** Set of Entries that have no IsPresent locators left, and need to be cleaned up. */ @@ -235,6 +265,8 @@ public: m_max_cluster_count(max_cluster_count) { Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + m_clustersets.reserve(MAX_LEVELS); + m_clustersets.emplace_back(); } // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). @@ -247,16 +279,21 @@ public: /** Swap the Entrys referred to by a and b. */ void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; - /** Extract a Cluster. */ - std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** If idx exists in the specified level ClusterSet (explicitly or implicitly), return the + * Cluster it is in. Otherwise, return nullptr. */ + Cluster* FindCluster(GraphIndex idx, int level) const noexcept; + /** Extract a Cluster from its ClusterSet. */ + std::unique_ptr ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept; /** Delete a Cluster. */ void DeleteCluster(Cluster& cluster) noexcept; - /** Insert a Cluster. */ - ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Insert a Cluster into its ClusterSet. */ + ClusterSetIndex InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ - void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; - /** Make a transaction not exist. */ - void ClearLocator(GraphIndex index) noexcept; + void SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist at a specified level. */ + void ClearLocator(int level, GraphIndex index) noexcept; + /** Find which Clusters conflict with the top level. */ + std::vector GetConflicts() const noexcept; // Functions for handling Refs. @@ -277,14 +314,16 @@ public: } // Functions related to various normalization/application steps. + /** If cluster is not in the top level, copy it there, and return a pointer to it. */ + Cluster* PullIn(Cluster* cluster) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a * NEEDS_SPLIT* QualityLevel). */ void ApplyRemovals() noexcept; - /** Split an individual cluster. */ + /** Split an individual cluster (which must be in the top-level ClusterSet). */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting. */ + /** Split all clusters that need splitting in the top ClusterSet. */ void SplitAll() noexcept; - /** Populate m_group_data (and m_oversized) based on m_deps_to_add. */ + /** Populate the top ClusterSet's m_group_data (and m_oversized) based on m_deps_to_add. */ void GroupClusters() noexcept; /** Merge the specified clusters. */ void Merge(std::span to_merge) noexcept; @@ -301,27 +340,38 @@ public: void SetTransactionFee(Ref&, int64_t fee) noexcept final; std::vector Cleanup() noexcept final; - bool Exists(const Ref& arg) noexcept final; - FeeFrac GetChunkFeerate(const Ref& arg) noexcept final; + void StartStaging() noexcept final; + void CommitStaging() noexcept final; + void AbortStaging() noexcept final; + bool HaveStaging() const noexcept final { return m_clustersets.size() > 1; } + + bool Exists(const Ref& arg, bool main_only = false) noexcept final; + FeeFrac GetMainChunkFeerate(const Ref& arg) noexcept final; FeeFrac GetIndividualFeerate(const Ref& arg) noexcept final; - std::vector GetCluster(const Ref& arg) noexcept final; - std::vector GetAncestors(const Ref& arg) noexcept final; - std::vector GetDescendants(const Ref& arg) noexcept final; - GraphIndex GetTransactionCount() noexcept final; - bool IsOversized() noexcept final; + std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + GraphIndex GetTransactionCount(bool main_only = false) noexcept final; + bool IsOversized(bool main_only = false) noexcept final; void SanityCheck() const final; }; -void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; - Assume(entry.m_locator.IsPresent()); - // Change the locator from Present to Missing. - entry.m_locator.SetMissing(); + Assume(entry.m_locator[level].IsPresent()); + // Change the locator from Present to Missing or Removed. + if (level == 0 || !entry.m_locator[level - 1].IsPresent()) { + entry.m_locator[level].SetMissing(); + } else { + entry.m_locator[level].SetRemoved(); + m_clustersets[level].m_removed.push_back(idx); + } // Update the transaction count. - --m_clusterset.m_txcount; - // Add it to the m_wiped list (which will be processed by Cleanup). + --m_clustersets[level].m_txcount; + // If this was the last level the Locator was Present at, add it to the m_wiped list (which + // will be processed by Cleanup). if (entry.IsWiped()) m_wiped.push_back(idx); } @@ -330,13 +380,13 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (ClusterIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; - entry.m_locator.SetPresent(this, idx); + entry.m_locator[m_level].SetPresent(this, idx); } - // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its - // information in the Entry's m_chunk_feerate. These fields are only accessed after making - // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that - // quality level yet. - if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { + // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. + // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless + // to compute these if we haven't reached that quality level yet. + if (m_level == 0 && (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE)) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; // Iterate over the chunks. @@ -347,7 +397,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept ClusterIndex idx = m_linearization[lin_idx++]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = chunk.feerate; + entry.m_main_chunk_feerate = chunk.feerate; chunk.transactions.Reset(idx); if (chunk.transactions.None()) break; } @@ -355,6 +405,57 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept } } +void Cluster::GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept +{ + for (auto i : m_linearization) { + auto& entry = graph.m_entries[m_mapping[i]]; + // For every transaction Entry in this Cluster, if it also exists in a lower-level Cluster, + // then that Cluster conflicts. + if (entry.m_locator[m_level - 1].IsPresent()) { + out.push_back(entry.m_locator[m_level - 1].cluster); + } + } +} + +std::vector TxGraphImpl::GetConflicts() const noexcept +{ + int level = m_clustersets.size() - 1; + std::vector ret; + // All Clusters at level-1 containing transactions in m_removed are conflicts. + for (auto i : m_clustersets[level].m_removed) { + auto& entry = m_entries[i]; + Assume(entry.m_locator[level - 1].IsPresent()); + ret.push_back(entry.m_locator[level - 1].cluster); + } + // Then go over all Clusters at this level, and find their conflicts. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& clusters = m_clustersets[level].m_clusters[quality]; + for (const auto& cluster : clusters) { + cluster->GetConflicts(*this, ret); + } + } + // Deduplicate the result (the same Cluster may appear multiple times). + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + return ret; +} + +Cluster* Cluster::CopyTo(TxGraphImpl& graph, int to_level) const noexcept +{ + // Construct an empty Cluster. + auto ret = std::make_unique(); + auto ptr = ret.get(); + // Copy depgraph, mapping, and linearization/ + ptr->m_depgraph = m_depgraph; + ptr->m_mapping = m_mapping; + ptr->m_linearization = m_linearization; + // Insert the new Cluster into the graph. + graph.InsertCluster(to_level, std::move(ret), m_quality); + // Update its Locators (and possibly linearization data in its Entrys). + ptr->Updated(graph); + return ptr; +} + void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept { // Iterate over the prefix of to_remove that applies to this cluster. @@ -362,15 +463,15 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove do { GraphIndex idx = to_remove.front(); auto& entry = graph.m_entries[idx]; - auto& locator = entry.m_locator; + auto& locator = entry.m_locator[m_level]; // Stop once we hit an entry that applies to another Cluster. if (locator.cluster != this) break; // - Remember it in a set of to-remove ClusterIndexes. todo.Set(locator.index); // - Remove from m_mapping. m_mapping[locator.index] = GraphIndex(-1); - // - Mark it as removed in the Entry's locator. - graph.ClearLocator(idx); + // - Mark it as missing/removed in the Entry's locator. + graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); } while(!to_remove.empty()); @@ -404,7 +505,32 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove [&](auto pos) { return todo[pos]; }), m_linearization.end()); quality = QualityLevel::NEEDS_SPLIT; } - graph.SetClusterQuality(m_quality, m_setindex, quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, quality); + Updated(graph); +} + +void Cluster::Clear(TxGraphImpl& graph) noexcept +{ + for (auto i : m_linearization) { + graph.ClearLocator(m_level, m_mapping[i]); + } + m_depgraph = {}; + m_linearization.clear(); + m_mapping.clear(); +} + +void Cluster::LevelDown(TxGraphImpl& graph) noexcept +{ + int level = m_level; + Assume(level > 0); + for (auto i : m_linearization) { + GraphIndex idx = m_mapping[i]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[level].SetMissing(); + } + auto quality = m_quality; + auto cluster = graph.ExtractCluster(level, quality, m_setindex); + graph.InsertCluster(level - 1, std::move(cluster), quality); Updated(graph); } @@ -436,7 +562,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept auto component = m_depgraph.FindConnectedComponent(todo); if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its // chunking. Updated(graph); @@ -451,7 +577,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), ClusterIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), new_quality); + graph.InsertCluster(m_level, std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -511,7 +637,7 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // Update the transaction's Locator. There is no need to call Updated() to update chunk // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). - graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. other.m_depgraph = DepGraph{}; @@ -534,14 +660,14 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::spansecond].m_locator; + auto& first_child = graph.m_entries[it->second].m_locator[m_level]; ClusterIndex child_idx = first_child.index; // Iterate over all to-be-added dependencies within that same child, gather the relevant // parents. SetType parents; while (it != to_apply.end()) { - auto& child = graph.m_entries[it->second].m_locator; - auto& parent = graph.m_entries[it->first].m_locator; + auto& child = graph.m_entries[it->second].m_locator[m_level]; + auto& parent = graph.m_entries[it->first].m_locator[m_level]; Assume(child.cluster == this && parent.cluster == this); if (child.index != child_idx) break; parents.Set(parent.index); @@ -562,17 +688,20 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +std::unique_ptr TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept { Assume(quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. std::unique_ptr ret = std::move(quality_clusters[setindex]); ret->m_quality = QualityLevel::NONE; ret->m_setindex = ClusterSetIndex(-1); + ret->m_level = -1; // Clean up space in quality_cluster. auto max_setindex = quality_clusters.size() - 1; @@ -580,6 +709,7 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust // If the cluster was not the last element of quality_clusters, move that to take its place. quality_clusters.back()->m_quality = quality; quality_clusters.back()->m_setindex = setindex; + quality_clusters.back()->m_level = level; quality_clusters[setindex] = std::move(quality_clusters.back()); } // The last element of quality_clusters is now unused; drop it. @@ -588,45 +718,86 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust return ret; } -ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +ClusterSetIndex TxGraphImpl::InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept { // Cannot insert with quality level NONE (as that would mean not inserted). Assume(quality != QualityLevel::NONE); // The passed-in Cluster must not currently be in the TxGraphImpl. Assume(cluster->m_quality == QualityLevel::NONE); + // The specified level must exist. + Assume(level >= 0 && size_t(level) < m_clustersets.size()); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; + cluster->m_level = level; quality_clusters.push_back(std::move(cluster)); return ret; } -void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +void TxGraphImpl::SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept { Assume(new_quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); // Don't do anything if the quality did not change. if (old_quality == new_quality) return; // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(old_quality, old_index); + auto cluster_ptr = ExtractCluster(level, old_quality, old_index); // And re-insert it where it belongs. - InsertCluster(std::move(cluster_ptr), new_quality); + InsertCluster(level, std::move(cluster_ptr), new_quality); } void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept { // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + auto cluster_ptr = ExtractCluster(cluster.m_level, cluster.m_quality, cluster.m_setindex); // And throw it away. cluster_ptr.reset(); } +Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept +{ + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + auto& entry = m_entries[idx]; + // Search the entry's locators from top to bottom. + for (int l = level; l >= 0; --l) { + // If the locator is missing, dig deeper; it may exist at a lower level. + if (entry.m_locator[l].IsMissing()) continue; + // If the locator has the entry marked as explicitly removed, stop. + if (entry.m_locator[l].IsRemoved()) break; + // Otherwise, we have found the topmost ClusterSet that contains this entry. + return entry.m_locator[l].cluster; + } + // If no non-empty locator was found, or an explicitly removed was hit, return nothing. + return nullptr; +} + +Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept +{ + int to_level = m_clustersets.size() - 1; + if (to_level == 0) return cluster; + int level = cluster->m_level; + Assume(level <= to_level); + // Copy the Cluster from the level it was found at to higher levels, if any. + while (level < to_level) { + // Make the Cluster Acceptable before copying. This isn't strictly necessary, but doing it + // now avoids doing doable work later. + MakeAcceptable(*cluster); + ++level; + auto new_cluster = cluster->CopyTo(*this, level); + cluster = new_cluster; + } + return cluster; +} + void TxGraphImpl::ApplyRemovals() noexcept { - auto& clusterset = m_clusterset; + int level = m_clustersets.size() - 1; + auto& clusterset = m_clustersets[level]; auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; @@ -634,14 +805,19 @@ void TxGraphImpl::ApplyRemovals() noexcept clusterset.m_group_data = std::nullopt; clusterset.m_group_clusters.clear(); clusterset.m_oversized = false; + // Pull in all Clusters that are not in the top ClusterSet. + for (GraphIndex index : clusterset.m_to_remove) { + auto cluster = FindCluster(index, level); + PullIn(cluster); + } // Group the set of to-be-removed entries by Cluster*. std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); }); // Process per Cluster. std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; if (cluster != nullptr) { // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it // can pop off whatever applies to it. @@ -651,7 +827,7 @@ void TxGraphImpl::ApplyRemovals() noexcept to_remove_span = to_remove_span.subspan(1); } } - to_remove.clear(); + clusterset.m_to_remove.clear(); } void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept @@ -666,20 +842,28 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept Entry& entry = m_entries[idx]; // Update linked Ref. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; - // Update the locator. The rest of the Entry information will not change, so no need to - // invoke Cluster::Updated(). - Locator& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->UpdateMapping(locator.index, idx); + // Update the locators for both levels. The rest of the Entry information will not change, + // so no need to invoke Cluster::Updated(). + for (int level = 0; level < MAX_LEVELS; ++level) { + Locator& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } } } } std::vector TxGraphImpl::Cleanup() noexcept { + // Don't do anything if more than 1 level exists. Cleaning up could invalidate higher levels' + // m_to_remove, m_removed, and m_deps_to_add. + if (m_clustersets.size() > 1) return {}; + + // Apply dependencies so that this level's m_to_remove, m_removed, and m_deps_to_add are + // empty, and oversizedness is determined. ApplyDependencies(); std::vector ret; - if (!m_clusterset.m_oversized) { + if (!m_clustersets[0].m_oversized) { // Sort the GraphIndex that need to be cleaned up. This groups them (so duplicates can be // processed just once). They are sorted in reverse, so the last ones get processed first. // This means earlier-processed GraphIndexes will not move of later-processed ones (which @@ -699,7 +883,9 @@ std::vector TxGraphImpl::Cleanup() noexcept m_entries[idx].m_ref = nullptr; } // Verify removed entries don't have anything that could hold a reference back. - Assume(!entry.m_locator.IsPresent()); + for (int level = 0; level < MAX_LEVELS; ++level) { + Assume(!entry.m_locator[level].IsPresent()); + } if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); m_entries.pop_back(); } @@ -725,7 +911,7 @@ void TxGraphImpl::SplitAll() noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) { - auto& queue = m_clusterset.m_clusters[int(quality)]; + auto& queue = m_clustersets.back().m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } @@ -734,12 +920,14 @@ void TxGraphImpl::SplitAll() noexcept void TxGraphImpl::GroupClusters() noexcept { + int level = m_clustersets.size() - 1; + // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(); - auto& clusterset = m_clusterset; + auto& clusterset = m_clustersets[level]; // If the groupings have been computed already, nothing is left to be done. if (clusterset.m_group_data.has_value()) return; @@ -753,8 +941,8 @@ void TxGraphImpl::GroupClusters() noexcept // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Skip dependencies for which the parent or child transaction is removed. if (par_cluster == nullptr || chl_cluster == nullptr) continue; an_clusters.emplace_back(par_cluster, nullptr); @@ -771,8 +959,8 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; - auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; - auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + auto a_chl_cluster = FindCluster(a_chl, level); + auto b_chl_cluster = FindCluster(b_chl, level); return std::less{}(a_chl_cluster, b_chl_cluster); }); @@ -846,8 +1034,8 @@ void TxGraphImpl::GroupClusters() noexcept Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Nothing to do if parent and child are in the same Cluster. if (par_cluster == chl_cluster) continue; // Nothing to do if either parent or child transaction is removed already. @@ -878,12 +1066,12 @@ void TxGraphImpl::GroupClusters() noexcept // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto chl_cluster = FindCluster(chl, level); // Skip dependencies that apply to earlier Clusters (those necessary are for // deleted transactions, as otherwise we'd have processed them already). if (!std::less{}(chl_cluster, data.cluster)) { if (chl_cluster != data.cluster) break; - auto par_cluster = m_entries[par].m_locator.cluster; + auto par_cluster = FindCluster(par, level); // Also filter out dependencies applying to a removed parent. if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); } @@ -965,15 +1153,22 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept void TxGraphImpl::ApplyDependencies() noexcept { + int level = m_clustersets.size() - 1; + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + auto& clusterset = m_clustersets[level]; GroupClusters(); - auto& clusterset = m_clusterset; Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (clusterset.m_deps_to_add.empty()) return; // Dependencies cannot be applied if it would result in oversized clusters. if (clusterset.m_oversized) return; + // Pull in all the Clusters the dependencies are in. + for (Cluster*& cluster : clusterset.m_group_clusters) { + cluster = PullIn(cluster); + } + // For each group of to-be-merged Clusters. Assume(clusterset.m_group_data.has_value()); for (const auto& group_data : *clusterset.m_group_data) { @@ -986,7 +1181,7 @@ void TxGraphImpl::ApplyDependencies() noexcept auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); - const auto& loc = m_entries[deps_span[0].second].m_locator; + const auto& loc = m_entries[deps_span[0].second].m_locator[level]; Assume(loc.IsPresent()); loc.cluster->ApplyDependencies(*this, deps_span); } @@ -1016,7 +1211,7 @@ void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept m_linearization = std::move(linearization); // Update the Cluster's quality. auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // Update the Entry objects. Updated(graph); } @@ -1051,9 +1246,10 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeeFrac& feerate) noexcept // Construct a new singleton Cluster (which is necessarily optimally linearized). auto cluster = std::make_unique(*this, feerate, idx); auto cluster_ptr = cluster.get(); - InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + int level = m_clustersets.size() - 1; + InsertCluster(level, std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_clusterset.m_txcount; + ++m_clustersets[level].m_txcount; // Return the Ref. return ret; } @@ -1065,10 +1261,10 @@ void TxGraphImpl::RemoveTransaction(Ref& arg) noexcept if (GetRefGraph(arg) == nullptr) return; Assume(GetRefGraph(arg) == this); // Find the Cluster the transaction is in, and stop if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), m_clustersets.size() - 1); if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); + m_clustersets.back().m_to_remove.push_back(GetRefIndex(arg)); } void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept @@ -1079,24 +1275,26 @@ void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); // Find the Cluster the parent and child transaction are in, and stop if either appears to be // already removed. - auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + auto par_cluster = FindCluster(GetRefIndex(parent), m_clustersets.size() - 1); if (par_cluster == nullptr) return; - auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + auto chl_cluster = FindCluster(GetRefIndex(child), m_clustersets.size() - 1); if (chl_cluster == nullptr) return; // Wipe m_group_data (as it will need to be recomputed). - m_clusterset.m_group_data.reset(); - m_clusterset.m_group_clusters.clear(); + m_clustersets.back().m_group_data.reset(); + m_clustersets.back().m_group_clusters.clear(); // Remember that this dependency is to be applied. - m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + m_clustersets.back().m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); } -bool TxGraphImpl::Exists(const Ref& arg) noexcept +bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept { if (GetRefGraph(arg) == nullptr) return false; Assume(GetRefGraph(arg) == this); + size_t level = main_only ? 0 : m_clustersets.size() - 1; // Make sure the transaction isn't scheduled for removal. - ApplyRemovals(); - return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); + if (level == m_clustersets.size() - 1) ApplyRemovals(); + auto cluster = FindCluster(GetRefIndex(arg), level); + return cluster != nullptr; } std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept @@ -1137,62 +1335,78 @@ FeeFrac Cluster::GetIndividualFeerate(ClusterIndex idx) noexcept return m_depgraph.FeeRate(idx); } -std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept { - // Return the empty vector if the Ref is empty (which may be indicative of the transaction - // having been removed already. - if (GetRefGraph(arg) == nullptr) return {}; - Assume(GetRefGraph(arg) == this); - // Apply all dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); - // Ancestry cannot be known if unapplied dependencies remain. - Assume(!m_clusterset.m_oversized); - // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; - if (cluster == nullptr) return {}; - // Dispatch to the Cluster. - return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + // Mark all transactions of a Cluster missing, needed when aborting staging, so that the + // corresponding Locators don't retain references into aborted Clusters. + for (auto ci : m_linearization) { + GraphIndex idx = m_mapping[ci]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[m_level].SetMissing(); + if (entry.IsWiped()) graph.m_wiped.push_back(idx); + } } -std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty (which may be indicative of the transaction // having been removed already. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(!m_clusterset.m_oversized); + Assume(!m_clustersets[level].m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty (which may be indicative of the transaction // having been removed already. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(!m_clustersets[level].m_oversized); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; ApplyDependencies(); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(!m_clusterset.m_oversized); + Assume(!m_clustersets[level].m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then dispatch to it. MakeAcceptable(*cluster); return cluster->GetClusterRefs(*this); } -TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept { + size_t level = main_only ? 0 : m_clustersets.size() - 1; ApplyRemovals(); - return m_clusterset.m_txcount; + return m_clustersets[level].m_txcount; } FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1203,14 +1417,21 @@ FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction. ApplyRemovals(); - // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + // Find the cluster the argument is in (the level does not matter as individual feerates will + // be identical if it occurs in both), and return the empty FeeFrac if it isn't in any. + Cluster* cluster{nullptr}; + for (int level = 0; size_t(level) < m_clustersets.size(); ++level) { + if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { + cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; + break; + } + } if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +FeeFrac TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept { // Return the empty FeeFrac if the passed Ref is empty (which may be indicative of the // transaction having been removed already. @@ -1219,23 +1440,102 @@ FeeFrac TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept // Apply all dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(!m_clusterset.m_oversized); + Assume(!m_clustersets[0].m_oversized); // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), 0); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then return the transaction's // chunk feerate. MakeAcceptable(*cluster); const auto& entry = m_entries[GetRefIndex(arg)]; - return entry.m_chunk_feerate; + return entry.m_main_chunk_feerate; } -bool TxGraphImpl::IsOversized() noexcept +bool TxGraphImpl::IsOversized(bool main_only) noexcept { + size_t level = main_only ? 0 : m_clustersets.size() - 1; // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(); - return m_clusterset.m_oversized; + return m_clustersets[level].m_oversized; +} + +void TxGraphImpl::StartStaging() noexcept +{ + Assume(m_clustersets.size() < MAX_LEVELS); + // Apply all remaining dependencies in main before creating a staging graph. Once staging + // exists, we cannot merge Clusters anymore (because of interference with Clusters being + // pulled into staging), so to make sure all inspectors are available (if not oversized), + // do all merging work now. This also involves applying all removals. + ApplyDependencies(); + // Construct a new graph. + m_clustersets.emplace_back(); + // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to + // the new graph. To-be-applied removals will always be empty at this point. + auto& stage = m_clustersets.back(); + auto& main = *(m_clustersets.rbegin() + 1); + stage.m_txcount = main.m_txcount; + stage.m_deps_to_add = main.m_deps_to_add; + stage.m_group_data = main.m_group_data; + stage.m_group_clusters = main.m_group_clusters; + stage.m_oversized = main.m_oversized; +} + +void TxGraphImpl::AbortStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + auto& stage = m_clustersets[stage_level]; + // Mark are removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created). + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Do the same with the non-removed transactions in staging Clusters. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (auto& cluster : stage.m_clusters[quality]) { + cluster->MakeTransactionsMissing(*this); + } + } + // Destroy the staging graph data. + m_clustersets.pop_back(); +} + +void TxGraphImpl::CommitStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + int main_level = stage_level - 1; + auto& stage = m_clustersets[stage_level]; + auto& main = m_clustersets[main_level]; + // Delete all conflicting Clusters in main_level, to make place for moving the staging ones + // there. All of these have been PullIn()'d to stage_level before. + auto conflicts = GetConflicts(); + for (Cluster* conflict : conflicts) { + conflict->Clear(*this); + DeleteCluster(*conflict); + } + // Mark the removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created0. + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Then move all Clusters in staging to main. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& stage_sets = stage.m_clusters[quality]; + while (!stage_sets.empty()) { + stage_sets.back()->LevelDown(*this); + } + } + // Move all statistics, precomputed data, and to-be-applied removals and dependencies. + main.m_deps_to_add = std::move(stage.m_deps_to_add); + main.m_to_remove = std::move(stage.m_to_remove); + main.m_group_data = std::move(stage.m_group_data); + main.m_group_clusters = std::move(stage.m_group_clusters); + main.m_oversized = std::move(stage.m_oversized); + main.m_txcount = std::move(stage.m_txcount); + // Delete the old staging graph, after all its information was moved to main. + m_clustersets.pop_back(); } void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept @@ -1248,9 +1548,9 @@ void Cluster::SetFee(TxGraphImpl& graph, ClusterIndex idx, int64_t fee) noexcept // in the same Cluster. m_depgraph.FeeRate(idx).fee = fee; if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); } else if (m_quality == QualityLevel::NEEDS_SPLIT_OPTIMAL || m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE) { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } @@ -1263,13 +1563,15 @@ void TxGraphImpl::SetTransactionFee(Ref& ref, int64_t fee) noexcept Assume(GetRefGraph(ref) == this); // Find the entry, its locator, and inform its Cluster about the new feerate, if any. auto& entry = m_entries[GetRefIndex(ref)]; - auto& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->SetFee(*this, locator.index, fee); + for (int level = 0; level < MAX_LEVELS; ++level) { + auto& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } } } -void Cluster::SanityCheck(const TxGraphImpl& graph) const +void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). assert(m_depgraph.PositionRange() == m_mapping.size()); @@ -1277,6 +1579,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.TxCount() == m_linearization.size()); // The number of transactions in a Cluster cannot exceed m_max_cluster_count. assert(m_linearization.size() <= graph.m_max_cluster_count); + // The level must match the level the Cluster occurs in. + assert(m_level == level); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1292,14 +1596,14 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const m_done.Set(lin_pos); assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); // Check that the Entry has a locator pointing back to this Cluster & position within it. - assert(entry.m_locator.cluster == this); - assert(entry.m_locator.index == lin_pos); - // Check linearization position and chunk feerate. - if (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE) { + assert(entry.m_locator[level].cluster == this); + assert(entry.m_locator[level].index == lin_pos); + // For top-level entries, check linearization position and chunk feerate. + if (level == 0 && (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE)) { if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } @@ -1312,8 +1616,10 @@ void TxGraphImpl::SanityCheck() const { /** Which GraphIndexes ought to occur in m_wiped, based on m_entries. */ std::set expected_wiped; - /** Which Clusters ought to occur in m_clusters, based on m_entries. */ - std::set expected_clusters; + /** Which Clusters ought to occur in ClusterSet::m_clusters, based on m_entries. */ + std::set expected_clusters[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ + std::set expected_removed[MAX_LEVELS]; // Go over all Entry objects in m_entries. for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { @@ -1327,43 +1633,65 @@ void TxGraphImpl::SanityCheck() const assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } - // Verify the Entry m_locator. - const auto& locator = entry.m_locator; - // Every Locator must be in exactly one of these 2 states. - assert(locator.IsMissing() + locator.IsPresent() == 1); - if (locator.IsPresent()) { - // Verify that the Cluster agrees with where the Locator claims the transaction is. - assert(locator.cluster->GetClusterEntry(locator.index) == idx); - // Remember that we expect said Cluster to appear in the m_clusters. - expected_clusters.insert(locator.cluster); - } - - } - - auto& clusterset = m_clusterset; - std::set actual_clusters; - // For all quality levels... - for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { - QualityLevel quality{qual}; - const auto& quality_clusters = clusterset.m_clusters[qual]; - // ... for all clusters in them ... - for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { - const auto& cluster = *quality_clusters[setindex]; - // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't - // expected to be referenced by the Entry vector). - if (cluster.GetTxCount() != 0) { - actual_clusters.insert(&cluster); + // Verify the Entry m_locators. + bool was_present{false}, was_removed{false}; + for (int level = 0; level < MAX_LEVELS; ++level) { + const auto& locator = entry.m_locator[level]; + // Every Locator must be in exactly one of these 3 states. + assert(locator.IsMissing() + locator.IsRemoved() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Once removed, a transaction cannot be revived. + assert(!was_removed); + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the ClusterSet::m_clusters. + expected_clusters[level].insert(locator.cluster); + was_present = true; + } else if (locator.IsRemoved()) { + // Level 0 (main) cannot have IsRemoved locators (IsMissing there means non-existing). + assert(level > 0); + // A Locator can only be IsRemoved if it was IsPresent before, and only once. + assert(was_present && !was_removed); + // Remember that we expect this GraphIndex to occur in the ClusterSet::m_removed. + expected_removed[level].insert(idx); + was_removed = true; } - // Sanity check the cluster, according to the Cluster's internal rules. - cluster.SanityCheck(*this); - // Check that the cluster's quality and setindex matches its position in the quality list. - assert(cluster.m_quality == quality); - assert(cluster.m_setindex == setindex); } } - // Verify that the actually encountered clusters match the ones occurring in Entry vector. - assert(actual_clusters == expected_clusters); + // For all levels (0 = main, 1 = staged)... + for (size_t level = 0; level < m_clustersets.size(); ++level) { + assert(level < MAX_LEVELS); + auto& clusterset = m_clustersets[level]; + std::set actual_clusters; + + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = clusterset.m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this, level); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters[level]); + + // Verify that the contents of m_removed matches what was expected based on the Entry vector. + std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + assert(actual_removed == expected_removed[level]); + } // Verify that the contents of m_wiped matches what was expected based on the Entry vector. std::set actual_wiped(m_wiped.begin(), m_wiped.end()); diff --git a/src/txgraph.h b/src/txgraph.h index a4588631404..a3c7855c8fb 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -69,9 +69,11 @@ protected: public: /** Virtual destructor, so inheriting is safe. */ virtual ~TxGraph() = default; - /** Construct a new transaction with the specified feerate, and return a Ref to it. */ + /** Construct a new transaction with the specified feerate, and return a Ref to it. + * If a staging graph exists, the new transaction is only created there. */ [[nodiscard]] virtual Ref AddTransaction(const FeeFrac& feerate) noexcept = 0; - /** Remove the specified transaction. This is a no-op if the transaction was already removed. + /** Remove the specified transaction. If a staging graph exists, the removal only happens + * there. This is a no-op if the transaction was already removed. * * TxGraph may internally reorder transaction removals with dependency additions for * performance reasons. If together with any transaction removal all its descendants, or all @@ -84,44 +86,67 @@ public: * deletion of B also either A or C is deleted, there is no distinction. */ virtual void RemoveTransaction(Ref& arg) noexcept = 0; - /** Add a dependency between two specified transactions. Parent may not be a descendant of - * child already (but may be an ancestor of it already, in which case this is a no-op). If - * either transaction is already removed, this is a no-op. */ + /** Add a dependency between two specified transactions. If a staging graph exists, the + * dependency is only added there. Parent may not be a descendant of child already (but may + * be an ancestor of it already, in which case this is a no-op). If either transaction is + * already removed, this is a no-op. */ virtual void AddDependency(Ref& parent, Ref& child) noexcept = 0; - /** Modify the fee of the specified transaction. If the transaction does not exist (or was - * removed), this has no effect. */ + /** Modify the fee of the specified transaction, in both the main graph and the staging + * graph if it exists. Wherever the transaction does not exist (or was removed), this has no + * effect. */ virtual void SetTransactionFee(Ref& arg, int64_t fee) noexcept = 0; /** Return a vector of pointers to Ref objects for transactions which have been removed from - * the graph, and have not been destroyed yet. This has no effect if the graph is oversized - * (see below). Each transaction is only reported once by Cleanup(). Afterwards, all Refs will - * be empty. */ + * the graph, and have not been destroyed yet. This has no effect if a staging graph exists, + * or if the graph is oversized (see below). Each transaction is only reported once by + * Cleanup(). Afterwards, all Refs will be empty. */ [[nodiscard]] virtual std::vector Cleanup() noexcept = 0; - /** Determine whether arg exists in this graph (i.e., was not removed). */ - virtual bool Exists(const Ref& arg) noexcept = 0; + /** Create a staging graph (which cannot exist already). This acts as if a full copy of + * the transaction graph is made, upon which further modifications are made. This copy can + * be inspected, and then either discarded, or the main graph can be replaced by it by + * commiting it. */ + virtual void StartStaging() noexcept = 0; + /** Discard the existing active staging graph (which must exist). */ + virtual void AbortStaging() noexcept = 0; + /** Replace the main graph with the staging graph (which must exist). */ + virtual void CommitStaging() noexcept = 0; + /** Check whether a staging graph exists. */ + virtual bool HaveStaging() const noexcept = 0; + + /** Determine whether arg exists in the graph (i.e., was not removed). If main_only is false + * and a staging graph exists, it is queried; otherwise the main graph is queried. */ + virtual bool Exists(const Ref& arg, bool main_only = false) noexcept = 0; /** Determine whether the graph is oversized (contains a connected component of more than the - * configured maximum cluster count). Some of the functions below are not available + * configured maximum cluster count). If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. Some of the functions below are not available * for oversized graphs. The mutators above are always available. */ - virtual bool IsOversized() noexcept = 0; - /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeeFrac if arg - * does not exist. The graph must not be oversized. */ - virtual FeeFrac GetChunkFeerate(const Ref& arg) noexcept = 0; + virtual bool IsOversized(bool main_only = false) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in in the main graph. Returns the + * empty FeeFrac if arg does not exist in the main graph. The main graph must not be + * oversized. */ + virtual FeeFrac GetMainChunkFeerate(const Ref& arg) noexcept = 0; /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if - * arg does not exist. This is available even for oversized graphs. */ + * arg does not exist in either main or staging. This is available even for oversized + * graphs. */ virtual FeeFrac GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the connected component ("cluster") which arg is in. * The transactions will be returned in a topologically-valid order of acceptable quality. - * Returns {} if arg does not exist in the queried graph. */ - virtual std::vector GetCluster(const Ref& arg) noexcept = 0; - /** Get pointers to all ancestors of the specified transaction. The queried graph must not be - * oversized. Returns {} if arg does not exist. */ - virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; - /** Get pointers to all descendants of the specified transaction. The graph must not be - * oversized. Returns {} if arg does not exist in the graph. */ - virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. This is available even for oversized - * graphs. */ - virtual GraphIndex GetTransactionCount() noexcept = 0; + * If main_only is false and a staging graph exists, it is queried; otherwise the main graph + * is queried. The queried graph must not be oversized. Returns {} if arg does not exist in + * the queried graph. */ + virtual std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all descendants of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get the total number of transactions in the graph. If main_only is false and a staging + * graph exists, it is queried; otherwise the main graph is queried. This is available even + * for oversized graphs. */ + virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From a16630a49c412349597bcde2f51c298313e415a5 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 3 Dec 2024 11:25:49 -0500 Subject: [PATCH 12/20] txgraph: (feature) destroying Ref means removing transaction Before this commit, if a TxGraph::Ref object is destroyed, it becomes impossible to refer to, but the actual corresponding transaction node in the TxGraph remains, and remains indefinitely as there is no way to remove it. Fix this by making the destruction of TxGraph::Ref trigger immediate removal of the corresponding transaction in TxGraph, both in main and staging if it exists. --- src/test/fuzz/txgraph.cpp | 52 ++++++++++++ src/txgraph.cpp | 174 ++++++++++++++++++++++++-------------- src/txgraph.h | 7 +- 3 files changed, 168 insertions(+), 65 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index faef8bb2377..e537f546120 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -148,6 +148,32 @@ struct SimTxGraph if (oversized.has_value() && *oversized) oversized = std::nullopt; } + /** Destroy the transaction from the graph, including from the removed set. This will + * trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized + * value is cleared (destroying does not clear oversizedness in TxGraph of the main + * graph while staging exists). */ + void DestroyTransaction(TxGraph::Ref& ref, bool reset_oversize) + { + // Special case the empty Ref. + if (!ref) return; + auto pos = Find(ref); + if (pos == MISSING) { + // Wipe the ref, if it exists, from the removed vector. Use std::partition rather + // than std::erase because we don't care about the order of the entries that + // remain. + auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != &ref; }); + removed.erase(remove, removed.end()); + } else { + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (reset_oversize && oversized.has_value() && *oversized) { + oversized = std::nullopt; + } + } + } + /** Construct the set with all positions in this graph corresponding to the specified * TxGraph::Refs. All of them must occur in this graph and not be removed. */ SetType MakeSet(std::span arg) @@ -333,6 +359,28 @@ FUZZ_TARGET(txgraph) sim.SetTransactionFee(ref, fee); } break; + } else if (command-- == 0) { + // ~Ref. + std::vector to_destroy; + to_destroy.push_back(&pick_fn()); + while (true) { + // Keep adding either the ancestors or descendants the already picked + // transactions have in both graphs (main and staging) combined. Destroying + // will trigger deletions in both, so to have consistent TxGraph behavior, the + // set must be closed under ancestors, or descendants, in both graphs. + auto old_size = to_destroy.size(); + for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt); + if (to_destroy.size() == old_size) break; + } + // The order in which these ancestors/descendants are destroyed should not matter; + // randomly shuffle them. + std::shuffle(to_destroy.begin(), to_destroy.end(), rng); + for (TxGraph::Ref* ptr : to_destroy) { + for (size_t level = 0; level < sims.size(); ++level) { + sims[level].DestroyTransaction(*ptr, level == sims.size() - 1); + } + } + break; } else if (command-- == 0) { // Cleanup. auto cleaned = real->Cleanup(); @@ -446,6 +494,10 @@ FUZZ_TARGET(txgraph) if (alt) { real->AbortStaging(); sims.pop_back(); + // Reset the cached oversized value (if TxGraph::Ref destructions triggered + // removals of main transactions while staging was active, then aborting will + // cause it to be re-evaluated in TxGraph). + sims.back().oversized = std::nullopt; } else { real->CommitStaging(); sims.erase(sims.begin()); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 7c4da0111f5..4d7eb4ac1a0 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -236,7 +236,7 @@ private: * transaction. */ struct Entry { - /** Pointer to the corresponding Ref object, if any. */ + /** Pointer to the corresponding Ref object, or nullptr if none. */ Ref* m_ref; /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ Locator m_locator[MAX_LEVELS]; @@ -311,19 +311,24 @@ public: auto& entry = m_entries[idx]; Assume(entry.m_ref != nullptr); entry.m_ref = nullptr; + if (!entry.IsWiped()) { + for (size_t level = 0; level < m_clustersets.size(); ++level) { + m_clustersets[level].m_to_remove.push_back(idx); + } + } } // Functions related to various normalization/application steps. - /** If cluster is not in the top level, copy it there, and return a pointer to it. */ - Cluster* PullIn(Cluster* cluster) noexcept; + /** If cluster is not in to_level, copy it there, and return a pointer to it. */ + Cluster* PullIn(Cluster* cluster, int to_level) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a * NEEDS_SPLIT* QualityLevel). */ - void ApplyRemovals() noexcept; - /** Split an individual cluster (which must be in the top-level ClusterSet). */ + void ApplyRemovals(int up_to_level) noexcept; + /** Split an individual cluster. */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting in the top ClusterSet. */ - void SplitAll() noexcept; - /** Populate the top ClusterSet's m_group_data (and m_oversized) based on m_deps_to_add. */ + /** Split all clusters that need splitting in ClusterSets up to the specified level. */ + void SplitAll(int up_to_level) noexcept; + /** Populate the top ClusterSet's m_group_data (and m_oversized) based on its m_deps_to_add. */ void GroupClusters() noexcept; /** Merge the specified clusters. */ void Merge(std::span to_merge) noexcept; @@ -370,6 +375,17 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept } // Update the transaction count. --m_clustersets[level].m_txcount; + // Adjust the status of Locators of this transaction at higher levels. + for (size_t after_level = level + 1; after_level < m_clustersets.size(); ++after_level) { + if (entry.m_locator[after_level].IsPresent()) { + break; + } else if (entry.m_locator[after_level].IsRemoved()) { + entry.m_locator[after_level].SetMissing(); + break; + } else { + --m_clustersets[after_level].m_txcount; + } + } // If this was the last level the Locator was Present at, add it to the m_wiped list (which // will be processed by Cleanup). if (entry.IsWiped()) m_wiped.push_back(idx); @@ -424,6 +440,7 @@ std::vector TxGraphImpl::GetConflicts() const noexcept // All Clusters at level-1 containing transactions in m_removed are conflicts. for (auto i : m_clustersets[level].m_removed) { auto& entry = m_entries[i]; + if (entry.IsWiped()) continue; Assume(entry.m_locator[level - 1].IsPresent()); ret.push_back(entry.m_locator[level - 1].cluster); } @@ -776,9 +793,8 @@ Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept return nullptr; } -Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept +Cluster* TxGraphImpl::PullIn(Cluster* cluster, int to_level) noexcept { - int to_level = m_clustersets.size() - 1; if (to_level == 0) return cluster; int level = cluster->m_level; Assume(level <= to_level); @@ -794,40 +810,49 @@ Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept return cluster; } -void TxGraphImpl::ApplyRemovals() noexcept +void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept { - int level = m_clustersets.size() - 1; - auto& clusterset = m_clustersets[level]; - auto& to_remove = clusterset.m_to_remove; - // Skip if there is nothing to remove. - if (to_remove.empty()) return; - // Wipe cached m_group_data and m_oversized, as they may be invalidated by removals. - clusterset.m_group_data = std::nullopt; - clusterset.m_group_clusters.clear(); - clusterset.m_oversized = false; - // Pull in all Clusters that are not in the top ClusterSet. - for (GraphIndex index : clusterset.m_to_remove) { - auto cluster = FindCluster(index, level); - PullIn(cluster); - } - // Group the set of to-be-removed entries by Cluster*. - std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); - }); - // Process per Cluster. - std::span to_remove_span{to_remove}; - while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; - if (cluster != nullptr) { - // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it - // can pop off whatever applies to it. - cluster->ApplyRemovals(*this, to_remove_span); - } else { - // Otherwise, skip this already-removed entry. - to_remove_span = to_remove_span.subspan(1); + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); + for (int level = 0; level <= up_to_level; ++level) { + auto& clusterset = m_clustersets[level]; + auto& to_remove = clusterset.m_to_remove; + // Skip if there is nothing to remove in this level. + if (to_remove.empty()) continue; + // Wipe cached m_group_data and m_oversized, as they may be invalidated by removals. + clusterset.m_group_data = std::nullopt; + clusterset.m_group_clusters.clear(); + if (size_t(level) == m_clustersets.size() - 1) { + // Do not wipe the oversized state of a lower level graph (main) if a higher level + // one (staging) exists. The reason for this is that the alternative would mean that + // cluster merges may need to be applied to a formerly-oversized main graph while + // staging exists (to satisfy chunk feerate queries into main, for example), and such + // merges could conflict with pulls of some of their constituents into staging. + clusterset.m_oversized = false; } + // Pull in all Clusters that are not in the ClusterSet at level level. + for (GraphIndex index : clusterset.m_to_remove) { + auto cluster = FindCluster(index, level); + if (cluster != nullptr) PullIn(cluster, level); + } + // Group the set of to-be-removed entries by Cluster*. + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); + }); + // Process per Cluster. + std::span to_remove_span{to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. + to_remove_span = to_remove_span.subspan(1); + } + } + clusterset.m_to_remove.clear(); } - clusterset.m_to_remove.clear(); } void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept @@ -840,7 +865,7 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept for (int i = 0; i < 2; ++i) { GraphIndex idx = i ? b : a; Entry& entry = m_entries[idx]; - // Update linked Ref. + // Update linked Ref, if any exists. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; // Update the locators for both levels. The rest of the Entry information will not change, // so no need to invoke Cluster::Updated(). @@ -898,7 +923,7 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept { // To split a Cluster, first make sure all removals are applied (as we might need to split // again afterwards otherwise). - ApplyRemovals(); + ApplyRemovals(cluster.m_level); bool del = cluster.Split(*this); if (del) { // Cluster::Split reports whether the Cluster is to be deleted. @@ -906,14 +931,17 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept } } -void TxGraphImpl::SplitAll() noexcept +void TxGraphImpl::SplitAll(int up_to_level) noexcept { + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); // Before splitting all Cluster, first make sure all removals are applied. - ApplyRemovals(); - for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) { - auto& queue = m_clustersets.back().m_clusters[int(quality)]; - while (!queue.empty()) { - Split(*queue.back().get()); + ApplyRemovals(up_to_level); + for (int level = 0; level <= up_to_level; ++level) { + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE, QualityLevel::NEEDS_SPLIT_OPTIMAL}) { + auto& queue = m_clustersets[level].m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } } @@ -925,7 +953,7 @@ void TxGraphImpl::GroupClusters() noexcept // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up // with inefficient and/or oversized Clusters which just end up being split again anyway. - SplitAll(); + SplitAll(level); auto& clusterset = m_clustersets[level]; // If the groupings have been computed already, nothing is left to be done. @@ -1166,7 +1194,7 @@ void TxGraphImpl::ApplyDependencies() noexcept // Pull in all the Clusters the dependencies are in. for (Cluster*& cluster : clusterset.m_group_clusters) { - cluster = PullIn(cluster); + cluster = PullIn(cluster, level); } // For each group of to-be-merged Clusters. @@ -1292,7 +1320,7 @@ bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept Assume(GetRefGraph(arg) == this); size_t level = main_only ? 0 : m_clustersets.size() - 1; // Make sure the transaction isn't scheduled for removal. - if (level == m_clustersets.size() - 1) ApplyRemovals(); + ApplyRemovals(level); auto cluster = FindCluster(GetRefIndex(arg), level); return cluster != nullptr; } @@ -1303,6 +1331,7 @@ std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, Cl // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Ancestors(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1314,6 +1343,7 @@ std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Descendants(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1325,6 +1355,7 @@ std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noe // Translate all transactions in the Cluster (in linearization order) to Refs. for (auto idx : m_linearization) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1355,7 +1386,8 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_o Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. size_t level = main_only ? 0 : m_clustersets.size() - 1; - ApplyDependencies(); + ApplyRemovals(level); + if (level == m_clustersets.size() - 1) ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. Assume(!m_clustersets[level].m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. @@ -1373,7 +1405,8 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. size_t level = main_only ? 0 : m_clustersets.size() - 1; - ApplyDependencies(); + ApplyRemovals(level); + if (level == m_clustersets.size() - 1) ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. Assume(!m_clustersets[level].m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. @@ -1391,7 +1424,8 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_onl Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be incorrect otherwise. size_t level = main_only ? 0 : m_clustersets.size() - 1; - ApplyDependencies(); + SplitAll(level); + if (level == m_clustersets.size() - 1) ApplyDependencies(); // Cluster linearization cannot be known if unapplied dependencies remain. Assume(!m_clustersets[level].m_oversized); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. @@ -1405,7 +1439,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_onl TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept { size_t level = main_only ? 0 : m_clustersets.size() - 1; - ApplyRemovals(); + ApplyRemovals(level); return m_clustersets[level].m_txcount; } @@ -1415,12 +1449,12 @@ FeeFrac TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept // transaction having been removed already. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); - // Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction. - ApplyRemovals(); // Find the cluster the argument is in (the level does not matter as individual feerates will // be identical if it occurs in both), and return the empty FeeFrac if it isn't in any. Cluster* cluster{nullptr}; for (int level = 0; size_t(level) < m_clustersets.size(); ++level) { + // Apply removals, so that we can correctly report FeeFrac{} for non-existing transaction. + ApplyRemovals(level); if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; break; @@ -1438,7 +1472,8 @@ FeeFrac TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all dependencies, as the result might be inaccurate otherwise. - ApplyDependencies(); + SplitAll(0); + if (m_clustersets.size() == 1) ApplyDependencies(); // Chunk feerates cannot be accurately known if unapplied dependencies remain. Assume(!m_clustersets[0].m_oversized); // Find the cluster the argument is in, and return the empty FeeFrac if it isn't in any. @@ -1456,7 +1491,7 @@ bool TxGraphImpl::IsOversized(bool main_only) noexcept size_t level = main_only ? 0 : m_clustersets.size() - 1; // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. - GroupClusters(); + if (level == m_clustersets.size() - 1) GroupClusters(); return m_clustersets[level].m_oversized; } @@ -1499,6 +1534,11 @@ void TxGraphImpl::AbortStaging() noexcept } // Destroy the staging graph data. m_clustersets.pop_back(); + if (!m_clustersets.back().m_group_data.has_value()) { + // In case m_oversized in main was kept after a Ref destruction while staging exists, we + // need to re-evaluate m_oversized now. + m_clustersets.back().m_oversized = false; + } } void TxGraphImpl::CommitStaging() noexcept @@ -1627,9 +1667,9 @@ void TxGraphImpl::SanityCheck() const if (entry.IsWiped()) { // If the Entry is not IsPresent anywhere, it should be in m_wiped. expected_wiped.insert(idx); - } else { - // Every non-wiped Entry must have a Ref that points back to it. - assert(entry.m_ref != nullptr); + } + if (entry.m_ref != nullptr) { + // If a Ref is pointed to, it must point back to this GraphIndex in this TxGraphImpl. assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } @@ -1690,6 +1730,14 @@ void TxGraphImpl::SanityCheck() const // Verify that the contents of m_removed matches what was expected based on the Entry vector. std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + for (auto i : expected_wiped) { + // If a transaction exists in both main and staging, and is removed from staging (adding + // it to m_removed there), and consequently destroyed (wiping the locator completely), + // it can remain in m_removed despite not having an IsRemoved() locator. Exclude those + // transactions from the comparison here. + actual_removed.erase(i); + expected_removed[level].erase(i); + } assert(actual_removed == expected_removed[level]); } diff --git a/src/txgraph.h b/src/txgraph.h index a3c7855c8fb..4042da0a8e6 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -42,7 +42,8 @@ public: Ref() noexcept = default; /** Test if this Ref is not empty. */ explicit operator bool() const noexcept { return m_graph != nullptr; } - /** Destroy this Ref. */ + /** Destroy this Ref. If it is not empty, the corresponding transaction is removed (in both + * main and staging, if it exists). */ virtual ~Ref(); // Support moving a Ref. Ref& operator=(Ref&& other) noexcept; @@ -119,7 +120,9 @@ public: /** Determine whether the graph is oversized (contains a connected component of more than the * configured maximum cluster count). If main_only is false and a staging graph exists, it is * queried; otherwise the main graph is queried. Some of the functions below are not available - * for oversized graphs. The mutators above are always available. */ + * for oversized graphs. The mutators above are always available. Removing a transaction by + * destroying its Ref while staging exists will not clear main's oversizedness until staging + * is aborted or committed. */ virtual bool IsOversized(bool main_only = false) noexcept = 0; /** Get the feerate of the chunk which transaction arg is in in the main graph. Returns the * empty FeeFrac if arg does not exist in the main graph. The main graph must not be From 72d3ca13b56b9ae17da1234f61a3e24874ab6d00 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 16:16:59 -0500 Subject: [PATCH 13/20] txgraph: (feature) expose ability to compare transactions In order to make it possible for higher layers to compare transaction quality (ordering within the implicit total ordering on the mempool), expose a comparison function and test it. --- src/test/fuzz/txgraph.cpp | 66 +++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 48 +++++++++++++++++++++++++--- src/txgraph.h | 4 +++ 3 files changed, 114 insertions(+), 4 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index e537f546120..45d7f7d02fe 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -503,6 +503,24 @@ FUZZ_TARGET(txgraph) sims.erase(sims.begin()); } break; + } else if (main_sim.GetTransactionCount() > 0 && !main_sim.IsOversized() && command-- == 0) { + // CompareMainOrder. + auto& ref_a = pick_fn(); + auto& ref_b = pick_fn(); + auto sim_a = main_sim.Find(ref_a); + auto sim_b = main_sim.Find(ref_b); + // Both transactions must exist in the main graph. + if (sim_a == SimTxGraph::MISSING || sim_b == SimTxGraph::MISSING) break; + auto cmp = real->CompareMainOrder(ref_a, ref_b); + // Distinct transactions have distinct places. + if (sim_a != sim_b) assert(cmp != 0); + // Ancestors go before descendants. + if (main_sim.graph.Ancestors(sim_a)[sim_b]) assert(cmp >= 0); + if (main_sim.graph.Descendants(sim_a)[sim_b]) assert(cmp <= 0); + // Do not verify consistency with chunk feerates, as we cannot easily determine + // these here without making more calls to real, which could affect its internal + // state. A full comparison is done at the end. + break; } } } @@ -510,6 +528,54 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + + if (!sims[0].IsOversized()) { + // If the main graph is not oversized, verify the total ordering implied by + // CompareMainOrder. + // First construct two distinct randomized permutations of the positions in sims[0]. + std::vector vec1; + for (auto i : sims[0].graph.Positions()) vec1.push_back(i); + std::shuffle(vec1.begin(), vec1.end(), rng); + auto vec2 = vec1; + std::shuffle(vec2.begin(), vec2.end(), rng); + if (vec1 == vec2) std::next_permutation(vec2.begin(), vec2.end()); + // Sort both according to CompareMainOrder. By having randomized starting points, the order + // of CompareMainOrder invocations is somewhat randomized as well. + auto cmp = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept { + return real->CompareMainOrder(sims[0].GetRef(a), sims[0].GetRef(b)) < 0; + }; + std::sort(vec1.begin(), vec1.end(), cmp); + std::sort(vec2.begin(), vec2.end(), cmp); + + // Verify the resulting orderings are identical. This could only fail if the ordering was + // not total. + assert(vec1 == vec2); + + // Verify that the ordering is topological. + auto todo = sims[0].graph.Positions(); + for (auto i : vec1) { + todo.Reset(i); + assert(!sims[0].graph.Ancestors(i).Overlaps(todo)); + } + assert(todo.None()); + + // For every transaction in the total ordering, find a random one before it and after it, + // and compare their chunk feerates, which must be consistent with the ordering. + for (size_t pos = 0; pos < vec1.size(); ++pos) { + auto pos_feerate = real->GetMainChunkFeerate(sims[0].GetRef(vec1[pos])); + if (pos > 0) { + size_t before = rng.randrange(pos); + auto before_feerate = real->GetMainChunkFeerate(sims[0].GetRef(vec1[before])); + assert(FeeRateCompare(before_feerate, pos_feerate) >= 0); + } + if (pos + 1 < vec1.size()) { + size_t after = pos + 1 + rng.randrange(vec1.size() - 1 - pos); + auto after_feerate = real->GetMainChunkFeerate(sims[0].GetRef(vec1[after])); + assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); + } + } + } + assert(real->HaveStaging() == (sims.size() > 1)); // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 4d7eb4ac1a0..d85d5b9ab90 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -242,6 +242,8 @@ private: Locator m_locator[MAX_LEVELS]; /** The chunk feerate of this transaction in main (if present in m_locator[0]) */ FeeFrac m_main_chunk_feerate; + /** The position this transaction in the main linearization (if present). /*/ + LinearizationIndex m_main_lin_index; /** Check whether this Entry is not present in any Cluster. */ bool IsWiped() const noexcept @@ -358,6 +360,7 @@ public: std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; + std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; void SanityCheck() const final; }; @@ -399,9 +402,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or - // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. - // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless - // to compute these if we haven't reached that quality level yet. + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_lin_index + // and m_main_chunk_feerate. These fields are only accessed after making the entire graph + // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level + // yet. if (m_level == 0 && (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE)) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; @@ -410,9 +414,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept auto chunk = chunking.GetChunk(chunk_idx); // Iterate over the transactions in the linearization, which must match those in chunk. while (true) { - ClusterIndex idx = m_linearization[lin_idx++]; + ClusterIndex idx = m_linearization[lin_idx]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; + entry.m_main_lin_index = lin_idx++; entry.m_main_chunk_feerate = chunk.feerate; chunk.transactions.Reset(idx); if (chunk.transactions.None()) break; @@ -487,6 +492,10 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove todo.Set(locator.index); // - Remove from m_mapping. m_mapping[locator.index] = GraphIndex(-1); + // - Remove its linearization index from the Entry (if in main). + if (m_level == 0) { + entry.m_main_lin_index = LinearizationIndex(-1); + } // - Mark it as missing/removed in the Entry's locator. graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); @@ -1611,6 +1620,35 @@ void TxGraphImpl::SetTransactionFee(Ref& ref, int64_t fee) noexcept } } +std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) noexcept +{ + // The references must not be empty. + Assume(GetRefGraph(a) == this); + Assume(GetRefGraph(b) == this); + // Apply dependencies if main is the only level (in every other case, they will have been + // applied already prior to the creating of staging, or main is oversized). + SplitAll(0); + if (m_clustersets.size() == 1) ApplyDependencies(); + Assume(!m_clustersets[0].m_oversized); + // Make both involved Clusters acceptable, so chunk feerates are relevant. + const auto& entry_a = m_entries[GetRefIndex(a)]; + const auto& entry_b = m_entries[GetRefIndex(b)]; + const auto& locator_a = entry_a.m_locator[0]; + const auto& locator_b = entry_b.m_locator[0]; + Assume(locator_a.IsPresent()); + Assume(locator_b.IsPresent()); + MakeAcceptable(*locator_a.cluster); + MakeAcceptable(*locator_b.cluster); + // Compare chunk feerates, and return result if it differs. + auto feerate_cmp = FeeRateCompare(entry_b.m_main_chunk_feerate, entry_a.m_main_chunk_feerate); + if (feerate_cmp < 0) return std::strong_ordering::less; + if (feerate_cmp > 0) return std::strong_ordering::greater; + // Compare Cluster* as tie-break for equal chunk feerates. + if (locator_a.cluster != locator_b.cluster) return locator_a.cluster <=> locator_b.cluster; + // As final tie-break, compare position within cluster linearization. + return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). @@ -1628,6 +1666,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const // Verify m_linearization. SetType m_done; + LinearizationIndex linindex{0}; assert(m_depgraph.IsAcyclic()); for (auto lin_pos : m_linearization) { assert(lin_pos < m_mapping.size()); @@ -1640,6 +1679,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(entry.m_locator[level].index == lin_pos); // For top-level entries, check linearization position and chunk feerate. if (level == 0 && (m_quality == QualityLevel::OPTIMAL || m_quality == QualityLevel::ACCEPTABLE)) { + assert(entry.m_main_lin_index == linindex++); if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } diff --git a/src/txgraph.h b/src/txgraph.h index 4042da0a8e6..ab0815a2aca 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -150,6 +150,10 @@ public: * graph exists, it is queried; otherwise the main graph is queried. This is available even * for oversized graphs. */ virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; + /** Compare two transactions according to the total order in the main graph (topological, and + * from high to low chunk feerate). Both transactions must be in the main graph. The main + * graph must not be oversized. */ + virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From b5055196fafe28d15bc62895d79397e1dbb362a2 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 28 Nov 2024 10:40:42 -0500 Subject: [PATCH 14/20] txgraph: (feature) Add GetMainStagingDiagrams function This allows determining whether the changes in a staging diagram unambiguously improve the graph, through CompareChunks(). --- src/test/fuzz/txgraph.cpp | 109 ++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 42 +++++++++++++++ src/txgraph.h | 3 ++ 3 files changed, 154 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 45d7f7d02fe..5a0925cb3ed 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -84,6 +85,16 @@ struct SimTxGraph /** Determine the number of (non-removed) transactions in the graph. */ ClusterIndex GetTransactionCount() const { return graph.TxCount(); } + /** Get the sum of all fees/sizes in the graph. */ + FeeFrac SumAll() const + { + FeeFrac ret; + for (auto i : graph.Positions()) { + ret += graph.FeeRate(i); + } + return ret; + } + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ Pos Find(const TxGraph::Ref& ref) const { @@ -277,6 +288,40 @@ FUZZ_TARGET(txgraph) return empty_ref; }; + /** Function to construct the full diagram for a simulated graph. This works by fetching the + * clusters and chunking them manually, so it works for both main and staging + * (GetMainChunkFeerate only works for main). */ + auto get_diagram_fn = [&](bool main_only) -> std::vector { + int level = main_only ? 0 : sims.size() - 1; + auto& sim = sims[level]; + // For every transaction in the graph, request its cluster, and throw them into a set. + std::set> clusters; + for (auto i : sim.graph.Positions()) { + auto& ref = sim.GetRef(i); + clusters.insert(real->GetCluster(ref, main_only)); + } + // Compute the chunkings of each (deduplicated) cluster. + size_t num_tx{0}; + std::vector ret; + for (const auto& cluster : clusters) { + num_tx += cluster.size(); + std::vector linearization; + linearization.reserve(cluster.size()); + for (auto refptr : cluster) linearization.push_back(sim.Find(*refptr)); + for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) { + ret.push_back(chunk_feerate); + } + } + // Verify the number of transactions after deduplicating clusters. This implicitly verifies + // that GetCluster on each element of a cluster reports the cluster transactions in the same + // order. + assert(num_tx == sim.GetTransactionCount()); + // Sort by feerate (we don't care about respecting ordering within clusters, as these are + // just feerates). + std::sort(ret.begin(), ret.end(), std::greater{}); + return ret; + }; + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); @@ -436,6 +481,7 @@ FUZZ_TARGET(txgraph) // Just do some quick checks that the reported value is in range. A full // recomputation of expected chunk feerates is done at the end. assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); + assert(feerate.size <= main_sim.SumAll().size); } break; } else if (!sel_sim.IsOversized() && command-- == 0) { @@ -521,6 +567,25 @@ FUZZ_TARGET(txgraph) // these here without making more calls to real, which could affect its internal // state. A full comparison is done at the end. break; + } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) { + // GetMainStagingDiagrams() + auto [main_diagram, staged_diagram] = real->GetMainStagingDiagrams(); + auto sum_main = std::accumulate(main_diagram.begin(), main_diagram.end(), FeeFrac{}); + auto sum_staged = std::accumulate(staged_diagram.begin(), staged_diagram.end(), FeeFrac{}); + auto diagram_gain = sum_staged - sum_main; + auto real_gain = sims[1].SumAll() - sims[0].SumAll(); + // Just check that the total fee gained/lost and size gained/lost according to the + // diagram matches the difference in these values in the simulated graph. A more + // complete check of the GetMainStagingDiagrams result is performed at the end. + assert(diagram_gain == real_gain); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_diagram.size(); ++i) { + assert(FeeRateCompare(main_diagram[i], main_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < staged_diagram.size(); ++i) { + assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0); + } + break; } } } @@ -574,6 +639,50 @@ FUZZ_TARGET(txgraph) assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); } } + + // Check that the implied ordering gives rise to a combined diagram that matches the + // diagram constructed from the individual cluster linearization chunkings. + auto main_diagram = get_diagram_fn(true); + auto expected_main_diagram = ChunkLinearization(sims[0].graph, vec1); + assert(CompareChunks(main_diagram, expected_main_diagram) == 0); + + if (sims.size() >= 2 && !sims[1].IsOversized()) { + // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and + // fully verify the result. + auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams(); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0); + } + // Apply total ordering on the feerate diagrams to make them comparable (the exact + // tie breaker among equal-feerate FeeFracs does not matter, but it has to be + // consistent with the one used in main_diagram and stage_diagram). + std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{}); + std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{}); + // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram. + // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected + // by staging. + std::vector missing_main_cmp; + std::set_difference(main_diagram.begin(), main_diagram.end(), + main_cmp_diagram.begin(), main_cmp_diagram.end(), + std::inserter(missing_main_cmp, missing_main_cmp.end()), + std::greater{}); + assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_diagram.size()); + // Do the same for chunks in stage_diagram missign from stage_cmp_diagram. + auto stage_diagram = get_diagram_fn(false); + std::vector missing_stage_cmp; + std::set_difference(stage_diagram.begin(), stage_diagram.end(), + stage_cmp_diagram.begin(), stage_cmp_diagram.end(), + std::inserter(missing_stage_cmp, missing_stage_cmp.end()), + std::greater{}); + assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_diagram.size()); + // The missing chunks must be equal across main & staging (otherwise they couldn't have + // been omitted). + assert(missing_main_cmp == missing_stage_cmp); + } } assert(real->HaveStaging() == (sims.size() > 1)); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index d85d5b9ab90..7078970a5a3 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -119,6 +119,8 @@ public: void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; /** Improve the linearization of this Cluster. */ void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + /** For every chunk in the cluster, append its FeeFrac to ret. */ + void AppendChunkFeerates(std::vector& ret) const noexcept; // Functions that implement the Cluster-specific side of public TxGraph functions. @@ -338,6 +340,8 @@ public: void ApplyDependencies() noexcept; /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ void MakeAcceptable(Cluster& cluster) noexcept; + /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */ + void MakeAllAcceptable(int level) noexcept; // Implementations for the public TxGraph interface. @@ -361,6 +365,7 @@ public: GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; + std::pair, std::vector> GetMainStagingDiagrams() noexcept final; void SanityCheck() const final; }; @@ -560,6 +565,12 @@ void Cluster::LevelDown(TxGraphImpl& graph) noexcept Updated(graph); } +void Cluster::AppendChunkFeerates(std::vector& ret) const noexcept +{ + auto chunk_feerates = ChunkLinearization(m_depgraph, m_linearization); + ret.insert(ret.end(), chunk_feerates.begin(), chunk_feerates.end()); +} + bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. @@ -1261,6 +1272,15 @@ void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept } } +void TxGraphImpl::MakeAllAcceptable(int level) noexcept +{ + if (size_t(level) == m_clustersets.size() - 1) ApplyDependencies(); + auto& queue = m_clustersets[level].m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)]; + while (!queue.empty()) { + MakeAcceptable(*queue.back().get()); + } +} + Cluster::Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_index) noexcept { // Create a new transaction in the DepGraph, and remember its position in m_mapping. @@ -1649,6 +1669,28 @@ std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) n return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; } +std::pair, std::vector> TxGraphImpl::GetMainStagingDiagrams() noexcept +{ + Assume(m_clustersets.size() >= 2); + ApplyDependencies(); + MakeAllAcceptable(m_clustersets.size() - 2); + MakeAllAcceptable(m_clustersets.size() - 1); + auto main_clusters = GetConflicts(); + std::vector main_feerates, staging_feerates; + for (Cluster* cluster : main_clusters) { + cluster->AppendChunkFeerates(main_feerates); + } + const auto& staging = m_clustersets.back(); + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (const auto& cluster : staging.m_clusters[quality]) { + cluster->AppendChunkFeerates(staging_feerates); + } + } + std::sort(main_feerates.begin(), main_feerates.end(), [](auto& a, auto& b) { return FeeRateCompare(a, b) > 0; }); + std::sort(staging_feerates.begin(), staging_feerates.end(), [](auto& a, auto& b) { return FeeRateCompare(a, b) > 0; }); + return std::make_pair(std::move(main_feerates), std::move(staging_feerates)); +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). diff --git a/src/txgraph.h b/src/txgraph.h index ab0815a2aca..72481611f13 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -154,6 +154,9 @@ public: * from high to low chunk feerate). Both transactions must be in the main graph. The main * graph must not be oversized. */ virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; + /** Get feerate diagrams (comparable using CompareChunks()) for both main and staging (which + * must both exist and not be oversized), ignoring unmodified components in both. */ + virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 913e7eaae9c980ed276b8477832e6732a794e68d Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 15:54:03 -0500 Subject: [PATCH 15/20] txgraph: (preparation) maintain chunk index This is preparation for exposing mining and eviction functionality in TxGraph. --- src/txgraph.cpp | 113 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 7078970a5a3..251c19a0fef 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -211,6 +211,49 @@ private: /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ std::vector m_clustersets; + /** Information about a chunk in the main graph. */ + struct ChunkData + { + /** The Entry which is the last transaction of the chunk. */ + mutable GraphIndex m_graph_index; + /** How many transactions the chunk contains. */ + LinearizationIndex m_chunk_count; + + ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept : + m_graph_index{graph_index}, m_chunk_count{chunk_count} {} + }; + + /** Comparator for ChunkData objects in mining order. */ + class ChunkOrder + { + const TxGraphImpl* const m_graph; + public: + explicit ChunkOrder(const TxGraphImpl* graph) : m_graph(graph) {} + + bool operator()(const ChunkData& a, const ChunkData& b) const noexcept + { + const auto& a_entry = m_graph->m_entries[a.m_graph_index]; + const auto& b_entry = m_graph->m_entries[b.m_graph_index]; + // First sort from high feerate to low feerate. + auto cmp_feerate = FeeRateCompare(a_entry.m_main_chunk_feerate, b_entry.m_main_chunk_feerate); + if (cmp_feerate != 0) return cmp_feerate > 0; + // Then sort by increasing Cluster pointer. + Assume(a_entry.m_locator[0].IsPresent()); + Assume(b_entry.m_locator[0].IsPresent()); + if (a_entry.m_locator[0].cluster != b_entry.m_locator[0].cluster) { + return std::less{}(a_entry.m_locator[0].cluster, b_entry.m_locator[0].cluster); + } + // Finally sort by position within the Cluster. + return a_entry.m_main_lin_index < b_entry.m_main_lin_index; + } + }; + + /** Definition for the mining index type. */ + using ChunkIndex = std::set; + + /** Index of ChunkData objects. */ + ChunkIndex m_chunkindex; + /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. */ struct Locator @@ -240,6 +283,8 @@ private: { /** Pointer to the corresponding Ref object, or nullptr if none. */ Ref* m_ref; + /** Iterator to the corresponding ChunkData, if any. */ + ChunkIndex::iterator m_chunkindex_iterator; /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ Locator m_locator[MAX_LEVELS]; /** The chunk feerate of this transaction in main (if present in m_locator[0]) */ @@ -266,7 +311,8 @@ private: public: /** Construct a new TxGraphImpl with the specified maximum cluster count. */ explicit TxGraphImpl(ClusterIndex max_cluster_count) noexcept : - m_max_cluster_count(max_cluster_count) + m_max_cluster_count(max_cluster_count), + m_chunkindex(ChunkOrder(this)) { Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); m_clustersets.reserve(MAX_LEVELS); @@ -397,6 +443,10 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept // If this was the last level the Locator was Present at, add it to the m_wiped list (which // will be processed by Cleanup). if (entry.IsWiped()) m_wiped.push_back(idx); + if (level == 0 && entry.m_chunkindex_iterator != m_chunkindex.end()) { + m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = m_chunkindex.end(); + } } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -404,6 +454,12 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (ClusterIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + // Destroy any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or @@ -417,6 +473,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Iterate over the chunks. for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { auto chunk = chunking.GetChunk(chunk_idx); + auto chunk_count = chunk.transactions.Count(); // Iterate over the transactions in the linearization, which must match those in chunk. while (true) { ClusterIndex idx = m_linearization[lin_idx]; @@ -425,7 +482,13 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept entry.m_main_lin_index = lin_idx++; entry.m_main_chunk_feerate = chunk.feerate; chunk.transactions.Reset(idx); - if (chunk.transactions.None()) break; + if (chunk.transactions.None()) { + // Last transaction in the chunk. + auto [it, inserted] = graph.m_chunkindex.emplace(graph_idx, chunk_count); + Assume(inserted); + entry.m_chunkindex_iterator = it; + break; + } } } } @@ -674,7 +737,14 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // Update the transaction's Locator. There is no need to call Updated() to update chunk // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). - graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos); + auto& entry = graph.m_entries[idx]; + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + // Destroy any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } + entry.m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. other.m_depgraph = DepGraph{}; @@ -887,6 +957,10 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept Entry& entry = m_entries[idx]; // Update linked Ref, if any exists. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update linked chunk index entries, if any exist. + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + entry.m_chunkindex_iterator->m_graph_index = idx; + } // Update the locators for both levels. The rest of the Entry information will not change, // so no need to invoke Cluster::Updated(). for (int level = 0; level < MAX_LEVELS; ++level) { @@ -928,6 +1002,7 @@ std::vector TxGraphImpl::Cleanup() noexcept m_entries[idx].m_ref = nullptr; } // Verify removed entries don't have anything that could hold a reference back. + Assume(entry.m_chunkindex_iterator == m_chunkindex.end()); for (int level = 0; level < MAX_LEVELS; ++level) { Assume(!entry.m_locator[level].IsPresent()); } @@ -1297,6 +1372,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeeFrac& feerate) noexcept auto idx = m_entries.size(); m_entries.emplace_back(); auto& entry = m_entries.back(); + entry.m_chunkindex_iterator = m_chunkindex.end(); entry.m_ref = &ret; GetRefGraph(ret) = this; GetRefIndex(ret) = idx; @@ -1404,6 +1480,10 @@ void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[idx]; entry.m_locator[m_level].SetMissing(); if (entry.IsWiped()) graph.m_wiped.push_back(idx); + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } } } @@ -1709,6 +1789,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const // Verify m_linearization. SetType m_done; LinearizationIndex linindex{0}; + ClusterIndex chunk_pos{0}; //!< position within the current chunk assert(m_depgraph.IsAcyclic()); for (auto lin_pos : m_linearization) { assert(lin_pos < m_mapping.size()); @@ -1724,8 +1805,13 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(entry.m_main_lin_index == linindex++); if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); + chunk_pos = 0; } assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); + // Verify that an entry in the chunk index exists for every chunk-ending transaction. + ++chunk_pos; + bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count()); + assert((entry.m_chunkindex_iterator != graph.m_chunkindex.end()) == is_chunk_end); // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } @@ -1742,6 +1828,8 @@ void TxGraphImpl::SanityCheck() const std::set expected_clusters[MAX_LEVELS]; /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ std::set expected_removed[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in m_chunkindex, based on m_entries. */ + std::set expected_chunkindex; // Go over all Entry objects in m_entries. for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { @@ -1755,6 +1843,11 @@ void TxGraphImpl::SanityCheck() const assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + // Remember which entries we see a chunkindex entry for. + assert(entry.m_locator[0].IsPresent()); + expected_chunkindex.insert(idx); + } // Verify the Entry m_locators. bool was_present{false}, was_removed{false}; for (int level = 0; level < MAX_LEVELS; ++level) { @@ -1826,6 +1919,20 @@ void TxGraphImpl::SanityCheck() const // Verify that the contents of m_wiped matches what was expected based on the Entry vector. std::set actual_wiped(m_wiped.begin(), m_wiped.end()); assert(actual_wiped == expected_wiped); + + // Finally, check the chunk index. + std::set actual_chunkindex; + FeeFrac last_chunk_feerate; + for (const auto& chunk : m_chunkindex) { + GraphIndex idx = chunk.m_graph_index; + actual_chunkindex.insert(idx); + auto chunk_feerate = m_entries[idx].m_main_chunk_feerate; + if (!last_chunk_feerate.IsEmpty()) { + assert(FeeRateCompare(last_chunk_feerate, chunk_feerate) >= 0); + } + last_chunk_feerate = chunk_feerate; + } + assert(actual_chunkindex == expected_chunkindex); } } // namespace From 02ab364247a96b5fd39f8d9ef8d6e3977a5675ae Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Mon, 25 Nov 2024 11:31:02 -0500 Subject: [PATCH 16/20] txgraph: (feature) introduce BlockBuilder interface This interface lets one iterate efficiently over the chunks of the main graph in a TxGraph, in the same order as CompareMainOrder. Each chunk can be marked as "included" or "skipped" (and in the latter case, dependent chunks will be skipped). --- src/test/fuzz/txgraph.cpp | 56 +++++++++++++++++ src/txgraph.cpp | 125 +++++++++++++++++++++++++++++++++++--- src/txgraph.h | 31 +++++++++- 3 files changed, 202 insertions(+), 10 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 5a0925cb3ed..d8e5e9defd2 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -586,6 +586,41 @@ FUZZ_TARGET(txgraph) assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0); } break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetBlockBuilder. + uint8_t frac = provider.ConsumeIntegral(); + auto builder = real->GetBlockBuilder(); + SimTxGraph::SetType done; + FeeFrac prev_chunk_feerate; + while (*builder) { + // Chunk feerates must be monotonously decreasing. + if (!prev_chunk_feerate.IsEmpty()) { + assert(FeeRateCompare(builder->GetCurrentChunkFeerate(), prev_chunk_feerate) <= 0); + } + prev_chunk_feerate = builder->GetCurrentChunkFeerate(); + // Only include a fraction of frac/255 out of all chunks. + if (rng.randrange(255) <= frac) { + FeeFrac sum_feerate; + for (TxGraph::Ref* ref : builder->GetCurrentChunk()) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(*ref); + assert(simpos != SimTxGraph::MISSING); + // Verify the claimed chunk feerate. + sum_feerate += main_sim.graph.FeeRate(simpos); + // Make sure the chunk contains no duplicate transactions. + assert(!done[simpos]); + done.Set(simpos); + // The concatenation of all included chunks, in order, must be + // topologically valid. + assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(done)); + } + assert(sum_feerate == builder->GetCurrentChunkFeerate()); + builder->Include(); + } else { + builder->Skip(); + } + } + break; } } } @@ -640,6 +675,27 @@ FUZZ_TARGET(txgraph) } } + // The same order should be obtained through a BlockBuilder, if nothing is skipped. + auto builder = real->GetBlockBuilder(); + std::vector vec_builder; + while (*builder) { + FeeFrac sum; + for (TxGraph::Ref* ref : builder->GetCurrentChunk()) { + // The reported chunk feerate must match the chunk feerate obtained by asking + // it for each of the chunk's transactions individually. + assert(real->GetMainChunkFeerate(*ref) == builder->GetCurrentChunkFeerate()); + // Verify the chunk feerate matches the sum of the reported individual feerates. + sum += real->GetIndividualFeerate(*ref); + // Chunks must contain transactions that exist in the graph. + auto simpos = sims[0].Find(*ref); + assert(simpos != SimTxGraph::MISSING); + vec_builder.push_back(simpos); + } + assert(sum == builder->GetCurrentChunkFeerate()); + builder->Include(); + } + assert(vec_builder == vec1); + // Check that the implied ordering gives rise to a combined diagram that matches the // diagram constructed from the individual cluster linearization chunkings. auto main_diagram = get_diagram_fn(true); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 251c19a0fef..2e277555299 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -129,7 +129,7 @@ public: /** Get a vector of Refs for the descendants of a given Cluster element. */ std::vector GetDescendantRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept; /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ - std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; + void GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; /** Get the individual transaction feerate of a Cluster element. */ FeeFrac GetIndividualFeerate(ClusterIndex idx) noexcept; /** Modify the fee of a Cluster element. */ @@ -166,6 +166,7 @@ public: class TxGraphImpl final : public TxGraph { friend class Cluster; + friend class BlockBuilderImpl; private: /** Internal RNG. */ FastRandomContext m_rng; @@ -253,6 +254,8 @@ private: /** Index of ChunkData objects. */ ChunkIndex m_chunkindex; + /** Number of index-observing objects in existence (BlockBuilderImpl). */ + size_t m_chunkindex_observers{0}; /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. */ @@ -359,6 +362,7 @@ public: void UnlinkRef(GraphIndex idx) noexcept final { auto& entry = m_entries[idx]; + Assume(m_chunkindex_observers == 0); Assume(entry.m_ref != nullptr); entry.m_ref = nullptr; if (!entry.IsWiped()) { @@ -413,9 +417,42 @@ public: std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; std::pair, std::vector> GetMainStagingDiagrams() noexcept final; + std::unique_ptr GetBlockBuilder() noexcept final; + void SanityCheck() const final; }; +/** Implementation of the TxGraph::BlockBuilder interface. */ +class BlockBuilderImpl final : public TxGraph::BlockBuilder +{ + /** Which TxGraphImpl this object is doing block building for. It will have its + * m_chunkindex_observers incremented as long as this BlockBuilderImpl exists. */ + TxGraphImpl* const m_graph; + /** Vector for actual storage pointed to by TxGraph::BlockBuilder::m_current_chunk. */ + std::vector m_chunkdata; + /** Which cluster the current chunk belongs to, so we can exclude further transaction from it + * when that chunk is skipped. */ + Cluster* m_remaining_cluster{nullptr}; + /** Clusters which we're not including further transactions from. */ + std::set m_excluded_clusters; + /** Iterator to the next chunk (after the current one) in the chunk index. end() if nothing + * further remains. */ + TxGraphImpl::ChunkIndex::const_iterator m_next_iter; + + /** Fill in information about the current chunk in m_current_chunk, m_chunkdata, + * m_remaining_cluster, and update m_next_iter. */ + void Next() noexcept; + +public: + /** Construct a new BlockBuilderImpl to build blocks for the provided graph. */ + BlockBuilderImpl(TxGraphImpl& graph) noexcept; + + // Implement the public interface. + ~BlockBuilderImpl() final; + void Include() noexcept final; + void Skip() noexcept final; +}; + void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; @@ -1366,6 +1403,7 @@ Cluster::Cluster(TxGraphImpl& graph, const FeeFrac& feerate, GraphIndex graph_in TxGraph::Ref TxGraphImpl::AddTransaction(const FeeFrac& feerate) noexcept { + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Construct a new Ref. Ref ret; // Construct a new Entry, and link it with the Ref. @@ -1393,6 +1431,7 @@ void TxGraphImpl::RemoveTransaction(Ref& arg) noexcept // having been removed). if (GetRefGraph(arg) == nullptr) return; Assume(GetRefGraph(arg) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Find the Cluster the transaction is in, and stop if it isn't in any. auto cluster = FindCluster(GetRefIndex(arg), m_clustersets.size() - 1); if (cluster == nullptr) return; @@ -1406,6 +1445,7 @@ void TxGraphImpl::AddDependency(Ref& parent, Ref& child) noexcept // removed). if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Find the Cluster the parent and child transaction are in, and stop if either appears to be // already removed. auto par_cluster = FindCluster(GetRefIndex(parent), m_clustersets.size() - 1); @@ -1454,16 +1494,15 @@ std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, return ret; } -std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept +void Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept { - std::vector ret; - // Translate all transactions in the Cluster (in linearization order) to Refs. - for (auto idx : m_linearization) { - const auto& entry = graph.m_entries[m_mapping[idx]]; + // Translate the transactions in the Cluster (in linearization order, starting at start_pos in + // the linearization) to Refs, and fill them in range. + for (auto& ref : range) { + const auto& entry = graph.m_entries[m_mapping[m_linearization[start_pos++]]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + ref = entry.m_ref; } - return ret; } FeeFrac Cluster::GetIndividualFeerate(ClusterIndex idx) noexcept @@ -1542,7 +1581,9 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_onl if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then dispatch to it. MakeAcceptable(*cluster); - return cluster->GetClusterRefs(*this); + std::vector ret(cluster->GetTxCount()); + cluster->GetClusterRefs(*this, ret, 0); + return ret; } TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept @@ -1657,6 +1698,7 @@ void TxGraphImpl::CommitStaging() noexcept int main_level = stage_level - 1; auto& stage = m_clustersets[stage_level]; auto& main = m_clustersets[main_level]; + Assume(m_chunkindex_observers == 0 || main_level > 0); // Delete all conflicting Clusters in main_level, to make place for moving the staging ones // there. All of these have been PullIn()'d to stage_level before. auto conflicts = GetConflicts(); @@ -1710,6 +1752,7 @@ void TxGraphImpl::SetTransactionFee(Ref& ref, int64_t fee) noexcept // transaction having been removed already. if (GetRefGraph(ref) == nullptr) return; Assume(GetRefGraph(ref) == this); + Assume(m_chunkindex_observers == 0); // Find the entry, its locator, and inform its Cluster about the new feerate, if any. auto& entry = m_entries[GetRefIndex(ref)]; for (int level = 0; level < MAX_LEVELS; ++level) { @@ -1935,6 +1978,70 @@ void TxGraphImpl::SanityCheck() const assert(actual_chunkindex == expected_chunkindex); } +void BlockBuilderImpl::Next() noexcept +{ + while (m_next_iter != m_graph->m_chunkindex.end()) { + // Find the cluster pointed to by m_next_iter (and advance it). + const auto& chunk_data = *(m_next_iter++); + const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + // If we previously skipped a chunk from this cluster we cannot include more from it. + if (m_excluded_clusters.contains(cluster)) continue; + // Populate m_current_chunk. + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + m_remaining_cluster = cluster; + m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); + return; + } + // We reached the end of m_chunkindex. + m_current_chunk = std::nullopt; +} + +BlockBuilderImpl::BlockBuilderImpl(TxGraphImpl& graph) noexcept : m_graph(&graph) +{ + // Make sure all clusters in main are up to date, and acceptable. + m_graph->SplitAll(0); + if (m_graph->m_clustersets.size() == 1) m_graph->ApplyDependencies(); + m_graph->MakeAllAcceptable(0); + // The main graph cannot be oversized, as that implies unappliable dependencies. + Assume(!m_graph->m_clustersets[0].m_oversized); + // Remember that this object is observing the graph's index, so that we can detect concurrent + // modifications. + ++m_graph->m_chunkindex_observers; + // Find the first chunk. + m_next_iter = m_graph->m_chunkindex.begin(); + Next(); +} + +BlockBuilderImpl::~BlockBuilderImpl() +{ + Assume(m_graph->m_chunkindex_observers > 0); + // Permit modifications to the main graph again after destroying the BlockBuilderImpl. + --m_graph->m_chunkindex_observers; +} + +void BlockBuilderImpl::Include() noexcept +{ + // The actual inclusion of the chunk is done by the calling code. All we have to do is switch + // to the next chunk. + Next(); +} + +void BlockBuilderImpl::Skip() noexcept +{ + // When skipping a chunk we need to not include anything more of the cluster, as that could make + // the result topologically invalid. + m_excluded_clusters.insert(m_remaining_cluster); + Next(); +} + +std::unique_ptr TxGraphImpl::GetBlockBuilder() noexcept +{ + return std::make_unique(*this); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 72481611f13..0bb0e8fde25 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -3,9 +3,11 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include -#include #include +#include +#include #include +#include #include @@ -54,6 +56,29 @@ public: Ref(const Ref&) = delete; }; + /** Interface returned by GetBlockBuilder. */ + class BlockBuilder + { + protected: + /** The next chunk, in topological order plus feerate, or std::nullopt if done. */ + std::optional, FeeFrac>> m_current_chunk; + /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */ + BlockBuilder() noexcept = default; + public: + /** Support safe inheritance. */ + virtual ~BlockBuilder() = default; + /** Determine whether there are more transactions to be included. */ + explicit operator bool() noexcept { return m_current_chunk.has_value(); } + /** Get the chunk that is currently suggested to be included. */ + const std::span& GetCurrentChunk() noexcept { return m_current_chunk->first; } + /** Get the feerate of the currently suggested chunk. */ + const FeeFrac& GetCurrentChunkFeerate() noexcept { return m_current_chunk->second; } + /** Mark the current chunk as included, and progress to the next one. */ + virtual void Include() noexcept = 0; + /** Mark the current chunk as skipped, and progress to the next one. */ + virtual void Skip() noexcept = 0; + }; + protected: // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. friend class TxGraph::Ref; @@ -158,6 +183,10 @@ public: * must both exist and not be oversized), ignoring unmodified components in both. */ virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; + /** Construct a block builder, drawing from the main graph, which cannot be oversized. While + * the returned object exists, no mutators on the main graph are allowed. */ + virtual std::unique_ptr GetBlockBuilder() noexcept = 0; + /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; }; From ac3f429f6c6902117ca7d73a48b6e6660a53412d Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 4 Dec 2024 16:14:19 -0500 Subject: [PATCH 17/20] txgraph: (feature) introduce Evictor interface Similar to the BlockBuilder interface, this lets one iterate the set of chunks in the entire graph. The iteration happens from low to high chunk feerate however, does not permit skipping chunks, but does permit destroying Refs of the chunks that are being iterated over. --- src/test/fuzz/txgraph.cpp | 71 ++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 75 +++++++++++++++++++++++++++++++++++++-- src/txgraph.h | 31 +++++++++++++--- 3 files changed, 170 insertions(+), 7 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index d8e5e9defd2..4b050d8d0b2 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -621,6 +621,53 @@ FUZZ_TARGET(txgraph) } } break; + } else if (/*sims.size() == 1 &&*/ !main_sim.IsOversized() && command-- == 0) { + // GetEvictor. + auto num_to_evict = provider.ConsumeIntegralInRange(0, main_sim.GetTransactionCount()); + auto evictor = real->GetEvictor(); + SimTxGraph::SetType done; + FeeFrac prev_chunk_feerate; + while (*evictor && num_to_evict >= 0) { + // Chunk feerates must be monotonously increasing. + if (!prev_chunk_feerate.IsEmpty()) { + assert(FeeRateCompare(evictor->GetCurrentChunkFeerate(), prev_chunk_feerate) >= 0); + } + prev_chunk_feerate = evictor->GetCurrentChunkFeerate(); + FeeFrac sum_feerate; + for (TxGraph::Ref* ref : evictor->GetCurrentChunk()) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(*ref); + assert(simpos != SimTxGraph::MISSING); + // Verify the claimed chunk feerate. + sum_feerate += main_sim.graph.FeeRate(simpos); + // Make sure the chunk contains no duplicate transactions. + assert(!done[simpos]); + done.Set(simpos); + // The concatenation of all reported transaction, in order, must be + // anti-topologically valid (all children before parents). + assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done)); + if (num_to_evict > 0) { + // Before destroying Ref, also remove any descendants it may have in + // staging, so that dependencies are consistent. + if (sims.size() == 2) { + auto stage_simpos = top_sim.Find(*ref); + if (stage_simpos != SimTxGraph::MISSING) { + for (auto desc : top_sim.graph.Descendants(stage_simpos)) { + auto& desc_ref = top_sim.GetRef(desc); + top_sim.RemoveTransaction(desc_ref); + real->RemoveTransaction(desc_ref); + } + } + } + // Destroy the Ref for both sims. + for (auto& sim : sims) sim.DestroyTransaction(*ref, true); + --num_to_evict; + } + } + assert(sum_feerate == evictor->GetCurrentChunkFeerate()); + evictor->Next(); + } + break; } } } @@ -695,6 +742,30 @@ FUZZ_TARGET(txgraph) builder->Include(); } assert(vec_builder == vec1); + builder.reset(); + + // The reverse order should be obtained through an Evictor, if nothing is destroyed. + auto evictor = real->GetEvictor(); + std::vector vec_evictor; + while (*evictor) { + FeeFrac sum; + for (TxGraph::Ref* ref : evictor->GetCurrentChunk()) { + // The reported chunk feerate must match the chunk feerate obtained by asking + // it for each of the chunk's transactions individually. + assert(real->GetMainChunkFeerate(*ref) == evictor->GetCurrentChunkFeerate()); + // Verify the chunk feerate matches the sum of the reported individual feerates. + sum += real->GetIndividualFeerate(*ref); + // Chunks must contain transactions that exist in the graph. + auto simpos = sims[0].Find(*ref); + assert(simpos != SimTxGraph::MISSING); + vec_evictor.push_back(simpos); + } + assert(sum == evictor->GetCurrentChunkFeerate()); + evictor->Next(); + } + std::reverse(vec_evictor.begin(), vec_evictor.end()); + assert(vec_evictor == vec1); + evictor.reset(); // Check that the implied ordering gives rise to a combined diagram that matches the // diagram constructed from the individual cluster linearization chunkings. diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 2e277555299..6b8648720ea 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -167,6 +167,7 @@ class TxGraphImpl final : public TxGraph { friend class Cluster; friend class BlockBuilderImpl; + friend class EvictorImpl; private: /** Internal RNG. */ FastRandomContext m_rng; @@ -254,7 +255,7 @@ private: /** Index of ChunkData objects. */ ChunkIndex m_chunkindex; - /** Number of index-observing objects in existence (BlockBuilderImpl). */ + /** Number of index-observing objects in existence (BlockBuilderImpl, EvictorImpl). */ size_t m_chunkindex_observers{0}; /** A Locator that describes whether, where, and in which Cluster an Entry appears. @@ -362,7 +363,6 @@ public: void UnlinkRef(GraphIndex idx) noexcept final { auto& entry = m_entries[idx]; - Assume(m_chunkindex_observers == 0); Assume(entry.m_ref != nullptr); entry.m_ref = nullptr; if (!entry.IsWiped()) { @@ -418,6 +418,7 @@ public: std::pair, std::vector> GetMainStagingDiagrams() noexcept final; std::unique_ptr GetBlockBuilder() noexcept final; + std::unique_ptr GetEvictor() noexcept final; void SanityCheck() const final; }; @@ -453,6 +454,27 @@ public: void Skip() noexcept final; }; +/** Implementation of the TxGraph::Evictor interface. */ +class EvictorImpl final : public TxGraph::Evictor +{ + /** Which TxGraphImpl this object is doing eviction. It will have its m_chunkindex_observers + * incremented as long as this EvictorImpl exists. */ + TxGraphImpl* const m_graph; + /** Vector for actual storage pointed to by TxGraph::Evictor::m_current_chunk. */ + std::vector m_chunkdata; + /** Iterator to the next chunk (after the current one) in the chunk index. rend() if nothing + * further remains. */ + TxGraphImpl::ChunkIndex::const_reverse_iterator m_next_iter; + +public: + /** Construct a new EvictorImpl for the provided graph. */ + EvictorImpl(TxGraphImpl& graph) noexcept; + + // Implement the public interface. + ~EvictorImpl() final; + void Next() noexcept final; +}; + void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; @@ -2042,6 +2064,55 @@ std::unique_ptr TxGraphImpl::GetBlockBuilder() noexcept return std::make_unique(*this); } +void EvictorImpl::Next() noexcept +{ + while (m_next_iter != m_graph->m_chunkindex.rend()) { + // Find the cluster pointed to by m_next_iter (and advance it). + const auto& chunk_data = *(m_next_iter++); + const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + // Populate m_current_chunk. + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); + // GetClusterRefs emits in topological order; Evictor interface expects children before + // parents, so reverse. + std::reverse(m_chunkdata.begin(), m_chunkdata.end()); + return; + } + // We reached the end of m_chunkindex. + m_current_chunk = std::nullopt; +} + +EvictorImpl::EvictorImpl(TxGraphImpl& graph) noexcept : m_graph(&graph) +{ + // Make sure all clusters in main are up to date, and acceptable. + m_graph->SplitAll(0); + if (m_graph->m_clustersets.size() == 1) m_graph->ApplyDependencies(); + m_graph->MakeAllAcceptable(0); + // The main graph cannot be oversized, as that implies unappliable dependencies. + Assume(!m_graph->m_clustersets[0].m_oversized); + // Remember that this object is observing the graph's index, so that we can detect concurrent + // modifications. + ++m_graph->m_chunkindex_observers; + // Find the first chunk. + m_next_iter = m_graph->m_chunkindex.rbegin(); + Next(); +} + +EvictorImpl::~EvictorImpl() +{ + Assume(m_graph->m_chunkindex_observers > 0); + // Permit modifications to the main graph again after destroying the BlockBuilderImpl. + --m_graph->m_chunkindex_observers; +} + +std::unique_ptr TxGraphImpl::GetEvictor() noexcept +{ + return std::make_unique(*this); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 0bb0e8fde25..b752e157dd4 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -56,29 +56,46 @@ public: Ref(const Ref&) = delete; }; - /** Interface returned by GetBlockBuilder. */ - class BlockBuilder + /** Base class for BlockBuilder and Evictor. */ + class ChunkIterator { protected: /** The next chunk, in topological order plus feerate, or std::nullopt if done. */ std::optional, FeeFrac>> m_current_chunk; /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */ - BlockBuilder() noexcept = default; + ChunkIterator() noexcept = default; public: /** Support safe inheritance. */ - virtual ~BlockBuilder() = default; - /** Determine whether there are more transactions to be included. */ + virtual ~ChunkIterator() = default; + /** Determine whether there are more transactions to be processed. */ explicit operator bool() noexcept { return m_current_chunk.has_value(); } /** Get the chunk that is currently suggested to be included. */ const std::span& GetCurrentChunk() noexcept { return m_current_chunk->first; } /** Get the feerate of the currently suggested chunk. */ const FeeFrac& GetCurrentChunkFeerate() noexcept { return m_current_chunk->second; } + }; + + /** Interface returned by GetBlockBuilder. */ + class BlockBuilder : public ChunkIterator + { + public: /** Mark the current chunk as included, and progress to the next one. */ virtual void Include() noexcept = 0; /** Mark the current chunk as skipped, and progress to the next one. */ virtual void Skip() noexcept = 0; }; + /** Interface returned by GetEvictor. */ + class Evictor : public ChunkIterator + { + public: + /** Progress to the next chunk. It is allowed to destroy the Ref objects pointed to by + * GetCurrentChunk before calling Next(), but not other modifications to the main graph + * are allowed while the Evictor exists. Children will always be reported before parents. + */ + virtual void Next() noexcept = 0; + }; + protected: // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. friend class TxGraph::Ref; @@ -186,6 +203,10 @@ public: /** Construct a block builder, drawing from the main graph, which cannot be oversized. While * the returned object exists, no mutators on the main graph are allowed. */ virtual std::unique_ptr GetBlockBuilder() noexcept = 0; + /** Construct an evictor, drawing from the main graph, which cannot be oversized. While + * the returned object exists, no mutators on the main graph are allowed, except destroying + * the Refs reported by Evictor::GetCurrentChunk */ + virtual std::unique_ptr GetEvictor() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From a9ce931394fc517d302dc6118834eb49563de079 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Mon, 2 Dec 2024 13:33:41 -0500 Subject: [PATCH 18/20] txgraph: (optimization) reuse discarded chunkindex entries --- src/txgraph.cpp | 72 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 6b8648720ea..8f266d10628 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -257,6 +258,8 @@ private: ChunkIndex m_chunkindex; /** Number of index-observing objects in existence (BlockBuilderImpl, EvictorImpl). */ size_t m_chunkindex_observers{0}; + /** Cache of discarded ChunkIndex node handles. */ + std::vector m_chunkindex_discarded; /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. */ @@ -348,6 +351,10 @@ public: void ClearLocator(int level, GraphIndex index) noexcept; /** Find which Clusters conflict with the top level. */ std::vector GetConflicts() const noexcept; + /** Clear an Entry's ChunkData. */ + void ClearChunkData(Entry& entry) noexcept; + /** Give an Entry a ChunkData object. */ + void CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept; // Functions for handling Refs. @@ -475,6 +482,36 @@ public: void Next() noexcept final; }; +void TxGraphImpl::ClearChunkData(Entry& entry) noexcept +{ + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + // If the Entry has a non-empty m_chunkindex_iterator, extract it, and move the handle + // to the cache of discarded chunkindex entries. + m_chunkindex_discarded.emplace_back(m_chunkindex.extract(entry.m_chunkindex_iterator)); + entry.m_chunkindex_iterator = m_chunkindex.end(); + } +} + +void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept +{ + auto& entry = m_entries[idx]; + if (!m_chunkindex_discarded.empty()) { + // Reuse an discarded node handle. + auto& node = m_chunkindex_discarded.back().value(); + node.m_graph_index = idx; + node.m_chunk_count = chunk_count; + auto insert_result = m_chunkindex.insert(std::move(m_chunkindex_discarded.back())); + Assume(insert_result.inserted); + entry.m_chunkindex_iterator = insert_result.position; + m_chunkindex_discarded.pop_back(); + } else { + // Construct a new entry. + auto emplace_result = m_chunkindex.emplace(idx, chunk_count); + Assume(emplace_result.second); + entry.m_chunkindex_iterator = emplace_result.first; + } +} + void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; @@ -502,10 +539,7 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept // If this was the last level the Locator was Present at, add it to the m_wiped list (which // will be processed by Cleanup). if (entry.IsWiped()) m_wiped.push_back(idx); - if (level == 0 && entry.m_chunkindex_iterator != m_chunkindex.end()) { - m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = m_chunkindex.end(); - } + if (level == 0) ClearChunkData(entry); } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -513,12 +547,9 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (ClusterIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - // Destroy any potential ChunkData prior to modifying the Cluster (as that could - // invalidate its ordering). - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or @@ -543,9 +574,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept chunk.transactions.Reset(idx); if (chunk.transactions.None()) { // Last transaction in the chunk. - auto [it, inserted] = graph.m_chunkindex.emplace(graph_idx, chunk_count); - Assume(inserted); - entry.m_chunkindex_iterator = it; + graph.CreateChunkData(graph_idx, chunk_count); break; } } @@ -797,12 +826,9 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). auto& entry = graph.m_entries[idx]; - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - // Destroy any potential ChunkData prior to modifying the Cluster (as that could - // invalidate its ordering). - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); entry.m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. @@ -1033,6 +1059,9 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept std::vector TxGraphImpl::Cleanup() noexcept { + // Release memory used by discarded ChunkData index entries. + ClearShrink(m_chunkindex_discarded); + // Don't do anything if more than 1 level exists. Cleaning up could invalidate higher levels' // m_to_remove, m_removed, and m_deps_to_add. if (m_clustersets.size() > 1) return {}; @@ -1541,10 +1570,7 @@ void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[idx]; entry.m_locator[m_level].SetMissing(); if (entry.IsWiped()) graph.m_wiped.push_back(idx); - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + if (m_level == 0) graph.ClearChunkData(entry); } } From cef85dd49a35ef1b3a81e7e46282becafe69ff38 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 27 Nov 2024 16:12:10 -0500 Subject: [PATCH 19/20] txgraph: (optimization) skipping end of cluster has no impact --- src/txgraph.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 8f266d10628..f43a42a4164 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -129,8 +129,9 @@ public: std::vector GetAncestorRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept; /** Get a vector of Refs for the descendants of a given Cluster element. */ std::vector GetDescendantRefs(const TxGraphImpl& graph, ClusterIndex idx) noexcept; - /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ - void GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. Returns + * the range ends at the end of the cluster. */ + bool GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; /** Get the individual transaction feerate of a Cluster element. */ FeeFrac GetIndividualFeerate(ClusterIndex idx) noexcept; /** Modify the fee of a Cluster element. */ @@ -439,8 +440,8 @@ class BlockBuilderImpl final : public TxGraph::BlockBuilder /** Vector for actual storage pointed to by TxGraph::BlockBuilder::m_current_chunk. */ std::vector m_chunkdata; /** Which cluster the current chunk belongs to, so we can exclude further transaction from it - * when that chunk is skipped. */ - Cluster* m_remaining_cluster{nullptr}; + * when that chunk is skipped, or std::nullopt if we're at the end of the current cluster. */ + std::optional m_remaining_cluster{nullptr}; /** Clusters which we're not including further transactions from. */ std::set m_excluded_clusters; /** Iterator to the next chunk (after the current one) in the chunk index. end() if nothing @@ -1545,7 +1546,7 @@ std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, return ret; } -void Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept +bool Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept { // Translate the transactions in the Cluster (in linearization order, starting at start_pos in // the linearization) to Refs, and fill them in range. @@ -1554,6 +1555,8 @@ void Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, Assume(entry.m_ref != nullptr); ref = entry.m_ref; } + // Return whether this was the end of the Cluster. + return start_pos == m_linearization.size(); } FeeFrac Cluster::GetIndividualFeerate(ClusterIndex idx) noexcept @@ -2038,8 +2041,12 @@ void BlockBuilderImpl::Next() noexcept // Populate m_current_chunk. m_chunkdata.resize(chunk_data.m_chunk_count); auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); - m_remaining_cluster = cluster; + bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (is_end) { + m_remaining_cluster = std::nullopt; + } else { + m_remaining_cluster = cluster; + } m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); return; } @@ -2081,7 +2088,9 @@ void BlockBuilderImpl::Skip() noexcept { // When skipping a chunk we need to not include anything more of the cluster, as that could make // the result topologically invalid. - m_excluded_clusters.insert(m_remaining_cluster); + if (m_remaining_cluster.has_value()) { + m_excluded_clusters.insert(*m_remaining_cluster); + } Next(); } From 2dbcf9228756f2e29a80a618b709d0b4e8a09490 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 27 Nov 2024 15:29:40 -0500 Subject: [PATCH 20/20] txgraph: (optimization) special-case singletons in chunk index --- src/txgraph.cpp | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index f43a42a4164..1ed3dec4b49 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -220,7 +220,7 @@ private: { /** The Entry which is the last transaction of the chunk. */ mutable GraphIndex m_graph_index; - /** How many transactions the chunk contains. */ + /** How many transactions the chunk contains (-1 = singleton tail of cluster). */ LinearizationIndex m_chunk_count; ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept : @@ -575,6 +575,12 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept chunk.transactions.Reset(idx); if (chunk.transactions.None()) { // Last transaction in the chunk. + if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) { + // If this is the final chunk of the cluster, and it contains just a single + // transaction (which will always be true for the very common singleton + // clusters), store the special value -1 as chunk count. + chunk_count = LinearizationIndex(-1); + } graph.CreateChunkData(graph_idx, chunk_count); break; } @@ -2039,13 +2045,22 @@ void BlockBuilderImpl::Next() noexcept // If we previously skipped a chunk from this cluster we cannot include more from it. if (m_excluded_clusters.contains(cluster)) continue; // Populate m_current_chunk. - m_chunkdata.resize(chunk_data.m_chunk_count); - auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); - if (is_end) { + if (chunk_data.m_chunk_count == LinearizationIndex(-1)) { + // Special case in case just a single transaction remains, avoiding the need to + // dispatch to and dereference Cluster. + m_chunkdata.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + m_chunkdata[0] = chunk_end_entry.m_ref; m_remaining_cluster = std::nullopt; } else { - m_remaining_cluster = cluster; + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (is_end) { + m_remaining_cluster = std::nullopt; + } else { + m_remaining_cluster = cluster; + } } m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); return; @@ -2107,13 +2122,21 @@ void EvictorImpl::Next() noexcept const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index]; Cluster* cluster = chunk_end_entry.m_locator[0].cluster; // Populate m_current_chunk. - m_chunkdata.resize(chunk_data.m_chunk_count); - auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (chunk_data.m_chunk_count == LinearizationIndex(-1) || chunk_data.m_chunk_count == 1) { + // Special case for single-transaction chunks, avoiding the need to dispatch to and + // dereference Cluster. + m_chunkdata.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + m_chunkdata[0] = chunk_end_entry.m_ref; + } else { + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + // GetClusterRefs emits in topological order; Evictor interface expects children before + // parents, so reverse. + std::reverse(m_chunkdata.begin(), m_chunkdata.end()); + } m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); - // GetClusterRefs emits in topological order; Evictor interface expects children before - // parents, so reverse. - std::reverse(m_chunkdata.begin(), m_chunkdata.end()); return; } // We reached the end of m_chunkindex.