From c72c8d5d45d9a87e4cf78e66f9737b9e6f371d2d Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 1 Apr 2025 14:05:56 -0400 Subject: [PATCH 1/2] txgraph: compare sequence numbers instead of Cluster* (bugfix) This makes fuzz testing more deterministic, by avoiding the (arbitrary) pointer value ordering in comparing transactions. --- src/test/fuzz/txgraph.cpp | 13 ++++--- src/txgraph.cpp | 74 ++++++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 26 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 7eabb013e2b..a2c65f2e0a6 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -206,11 +206,14 @@ struct SimTxGraph ret.push_back(ptr); } } - // Deduplicate. - std::sort(ret.begin(), ret.end()); - ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); - // Replace input. - arg = std::move(ret); + // Construct deduplicated version in input (do not use std::sort/std::unique for + // deduplication as it'd rely on non-deterministic pointer comparison). + arg.clear(); + for (auto ptr : ret) { + if (std::find(arg.begin(), arg.end(), ptr) == arg.end()) { + arg.push_back(ptr); + } + } } }; diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 9e4bf116b02..880746d5dbe 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -70,12 +70,16 @@ class Cluster ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; /** Which level this Cluster is at in the graph (-1=not inserted, 0=main, 1=staging). */ int m_level{-1}; + /** Sequence number for this Cluster (for tie-breaking comparison between equal-chunk-feerate + transactions in distinct clusters). */ + uint64_t m_sequence; public: + Cluster() noexcept = delete; /** Construct an empty Cluster. */ - Cluster() noexcept = default; + explicit Cluster(uint64_t sequence) noexcept; /** Construct a singleton Cluster. */ - explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; + explicit Cluster(uint64_t sequence, TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */ Cluster(const Cluster&) = delete; @@ -157,6 +161,7 @@ public: void SanityCheck(const TxGraphImpl& graph, int level) const; }; + /** The transaction graph, including staged changes. * * The overall design of the data structure consists of 3 interlinked representations: @@ -244,6 +249,8 @@ private: ClusterSet m_main_clusterset; /** The staging ClusterSet, if any. */ std::optional m_staging_clusterset; + /** Next sequence number to assign to created Clusters. */ + uint64_t m_next_sequence_counter{0}; /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. @@ -315,6 +322,18 @@ private: /** Set of Entries which have no linked Ref anymore. */ std::vector m_unlinked; + /** Compare two Cluster* by their m_sequence value (while supporting nullptr). */ + static std::strong_ordering CompareClusters(Cluster* a, Cluster* b) noexcept + { + // The nullptr pointer compares before everything else. + if (a == nullptr || b == nullptr) { + return (a != nullptr) <=> (b != nullptr); + } + // If neither pointer is nullptr, compare the Clusters' sequence numbers. + Assume(a == b || a->m_sequence != b->m_sequence); + return a->m_sequence <=> b->m_sequence; + } + public: /** Construct a new TxGraphImpl with the specified maximum cluster count. */ explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept : @@ -569,7 +588,7 @@ std::vector TxGraphImpl::GetConflicts() const noexcept } } // Deduplicate the result (the same Cluster may appear multiple times). - std::sort(ret.begin(), ret.end()); + std::sort(ret.begin(), ret.end(), [](Cluster* a, Cluster* b) noexcept { return CompareClusters(a, b) < 0; }); ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); return ret; } @@ -577,7 +596,7 @@ std::vector TxGraphImpl::GetConflicts() const noexcept Cluster* Cluster::CopyToStaging(TxGraphImpl& graph) const noexcept { // Construct an empty Cluster. - auto ret = std::make_unique(); + auto ret = std::make_unique(graph.m_next_sequence_counter++); auto ptr = ret.get(); // Copy depgraph, mapping, and linearization/ ptr->m_depgraph = m_depgraph; @@ -710,7 +729,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept } first = false; // Construct a new Cluster to hold the found component. - auto new_cluster = std::make_unique(); + auto new_cluster = std::make_unique(graph.m_next_sequence_counter++); new_clusters.push_back(new_cluster.get()); // Remember that all the component's transactions go to this new Cluster. The positions // will be determined below, so use -1 for now. @@ -956,9 +975,11 @@ void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept if (cluster != nullptr) PullIn(cluster); } } - // Group the set of to-be-removed entries by Cluster*. + // Group the set of to-be-removed entries by Cluster::m_sequence. std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); + Cluster* cluster_a = m_entries[a].m_locator[level].cluster; + Cluster* cluster_b = m_entries[b].m_locator[level].cluster; + return CompareClusters(cluster_a, cluster_b) < 0; }); // Process per Cluster. std::span to_remove_span{to_remove}; @@ -1103,16 +1124,16 @@ void TxGraphImpl::GroupClusters(int level) noexcept } // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters // to which dependencies apply. - std::sort(an_clusters.begin(), an_clusters.end()); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; }); an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); - // Sort the dependencies by child Cluster. + // Sort the dependencies by child Cluster::m_sequence. std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; auto a_chl_cluster = FindCluster(a_chl, level); auto b_chl_cluster = FindCluster(b_chl, level); - return std::less{}(a_chl_cluster, b_chl_cluster); + return CompareClusters(a_chl_cluster, b_chl_cluster) < 0; }); // Run the union-find algorithm to to find partitions of the input Clusters which need to be @@ -1132,13 +1153,13 @@ void TxGraphImpl::GroupClusters(int level) noexcept * tree for this partition. */ unsigned rank; }; - /** Information about each input Cluster. Sorted by Cluster* pointer. */ + /** Information about each input Cluster. Sorted by Cluster::m_sequence. */ std::vector partition_data; /** Given a Cluster, find its corresponding PartitionData. */ auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* { auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, - [](auto& a, Cluster* ptr) noexcept { return a.cluster < ptr; }); + [](auto& a, Cluster* ptr) noexcept { return CompareClusters(a.cluster, ptr) < 0; }); Assume(it != partition_data.end()); Assume(it->cluster == arg); return &*it; @@ -1219,7 +1240,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; auto chl_cluster = FindCluster(chl, level); - if (std::greater{}(chl_cluster, data.cluster)) break; + if (CompareClusters(chl_cluster, data.cluster) > 0) break; // Skip dependencies that apply to earlier Clusters (those necessary are for // deleted transactions, as otherwise we'd have processed them already). if (chl_cluster == data.cluster) { @@ -1234,8 +1255,8 @@ void TxGraphImpl::GroupClusters(int level) noexcept // Sort both an_clusters and an_deps by representative of the partition they are in, grouping // all those applying to the same partition together. - std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); - std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; }); // Translate the resulting cluster groups to the m_group_data structure, and the dependencies // back to m_deps_to_add. @@ -1390,7 +1411,10 @@ void TxGraphImpl::MakeAllAcceptable(int level) noexcept } } -Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept +Cluster::Cluster(uint64_t sequence) noexcept : m_sequence{sequence} {} + +Cluster::Cluster(uint64_t sequence, TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept : + m_sequence{sequence} { // Create a new transaction in the DepGraph, and remember its position in m_mapping. auto cluster_idx = m_depgraph.AddTransaction(feerate); @@ -1410,7 +1434,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept GetRefGraph(ret) = this; GetRefIndex(ret) = idx; // Construct a new singleton Cluster (which is necessarily optimally linearized). - auto cluster = std::make_unique(*this, feerate, idx); + auto cluster = std::make_unique(m_next_sequence_counter++, *this, feerate, idx); auto cluster_ptr = cluster.get(); int level = GetTopLevel(); auto& clusterset = GetClusterSet(level); @@ -1606,7 +1630,7 @@ std::vector TxGraphImpl::GetAncestorsUnion(std::spanm_level].index); } // Group by Cluster. - std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; }); // Dispatch to the Clusters. std::span match_span(matches); std::vector ret; @@ -1639,7 +1663,7 @@ std::vector TxGraphImpl::GetDescendantsUnion(std::spanm_level].index); } // Group by Cluster. - std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; }); // Dispatch to the Clusters. std::span match_span(matches); std::vector ret; @@ -1867,7 +1891,9 @@ std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) n if (feerate_cmp < 0) return std::strong_ordering::less; if (feerate_cmp > 0) return std::strong_ordering::greater; // Compare Cluster* as tie-break for equal chunk feerates. - if (locator_a.cluster != locator_b.cluster) return locator_a.cluster <=> locator_b.cluster; + if (locator_a.cluster != locator_b.cluster) { + return CompareClusters(locator_a.cluster, locator_b.cluster); + } // As final tie-break, compare position within cluster linearization. return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; } @@ -1889,7 +1915,7 @@ TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span expected_clusters[MAX_LEVELS]; /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ std::set expected_removed[MAX_LEVELS]; + /** Which Cluster::m_sequence values have been encountered. */ + std::set sequences; /** Whether compaction is possible in the current state. */ bool compact_possible{true}; @@ -2004,6 +2032,10 @@ void TxGraphImpl::SanityCheck() const // ... for all clusters in them ... for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { const auto& cluster = *quality_clusters[setindex]; + // Check the sequence number. + assert(cluster.m_sequence < m_next_sequence_counter); + assert(sequences.count(cluster.m_sequence) == 0); + sequences.insert(cluster.m_sequence); // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't // expected to be referenced by the Entry vector). if (cluster.GetTxCount() != 0) { From 2835216ec09cc2d86b091824376b15601e7c7b8a Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 1 Apr 2025 16:33:14 -0400 Subject: [PATCH 2/2] txgraph: make GroupClusters use partition numbers directly (optimization) --- src/txgraph.cpp | 107 ++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 880746d5dbe..7c74ae50ef3 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -1103,38 +1103,37 @@ void TxGraphImpl::GroupClusters(int level) noexcept // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(level); - /** Annotated clusters: an entry for each Cluster, together with the representative for the - * partition it is in if known, or with nullptr if not yet known. */ - std::vector> an_clusters; + /** Annotated clusters: an entry for each Cluster, together with the sequence number for the + * representative for the partition it is in (initially its own, later that of the + * to-be-merged group). */ + std::vector> an_clusters; /** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply - * to removed transactions), together with the representative root of the partition of - * Clusters it applies to. */ - std::vector, Cluster*>> an_deps; + * to removed transactions), together with the sequence number of the representative root of + * Clusters it applies to (initially that of the child Cluster, later that of the + * to-be-merged group). */ + std::vector, uint64_t>> an_deps; - // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies, + // and an an_deps entry for each dependency to be applied. + an_deps.reserve(clusterset.m_deps_to_add.size()); for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = FindCluster(par, level); auto chl_cluster = FindCluster(chl, level); // Skip dependencies for which the parent or child transaction is removed. if (par_cluster == nullptr || chl_cluster == nullptr) continue; - an_clusters.emplace_back(par_cluster, nullptr); + an_clusters.emplace_back(par_cluster, par_cluster->m_sequence); // Do not include a duplicate when parent and child are identical, as it'll be removed // below anyway. - if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr); + if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, chl_cluster->m_sequence); + // Add entry to an_deps, using the child sequence number. + an_deps.emplace_back(std::pair{par, chl}, chl_cluster->m_sequence); } // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters // to which dependencies apply. - std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.first, b.first) < 0; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); - - // Sort the dependencies by child Cluster::m_sequence. - std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { - auto [_a_par, a_chl] = a; - auto [_b_par, b_chl] = b; - auto a_chl_cluster = FindCluster(a_chl, level); - auto b_chl_cluster = FindCluster(b_chl, level); - return CompareClusters(a_chl_cluster, b_chl_cluster) < 0; - }); + // Sort an_deps by applying the same order to the involved child cluster. + std::sort(an_deps.begin(), an_deps.end(), [&](auto& a, auto& b) noexcept { return a.second < b.second; }); // Run the union-find algorithm to to find partitions of the input Clusters which need to be // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. @@ -1142,8 +1141,8 @@ void TxGraphImpl::GroupClusters(int level) noexcept /** Each PartitionData entry contains information about a single input Cluster. */ struct PartitionData { - /** The cluster this holds information for. */ - Cluster* cluster; + /** The sequence number of the cluster this holds information for. */ + uint64_t sequence; /** All PartitionData entries belonging to the same partition are organized in a tree. * Each element points to its parent, or to itself if it is the root. The root is then * a representative for the entire tree, and can be found by walking upwards from any @@ -1157,11 +1156,11 @@ void TxGraphImpl::GroupClusters(int level) noexcept std::vector partition_data; /** Given a Cluster, find its corresponding PartitionData. */ - auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* { - auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, - [](auto& a, Cluster* ptr) noexcept { return CompareClusters(a.cluster, ptr) < 0; }); + auto locate_fn = [&](uint64_t sequence) noexcept -> PartitionData* { + auto it = std::lower_bound(partition_data.begin(), partition_data.end(), sequence, + [](auto& a, uint64_t seq) noexcept { return a.sequence < seq; }); Assume(it != partition_data.end()); - Assume(it->cluster == arg); + Assume(it->sequence == sequence); return &*it; }; @@ -1196,67 +1195,59 @@ void TxGraphImpl::GroupClusters(int level) noexcept // Start by initializing every Cluster as its own singleton partition. partition_data.resize(an_clusters.size()); for (size_t i = 0; i < an_clusters.size(); ++i) { - partition_data[i].cluster = an_clusters[i].first; + partition_data[i].sequence = an_clusters[i].first->m_sequence; partition_data[i].parent = &partition_data[i]; partition_data[i].rank = 0; } - // Run through all parent/child pairs in m_deps_to_add, and union the - // the partitions their Clusters are in. + // Run through all parent/child pairs in an_deps, and union the partitions their Clusters + // are in. Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; - for (const auto& [par, chl] : clusterset.m_deps_to_add) { + for (const auto& [dep, _] : an_deps) { + auto [par, chl] = dep; auto par_cluster = FindCluster(par, level); auto chl_cluster = FindCluster(chl, level); + Assume(chl_cluster != nullptr && par_cluster != nullptr); // Nothing to do if parent and child are in the same Cluster. if (par_cluster == chl_cluster) continue; - // Nothing to do if either parent or child transaction is removed already. - if (par_cluster == nullptr || chl_cluster == nullptr) continue; Assume(par != chl); if (chl_cluster == last_chl_cluster) { // If the child Clusters is the same as the previous iteration, union with the - // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add + // tree they were in, avoiding the need for another lookup. Note that an_deps // is sorted by child Cluster, so batches with the same child are expected. - last_partition = union_fn(locate_fn(par_cluster), last_partition); + last_partition = union_fn(locate_fn(par_cluster->m_sequence), last_partition); } else { last_chl_cluster = chl_cluster; - last_partition = union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + last_partition = union_fn(locate_fn(par_cluster->m_sequence), locate_fn(chl_cluster->m_sequence)); } } - // Populate the an_clusters and an_deps data structures with the list of input Clusters, - // and the input dependencies, annotated with the representative of the Cluster partition - // it applies to. - an_deps.reserve(clusterset.m_deps_to_add.size()); - auto deps_it = clusterset.m_deps_to_add.begin(); + // Update the sequence numbers in an_clusters and an_deps to be those of the partition + // representative. + auto deps_it = an_deps.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; - // Find the representative of the partition Cluster i is in, and store it with the - // Cluster. - auto rep = find_root_fn(&data)->cluster; - Assume(an_clusters[i].second == nullptr); - an_clusters[i].second = rep; + // Find the sequence of the representative of the partition Cluster i is in, and store + // it with the Cluster. + auto rep_seq = find_root_fn(&data)->sequence; + an_clusters[i].second = rep_seq; // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. - while (deps_it != clusterset.m_deps_to_add.end()) { - auto [par, chl] = *deps_it; + while (deps_it != an_deps.end()) { + auto [par, chl] = deps_it->first; auto chl_cluster = FindCluster(chl, level); - if (CompareClusters(chl_cluster, data.cluster) > 0) break; - // Skip dependencies that apply to earlier Clusters (those necessary are for - // deleted transactions, as otherwise we'd have processed them already). - if (chl_cluster == data.cluster) { - auto par_cluster = FindCluster(par, level); - // Also filter out dependencies applying to a removed parent. - if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); - } + Assume(chl_cluster != nullptr); + if (chl_cluster->m_sequence > data.sequence) break; + deps_it->second = rep_seq; ++deps_it; } } } - // Sort both an_clusters and an_deps by representative of the partition they are in, grouping - // all those applying to the same partition together. - std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; }); - std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return CompareClusters(a.second, b.second) < 0; }); + // Sort both an_clusters and an_deps by sequence number of the representative of the + // partition they are in, grouping all those applying to the same partition together. + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); // Translate the resulting cluster groups to the m_group_data structure, and the dependencies // back to m_deps_to_add.