From 777179bc27bf5bb9be28fd3912ceca8153e8cebd Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 26 Mar 2025 22:47:18 -0400 Subject: [PATCH 1/3] txgraph: rename group_data in ApplyDependencies --- src/txgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index f6d9eec5666..0e7a4c9fdcf 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -1321,9 +1321,9 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept if (clusterset.m_group_data->m_group_oversized) return; // For each group of to-be-merged Clusters. - for (const auto& group_data : clusterset.m_group_data->m_groups) { + for (const auto& group_entry : clusterset.m_group_data->m_groups) { auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} - .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + .subspan(group_entry.m_cluster_offset, group_entry.m_cluster_count); // Pull in all the Clusters that contain dependencies. if (level == 1) { for (Cluster*& cluster : cluster_span) { @@ -1335,7 +1335,7 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). auto deps_span = std::span{clusterset.m_deps_to_add} - .subspan(group_data.m_deps_offset, group_data.m_deps_count); + .subspan(group_entry.m_deps_offset, group_entry.m_deps_count); Assume(!deps_span.empty()); const auto& loc = m_entries[deps_span[0].second].m_locator[level]; Assume(loc.IsPresent()); From c7d5dcaa61471cc26c3fb1772dfc4c82b20e1f9d Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 26 Mar 2025 22:57:41 -0400 Subject: [PATCH 2/3] clusterlin: fix typos --- src/cluster_linearize.h | 2 +- src/txgraph.cpp | 10 +++++----- src/txgraph.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index b01daedf4bf..ad6f1a76032 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1367,7 +1367,7 @@ void FixLinearization(const DepGraph& depgraph, std::span 0); auto to_swap = linearization[len - 1 - (j - 1)]; place_before.Reset(to_swap); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 0e7a4c9fdcf..8fb5300a664 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -335,7 +335,7 @@ public: // Simple helper functions. - /** Swap the Entrys referred to by a and b. */ + /** Swap the Entry referred to by a and the one referred to by b. */ void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; /** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not * removed), return the Cluster it is in. Otherwise, return nullptr. */ @@ -408,8 +408,8 @@ public: // Functions related to various normalization/application steps. /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex - * values for remaining Entrys, so this only does something when no to-be-applied operations - * or staged removals referring to GraphIndexes remain). */ + * values for remaining Entry objects, so this only does something when no to-be-applied + * operations or staged removals referring to GraphIndexes remain). */ void Compact() noexcept; /** If cluster is not in staging, copy it there, and return a pointer to it. This has no * effect if only a main graph exists, but if staging exists this modifies the locators of its @@ -505,7 +505,7 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept void Cluster::Updated(TxGraphImpl& graph) noexcept { - // Update all the Locators for this Cluster's Entrys. + // Update all the Locators for this Cluster's Entry objects. for (DepGraphIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; entry.m_locator[m_level].SetPresent(this, idx); @@ -1937,7 +1937,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } } - // Verify that each element of m_depgraph occured in m_linearization. + // Verify that each element of m_depgraph occurred in m_linearization. assert(m_done == m_depgraph.Positions()); } diff --git a/src/txgraph.h b/src/txgraph.h index eba983cb5b9..803696440c2 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -99,7 +99,7 @@ public: /** Create a staging graph (which cannot exist already). This acts as if a full copy of * the transaction graph is made, upon which further modifications are made. This copy can * be inspected, and then either discarded, or the main graph can be replaced by it by - * commiting it. */ + * committing it. */ virtual void StartStaging() noexcept = 0; /** Discard the existing active staging graph (which must exist). */ virtual void AbortStaging() noexcept = 0; From a52b53926b5c6a5b92255435e3c204cdf18665a2 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 27 Mar 2025 10:01:51 -0400 Subject: [PATCH 3/3] clusterlin: add GetConnectedComponent This abstracts out the finding of the connected component that includes a given element from FindConnectedComponent (which just finds any connected component). Use this in the txgraph fuzz test, which was effectively reimplementing this logic. At the same time, improve its performance by replacing a vector with a set. --- src/cluster_linearize.h | 26 +++++++++++++++++++------- src/test/fuzz/cluster_linearize.cpp | 21 +++++++++++++++++++-- src/test/fuzz/txgraph.cpp | 25 ++++++------------------- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index ad6f1a76032..217c4700afe 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -250,10 +250,8 @@ public: return ret; } - /** Find some connected component within the subset "todo" of this graph. - * - * Specifically, this finds the connected component which contains the first transaction of - * todo (if any). + /** Get the connected component within the subset "todo" that contains tx (which must be in + * todo). * * Two transactions are considered connected if they are both in `todo`, and one is an ancestor * of the other in the entire graph (so not just within `todo`), or transitively there is a @@ -262,10 +260,11 @@ public: * * Complexity: O(ret.Count()). */ - SetType FindConnectedComponent(const SetType& todo) const noexcept + SetType GetConnectedComponent(const SetType& todo, DepGraphIndex tx) const noexcept { - if (todo.None()) return todo; - auto to_add = SetType::Singleton(todo.First()); + Assume(todo[tx]); + Assume(todo.IsSubsetOf(m_used)); + auto to_add = SetType::Singleton(tx); SetType ret; do { SetType old = ret; @@ -279,6 +278,19 @@ public: return ret; } + /** Find some connected component within the subset "todo" of this graph. + * + * Specifically, this finds the connected component which contains the first transaction of + * todo (if any). + * + * Complexity: O(ret.Count()). + */ + SetType FindConnectedComponent(const SetType& todo) const noexcept + { + if (todo.None()) return todo; + return GetConnectedComponent(todo, todo.First()); + } + /** Determine if a subset is connected. * * Complexity: O(subset.Count()). diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index c7e40a833da..fb4bf3a719f 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -446,19 +446,36 @@ FUZZ_TARGET(clusterlin_components) // Construct a depgraph. SpanReader reader(buffer); DepGraph depgraph; + std::vector linearization; try { reader >> Using(depgraph); } catch (const std::ios_base::failure&) {} TestBitSet todo = depgraph.Positions(); while (todo.Any()) { - // Find a connected component inside todo. - auto component = depgraph.FindConnectedComponent(todo); + // Pick a transaction in todo, or nothing. + std::optional picked; + { + uint64_t picked_num{0}; + try { + reader >> VARINT(picked_num); + } catch (const std::ios_base::failure&) {} + if (picked_num < todo.Size() && todo[picked_num]) { + picked = picked_num; + } + } + + // Find a connected component inside todo, including picked if any. + auto component = picked ? depgraph.GetConnectedComponent(todo, *picked) + : depgraph.FindConnectedComponent(todo); // The component must be a subset of todo and non-empty. assert(component.IsSubsetOf(todo)); assert(component.Any()); + // If picked was provided, the component must include it. + if (picked) assert(component[*picked]); + // If todo is the entire graph, and the entire graph is connected, then the component must // be the entire graph. if (todo == depgraph.Positions()) { diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 010c9e951ed..7eabb013e2b 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -561,36 +561,23 @@ FUZZ_TARGET(txgraph) std::shuffle(refs.begin(), refs.end(), rng); // Invoke the real function. auto result = real->CountDistinctClusters(refs, use_main); - // Build a vector with representatives of the clusters the Refs occur in in the + // Build a set with representatives of the clusters the Refs occur in in the // simulated graph. For each, remember the lowest-index transaction SimPos in the // cluster. - std::vector sim_reps; + SimTxGraph::SetType sim_reps; for (auto ref : refs) { // Skip Refs that do not occur in the simulated graph. auto simpos = sel_sim.Find(ref); if (simpos == SimTxGraph::MISSING) continue; - // Start with component equal to just the Ref's SimPos. - auto component = SimTxGraph::SetType::Singleton(simpos); - // Keep adding ancestors/descendants of all elements in component until it no - // longer changes. - while (true) { - auto old_component = component; - for (auto i : component) { - component |= sel_sim.graph.Ancestors(i); - component |= sel_sim.graph.Descendants(i); - } - if (component == old_component) break; - } + // Find the component that includes ref. + auto component = sel_sim.graph.GetConnectedComponent(sel_sim.graph.Positions(), simpos); // Remember the lowest-index SimPos in component, as a representative for it. assert(component.Any()); - sim_reps.push_back(component.First()); + sim_reps.Set(component.First()); } - // Remove duplicates from sim_reps. - std::sort(sim_reps.begin(), sim_reps.end()); - sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end()); // Compare the number of deduplicated representatives with the value returned by // the real function. - assert(result == sim_reps.size()); + assert(result == sim_reps.Count()); break; } else if (command-- == 0) { // DoWork.