Merge bitcoin/bitcoin#32151: Follow-ups for txgraph #31363
Some checks are pending
CI / test each commit (push) Waiting to run
CI / macOS 14 native, arm64, no depends, sqlite only, gui (push) Waiting to run
CI / macOS 14 native, arm64, fuzz (push) Waiting to run
CI / Win64 native, VS 2022 (push) Waiting to run
CI / Win64 native fuzz, VS 2022 (push) Waiting to run
CI / ASan + LSan + UBSan + integer, no depends, USDT (push) Waiting to run

a52b53926b clusterlin: add GetConnectedComponent (Pieter Wuille)
c7d5dcaa61 clusterlin: fix typos (Pieter Wuille)
777179bc27 txgraph: rename group_data in ApplyDependencies (Pieter Wuille)

Pull request description:

  This addresses a few review comments in #31363 after merge:
  * https://github.com/bitcoin/bitcoin/pull/31363#discussion_r2012730654 (rename `group_data` to `group_entry`)
  * https://github.com/bitcoin/bitcoin/pull/31363#discussion_r2015001561 (introduce `GetConnectedComponent` and use it in the txgraph fuzz test).
  * https://github.com/bitcoin/bitcoin/pull/31363#discussion_r2015003889 (typo in `FixLinearization`)
  * https://github.com/bitcoin/bitcoin/pull/32151#discussion_r2015764185 (more typos)

ACKs for top commit:
  instagibbs:
    ACK a52b53926b
  glozow:
    ACK a52b53926b

Tree-SHA512: e8a98101ecb9c09b860306c7fbd22cf20bd12990768879753680dfbf7db31283506d9f30bb7ee726daebd90c34a1c565d07b3e1147b2a87426247acb19058ed3
This commit is contained in:
merge-script 2025-03-28 10:32:45 +08:00
commit 0a1e36effa
No known key found for this signature in database
GPG key ID: 2EEB9F5CC09526C1
5 changed files with 54 additions and 38 deletions

View file

@ -250,10 +250,8 @@ public:
return ret;
}
/** Find some connected component within the subset "todo" of this graph.
*
* Specifically, this finds the connected component which contains the first transaction of
* todo (if any).
/** Get the connected component within the subset "todo" that contains tx (which must be in
* todo).
*
* Two transactions are considered connected if they are both in `todo`, and one is an ancestor
* of the other in the entire graph (so not just within `todo`), or transitively there is a
@ -262,10 +260,11 @@ public:
*
* Complexity: O(ret.Count()).
*/
SetType FindConnectedComponent(const SetType& todo) const noexcept
SetType GetConnectedComponent(const SetType& todo, DepGraphIndex tx) const noexcept
{
if (todo.None()) return todo;
auto to_add = SetType::Singleton(todo.First());
Assume(todo[tx]);
Assume(todo.IsSubsetOf(m_used));
auto to_add = SetType::Singleton(tx);
SetType ret;
do {
SetType old = ret;
@ -279,6 +278,19 @@ public:
return ret;
}
/** Find some connected component within the subset "todo" of this graph.
*
* Specifically, this finds the connected component which contains the first transaction of
* todo (if any).
*
* Complexity: O(ret.Count()).
*/
SetType FindConnectedComponent(const SetType& todo) const noexcept
{
if (todo.None()) return todo;
return GetConnectedComponent(todo, todo.First());
}
/** Determine if a subset is connected.
*
* Complexity: O(subset.Count()).
@ -1367,7 +1379,7 @@ void FixLinearization(const DepGraph<SetType>& depgraph, std::span<DepGraphIndex
// in between forward.
while (place_before.Any()) {
// j cannot be 0 here; if it was, then there was necessarily nothing earlier which
// elem needs to be place before anymore, and place_before would be empty.
// elem needs to be placed before anymore, and place_before would be empty.
Assume(j > 0);
auto to_swap = linearization[len - 1 - (j - 1)];
place_before.Reset(to_swap);

View file

@ -446,19 +446,36 @@ FUZZ_TARGET(clusterlin_components)
// Construct a depgraph.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
std::vector<DepGraphIndex> linearization;
try {
reader >> Using<DepGraphFormatter>(depgraph);
} catch (const std::ios_base::failure&) {}
TestBitSet todo = depgraph.Positions();
while (todo.Any()) {
// Find a connected component inside todo.
auto component = depgraph.FindConnectedComponent(todo);
// Pick a transaction in todo, or nothing.
std::optional<DepGraphIndex> picked;
{
uint64_t picked_num{0};
try {
reader >> VARINT(picked_num);
} catch (const std::ios_base::failure&) {}
if (picked_num < todo.Size() && todo[picked_num]) {
picked = picked_num;
}
}
// Find a connected component inside todo, including picked if any.
auto component = picked ? depgraph.GetConnectedComponent(todo, *picked)
: depgraph.FindConnectedComponent(todo);
// The component must be a subset of todo and non-empty.
assert(component.IsSubsetOf(todo));
assert(component.Any());
// If picked was provided, the component must include it.
if (picked) assert(component[*picked]);
// If todo is the entire graph, and the entire graph is connected, then the component must
// be the entire graph.
if (todo == depgraph.Positions()) {

View file

@ -561,36 +561,23 @@ FUZZ_TARGET(txgraph)
std::shuffle(refs.begin(), refs.end(), rng);
// Invoke the real function.
auto result = real->CountDistinctClusters(refs, use_main);
// Build a vector with representatives of the clusters the Refs occur in in the
// Build a set with representatives of the clusters the Refs occur in in the
// simulated graph. For each, remember the lowest-index transaction SimPos in the
// cluster.
std::vector<DepGraphIndex> sim_reps;
SimTxGraph::SetType sim_reps;
for (auto ref : refs) {
// Skip Refs that do not occur in the simulated graph.
auto simpos = sel_sim.Find(ref);
if (simpos == SimTxGraph::MISSING) continue;
// Start with component equal to just the Ref's SimPos.
auto component = SimTxGraph::SetType::Singleton(simpos);
// Keep adding ancestors/descendants of all elements in component until it no
// longer changes.
while (true) {
auto old_component = component;
for (auto i : component) {
component |= sel_sim.graph.Ancestors(i);
component |= sel_sim.graph.Descendants(i);
}
if (component == old_component) break;
}
// Find the component that includes ref.
auto component = sel_sim.graph.GetConnectedComponent(sel_sim.graph.Positions(), simpos);
// Remember the lowest-index SimPos in component, as a representative for it.
assert(component.Any());
sim_reps.push_back(component.First());
sim_reps.Set(component.First());
}
// Remove duplicates from sim_reps.
std::sort(sim_reps.begin(), sim_reps.end());
sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end());
// Compare the number of deduplicated representatives with the value returned by
// the real function.
assert(result == sim_reps.size());
assert(result == sim_reps.Count());
break;
} else if (command-- == 0) {
// DoWork.

View file

@ -335,7 +335,7 @@ public:
// Simple helper functions.
/** Swap the Entrys referred to by a and b. */
/** Swap the Entry referred to by a and the one referred to by b. */
void SwapIndexes(GraphIndex a, GraphIndex b) noexcept;
/** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not
* removed), return the Cluster it is in. Otherwise, return nullptr. */
@ -408,8 +408,8 @@ public:
// Functions related to various normalization/application steps.
/** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex
* values for remaining Entrys, so this only does something when no to-be-applied operations
* or staged removals referring to GraphIndexes remain). */
* values for remaining Entry objects, so this only does something when no to-be-applied
* operations or staged removals referring to GraphIndexes remain). */
void Compact() noexcept;
/** If cluster is not in staging, copy it there, and return a pointer to it. This has no
* effect if only a main graph exists, but if staging exists this modifies the locators of its
@ -505,7 +505,7 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept
void Cluster::Updated(TxGraphImpl& graph) noexcept
{
// Update all the Locators for this Cluster's Entrys.
// Update all the Locators for this Cluster's Entry objects.
for (DepGraphIndex idx : m_linearization) {
auto& entry = graph.m_entries[m_mapping[idx]];
entry.m_locator[m_level].SetPresent(this, idx);
@ -1321,9 +1321,9 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept
if (clusterset.m_group_data->m_group_oversized) return;
// For each group of to-be-merged Clusters.
for (const auto& group_data : clusterset.m_group_data->m_groups) {
for (const auto& group_entry : clusterset.m_group_data->m_groups) {
auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters}
.subspan(group_data.m_cluster_offset, group_data.m_cluster_count);
.subspan(group_entry.m_cluster_offset, group_entry.m_cluster_count);
// Pull in all the Clusters that contain dependencies.
if (level == 1) {
for (Cluster*& cluster : cluster_span) {
@ -1335,7 +1335,7 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept
// Actually apply all to-be-added dependencies (all parents and children from this grouping
// belong to the same Cluster at this point because of the merging above).
auto deps_span = std::span{clusterset.m_deps_to_add}
.subspan(group_data.m_deps_offset, group_data.m_deps_count);
.subspan(group_entry.m_deps_offset, group_entry.m_deps_count);
Assume(!deps_span.empty());
const auto& loc = m_entries[deps_span[0].second].m_locator[level];
Assume(loc.IsPresent());
@ -1937,7 +1937,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const
assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions));
}
}
// Verify that each element of m_depgraph occured in m_linearization.
// Verify that each element of m_depgraph occurred in m_linearization.
assert(m_done == m_depgraph.Positions());
}

View file

@ -99,7 +99,7 @@ public:
/** Create a staging graph (which cannot exist already). This acts as if a full copy of
* the transaction graph is made, upon which further modifications are made. This copy can
* be inspected, and then either discarded, or the main graph can be replaced by it by
* commiting it. */
* committing it. */
virtual void StartStaging() noexcept = 0;
/** Discard the existing active staging graph (which must exist). */
virtual void AbortStaging() noexcept = 0;