txgraph: Add CountDistinctClusters function (feature)

This commit is contained in:
Pieter Wuille 2025-01-16 16:00:10 -05:00
parent b685d322c9
commit aded047019
3 changed files with 77 additions and 0 deletions

View file

@ -526,6 +526,50 @@ FUZZ_TARGET(txgraph)
// these here without making more calls to real, which could affect its internal
// state. A full comparison is done at the end.
break;
} else if (!sel_sim.IsOversized() && command-- == 0) {
// CountDistinctClusters.
std::vector<TxGraph::Ref*> refs;
// Gather a list of up to 15 (or up to 255) Ref pointers.
auto count = provider.ConsumeIntegralInRange<size_t>(0, alt ? 255 : 15);
refs.resize(count);
for (size_t i = 0; i < count; ++i) {
refs[i] = pick_fn();
}
// Their order should not matter, shuffle them.
std::shuffle(refs.begin(), refs.end(), rng);
// Invoke the real function.
auto result = real->CountDistinctClusters(refs, use_main);
// Build a vector with representatives of the clusters the Refs occur in in the
// simulated graph. For each, remember the lowest-index transaction SimPos in the
// cluster.
std::vector<DepGraphIndex> sim_reps;
for (auto ref : refs) {
// Skip Refs that do not occur in the simulated graph.
auto simpos = sel_sim.Find(ref);
if (simpos == SimTxGraph::MISSING) continue;
// Start with component equal to just the Ref's SimPos.
auto component = SimTxGraph::SetType::Singleton(simpos);
// Keep adding ancestors/descendants of all elements in component until it no
// longer changes.
while (true) {
auto old_component = component;
for (auto i : component) {
component |= sel_sim.graph.Ancestors(i);
component |= sel_sim.graph.Descendants(i);
}
if (component == old_component) break;
}
// Remember the lowest-index SimPos in component, as a representative for it.
assert(component.Any());
sim_reps.push_back(component.First());
}
// Remove duplicates from sim_reps.
std::sort(sim_reps.begin(), sim_reps.end());
sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end());
// Compare the number of deduplicated representatives with the value returned by
// the real function.
assert(result == sim_reps.size());
break;
} else if (command-- == 0) {
// DoWork.
real->DoWork();

View file

@ -454,6 +454,7 @@ public:
GraphIndex GetTransactionCount(bool main_only = false) noexcept final;
bool IsOversized(bool main_only = false) noexcept final;
std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final;
GraphIndex CountDistinctClusters(std::span<const Ref* const> refs, bool main_only = false) noexcept final;
void SanityCheck() const final;
};
@ -1781,6 +1782,33 @@ std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) n
return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index;
}
TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span<const Ref* const> refs, bool main_only) noexcept
{
size_t level = GetSpecifiedLevel(main_only);
ApplyDependencies(level);
auto& clusterset = GetClusterSet(level);
Assume(clusterset.m_deps_to_add.empty());
// Build a vector of Clusters that the specified Refs occur in.
std::vector<Cluster*> clusters;
clusters.reserve(refs.size());
for (const Ref* ref : refs) {
if (ref == nullptr) continue;
if (GetRefGraph(*ref) == nullptr) continue;
Assume(GetRefGraph(*ref) == this);
auto cluster = FindCluster(GetRefIndex(*ref), level);
if (cluster != nullptr) clusters.push_back(cluster);
}
// Count the number of distinct elements in clusters.
std::sort(clusters.begin(), clusters.end());
Cluster* last{nullptr};
GraphIndex ret{0};
for (Cluster* cluster : clusters) {
ret += (cluster != last);
last = cluster;
}
return ret;
}
void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const
{
// There must be an m_mapping for each m_depgraph position (including holes).

View file

@ -149,6 +149,11 @@ public:
/** Compare two transactions according to their order in the main graph. Both transactions must
* be in the main graph. The main graph must not be oversized. */
virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0;
/** Count the number of distinct clusters that the specified transactions belong to. If
* main_only is false and a staging graph exists, staging clusters are counted. Otherwise,
* main clusters are counted. Refs that do not exist in the queried graph are ignored. The
* queried graph must not be oversized. */
virtual GraphIndex CountDistinctClusters(std::span<const Ref* const>, bool main_only = false) noexcept = 0;
/** Perform an internal consistency check on this object. */
virtual void SanityCheck() const = 0;