clusterlin: separate initial search entries per component (optimization)

Before this commit, the worst case for linearization involves clusters which
break apart in several smaller components after the first candidate is
included in the output linearization.

Address this by never considering work items that span multiple components
of what remains of the cluster.
This commit is contained in:
Pieter Wuille 2024-05-13 16:50:34 -04:00
parent e4faea9ca7
commit 85a285a306
2 changed files with 67 additions and 16 deletions

View file

@ -613,11 +613,20 @@ public:
VecDeque<WorkItem> queue;
queue.reserve(std::max<size_t>(256, 2 * m_todo.Count()));
// Create an initial entry with m_todo as undecided. Also use it as best if not provided,
// so that during the work processing loop below, and during the add_fn/split_fn calls, we
// do not need to deal with the best=empty case.
if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, m_todo);
queue.emplace_back(SetInfo<SetType>{}, SetType{m_todo});
// Create initial entries per connected component of m_todo. While clusters themselves are
// generally connected, this is not necessarily true after some parts have already been
// removed from m_todo. Without this, effort can be wasted on searching "inc" sets that
// span multiple components.
auto to_cover = m_todo;
do {
auto component = m_depgraph.FindConnectedComponent(to_cover);
to_cover -= component;
// If best is not provided, set it to the first component, so that during the work
// processing loop below, and during the add_fn/split_fn calls, we do not need to deal
// with the best=empty case.
if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, component);
queue.emplace_back(/*inc=*/SetInfo<SetType>{}, /*und=*/std::move(component));
} while (to_cover.Any());
/** Local copy of the iteration limit. */
uint64_t iterations_left = max_iterations;
@ -645,7 +654,7 @@ public:
// space runs out (see below), we know that no reallocation of the queue should ever
// occur.
Assume(queue.size() < queue.capacity());
queue.emplace_back(std::move(inc), std::move(und));
queue.emplace_back(/*inc=*/std::move(inc), /*und=*/std::move(und));
};
/** Internal process function. It takes an existing work item, and splits it in two: one

View file

@ -165,6 +165,23 @@ std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetTyp
return {std::move(linearization), optimal};
}
/** Stitch connected components together in a DepGraph, guaranteeing its corresponding cluster is connected. */
template<typename BS>
void MakeConnected(DepGraph<BS>& depgraph)
{
auto todo = BS::Fill(depgraph.TxCount());
auto comp = depgraph.FindConnectedComponent(todo);
Assume(depgraph.IsConnected(comp));
todo -= comp;
while (todo.Any()) {
auto nextcomp = depgraph.FindConnectedComponent(todo);
Assume(depgraph.IsConnected(nextcomp));
depgraph.AddDependency(comp.Last(), nextcomp.First());
todo -= nextcomp;
comp = nextcomp;
}
}
/** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */
template<typename SetType>
SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader)
@ -369,6 +386,20 @@ FUZZ_TARGET(clusterlin_components)
assert(depgraph.FindConnectedComponent(todo).None());
}
FUZZ_TARGET(clusterlin_make_connected)
{
// Verify that MakeConnected makes graphs connected.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
try {
reader >> Using<DepGraphFormatter>(depgraph);
} catch (const std::ios_base::failure&) {}
MakeConnected(depgraph);
SanityCheck(depgraph);
assert(depgraph.IsConnected());
}
FUZZ_TARGET(clusterlin_chunking)
{
// Verify the correctness of the ChunkLinearization function.
@ -468,13 +499,17 @@ FUZZ_TARGET(clusterlin_search_finder)
// and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and
// AncestorCandidateFinder.
// Retrieve an RNG seed and a depgraph from the fuzz input.
// Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
uint64_t rng_seed{0};
uint8_t make_connected{1};
try {
reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed;
reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
} catch (const std::ios_base::failure&) {}
// The most complicated graphs are connected ones (other ones just split up). Optionally force
// the graph to be connected.
if (make_connected) MakeConnected(depgraph);
// Instantiate ALL the candidate finders.
SearchCandidateFinder src_finder(depgraph, rng_seed);
@ -513,9 +548,11 @@ FUZZ_TARGET(clusterlin_search_finder)
assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo));
}
// At most 2^N-1 iterations can be required: the number of non-empty subsets a graph with N
// transactions has.
assert(iterations_done <= ((uint64_t{1} << todo.Count()) - 1));
// At most 2^(N-1) iterations can be required: the maximum number of non-empty topological
// subsets a (connected) cluster with N transactions can have. Even when the cluster is no
// longer connected after removing certain transactions, this holds, because the connected
// components are searched separately.
assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
// Perform quality checks only if SearchCandidateFinder claims an optimal result.
if (iterations_done < max_iterations) {
@ -685,14 +722,19 @@ FUZZ_TARGET(clusterlin_linearize)
{
// Verify the behavior of Linearize().
// Retrieve an RNG seed, an iteration count, and a depgraph from the fuzz input.
// Retrieve an RNG seed, an iteration count, a depgraph, and whether to make it connected from
// the fuzz input.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
uint64_t rng_seed{0};
uint64_t iter_count{0};
uint8_t make_connected{1};
try {
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed;
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
} catch (const std::ios_base::failure&) {}
// The most complicated graphs are connected ones (other ones just split up). Optionally force
// the graph to be connected.
if (make_connected) MakeConnected(depgraph);
// Optionally construct an old linearization for it.
std::vector<ClusterIndex> old_linearization;
@ -721,10 +763,10 @@ FUZZ_TARGET(clusterlin_linearize)
}
// If the iteration count is sufficiently high, an optimal linearization must be found.
// Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is
// 2 * (2^n - 1)
// Each linearization step can use up to 2^(k-1) iterations, with steps k=1..n. That sum is
// 2^n - 1.
const uint64_t n = depgraph.TxCount();
if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) {
if (n <= 19 && iter_count > (uint64_t{1} << n)) {
assert(optimal);
}