diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp index 9987d376a53..269648f4e2f 100644 --- a/src/bench/cluster_linearize.cpp +++ b/src/bench/cluster_linearize.cpp @@ -169,6 +169,38 @@ void BenchLinearizeNoItersWorstCaseLIMO(ClusterIndex ntx, benchmark::Bench& benc }); } +template +void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +{ + DepGraph depgraph = MakeWideGraph(ntx); + std::vector lin(ntx); + bench.run([&] { + for (ClusterIndex i = 0; i < ntx; ++i) lin[i] = i; + PostLinearize(depgraph, lin); + }); +} + +template +void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +{ + DepGraph depgraph; + for (ClusterIndex i = 0; i < ntx; ++i) { + depgraph.AddTransaction({i, 1}); + if (i) depgraph.AddDependency(0, i); + } + std::vector lin1; + std::vector lin2; + lin1.push_back(0); + lin2.push_back(0); + for (ClusterIndex i = 1; i < ntx; ++i) { + lin1.push_back(i); + lin2.push_back(ntx - i); + } + bench.run([&] { + MergeLinearizations(depgraph, lin1, lin2); + }); +} + } // namespace static void LinearizePerIter16TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(16, bench); } @@ -192,6 +224,20 @@ static void LinearizeNoIters64TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLi static void LinearizeNoIters75TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO>(75, bench); } static void LinearizeNoIters99TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO>(99, bench); } +static void PostLinearize16TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(16, bench); } +static void PostLinearize32TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(32, bench); } +static void PostLinearize48TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(48, bench); } +static void PostLinearize64TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(64, bench); } +static void PostLinearize75TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(75, bench); } +static void PostLinearize99TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase>(99, bench); } + +static void MergeLinearizations16TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(16, bench); } +static void MergeLinearizations32TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(32, bench); } +static void MergeLinearizations48TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(48, bench); } +static void MergeLinearizations64TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(64, bench); } +static void MergeLinearizations75TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(75, bench); } +static void MergeLinearizations99TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(99, bench); } + BENCHMARK(LinearizePerIter16TxWorstCase, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizePerIter32TxWorstCase, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizePerIter48TxWorstCase, benchmark::PriorityLevel::HIGH); @@ -212,3 +258,17 @@ BENCHMARK(LinearizeNoIters48TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizeNoIters64TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizeNoIters75TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizeNoIters99TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH); + +BENCHMARK(PostLinearize16TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(PostLinearize32TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(PostLinearize48TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(PostLinearize64TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(PostLinearize75TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(PostLinearize99TxWorstCase, benchmark::PriorityLevel::HIGH); + +BENCHMARK(MergeLinearizations16TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(MergeLinearizations32TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(MergeLinearizations48TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(MergeLinearizations64TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(MergeLinearizations75TxWorstCase, benchmark::PriorityLevel::HIGH); +BENCHMARK(MergeLinearizations99TxWorstCase, benchmark::PriorityLevel::HIGH); diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 07d28a9aa51..607ae681d25 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -122,6 +122,8 @@ public: auto TxCount() const noexcept { return entries.size(); } /** Get the feerate of a given transaction i. Complexity: O(1). */ const FeeFrac& FeeRate(ClusterIndex i) const noexcept { return entries[i].feerate; } + /** Get the mutable feerate of a given transaction i. Complexity: O(1). */ + FeeFrac& FeeRate(ClusterIndex i) noexcept { return entries[i].feerate; } /** Get the ancestors of a given transaction i. Complexity: O(1). */ const SetType& Ancestors(ClusterIndex i) const noexcept { return entries[i].ancestors; } /** Get the descendants of a given transaction i. Complexity: O(1). */ @@ -171,6 +173,50 @@ public: return ret; } + /** Find some connected component within the subset "todo" of this graph. + * + * Specifically, this finds the connected component which contains the first transaction of + * todo (if any). + * + * Two transactions are considered connected if they are both in `todo`, and one is an ancestor + * of the other in the entire graph (so not just within `todo`), or transitively there is a + * path of transactions connecting them. This does mean that if `todo` contains a transaction + * and a grandparent, but misses the parent, they will still be part of the same component. + * + * Complexity: O(ret.Count()). + */ + SetType FindConnectedComponent(const SetType& todo) const noexcept + { + if (todo.None()) return todo; + auto to_add = SetType::Singleton(todo.First()); + SetType ret; + do { + SetType old = ret; + for (auto add : to_add) { + ret |= Descendants(add); + ret |= Ancestors(add); + } + ret &= todo; + to_add = ret - old; + } while (to_add.Any()); + return ret; + } + + /** Determine if a subset is connected. + * + * Complexity: O(subset.Count()). + */ + bool IsConnected(const SetType& subset) const noexcept + { + return FindConnectedComponent(subset) == subset; + } + + /** Determine if this entire graph is connected. + * + * Complexity: O(TxCount()). + */ + bool IsConnected() const noexcept { return IsConnected(SetType::Fill(TxCount())); } + /** Append the entries of select to list in a topologically valid order. * * Complexity: O(select.Count() * log(select.Count())). @@ -264,22 +310,30 @@ class LinearizationChunking /** The depgraph this linearization is for. */ const DepGraph& m_depgraph; - /** The linearization we started from. */ + /** The linearization we started from, possibly with removed prefix stripped. */ Span m_linearization; /** Chunk sets and their feerates, of what remains of the linearization. */ std::vector> m_chunks; + /** How large a prefix of m_chunks corresponds to removed transactions. */ + ClusterIndex m_chunks_skip{0}; + /** Which transactions remain in the linearization. */ SetType m_todo; - /** Fill the m_chunks variable. */ + /** Fill the m_chunks variable, and remove the done prefix of m_linearization. */ void BuildChunks() noexcept { // Caller must clear m_chunks. Assume(m_chunks.empty()); - // Iterate over the entries in m_linearization. This is effectively the same + // Chop off the initial part of m_linearization that is already done. + while (!m_linearization.empty() && !m_todo[m_linearization.front()]) { + m_linearization = m_linearization.subspan(1); + } + + // Iterate over the remaining entries in m_linearization. This is effectively the same // algorithm as ChunkLinearization, but supports skipping parts of the linearization and // keeps track of the sets themselves instead of just their feerates. for (auto idx : m_linearization) { @@ -309,13 +363,13 @@ public: } /** Determine how many chunks remain in the linearization. */ - ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size(); } + ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ const SetInfo& GetChunk(ClusterIndex n) const noexcept { - Assume(n < m_chunks.size()); - return m_chunks[n]; + Assume(n + m_chunks_skip < m_chunks.size()); + return m_chunks[n + m_chunks_skip]; } /** Remove some subset of transactions from the linearization. */ @@ -324,21 +378,33 @@ public: Assume(subset.Any()); Assume(subset.IsSubsetOf(m_todo)); m_todo -= subset; - // Rechunk what remains of m_linearization. - m_chunks.clear(); - BuildChunks(); + if (GetChunk(0).transactions == subset) { + // If the newly done transactions exactly match the first chunk of the remainder of + // the linearization, we do not need to rechunk; just remember to skip one + // additional chunk. + ++m_chunks_skip; + // With subset marked done, some prefix of m_linearization will be done now. How long + // that prefix is depends on how many done elements were interspersed with subset, + // but at least as many transactions as there are in subset. + m_linearization = m_linearization.subspan(subset.Count()); + } else { + // Otherwise rechunk what remains of m_linearization. + m_chunks.clear(); + m_chunks_skip = 0; + BuildChunks(); + } } /** Find the shortest intersection between subset and the prefixes of remaining chunks * of the linearization that has a feerate not below subset's. * * This is a crucial operation in guaranteeing improvements to linearizations. If subset has - * a feerate not below GetChunk(0)'s, then moving Intersect(subset) to the front of (what - * remains of) the linearization is guaranteed not to make it worse at any point. + * a feerate not below GetChunk(0)'s, then moving IntersectPrefixes(subset) to the front of + * (what remains of) the linearization is guaranteed not to make it worse at any point. * * See https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 for background. */ - SetInfo Intersect(const SetInfo& subset) const noexcept + SetInfo IntersectPrefixes(const SetInfo& subset) const noexcept { Assume(subset.transactions.IsSubsetOf(m_todo)); SetInfo accumulator; @@ -719,7 +785,7 @@ std::pair, bool> Linearize(const DepGraph& de // sure we don't pick something that makes us unable to reach further diagram points // of the old linearization. if (old_chunking.NumChunksLeft() > 0) { - best = old_chunking.Intersect(best); + best = old_chunking.IntersectPrefixes(best); } } @@ -738,6 +804,249 @@ std::pair, bool> Linearize(const DepGraph& de return {std::move(linearization), optimal}; } +/** Improve a given linearization. + * + * @param[in] depgraph Dependency graph of the cluster being linearized. + * @param[in,out] linearization On input, an existing linearization for depgraph. On output, a + * potentially better linearization for the same graph. + * + * Postlinearization guarantees: + * - The resulting chunks are connected. + * - If the input has a tree shape (either all transactions have at most one child, or all + * transactions have at most one parent), the result is optimal. + * - Given a linearization L1 and a leaf transaction T in it. Let L2 be L1 with T moved to the end, + * optionally with its fee increased. Let L3 be the postlinearization of L2. L3 will be at least + * as good as L1. This means that replacing transactions with same-size higher-fee transactions + * will not worsen linearizations through a "drop conflicts, append new transactions, + * postlinearize" process. + */ +template +void PostLinearize(const DepGraph& depgraph, Span linearization) +{ + // This algorithm performs a number of passes (currently 2); the even ones operate from back to + // front, the odd ones from front to back. Each results in an equal-or-better linearization + // than the one started from. + // - One pass in either direction guarantees that the resulting chunks are connected. + // - Each direction corresponds to one shape of tree being linearized optimally (forward passes + // guarantee this for graphs where each transaction has at most one child; backward passes + // guarantee this for graphs where each transaction has at most one parent). + // - Starting with a backward pass guarantees the moved-tree property. + // + // During an odd (forward) pass, the high-level operation is: + // - Start with an empty list of groups L=[]. + // - For every transaction i in the old linearization, from front to back: + // - Append a new group C=[i], containing just i, to the back of L. + // - While L has at least one group before C, and the group immediately before C has feerate + // lower than C: + // - If C depends on P: + // - Merge P into C, making C the concatenation of P+C, continuing with the combined C. + // - Otherwise: + // - Swap P with C, continuing with the now-moved C. + // - The output linearization is the concatenation of the groups in L. + // + // During even (backward) passes, i iterates from the back to the front of the existing + // linearization, and new groups are prepended instead of appended to the list L. To enable + // more code reuse, both passes append groups, but during even passes the meanings of + // parent/child, and of high/low feerate are reversed, and the final concatenation is reversed + // on output. + // + // In the implementation below, the groups are represented by singly-linked lists (pointing + // from the back to the front), which are themselves organized in a singly-linked circular + // list (each group pointing to its predecessor, with a special sentinel group at the front + // that points back to the last group). + // + // Information about transaction t is stored in entries[t + 1], while the sentinel is in + // entries[0]. + + /** Index of the sentinel in the entries array below. */ + static constexpr ClusterIndex SENTINEL{0}; + /** Indicator that a group has no previous transaction. */ + static constexpr ClusterIndex NO_PREV_TX{0}; + + + /** Data structure per transaction entry. */ + struct TxEntry + { + /** The index of the previous transaction in this group; NO_PREV_TX if this is the first + * entry of a group. */ + ClusterIndex prev_tx; + + // The fields below are only used for transactions that are the last one in a group + // (referred to as tail transactions below). + + /** Index of the first transaction in this group, possibly itself. */ + ClusterIndex first_tx; + /** Index of the last transaction in the previous group. The first group (the sentinel) + * points back to the last group here, making it a singly-linked circular list. */ + ClusterIndex prev_group; + /** All transactions in the group. Empty for the sentinel. */ + SetType group; + /** All dependencies of the group (descendants in even passes; ancestors in odd ones). */ + SetType deps; + /** The combined fee/size of transactions in the group. Fee is negated in even passes. */ + FeeFrac feerate; + }; + + // As an example, consider the state corresponding to the linearization [1,0,3,2], with + // groups [1,0,3] and [2], in an odd pass. The linked lists would be: + // + // +-----+ + // 0<-P-- | 0 S | ---\ Legend: + // +-----+ | + // ^ | - digit in box: entries index + // /--------------F---------+ G | (note: one more than tx value) + // v \ | | - S: sentinel group + // +-----+ +-----+ +-----+ | (empty feerate) + // 0<-P-- | 2 | <--P-- | 1 | <--P-- | 4 T | | - T: tail transaction, contains + // +-----+ +-----+ +-----+ | fields beyond prev_tv. + // ^ | - P: prev_tx reference + // G G - F: first_tx reference + // | | - G: prev_group reference + // +-----+ | + // 0<-P-- | 3 T | <--/ + // +-----+ + // ^ | + // \-F-/ + // + // During an even pass, the diagram above would correspond to linearization [2,3,0,1], with + // groups [2] and [3,0,1]. + + std::vector entries(linearization.size() + 1); + + // Perform two passes over the linearization. + for (int pass = 0; pass < 2; ++pass) { + int rev = !(pass & 1); + // Construct a sentinel group, identifying the start of the list. + entries[SENTINEL].prev_group = SENTINEL; + Assume(entries[SENTINEL].feerate.IsEmpty()); + + // Iterate over all elements in the existing linearization. + for (ClusterIndex i = 0; i < linearization.size(); ++i) { + // Even passes are from back to front; odd passes from front to back. + ClusterIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; + // Construct a new group containing just idx. In even passes, the meaning of + // parent/child and high/low feerate are swapped. + ClusterIndex cur_group = idx + 1; + entries[cur_group].group = SetType::Singleton(idx); + entries[cur_group].deps = rev ? depgraph.Descendants(idx): depgraph.Ancestors(idx); + entries[cur_group].feerate = depgraph.FeeRate(idx); + if (rev) entries[cur_group].feerate.fee = -entries[cur_group].feerate.fee; + entries[cur_group].prev_tx = NO_PREV_TX; // No previous transaction in group. + entries[cur_group].first_tx = cur_group; // Transaction itself is first of group. + // Insert the new group at the back of the groups linked list. + entries[cur_group].prev_group = entries[SENTINEL].prev_group; + entries[SENTINEL].prev_group = cur_group; + + // Start merge/swap cycle. + ClusterIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. + ClusterIndex prev_group = entries[cur_group].prev_group; + // Continue as long as the current group has higher feerate than the previous one. + while (entries[cur_group].feerate >> entries[prev_group].feerate) { + // prev_group/cur_group/next_group refer to (the last transactions of) 3 + // consecutive entries in groups list. + Assume(cur_group == entries[next_group].prev_group); + Assume(prev_group == entries[cur_group].prev_group); + // The sentinel has empty feerate, which is neither higher or lower than other + // feerates. Thus, the while loop we are in here guarantees that cur_group and + // prev_group are not the sentinel. + Assume(cur_group != SENTINEL); + Assume(prev_group != SENTINEL); + if (entries[cur_group].deps.Overlaps(entries[prev_group].group)) { + // There is a dependency between cur_group and prev_group; merge prev_group + // into cur_group. The group/deps/feerate fields of prev_group remain unchanged + // but become unused. + entries[cur_group].group |= entries[prev_group].group; + entries[cur_group].deps |= entries[prev_group].deps; + entries[cur_group].feerate += entries[prev_group].feerate; + // Make the first of the current group point to the tail of the previous group. + entries[entries[cur_group].first_tx].prev_tx = prev_group; + // The first of the previous group becomes the first of the newly-merged group. + entries[cur_group].first_tx = entries[prev_group].first_tx; + // The previous group becomes whatever group was before the former one. + prev_group = entries[prev_group].prev_group; + entries[cur_group].prev_group = prev_group; + } else { + // There is no dependency between cur_group and prev_group; swap them. + ClusterIndex preprev_group = entries[prev_group].prev_group; + // If PP, P, C, N were the old preprev, prev, cur, next groups, then the new + // layout becomes [PP, C, P, N]. Update prev_groups to reflect that order. + entries[next_group].prev_group = prev_group; + entries[prev_group].prev_group = cur_group; + entries[cur_group].prev_group = preprev_group; + // The current group remains the same, but the groups before/after it have + // changed. + next_group = prev_group; + prev_group = preprev_group; + } + } + } + + // Convert the entries back to linearization (overwriting the existing one). + ClusterIndex cur_group = entries[0].prev_group; + ClusterIndex done = 0; + while (cur_group != SENTINEL) { + ClusterIndex cur_tx = cur_group; + // Traverse the transactions of cur_group (from back to front), and write them in the + // same order during odd passes, and reversed (front to back) in even passes. + if (rev) { + do { + *(linearization.begin() + (done++)) = cur_tx - 1; + cur_tx = entries[cur_tx].prev_tx; + } while (cur_tx != NO_PREV_TX); + } else { + do { + *(linearization.end() - (++done)) = cur_tx - 1; + cur_tx = entries[cur_tx].prev_tx; + } while (cur_tx != NO_PREV_TX); + } + cur_group = entries[cur_group].prev_group; + } + Assume(done == linearization.size()); + } +} + +/** Merge two linearizations for the same cluster into one that is as good as both. + * + * Complexity: O(N^2) where N=depgraph.TxCount(); O(N) if both inputs are identical. + */ +template +std::vector MergeLinearizations(const DepGraph& depgraph, Span lin1, Span lin2) +{ + Assume(lin1.size() == depgraph.TxCount()); + Assume(lin2.size() == depgraph.TxCount()); + + /** Chunkings of what remains of both input linearizations. */ + LinearizationChunking chunking1(depgraph, lin1), chunking2(depgraph, lin2); + /** Output linearization. */ + std::vector ret; + if (depgraph.TxCount() == 0) return ret; + ret.reserve(depgraph.TxCount()); + + while (true) { + // As long as we are not done, both linearizations must have chunks left. + Assume(chunking1.NumChunksLeft() > 0); + Assume(chunking2.NumChunksLeft() > 0); + // Find the set to output by taking the best remaining chunk, and then intersecting it with + // prefixes of remaining chunks of the other linearization. + SetInfo best; + const auto& lin1_firstchunk = chunking1.GetChunk(0); + const auto& lin2_firstchunk = chunking2.GetChunk(0); + if (lin2_firstchunk.feerate >> lin1_firstchunk.feerate) { + best = chunking1.IntersectPrefixes(lin2_firstchunk); + } else { + best = chunking2.IntersectPrefixes(lin1_firstchunk); + } + // Append the result to the output and mark it as done. + depgraph.AppendTopo(ret, best.transactions); + chunking1.MarkDone(best.transactions); + if (chunking1.NumChunksLeft() == 0) break; + chunking2.MarkDone(best.transactions); + } + + Assume(ret.size() == depgraph.TxCount()); + return ret; +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 031cb045593..2dfdfbb41de 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -294,6 +294,81 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) assert(IsAcyclic(depgraph)); } +FUZZ_TARGET(clusterlin_components) +{ + // Verify the behavior of DepGraphs's FindConnectedComponent and IsConnected functions. + + // Construct a depgraph. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + TestBitSet todo = TestBitSet::Fill(depgraph.TxCount()); + while (todo.Any()) { + // Find a connected component inside todo. + auto component = depgraph.FindConnectedComponent(todo); + + // The component must be a subset of todo and non-empty. + assert(component.IsSubsetOf(todo)); + assert(component.Any()); + + // If todo is the entire graph, and the entire graph is connected, then the component must + // be the entire graph. + if (todo == TestBitSet::Fill(depgraph.TxCount())) { + assert((component == todo) == depgraph.IsConnected()); + } + + // If subset is connected, then component must match subset. + assert((component == todo) == depgraph.IsConnected(todo)); + + // The component cannot have any ancestors or descendants outside of component but in todo. + for (auto i : component) { + assert((depgraph.Ancestors(i) & todo).IsSubsetOf(component)); + assert((depgraph.Descendants(i) & todo).IsSubsetOf(component)); + } + + // Starting from any component element, we must be able to reach every element. + for (auto i : component) { + // Start with just i as reachable. + TestBitSet reachable = TestBitSet::Singleton(i); + // Add in-todo descendants and ancestors to reachable until it does not change anymore. + while (true) { + TestBitSet new_reachable = reachable; + for (auto j : new_reachable) { + new_reachable |= depgraph.Ancestors(j) & todo; + new_reachable |= depgraph.Descendants(j) & todo; + } + if (new_reachable == reachable) break; + reachable = new_reachable; + } + // Verify that the result is the entire component. + assert(component == reachable); + } + + // Construct an arbitrary subset of todo. + uint64_t subset_bits{0}; + try { + reader >> VARINT(subset_bits); + } catch (const std::ios_base::failure&) {} + TestBitSet subset; + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + if (todo[i]) { + if (subset_bits & 1) subset.Set(i); + subset_bits >>= 1; + } + } + // Which must be non-empty. + if (subset.None()) subset = TestBitSet::Singleton(todo.First()); + // Remove it from todo. + todo -= subset; + } + + // No components can be found in an empty subset. + assert(depgraph.FindConnectedComponent(todo).None()); +} + FUZZ_TARGET(clusterlin_chunking) { // Verify the correctness of the ChunkLinearization function. @@ -357,6 +432,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder) assert(best_anc.transactions.Any()); assert(best_anc.transactions.IsSubsetOf(todo)); assert(depgraph.FeeRate(best_anc.transactions) == best_anc.feerate); + assert(depgraph.IsConnected(best_anc.transactions)); // Check that it is topologically valid. for (auto i : best_anc.transactions) { assert((depgraph.Ancestors(i) & todo).IsSubsetOf(best_anc.transactions)); @@ -443,6 +519,9 @@ FUZZ_TARGET(clusterlin_search_finder) // Perform quality checks only if SearchCandidateFinder claims an optimal result. if (iterations_done < max_iterations) { + // Optimal sets are always connected. + assert(depgraph.IsConnected(found.transactions)); + // Compare with SimpleCandidateFinder. auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS); assert(found.feerate >= simple.feerate); @@ -560,10 +639,10 @@ FUZZ_TARGET(clusterlin_linearization_chunking) } assert(combined == todo); - // Verify the expected properties of LinearizationChunking::Intersect: - auto intersect = chunking.Intersect(subset); + // Verify the expected properties of LinearizationChunking::IntersectPrefixes: + auto intersect = chunking.IntersectPrefixes(subset); // - Intersecting again doesn't change the result. - assert(chunking.Intersect(intersect) == intersect); + assert(chunking.IntersectPrefixes(intersect) == intersect); // - The intersection is topological. TestBitSet intersect_anc; for (auto idx : intersect.transactions) { @@ -687,3 +766,192 @@ FUZZ_TARGET(clusterlin_linearize) } } } + +FUZZ_TARGET(clusterlin_postlinearize) +{ + // Verify expected properties of PostLinearize() on arbitrary linearizations. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Retrieve a linearization from the fuzz input. + std::vector linearization; + linearization = ReadLinearization(depgraph, reader); + SanityCheck(depgraph, linearization); + + // Produce a post-processed version. + auto post_linearization = linearization; + PostLinearize(depgraph, post_linearization); + SanityCheck(depgraph, post_linearization); + + // Compare diagrams: post-linearization cannot worsen anywhere. + auto chunking = ChunkLinearization(depgraph, linearization); + auto post_chunking = ChunkLinearization(depgraph, post_linearization); + auto cmp = CompareChunks(post_chunking, chunking); + assert(cmp >= 0); + + // Run again, things can keep improving (and never get worse) + auto post_post_linearization = post_linearization; + PostLinearize(depgraph, post_post_linearization); + SanityCheck(depgraph, post_post_linearization); + auto post_post_chunking = ChunkLinearization(depgraph, post_post_linearization); + cmp = CompareChunks(post_post_chunking, post_chunking); + assert(cmp >= 0); + + // The chunks that come out of postlinearizing are always connected. + LinearizationChunking linchunking(depgraph, post_linearization); + while (linchunking.NumChunksLeft()) { + assert(depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } +} + +FUZZ_TARGET(clusterlin_postlinearize_tree) +{ + // Verify expected properties of PostLinearize() on linearizations of graphs that form either + // an upright or reverse tree structure. + + // Construct a direction, RNG seed, and an arbitrary graph from the fuzz input. + SpanReader reader(buffer); + uint64_t rng_seed{0}; + DepGraph depgraph_gen; + uint8_t direction{0}; + try { + reader >> direction >> rng_seed >> Using(depgraph_gen); + } catch (const std::ios_base::failure&) {} + + // Now construct a new graph, copying the nodes, but leaving only the first parent (even + // direction) or the first child (odd direction). + DepGraph depgraph_tree; + for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + depgraph_tree.AddTransaction(depgraph_gen.FeeRate(i)); + } + if (direction & 1) { + for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + auto children = depgraph_gen.Descendants(i) - TestBitSet::Singleton(i); + // Remove descendants that are children of other descendants. + for (auto j : children) { + if (!children[j]) continue; + children -= depgraph_gen.Descendants(j); + children.Set(j); + } + if (children.Any()) depgraph_tree.AddDependency(i, children.First()); + } + } else { + for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + auto parents = depgraph_gen.Ancestors(i) - TestBitSet::Singleton(i); + // Remove ancestors that are parents of other ancestors. + for (auto j : parents) { + if (!parents[j]) continue; + parents -= depgraph_gen.Ancestors(j); + parents.Set(j); + } + if (parents.Any()) depgraph_tree.AddDependency(parents.First(), i); + } + } + + // Retrieve a linearization from the fuzz input. + std::vector linearization; + linearization = ReadLinearization(depgraph_tree, reader); + SanityCheck(depgraph_tree, linearization); + + // Produce a postlinearized version. + auto post_linearization = linearization; + PostLinearize(depgraph_tree, post_linearization); + SanityCheck(depgraph_tree, post_linearization); + + // Compare diagrams. + auto chunking = ChunkLinearization(depgraph_tree, linearization); + auto post_chunking = ChunkLinearization(depgraph_tree, post_linearization); + auto cmp = CompareChunks(post_chunking, chunking); + assert(cmp >= 0); + + // Verify that post-linearizing again does not change the diagram. The result must be identical + // as post_linearization ought to be optimal already with a tree-structured graph. + auto post_post_linearization = post_linearization; + PostLinearize(depgraph_tree, post_linearization); + SanityCheck(depgraph_tree, post_linearization); + auto post_post_chunking = ChunkLinearization(depgraph_tree, post_post_linearization); + auto cmp_post = CompareChunks(post_post_chunking, post_chunking); + assert(cmp_post == 0); + + // Try to find an even better linearization directly. This must not change the diagram for the + // same reason. + auto [opt_linearization, _optimal] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization); + auto opt_chunking = ChunkLinearization(depgraph_tree, opt_linearization); + auto cmp_opt = CompareChunks(opt_chunking, post_chunking); + assert(cmp_opt == 0); +} + +FUZZ_TARGET(clusterlin_postlinearize_moved_leaf) +{ + // Verify that taking an existing linearization, and moving a leaf to the back, potentially + // increasing its fee, and then post-linearizing, results in something as good as the + // original. This guarantees that in an RBF that replaces a transaction with one of the same + // size but higher fee, applying the "remove conflicts, append new transaction, postlinearize" + // process will never worsen linearization quality. + + // Construct an arbitrary graph and a fee from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + int32_t fee_inc{0}; + try { + uint64_t fee_inc_code; + reader >> Using(depgraph) >> VARINT(fee_inc_code); + fee_inc = fee_inc_code & 0x3ffff; + } catch (const std::ios_base::failure&) {} + if (depgraph.TxCount() == 0) return; + + // Retrieve two linearizations from the fuzz input. + auto lin = ReadLinearization(depgraph, reader); + auto lin_leaf = ReadLinearization(depgraph, reader); + + // Construct a linearization identical to lin, but with the tail end of lin_leaf moved to the + // back. + std::vector lin_moved; + for (auto i : lin) { + if (i != lin_leaf.back()) lin_moved.push_back(i); + } + lin_moved.push_back(lin_leaf.back()); + + // Postlinearize lin_moved. + PostLinearize(depgraph, lin_moved); + SanityCheck(depgraph, lin_moved); + + // Compare diagrams (applying the fee delta after computing the old one). + auto old_chunking = ChunkLinearization(depgraph, lin); + depgraph.FeeRate(lin_leaf.back()).fee += fee_inc; + auto new_chunking = ChunkLinearization(depgraph, lin_moved); + auto cmp = CompareChunks(new_chunking, old_chunking); + assert(cmp >= 0); +} + +FUZZ_TARGET(clusterlin_merge) +{ + // Construct an arbitrary graph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Retrieve two linearizations from the fuzz input. + auto lin1 = ReadLinearization(depgraph, reader); + auto lin2 = ReadLinearization(depgraph, reader); + + // Merge the two. + auto lin_merged = MergeLinearizations(depgraph, lin1, lin2); + + // Compute chunkings and compare. + auto chunking1 = ChunkLinearization(depgraph, lin1); + auto chunking2 = ChunkLinearization(depgraph, lin2); + auto chunking_merged = ChunkLinearization(depgraph, lin_merged); + auto cmp1 = CompareChunks(chunking_merged, chunking1); + assert(cmp1 >= 0); + auto cmp2 = CompareChunks(chunking_merged, chunking2); + assert(cmp2 >= 0); +}