From 0e52728a2d6ccafcfecfefbb5a0432a9881d8e0d Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Tue, 30 Jul 2024 16:21:51 -0400
Subject: [PATCH 1/5] clusterlin: rename Intersect -> IntersectPrefixes

This makes it clearer what the function does.
---
 src/cluster_linearize.h             | 8 ++++----
 src/test/fuzz/cluster_linearize.cpp | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index 07d28a9aa5..61b76968cf 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -333,12 +333,12 @@ public:
      *  of the linearization that has a feerate not below subset's.
      *
      * This is a crucial operation in guaranteeing improvements to linearizations. If subset has
-     * a feerate not below GetChunk(0)'s, then moving Intersect(subset) to the front of (what
-     * remains of) the linearization is guaranteed not to make it worse at any point.
+     * a feerate not below GetChunk(0)'s, then moving IntersectPrefixes(subset) to the front of
+     * (what remains of) the linearization is guaranteed not to make it worse at any point.
      *
      * See https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 for background.
      */
-    SetInfo<SetType> Intersect(const SetInfo<SetType>& subset) const noexcept
+    SetInfo<SetType> IntersectPrefixes(const SetInfo<SetType>& subset) const noexcept
     {
         Assume(subset.transactions.IsSubsetOf(m_todo));
         SetInfo<SetType> accumulator;
@@ -719,7 +719,7 @@ std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& de
             // sure we don't pick something that makes us unable to reach further diagram points
             // of the old linearization.
             if (old_chunking.NumChunksLeft() > 0) {
-                best = old_chunking.Intersect(best);
+                best = old_chunking.IntersectPrefixes(best);
             }
         }
 
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 031cb04559..c97d00dea1 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -560,10 +560,10 @@ FUZZ_TARGET(clusterlin_linearization_chunking)
         }
         assert(combined == todo);
 
-        // Verify the expected properties of LinearizationChunking::Intersect:
-        auto intersect = chunking.Intersect(subset);
+        // Verify the expected properties of LinearizationChunking::IntersectPrefixes:
+        auto intersect = chunking.IntersectPrefixes(subset);
         // - Intersecting again doesn't change the result.
-        assert(chunking.Intersect(intersect) == intersect);
+        assert(chunking.IntersectPrefixes(intersect) == intersect);
         // - The intersection is topological.
         TestBitSet intersect_anc;
         for (auto idx : intersect.transactions) {

From 0e2812d2938b933debffba5b873637fa1d348b81 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 15 May 2024 08:37:12 -0400
Subject: [PATCH 2/5] clusterlin: add algorithms for connectedness/connected
 components

Add utility functions to DepGraph for finding connected components.
---
 src/cluster_linearize.h             | 44 ++++++++++++++++
 src/test/fuzz/cluster_linearize.cpp | 79 +++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index 61b76968cf..b581f01da5 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -171,6 +171,50 @@ public:
         return ret;
     }
 
+    /** Find some connected component within the subset "todo" of this graph.
+     *
+     * Specifically, this finds the connected component which contains the first transaction of
+     * todo (if any).
+     *
+     * Two transactions are considered connected if they are both in `todo`, and one is an ancestor
+     * of the other in the entire graph (so not just within `todo`), or transitively there is a
+     * path of transactions connecting them. This does mean that if `todo` contains a transaction
+     * and a grandparent, but misses the parent, they will still be part of the same component.
+     *
+     * Complexity: O(ret.Count()).
+     */
+    SetType FindConnectedComponent(const SetType& todo) const noexcept
+    {
+        if (todo.None()) return todo;
+        auto to_add = SetType::Singleton(todo.First());
+        SetType ret;
+        do {
+            SetType old = ret;
+            for (auto add : to_add) {
+                ret |= Descendants(add);
+                ret |= Ancestors(add);
+            }
+            ret &= todo;
+            to_add = ret - old;
+        } while (to_add.Any());
+        return ret;
+    }
+
+    /** Determine if a subset is connected.
+     *
+     * Complexity: O(subset.Count()).
+     */
+    bool IsConnected(const SetType& subset) const noexcept
+    {
+        return FindConnectedComponent(subset) == subset;
+    }
+
+    /** Determine if this entire graph is connected.
+     *
+     * Complexity: O(TxCount()).
+     */
+    bool IsConnected() const noexcept { return IsConnected(SetType::Fill(TxCount())); }
+
     /** Append the entries of select to list in a topologically valid order.
      *
      * Complexity: O(select.Count() * log(select.Count())).
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index c97d00dea1..1d16432c9a 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -294,6 +294,81 @@ FUZZ_TARGET(clusterlin_depgraph_serialization)
     assert(IsAcyclic(depgraph));
 }
 
+FUZZ_TARGET(clusterlin_components)
+{
+    // Verify the behavior of DepGraphs's FindConnectedComponent and IsConnected functions.
+
+    // Construct a depgraph.
+    SpanReader reader(buffer);
+    DepGraph<TestBitSet> depgraph;
+    try {
+        reader >> Using<DepGraphFormatter>(depgraph);
+    } catch (const std::ios_base::failure&) {}
+
+    TestBitSet todo = TestBitSet::Fill(depgraph.TxCount());
+    while (todo.Any()) {
+        // Find a connected component inside todo.
+        auto component = depgraph.FindConnectedComponent(todo);
+
+        // The component must be a subset of todo and non-empty.
+        assert(component.IsSubsetOf(todo));
+        assert(component.Any());
+
+        // If todo is the entire graph, and the entire graph is connected, then the component must
+        // be the entire graph.
+        if (todo == TestBitSet::Fill(depgraph.TxCount())) {
+            assert((component == todo) == depgraph.IsConnected());
+        }
+
+        // If subset is connected, then component must match subset.
+        assert((component == todo) == depgraph.IsConnected(todo));
+
+        // The component cannot have any ancestors or descendants outside of component but in todo.
+        for (auto i : component) {
+            assert((depgraph.Ancestors(i) & todo).IsSubsetOf(component));
+            assert((depgraph.Descendants(i) & todo).IsSubsetOf(component));
+        }
+
+        // Starting from any component element, we must be able to reach every element.
+        for (auto i : component) {
+            // Start with just i as reachable.
+            TestBitSet reachable = TestBitSet::Singleton(i);
+            // Add in-todo descendants and ancestors to reachable until it does not change anymore.
+            while (true) {
+                TestBitSet new_reachable = reachable;
+                for (auto j : new_reachable) {
+                    new_reachable |= depgraph.Ancestors(j) & todo;
+                    new_reachable |= depgraph.Descendants(j) & todo;
+                }
+                if (new_reachable == reachable) break;
+                reachable = new_reachable;
+            }
+            // Verify that the result is the entire component.
+            assert(component == reachable);
+        }
+
+        // Construct an arbitrary subset of todo.
+        uint64_t subset_bits{0};
+        try {
+            reader >> VARINT(subset_bits);
+        } catch (const std::ios_base::failure&) {}
+        TestBitSet subset;
+        for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
+            if (todo[i]) {
+                if (subset_bits & 1) subset.Set(i);
+                subset_bits >>= 1;
+            }
+        }
+        // Which must be non-empty.
+        if (subset.None()) subset = TestBitSet::Singleton(todo.First());
+        // Remove it from todo.
+        todo -= subset;
+    }
+
+    // No components can be found in an empty subset.
+    assert(depgraph.FindConnectedComponent(todo).None());
+}
+
 FUZZ_TARGET(clusterlin_chunking)
 {
     // Verify the correctness of the ChunkLinearization function.
@@ -357,6 +432,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder)
         assert(best_anc.transactions.Any());
         assert(best_anc.transactions.IsSubsetOf(todo));
         assert(depgraph.FeeRate(best_anc.transactions) == best_anc.feerate);
+        assert(depgraph.IsConnected(best_anc.transactions));
         // Check that it is topologically valid.
         for (auto i : best_anc.transactions) {
             assert((depgraph.Ancestors(i) & todo).IsSubsetOf(best_anc.transactions));
@@ -443,6 +519,9 @@ FUZZ_TARGET(clusterlin_search_finder)
 
         // Perform quality checks only if SearchCandidateFinder claims an optimal result.
         if (iterations_done < max_iterations) {
+            // Optimal sets are always connected.
+            assert(depgraph.IsConnected(found.transactions));
+
             // Compare with SimpleCandidateFinder.
             auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
             assert(found.feerate >= simple.feerate);

From 4f8958d7563ae2d0d359ec1e6885f8cb5e40a5e0 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Sun, 19 May 2024 08:03:57 -0400
Subject: [PATCH 3/5] clusterlin: add PostLinearize + benchmarks + fuzz tests

---
 src/bench/cluster_linearize.cpp     |  25 ++++
 src/cluster_linearize.h             | 203 ++++++++++++++++++++++++++++
 src/test/fuzz/cluster_linearize.cpp | 163 ++++++++++++++++++++++
 3 files changed, 391 insertions(+)

diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp
index 9987d376a5..30c7ecef01 100644
--- a/src/bench/cluster_linearize.cpp
+++ b/src/bench/cluster_linearize.cpp
@@ -169,6 +169,17 @@ void BenchLinearizeNoItersWorstCaseLIMO(ClusterIndex ntx, benchmark::Bench& benc
     });
 }
 
+template<typename SetType>
+void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench)
+{
+    DepGraph<SetType> depgraph = MakeWideGraph<SetType>(ntx);
+    std::vector<ClusterIndex> lin(ntx);
+    bench.run([&] {
+        for (ClusterIndex i = 0; i < ntx; ++i) lin[i] = i;
+        PostLinearize(depgraph, lin);
+    });
+}
+
 } // namespace
 
 static void LinearizePerIter16TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase<BitSet<16>>(16, bench); }
@@ -192,6 +203,13 @@ static void LinearizeNoIters64TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLi
 static void LinearizeNoIters75TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<75>>(75, bench); }
 static void LinearizeNoIters99TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<99>>(99, bench); }
 
+static void PostLinearize16TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<16>>(16, bench); }
+static void PostLinearize32TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<32>>(32, bench); }
+static void PostLinearize48TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<48>>(48, bench); }
+static void PostLinearize64TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<64>>(64, bench); }
+static void PostLinearize75TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<75>>(75, bench); }
+static void PostLinearize99TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<99>>(99, bench); }
+
 BENCHMARK(LinearizePerIter16TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizePerIter32TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizePerIter48TxWorstCase, benchmark::PriorityLevel::HIGH);
@@ -212,3 +230,10 @@ BENCHMARK(LinearizeNoIters48TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizeNoIters64TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizeNoIters75TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizeNoIters99TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
+
+BENCHMARK(PostLinearize16TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(PostLinearize32TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(PostLinearize48TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(PostLinearize64TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(PostLinearize75TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(PostLinearize99TxWorstCase, benchmark::PriorityLevel::HIGH);
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index b581f01da5..1e02d9fc3b 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -122,6 +122,8 @@ public:
     auto TxCount() const noexcept { return entries.size(); }
     /** Get the feerate of a given transaction i. Complexity: O(1). */
     const FeeFrac& FeeRate(ClusterIndex i) const noexcept { return entries[i].feerate; }
+    /** Get the mutable feerate of a given transaction i. Complexity: O(1). */
+    FeeFrac& FeeRate(ClusterIndex i) noexcept { return entries[i].feerate; }
     /** Get the ancestors of a given transaction i. Complexity: O(1). */
     const SetType& Ancestors(ClusterIndex i) const noexcept { return entries[i].ancestors; }
     /** Get the descendants of a given transaction i. Complexity: O(1). */
@@ -782,6 +784,207 @@ std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& de
     return {std::move(linearization), optimal};
 }
 
+/** Improve a given linearization.
+ *
+ * @param[in]     depgraph       Dependency graph of the cluster being linearized.
+ * @param[in,out] linearization  On input, an existing linearization for depgraph. On output, a
+ *                               potentially better linearization for the same graph.
+ *
+ * Postlinearization guarantees:
+ * - The resulting chunks are connected.
+ * - If the input has a tree shape (either all transactions have at most one child, or all
+ *   transactions have at most one parent), the result is optimal.
+ * - Given a linearization L1 and a leaf transaction T in it. Let L2 be L1 with T moved to the end,
+ *   optionally with its fee increased. Let L3 be the postlinearization of L2. L3 will be at least
+ *   as good as L1. This means that replacing transactions with same-size higher-fee transactions
+ *   will not worsen linearizations through a "drop conflicts, append new transactions,
+ *   postlinearize" process.
+ */
+template<typename SetType>
+void PostLinearize(const DepGraph<SetType>& depgraph, Span<ClusterIndex> linearization)
+{
+    // This algorithm performs a number of passes (currently 2); the even ones operate from back to
+    // front, the odd ones from front to back. Each results in an equal-or-better linearization
+    // than the one started from.
+    // - One pass in either direction guarantees that the resulting chunks are connected.
+    // - Each direction corresponds to one shape of tree being linearized optimally (forward passes
+    //   guarantee this for graphs where each transaction has at most one child; backward passes
+    //   guarantee this for graphs where each transaction has at most one parent).
+    // - Starting with a backward pass guarantees the moved-tree property.
+    //
+    // During an odd (forward) pass, the high-level operation is:
+    // - Start with an empty list of groups L=[].
+    // - For every transaction i in the old linearization, from front to back:
+    //   - Append a new group C=[i], containing just i, to the back of L.
+    //   - While L has at least one group before C, and the group immediately before C has feerate
+    //     lower than C:
+    //     - If C depends on P:
+    //       - Merge P into C, making C the concatenation of P+C, continuing with the combined C.
+    //     - Otherwise:
+    //       - Swap P with C, continuing with the now-moved C.
+    // - The output linearization is the concatenation of the groups in L.
+    //
+    // During even (backward) passes, i iterates from the back to the front of the existing
+    // linearization, and new groups are prepended instead of appended to the list L. To enable
+    // more code reuse, both passes append groups, but during even passes the meanings of
+    // parent/child, and of high/low feerate are reversed, and the final concatenation is reversed
+    // on output.
+    //
+    // In the implementation below, the groups are represented by singly-linked lists (pointing
+    // from the back to the front), which are themselves organized in a singly-linked circular
+    // list (each group pointing to its predecessor, with a special sentinel group at the front
+    // that points back to the last group).
+    //
+    // Information about transaction t is stored in entries[t + 1], while the sentinel is in
+    // entries[0].
+
+    /** Index of the sentinel in the entries array below. */
+    static constexpr ClusterIndex SENTINEL{0};
+    /** Indicator that a group has no previous transaction. */
+    static constexpr ClusterIndex NO_PREV_TX{0};
+
+
+    /** Data structure per transaction entry. */
+    struct TxEntry
+    {
+        /** The index of the previous transaction in this group; NO_PREV_TX if this is the first
+         *  entry of a group. */
+        ClusterIndex prev_tx;
+
+        // The fields below are only used for transactions that are the last one in a group
+        // (referred to as tail transactions below).
+
+        /** Index of the first transaction in this group, possibly itself. */
+        ClusterIndex first_tx;
+        /** Index of the last transaction in the previous group. The first group (the sentinel)
+         *  points back to the last group here, making it a singly-linked circular list. */
+        ClusterIndex prev_group;
+        /** All transactions in the group. Empty for the sentinel. */
+        SetType group;
+        /** All dependencies of the group (descendants in even passes; ancestors in odd ones). */
+        SetType deps;
+        /** The combined fee/size of transactions in the group. Fee is negated in even passes. */
+        FeeFrac feerate;
+    };
+
+    // As an example, consider the state corresponding to the linearization [1,0,3,2], with
+    // groups [1,0,3] and [2], in an odd pass. The linked lists would be:
+    //
+    //                                        +-----+
+    //                                 0<-P-- | 0 S | ---\     Legend:
+    //                                        +-----+    |
+    //                                           ^       |     - digit in box: entries index
+    //             /--------------F---------+    G       |       (note: one more than tx value)
+    //             v                         \   |       |     - S: sentinel group
+    //          +-----+        +-----+        +-----+    |          (empty feerate)
+    //   0<-P-- | 2   | <--P-- | 1   | <--P-- | 4 T |    |     - T: tail transaction, contains
+    //          +-----+        +-----+        +-----+    |          fields beyond prev_tv.
+    //                                           ^       |     - P: prev_tx reference
+    //                                           G       G     - F: first_tx reference
+    //                                           |       |     - G: prev_group reference
+    //                                        +-----+    |
+    //                                 0<-P-- | 3 T | <--/
+    //                                        +-----+
+    //                                         ^   |
+    //                                         \-F-/
+    //
+    // During an even pass, the diagram above would correspond to linearization [2,3,0,1], with
+    // groups [2] and [3,0,1].
+
+    std::vector<TxEntry> entries(linearization.size() + 1);
+
+    // Perform two passes over the linearization.
+    for (int pass = 0; pass < 2; ++pass) {
+        int rev = !(pass & 1);
+        // Construct a sentinel group, identifying the start of the list.
+        entries[SENTINEL].prev_group = SENTINEL;
+        Assume(entries[SENTINEL].feerate.IsEmpty());
+
+        // Iterate over all elements in the existing linearization.
+        for (ClusterIndex i = 0; i < linearization.size(); ++i) {
+            // Even passes are from back to front; odd passes from front to back.
+            ClusterIndex idx = linearization[rev ? linearization.size() - 1 - i : i];
+            // Construct a new group containing just idx. In even passes, the meaning of
+            // parent/child and high/low feerate are swapped.
+            ClusterIndex cur_group = idx + 1;
+            entries[cur_group].group = SetType::Singleton(idx);
+            entries[cur_group].deps = rev ? depgraph.Descendants(idx): depgraph.Ancestors(idx);
+            entries[cur_group].feerate = depgraph.FeeRate(idx);
+            if (rev) entries[cur_group].feerate.fee = -entries[cur_group].feerate.fee;
+            entries[cur_group].prev_tx = NO_PREV_TX; // No previous transaction in group.
+            entries[cur_group].first_tx = cur_group; // Transaction itself is first of group.
+            // Insert the new group at the back of the groups linked list.
+            entries[cur_group].prev_group = entries[SENTINEL].prev_group;
+            entries[SENTINEL].prev_group = cur_group;
+
+            // Start merge/swap cycle.
+            ClusterIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel.
+            ClusterIndex prev_group = entries[cur_group].prev_group;
+            // Continue as long as the current group has higher feerate than the previous one.
+            while (entries[cur_group].feerate >> entries[prev_group].feerate) {
+                // prev_group/cur_group/next_group refer to (the last transactions of) 3
+                // consecutive entries in groups list.
+                Assume(cur_group == entries[next_group].prev_group);
+                Assume(prev_group == entries[cur_group].prev_group);
+                // The sentinel has empty feerate, which is neither higher or lower than other
+                // feerates. Thus, the while loop we are in here guarantees that cur_group and
+                // prev_group are not the sentinel.
+                Assume(cur_group != SENTINEL);
+                Assume(prev_group != SENTINEL);
+                if (entries[cur_group].deps.Overlaps(entries[prev_group].group)) {
+                    // There is a dependency between cur_group and prev_group; merge prev_group
+                    // into cur_group. The group/deps/feerate fields of prev_group remain unchanged
+                    // but become unused.
+                    entries[cur_group].group |= entries[prev_group].group;
+                    entries[cur_group].deps |= entries[prev_group].deps;
+                    entries[cur_group].feerate += entries[prev_group].feerate;
+                    // Make the first of the current group point to the tail of the previous group.
+                    entries[entries[cur_group].first_tx].prev_tx = prev_group;
+                    // The first of the previous group becomes the first of the newly-merged group.
+                    entries[cur_group].first_tx = entries[prev_group].first_tx;
+                    // The previous group becomes whatever group was before the former one.
+                    prev_group = entries[prev_group].prev_group;
+                    entries[cur_group].prev_group = prev_group;
+                } else {
+                    // There is no dependency between cur_group and prev_group; swap them.
+                    ClusterIndex preprev_group = entries[prev_group].prev_group;
+                    // If PP, P, C, N were the old preprev, prev, cur, next groups, then the new
+                    // layout becomes [PP, C, P, N]. Update prev_groups to reflect that order.
+                    entries[next_group].prev_group = prev_group;
+                    entries[prev_group].prev_group = cur_group;
+                    entries[cur_group].prev_group = preprev_group;
+                    // The current group remains the same, but the groups before/after it have
+                    // changed.
+                    next_group = prev_group;
+                    prev_group = preprev_group;
+                }
+            }
+        }
+
+        // Convert the entries back to linearization (overwriting the existing one).
+        ClusterIndex cur_group = entries[0].prev_group;
+        ClusterIndex done = 0;
+        while (cur_group != SENTINEL) {
+            ClusterIndex cur_tx = cur_group;
+            // Traverse the transactions of cur_group (from back to front), and write them in the
+            // same order during odd passes, and reversed (front to back) in even passes.
+            if (rev) {
+                do {
+                    *(linearization.begin() + (done++)) = cur_tx - 1;
+                    cur_tx = entries[cur_tx].prev_tx;
+                } while (cur_tx != NO_PREV_TX);
+            } else {
+                do {
+                    *(linearization.end() - (++done)) = cur_tx - 1;
+                    cur_tx = entries[cur_tx].prev_tx;
+                } while (cur_tx != NO_PREV_TX);
+            }
+            cur_group = entries[cur_group].prev_group;
+        }
+        Assume(done == linearization.size());
+    }
+}
+
 } // namespace cluster_linearize
 
 #endif // BITCOIN_CLUSTER_LINEARIZE_H
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 1d16432c9a..2412db5c1b 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -766,3 +766,166 @@ FUZZ_TARGET(clusterlin_linearize)
         }
     }
 }
+
+FUZZ_TARGET(clusterlin_postlinearize)
+{
+    // Verify expected properties of PostLinearize() on arbitrary linearizations.
+
+    // Retrieve a depgraph from the fuzz input.
+    SpanReader reader(buffer);
+    DepGraph<TestBitSet> depgraph;
+    try {
+        reader >> Using<DepGraphFormatter>(depgraph);
+    } catch (const std::ios_base::failure&) {}
+
+    // Retrieve a linearization from the fuzz input.
+    std::vector<ClusterIndex> linearization;
+    linearization = ReadLinearization(depgraph, reader);
+    SanityCheck(depgraph, linearization);
+
+    // Produce a post-processed version.
+    auto post_linearization = linearization;
+    PostLinearize(depgraph, post_linearization);
+    SanityCheck(depgraph, post_linearization);
+
+    // Compare diagrams: post-linearization cannot worsen anywhere.
+    auto chunking = ChunkLinearization(depgraph, linearization);
+    auto post_chunking = ChunkLinearization(depgraph, post_linearization);
+    auto cmp = CompareChunks(post_chunking, chunking);
+    assert(cmp >= 0);
+
+    // Run again, things can keep improving (and never get worse)
+    auto post_post_linearization = post_linearization;
+    PostLinearize(depgraph, post_post_linearization);
+    SanityCheck(depgraph, post_post_linearization);
+    auto post_post_chunking = ChunkLinearization(depgraph, post_post_linearization);
+    cmp = CompareChunks(post_post_chunking, post_chunking);
+    assert(cmp >= 0);
+
+    // The chunks that come out of postlinearizing are always connected.
+    LinearizationChunking linchunking(depgraph, post_linearization);
+    while (linchunking.NumChunksLeft()) {
+        assert(depgraph.IsConnected(linchunking.GetChunk(0).transactions));
+        linchunking.MarkDone(linchunking.GetChunk(0).transactions);
+    }
+}
+
+FUZZ_TARGET(clusterlin_postlinearize_tree)
+{
+    // Verify expected properties of PostLinearize() on linearizations of graphs that form either
+    // an upright or reverse tree structure.
+
+    // Construct a direction, RNG seed, and an arbitrary graph from the fuzz input.
+    SpanReader reader(buffer);
+    uint64_t rng_seed{0};
+    DepGraph<TestBitSet> depgraph_gen;
+    uint8_t direction{0};
+    try {
+        reader >> direction >> rng_seed >> Using<DepGraphFormatter>(depgraph_gen);
+    } catch (const std::ios_base::failure&) {}
+
+    // Now construct a new graph, copying the nodes, but leaving only the first parent (even
+    // direction) or the first child (odd direction).
+    DepGraph<TestBitSet> depgraph_tree;
+    for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) {
+        depgraph_tree.AddTransaction(depgraph_gen.FeeRate(i));
+    }
+    if (direction & 1) {
+        for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) {
+            auto children = depgraph_gen.Descendants(i) - TestBitSet::Singleton(i);
+            // Remove descendants that are children of other descendants.
+            for (auto j : children) {
+                if (!children[j]) continue;
+                children -= depgraph_gen.Descendants(j);
+                children.Set(j);
+            }
+            if (children.Any()) depgraph_tree.AddDependency(i, children.First());
+         }
+    } else {
+        for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) {
+            auto parents = depgraph_gen.Ancestors(i) - TestBitSet::Singleton(i);
+            // Remove ancestors that are parents of other ancestors.
+            for (auto j : parents) {
+                if (!parents[j]) continue;
+                parents -= depgraph_gen.Ancestors(j);
+                parents.Set(j);
+            }
+            if (parents.Any()) depgraph_tree.AddDependency(parents.First(), i);
+        }
+    }
+
+    // Retrieve a linearization from the fuzz input.
+    std::vector<ClusterIndex> linearization;
+    linearization = ReadLinearization(depgraph_tree, reader);
+    SanityCheck(depgraph_tree, linearization);
+
+    // Produce a postlinearized version.
+    auto post_linearization = linearization;
+    PostLinearize(depgraph_tree, post_linearization);
+    SanityCheck(depgraph_tree, post_linearization);
+
+    // Compare diagrams.
+    auto chunking = ChunkLinearization(depgraph_tree, linearization);
+    auto post_chunking = ChunkLinearization(depgraph_tree, post_linearization);
+    auto cmp = CompareChunks(post_chunking, chunking);
+    assert(cmp >= 0);
+
+    // Verify that post-linearizing again does not change the diagram. The result must be identical
+    // as post_linearization ought to be optimal already with a tree-structured graph.
+    auto post_post_linearization = post_linearization;
+    PostLinearize(depgraph_tree, post_linearization);
+    SanityCheck(depgraph_tree, post_linearization);
+    auto post_post_chunking = ChunkLinearization(depgraph_tree, post_post_linearization);
+    auto cmp_post = CompareChunks(post_post_chunking, post_chunking);
+    assert(cmp_post == 0);
+
+    // Try to find an even better linearization directly. This must not change the diagram for the
+    // same reason.
+    auto [opt_linearization, _optimal] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization);
+    auto opt_chunking = ChunkLinearization(depgraph_tree, opt_linearization);
+    auto cmp_opt = CompareChunks(opt_chunking, post_chunking);
+    assert(cmp_opt == 0);
+}
+
+FUZZ_TARGET(clusterlin_postlinearize_moved_leaf)
+{
+    // Verify that taking an existing linearization, and moving a leaf to the back, potentially
+    // increasing its fee, and then post-linearizing, results in something as good as the
+    // original. This guarantees that in an RBF that replaces a transaction with one of the same
+    // size but higher fee, applying the "remove conflicts, append new transaction, postlinearize"
+    // process will never worsen linearization quality.
+
+    // Construct an arbitrary graph and a fee from the fuzz input.
+    SpanReader reader(buffer);
+    DepGraph<TestBitSet> depgraph;
+    int32_t fee_inc{0};
+    try {
+        uint64_t fee_inc_code;
+        reader >> Using<DepGraphFormatter>(depgraph) >> VARINT(fee_inc_code);
+        fee_inc = fee_inc_code & 0x3ffff;
+    } catch (const std::ios_base::failure&) {}
+    if (depgraph.TxCount() == 0) return;
+
+    // Retrieve two linearizations from the fuzz input.
+    auto lin = ReadLinearization(depgraph, reader);
+    auto lin_leaf = ReadLinearization(depgraph, reader);
+
+    // Construct a linearization identical to lin, but with the tail end of lin_leaf moved to the
+    // back.
+    std::vector<ClusterIndex> lin_moved;
+    for (auto i : lin) {
+        if (i != lin_leaf.back()) lin_moved.push_back(i);
+    }
+    lin_moved.push_back(lin_leaf.back());
+
+    // Postlinearize lin_moved.
+    PostLinearize(depgraph, lin_moved);
+    SanityCheck(depgraph, lin_moved);
+
+    // Compare diagrams (applying the fee delta after computing the old one).
+    auto old_chunking = ChunkLinearization(depgraph, lin);
+    depgraph.FeeRate(lin_leaf.back()).fee += fee_inc;
+    auto new_chunking = ChunkLinearization(depgraph, lin_moved);
+    auto cmp = CompareChunks(new_chunking, old_chunking);
+    assert(cmp >= 0);
+}

From 04d7a04ea426dd0a69b61e3b887867b0277d84d1 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Tue, 28 May 2024 21:18:52 -0400
Subject: [PATCH 4/5] clusterlin: add MergeLinearizations function + fuzz test
 + benchmark

---
 src/bench/cluster_linearize.cpp     | 35 ++++++++++++++++++++++++
 src/cluster_linearize.h             | 42 +++++++++++++++++++++++++++++
 src/test/fuzz/cluster_linearize.cpp | 26 ++++++++++++++++++
 3 files changed, 103 insertions(+)

diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp
index 30c7ecef01..269648f4e2 100644
--- a/src/bench/cluster_linearize.cpp
+++ b/src/bench/cluster_linearize.cpp
@@ -180,6 +180,27 @@ void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench)
     });
 }
 
+template<typename SetType>
+void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench)
+{
+    DepGraph<SetType> depgraph;
+    for (ClusterIndex i = 0; i < ntx; ++i) {
+        depgraph.AddTransaction({i, 1});
+        if (i) depgraph.AddDependency(0, i);
+    }
+    std::vector<ClusterIndex> lin1;
+    std::vector<ClusterIndex> lin2;
+    lin1.push_back(0);
+    lin2.push_back(0);
+    for (ClusterIndex i = 1; i < ntx; ++i) {
+        lin1.push_back(i);
+        lin2.push_back(ntx - i);
+    }
+    bench.run([&] {
+        MergeLinearizations(depgraph, lin1, lin2);
+    });
+}
+
 } // namespace
 
 static void LinearizePerIter16TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase<BitSet<16>>(16, bench); }
@@ -210,6 +231,13 @@ static void PostLinearize64TxWorstCase(benchmark::Bench& bench) { BenchPostLinea
 static void PostLinearize75TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<75>>(75, bench); }
 static void PostLinearize99TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<99>>(99, bench); }
 
+static void MergeLinearizations16TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<16>>(16, bench); }
+static void MergeLinearizations32TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<32>>(32, bench); }
+static void MergeLinearizations48TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<48>>(48, bench); }
+static void MergeLinearizations64TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<64>>(64, bench); }
+static void MergeLinearizations75TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<75>>(75, bench); }
+static void MergeLinearizations99TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase<BitSet<99>>(99, bench); }
+
 BENCHMARK(LinearizePerIter16TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizePerIter32TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(LinearizePerIter48TxWorstCase, benchmark::PriorityLevel::HIGH);
@@ -237,3 +265,10 @@ BENCHMARK(PostLinearize48TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(PostLinearize64TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(PostLinearize75TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(PostLinearize99TxWorstCase, benchmark::PriorityLevel::HIGH);
+
+BENCHMARK(MergeLinearizations16TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(MergeLinearizations32TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(MergeLinearizations48TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(MergeLinearizations64TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(MergeLinearizations75TxWorstCase, benchmark::PriorityLevel::HIGH);
+BENCHMARK(MergeLinearizations99TxWorstCase, benchmark::PriorityLevel::HIGH);
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index 1e02d9fc3b..ced90c7bd2 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -985,6 +985,48 @@ void PostLinearize(const DepGraph<SetType>& depgraph, Span<ClusterIndex> lineari
     }
 }
 
+/** Merge two linearizations for the same cluster into one that is as good as both.
+ *
+ * Complexity: O(N^2) where N=depgraph.TxCount(); O(N) if both inputs are identical.
+ */
+template<typename SetType>
+std::vector<ClusterIndex> MergeLinearizations(const DepGraph<SetType>& depgraph, Span<const ClusterIndex> lin1, Span<const ClusterIndex> lin2)
+{
+    Assume(lin1.size() == depgraph.TxCount());
+    Assume(lin2.size() == depgraph.TxCount());
+
+    /** Chunkings of what remains of both input linearizations. */
+    LinearizationChunking chunking1(depgraph, lin1), chunking2(depgraph, lin2);
+    /** Output linearization. */
+    std::vector<ClusterIndex> ret;
+    if (depgraph.TxCount() == 0) return ret;
+    ret.reserve(depgraph.TxCount());
+
+    while (true) {
+        // As long as we are not done, both linearizations must have chunks left.
+        Assume(chunking1.NumChunksLeft() > 0);
+        Assume(chunking2.NumChunksLeft() > 0);
+        // Find the set to output by taking the best remaining chunk, and then intersecting it with
+        // prefixes of remaining chunks of the other linearization.
+        SetInfo<SetType> best;
+        const auto& lin1_firstchunk = chunking1.GetChunk(0);
+        const auto& lin2_firstchunk = chunking2.GetChunk(0);
+        if (lin2_firstchunk.feerate >> lin1_firstchunk.feerate) {
+            best = chunking1.IntersectPrefixes(lin2_firstchunk);
+        } else {
+            best = chunking2.IntersectPrefixes(lin1_firstchunk);
+        }
+        // Append the result to the output and mark it as done.
+        depgraph.AppendTopo(ret, best.transactions);
+        chunking1.MarkDone(best.transactions);
+        if (chunking1.NumChunksLeft() == 0) break;
+        chunking2.MarkDone(best.transactions);
+    }
+
+    Assume(ret.size() == depgraph.TxCount());
+    return ret;
+}
+
 } // namespace cluster_linearize
 
 #endif // BITCOIN_CLUSTER_LINEARIZE_H
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 2412db5c1b..2dfdfbb41d 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -929,3 +929,29 @@ FUZZ_TARGET(clusterlin_postlinearize_moved_leaf)
     auto cmp = CompareChunks(new_chunking, old_chunking);
     assert(cmp >= 0);
 }
+
+FUZZ_TARGET(clusterlin_merge)
+{
+    // Construct an arbitrary graph from the fuzz input.
+    SpanReader reader(buffer);
+    DepGraph<TestBitSet> depgraph;
+    try {
+        reader >> Using<DepGraphFormatter>(depgraph);
+    } catch (const std::ios_base::failure&) {}
+
+    // Retrieve two linearizations from the fuzz input.
+    auto lin1 = ReadLinearization(depgraph, reader);
+    auto lin2 = ReadLinearization(depgraph, reader);
+
+    // Merge the two.
+    auto lin_merged = MergeLinearizations(depgraph, lin1, lin2);
+
+    // Compute chunkings and compare.
+    auto chunking1 = ChunkLinearization(depgraph, lin1);
+    auto chunking2 = ChunkLinearization(depgraph, lin2);
+    auto chunking_merged = ChunkLinearization(depgraph, lin_merged);
+    auto cmp1 = CompareChunks(chunking_merged, chunking1);
+    assert(cmp1 >= 0);
+    auto cmp2 = CompareChunks(chunking_merged, chunking2);
+    assert(cmp2 >= 0);
+}

From bbcee5a0d67db46526ba29a1a4a7c590d303de03 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 10 Jul 2024 15:56:17 -0400
Subject: [PATCH 5/5] clusterlin: improve rechunking in LinearizationChunking
 (optimization)

When the transactions being marked done exactly match the first chunk of
what remains of the linearization, we can just remember to skip that
chunk instead of computing a full rechunking.

Further, chop off prefixes of the input linearization that are already done,
so they don't need to be reconsidered for further rechunkings.
---
 src/cluster_linearize.h | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index ced90c7bd2..607ae681d2 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -310,22 +310,30 @@ class LinearizationChunking
     /** The depgraph this linearization is for. */
     const DepGraph<SetType>& m_depgraph;
 
-    /** The linearization we started from. */
+    /** The linearization we started from, possibly with removed prefix stripped. */
     Span<const ClusterIndex> m_linearization;
 
     /** Chunk sets and their feerates, of what remains of the linearization. */
     std::vector<SetInfo<SetType>> m_chunks;
 
+    /** How large a prefix of m_chunks corresponds to removed transactions. */
+    ClusterIndex m_chunks_skip{0};
+
     /** Which transactions remain in the linearization. */
     SetType m_todo;
 
-    /** Fill the m_chunks variable. */
+    /** Fill the m_chunks variable, and remove the done prefix of m_linearization. */
     void BuildChunks() noexcept
     {
         // Caller must clear m_chunks.
         Assume(m_chunks.empty());
 
-        // Iterate over the entries in m_linearization. This is effectively the same
+        // Chop off the initial part of m_linearization that is already done.
+        while (!m_linearization.empty() && !m_todo[m_linearization.front()]) {
+            m_linearization = m_linearization.subspan(1);
+        }
+
+        // Iterate over the remaining entries in m_linearization. This is effectively the same
         // algorithm as ChunkLinearization, but supports skipping parts of the linearization and
         // keeps track of the sets themselves instead of just their feerates.
         for (auto idx : m_linearization) {
@@ -355,13 +363,13 @@ public:
     }
 
     /** Determine how many chunks remain in the linearization. */
-    ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size(); }
+    ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; }
 
     /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */
     const SetInfo<SetType>& GetChunk(ClusterIndex n) const noexcept
     {
-        Assume(n < m_chunks.size());
-        return m_chunks[n];
+        Assume(n + m_chunks_skip < m_chunks.size());
+        return m_chunks[n + m_chunks_skip];
     }
 
     /** Remove some subset of transactions from the linearization. */
@@ -370,9 +378,21 @@ public:
         Assume(subset.Any());
         Assume(subset.IsSubsetOf(m_todo));
         m_todo -= subset;
-        // Rechunk what remains of m_linearization.
-        m_chunks.clear();
-        BuildChunks();
+        if (GetChunk(0).transactions == subset) {
+            // If the newly done transactions exactly match the first chunk of the remainder of
+            // the linearization, we do not need to rechunk; just remember to skip one
+            // additional chunk.
+            ++m_chunks_skip;
+            // With subset marked done, some prefix of m_linearization will be done now. How long
+            // that prefix is depends on how many done elements were interspersed with subset,
+            // but at least as many transactions as there are in subset.
+            m_linearization = m_linearization.subspan(subset.Count());
+        } else {
+            // Otherwise rechunk what remains of m_linearization.
+            m_chunks.clear();
+            m_chunks_skip = 0;
+            BuildChunks();
+        }
     }
 
     /** Find the shortest intersection between subset and the prefixes of remaining chunks