diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp
index cb06f3fc28a..1cd36b53939 100644
--- a/src/bench/cluster_linearize.cpp
+++ b/src/bench/cluster_linearize.cpp
@@ -229,8 +229,8 @@ void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array<uint8_t,
         reader >> Using<DepGraphFormatter>(depgraph);
         uint64_t rng_seed = 0;
         bench.run([&] {
-            auto res = Linearize(depgraph, /*max_iterations=*/10000000, rng_seed++);
-            assert(res.second);
+            auto [_lin, optimal, _cost] = Linearize(depgraph, /*max_iterations=*/10000000, rng_seed++);
+            assert(optimal);
         });
     };
 
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index 217c4700afe..d3a262ff61b 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -1030,19 +1030,20 @@ public:
  *                                linearize.
  * @param[in] old_linearization   An existing linearization for the cluster (which must be
  *                                topologically valid), or empty.
- * @return                        A pair of:
+ * @return                        A tuple of:
  *                                - The resulting linearization. It is guaranteed to be at least as
  *                                  good (in the feerate diagram sense) as old_linearization.
  *                                - A boolean indicating whether the result is guaranteed to be
  *                                  optimal.
+ *                                - How many optimization steps were actually performed.
  *
  * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount().
  */
 template<typename SetType>
-std::pair<std::vector<DepGraphIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
+std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
 {
     Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount());
-    if (depgraph.TxCount() == 0) return {{}, true};
+    if (depgraph.TxCount() == 0) return {{}, true, 0};
 
     uint64_t iterations_left = max_iterations;
     std::vector<DepGraphIndex> linearization;
@@ -1113,7 +1114,7 @@ std::pair<std::vector<DepGraphIndex>, bool> Linearize(const DepGraph<SetType>& d
         }
     }
 
-    return {std::move(linearization), optimal};
+    return {std::move(linearization), optimal, max_iterations - iterations_left};
 }
 
 /** Improve a given linearization.
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index fb4bf3a719f..401184b1602 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -906,7 +906,8 @@ FUZZ_TARGET(clusterlin_linearize)
 
     // Invoke Linearize().
     iter_count &= 0x7ffff;
-    auto [linearization, optimal] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
+    auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
+    assert(cost <= iter_count);
     SanityCheck(depgraph, linearization);
     auto chunking = ChunkLinearization(depgraph, linearization);
 
@@ -1090,7 +1091,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree)
 
     // Try to find an even better linearization directly. This must not change the diagram for the
     // same reason.
-    auto [opt_linearization, _optimal] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization);
+    auto [opt_linearization, _optimal, _cost] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization);
     auto opt_chunking = ChunkLinearization(depgraph_tree, opt_linearization);
     auto cmp_opt = CompareChunks(opt_chunking, post_chunking);
     assert(cmp_opt == 0);
diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp
index a2c65f2e0a6..fe13dd92cce 100644
--- a/src/test/fuzz/txgraph.cpp
+++ b/src/test/fuzz/txgraph.cpp
@@ -11,6 +11,7 @@
 #include <util/feefrac.h>
 
 #include <algorithm>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <set>
@@ -52,9 +53,17 @@ struct SimTxGraph
     std::optional<bool> oversized;
     /** The configured maximum number of transactions per cluster. */
     DepGraphIndex max_cluster_count;
+    /** Which transactions have been modified in the graph since creation, either directly or by
+     *  being in a cluster which includes modifications. Only relevant for the staging graph. */
+    SetType modified;
+    /** The configured maximum total size of transactions per cluster. */
+    uint64_t max_cluster_size;
+    /** Whether the corresponding real graph is known to be optimally linearized. */
+    bool real_is_optimal{false};
 
     /** Construct a new SimTxGraph with the specified maximum cluster count. */
-    explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {}
+    explicit SimTxGraph(DepGraphIndex max_cluster, uint64_t max_size) :
+        max_cluster_count(max_cluster), max_cluster_size(max_size) {}
 
     // Permit copying and moving.
     SimTxGraph(const SimTxGraph&) noexcept = default;
@@ -74,15 +83,33 @@ struct SimTxGraph
             while (todo.Any()) {
                 auto component = graph.FindConnectedComponent(todo);
                 if (component.Count() > max_cluster_count) oversized = true;
+                uint64_t component_size{0};
+                for (auto i : component) component_size += graph.FeeRate(i).size;
+                if (component_size > max_cluster_size) oversized = true;
                 todo -= component;
             }
         }
         return *oversized;
     }
 
+    void MakeModified(DepGraphIndex index)
+    {
+        modified |= graph.GetConnectedComponent(graph.Positions(), index);
+    }
+
     /** Determine the number of (non-removed) transactions in the graph. */
     DepGraphIndex GetTransactionCount() const { return graph.TxCount(); }
 
+    /** Get the sum of all fees/sizes in the graph. */
+    FeePerWeight SumAll() const
+    {
+        FeePerWeight ret;
+        for (auto i : graph.Positions()) {
+            ret += graph.FeeRate(i);
+        }
+        return ret;
+    }
+
     /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */
     Pos Find(const TxGraph::Ref* ref) const
     {
@@ -104,10 +131,14 @@ struct SimTxGraph
     {
         assert(graph.TxCount() < MAX_TRANSACTIONS);
         auto simpos = graph.AddTransaction(feerate);
+        real_is_optimal = false;
+        MakeModified(simpos);
         assert(graph.Positions()[simpos]);
         simmap[simpos] = std::make_shared<TxGraph::Ref>();
         auto ptr = simmap[simpos].get();
         simrevmap[ptr] = simpos;
+        // This may invalidate our cached oversized value.
+        if (oversized.has_value() && !*oversized) oversized = std::nullopt;
         return ptr;
     }
 
@@ -119,6 +150,8 @@ struct SimTxGraph
         auto chl_pos = Find(child);
         if (chl_pos == MISSING) return;
         graph.AddDependencies(SetType::Singleton(par_pos), chl_pos);
+        MakeModified(par_pos);
+        real_is_optimal = false;
         // This may invalidate our cached oversized value.
         if (oversized.has_value() && !*oversized) oversized = std::nullopt;
     }
@@ -128,6 +161,8 @@ struct SimTxGraph
     {
         auto pos = Find(ref);
         if (pos == MISSING) return;
+        // No need to invoke MakeModified, because this equally affects main and staging.
+        real_is_optimal = false;
         graph.FeeRate(pos).fee = fee;
     }
 
@@ -136,6 +171,8 @@ struct SimTxGraph
     {
         auto pos = Find(ref);
         if (pos == MISSING) return;
+        MakeModified(pos);
+        real_is_optimal = false;
         graph.RemoveTransactions(SetType::Singleton(pos));
         simrevmap.erase(simmap[pos].get());
         // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't
@@ -160,7 +197,9 @@ struct SimTxGraph
             auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; });
             removed.erase(remove, removed.end());
         } else {
+            MakeModified(pos);
             graph.RemoveTransactions(SetType::Singleton(pos));
+            real_is_optimal = false;
             simrevmap.erase(simmap[pos].get());
             simmap[pos].reset();
             // This may invalidate our cached oversized value.
@@ -238,12 +277,34 @@ FUZZ_TARGET(txgraph)
 
     // Decide the maximum number of transactions per cluster we will use in this simulation.
     auto max_count = provider.ConsumeIntegralInRange<DepGraphIndex>(1, MAX_CLUSTER_COUNT_LIMIT);
+    // And the maximum combined size of transactions per cluster.
+    auto max_size = provider.ConsumeIntegralInRange<uint64_t>(1, 0x3fffff * MAX_CLUSTER_COUNT_LIMIT);
+    // And the number of iterations to consider a cluster acceptably linearized.
+    auto acceptable_iters = provider.ConsumeIntegralInRange<uint64_t>(0, 10000);
 
     // Construct a real graph, and a vector of simulated graphs (main, and possibly staging).
-    auto real = MakeTxGraph(max_count);
+    auto real = MakeTxGraph(max_count, max_size, acceptable_iters);
     std::vector<SimTxGraph> sims;
     sims.reserve(2);
-    sims.emplace_back(max_count);
+    sims.emplace_back(max_count, max_size);
+
+    /** Struct encapsulating information about a BlockBuilder that's currently live. */
+    struct BlockBuilderData
+    {
+        /** BlockBuilder object from real. */
+        std::unique_ptr<TxGraph::BlockBuilder> builder;
+        /** The set of transactions marked as included in *builder. */
+        SimTxGraph::SetType included;
+        /** The set of transactions marked as included or skipped in *builder. */
+        SimTxGraph::SetType done;
+        /** The last chunk feerate returned by *builder. IsEmpty() if none yet. */
+        FeePerWeight last_feerate;
+
+        BlockBuilderData(std::unique_ptr<TxGraph::BlockBuilder> builder_in) : builder(std::move(builder_in)) {}
+    };
+
+    /** Currently active block builders. */
+    std::vector<BlockBuilderData> block_builders;
 
     /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the
      *  empty Ref). */
@@ -282,9 +343,44 @@ FUZZ_TARGET(txgraph)
         return &empty_ref;
     };
 
+    /** Function to construct the correct fee-size diagram a real graph has based on its graph
+     *  order (as reported by GetCluster(), so it works for both main and staging). */
+    auto get_diagram_fn = [&](bool main_only) -> std::vector<FeeFrac> {
+        int level = main_only ? 0 : sims.size() - 1;
+        auto& sim = sims[level];
+        // For every transaction in the graph, request its cluster, and throw them into a set.
+        std::set<std::vector<TxGraph::Ref*>> clusters;
+        for (auto i : sim.graph.Positions()) {
+            auto ref = sim.GetRef(i);
+            clusters.insert(real->GetCluster(*ref, main_only));
+        }
+        // Compute the chunkings of each (deduplicated) cluster.
+        size_t num_tx{0};
+        std::vector<FeeFrac> chunk_feerates;
+        for (const auto& cluster : clusters) {
+            num_tx += cluster.size();
+            std::vector<SimTxGraph::Pos> linearization;
+            linearization.reserve(cluster.size());
+            for (auto refptr : cluster) linearization.push_back(sim.Find(refptr));
+            for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) {
+                chunk_feerates.push_back(chunk_feerate);
+            }
+        }
+        // Verify the number of transactions after deduplicating clusters. This implicitly verifies
+        // that GetCluster on each element of a cluster reports the cluster transactions in the same
+        // order.
+        assert(num_tx == sim.GetTransactionCount());
+        // Sort by feerate only, since violating topological constraints within same-feerate
+        // chunks won't affect diagram comparisons.
+        std::sort(chunk_feerates.begin(), chunk_feerates.end(), std::greater{});
+        return chunk_feerates;
+    };
+
     LIMITED_WHILE(provider.remaining_bytes() > 0, 200) {
         // Read a one-byte command.
         int command = provider.ConsumeIntegral<uint8_t>();
+        int orig_command = command;
+
         // Treat the lowest bit of a command as a flag (which selects a variant of some of the
         // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave
         // the rest of the bits in command.
@@ -292,6 +388,11 @@ FUZZ_TARGET(txgraph)
         bool use_main = command & 2;
         command >>= 2;
 
+        /** Use the bottom 2 bits of command to select an entry in the block_builders vector (if
+         *  any). These use the same bits as alt/use_main, so don't use those in actions below
+         *  where builder_idx is used as well. */
+        int builder_idx = block_builders.empty() ? -1 : int((orig_command & 3) % block_builders.size());
+
         // Provide convenient aliases for the top simulated graph (main, or staging if it exists),
         // one for the simulated graph selected based on use_main (for operations that can operate
         // on both graphs), and one that always refers to the main graph.
@@ -302,7 +403,7 @@ FUZZ_TARGET(txgraph)
         // Keep decrementing command for each applicable operation, until one is hit. Multiple
         // iterations may be necessary.
         while (true) {
-            if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) {
+            if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) {
                 // AddTransaction.
                 int64_t fee;
                 int32_t size;
@@ -314,7 +415,7 @@ FUZZ_TARGET(txgraph)
                     // Otherwise, use smaller range which consume fewer fuzz input bytes, as just
                     // these are likely sufficient to trigger all interesting code paths already.
                     fee = provider.ConsumeIntegral<uint8_t>();
-                    size = provider.ConsumeIntegral<uint8_t>() + 1;
+                    size = provider.ConsumeIntegralInRange<uint32_t>(1, 0xff);
                 }
                 FeePerWeight feerate{fee, size};
                 // Create a real TxGraph::Ref.
@@ -324,7 +425,7 @@ FUZZ_TARGET(txgraph)
                 // Move it in place.
                 *ref_loc = std::move(ref);
                 break;
-            } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) {
+            } else if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) {
                 // AddDependency.
                 auto par = pick_fn();
                 auto chl = pick_fn();
@@ -336,9 +437,10 @@ FUZZ_TARGET(txgraph)
                     if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break;
                 }
                 top_sim.AddDependency(par, chl);
+                top_sim.real_is_optimal = false;
                 real->AddDependency(*par, *chl);
                 break;
-            } else if (top_sim.removed.size() < 100 && command-- == 0) {
+            } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) {
                 // RemoveTransaction. Either all its ancestors or all its descendants are also
                 // removed (if any), to make sure TxGraph's reordering of removals and dependencies
                 // has no effect.
@@ -368,7 +470,7 @@ FUZZ_TARGET(txgraph)
                 }
                 sel_sim.removed.pop_back();
                 break;
-            } else if (command-- == 0) {
+            } else if (block_builders.empty() && command-- == 0) {
                 // ~Ref (of any transaction).
                 std::vector<TxGraph::Ref*> to_destroy;
                 to_destroy.push_back(pick_fn());
@@ -390,7 +492,7 @@ FUZZ_TARGET(txgraph)
                     }
                 }
                 break;
-            } else if (command-- == 0) {
+            } else if (block_builders.empty() && command-- == 0) {
                 // SetTransactionFee.
                 int64_t fee;
                 if (alt) {
@@ -444,6 +546,7 @@ FUZZ_TARGET(txgraph)
                     // Just do some quick checks that the reported value is in range. A full
                     // recomputation of expected chunk feerates is done at the end.
                     assert(feerate.size >= main_sim.graph.FeeRate(simpos).size);
+                    assert(feerate.size <= main_sim.SumAll().size);
                 }
                 break;
             } else if (!sel_sim.IsOversized() && command-- == 0) {
@@ -487,13 +590,17 @@ FUZZ_TARGET(txgraph)
                 assert(result.size() <= max_count);
                 // Require the result to be topologically valid and not contain duplicates.
                 auto left = sel_sim.graph.Positions();
+                uint64_t total_size{0};
                 for (auto refptr : result) {
                     auto simpos = sel_sim.Find(refptr);
+                    total_size += sel_sim.graph.FeeRate(simpos).size;
                     assert(simpos != SimTxGraph::MISSING);
                     assert(left[simpos]);
                     left.Reset(simpos);
                     assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left));
                 }
+                // Check cluster size limit.
+                assert(total_size <= max_size);
                 // Require the set to be connected.
                 auto result_set = sel_sim.MakeSet(result);
                 assert(sel_sim.graph.IsConnected(result_set));
@@ -517,9 +624,10 @@ FUZZ_TARGET(txgraph)
             } else if (sims.size() < 2 && command-- == 0) {
                 // StartStaging.
                 sims.emplace_back(sims.back());
+                sims.back().modified = SimTxGraph::SetType{};
                 real->StartStaging();
                 break;
-            } else if (sims.size() > 1 && command-- == 0) {
+            } else if (block_builders.empty() && sims.size() > 1 && command-- == 0) {
                 // CommitStaging.
                 real->CommitStaging();
                 sims.erase(sims.begin());
@@ -584,7 +692,135 @@ FUZZ_TARGET(txgraph)
                 break;
             } else if (command-- == 0) {
                 // DoWork.
-                real->DoWork();
+                uint64_t iters = provider.ConsumeIntegralInRange<uint64_t>(0, alt ? 10000 : 255);
+                if (real->DoWork(iters)) {
+                    for (auto& sim : sims) sim.real_is_optimal = true;
+                }
+                break;
+            } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) {
+                // GetMainStagingDiagrams()
+                auto [main_diagram, staged_diagram] = real->GetMainStagingDiagrams();
+                auto sum_main = std::accumulate(main_diagram.begin(), main_diagram.end(), FeeFrac{});
+                auto sum_staged = std::accumulate(staged_diagram.begin(), staged_diagram.end(), FeeFrac{});
+                auto diagram_gain = sum_staged - sum_main;
+                auto real_gain = sims[1].SumAll() - sims[0].SumAll();
+                // Just check that the total fee gained/lost and size gained/lost according to the
+                // diagram matches the difference in these values in the simulated graph. A more
+                // complete check of the GetMainStagingDiagrams result is performed at the end.
+                assert(diagram_gain == real_gain);
+                // Check that the feerates in each diagram are monotonically decreasing.
+                for (size_t i = 1; i < main_diagram.size(); ++i) {
+                    assert(FeeRateCompare(main_diagram[i], main_diagram[i - 1]) <= 0);
+                }
+                for (size_t i = 1; i < staged_diagram.size(); ++i) {
+                    assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0);
+                }
+                break;
+            } else if (block_builders.size() < 4 && !main_sim.IsOversized() && command-- == 0) {
+                // GetBlockBuilder.
+                block_builders.emplace_back(real->GetBlockBuilder());
+                break;
+            } else if (!block_builders.empty() && command-- == 0) {
+                // ~BlockBuilder.
+                block_builders.erase(block_builders.begin() + builder_idx);
+                break;
+            } else if (!block_builders.empty() && command-- == 0) {
+                // BlockBuilder::GetCurrentChunk, followed by Include/Skip.
+                auto& builder_data = block_builders[builder_idx];
+                auto new_included = builder_data.included;
+                auto new_done = builder_data.done;
+                auto chunk = builder_data.builder->GetCurrentChunk();
+                if (chunk) {
+                    // Chunk feerates must be monotonously decreasing.
+                    if (!builder_data.last_feerate.IsEmpty()) {
+                        assert(!(chunk->second >> builder_data.last_feerate));
+                    }
+                    builder_data.last_feerate = chunk->second;
+                    // Verify the contents of GetCurrentChunk.
+                    FeePerWeight sum_feerate;
+                    for (TxGraph::Ref* ref : chunk->first) {
+                        // Each transaction in the chunk must exist in the main graph.
+                        auto simpos = main_sim.Find(ref);
+                        assert(simpos != SimTxGraph::MISSING);
+                        // Verify the claimed chunk feerate.
+                        sum_feerate += main_sim.graph.FeeRate(simpos);
+                        // Make sure no transaction is reported twice.
+                        assert(!new_done[simpos]);
+                        new_done.Set(simpos);
+                        // The concatenation of all included transactions must be topologically valid.
+                        new_included.Set(simpos);
+                        assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(new_included));
+                    }
+                    assert(sum_feerate == chunk->second);
+                } else {
+                    // When we reach the end, if nothing was skipped, the entire graph should have
+                    // been reported.
+                    if (builder_data.done == builder_data.included) {
+                        assert(builder_data.done.Count() == main_sim.GetTransactionCount());
+                    }
+                }
+                // Possibly invoke GetCurrentChunk() again, which should give the same result.
+                if ((orig_command % 7) >= 5) {
+                    auto chunk2 = builder_data.builder->GetCurrentChunk();
+                    assert(chunk == chunk2);
+                }
+                // Skip or include.
+                if ((orig_command % 5) >= 3) {
+                    // Skip.
+                    builder_data.builder->Skip();
+                } else {
+                    // Include.
+                    builder_data.builder->Include();
+                    builder_data.included = new_included;
+                }
+                builder_data.done = new_done;
+                break;
+            } else if (!main_sim.IsOversized() && command-- == 0) {
+                // GetWorstMainChunk.
+                auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
+                // Just do some sanity checks here. Consistency with GetBlockBuilder is checked
+                // below.
+                if (main_sim.GetTransactionCount() == 0) {
+                    assert(worst_chunk.empty());
+                    assert(worst_chunk_feerate.IsEmpty());
+                } else {
+                    assert(!worst_chunk.empty());
+                    SimTxGraph::SetType done;
+                    FeePerWeight sum;
+                    for (TxGraph::Ref* ref : worst_chunk) {
+                        // Each transaction in the chunk must exist in the main graph.
+                        auto simpos = main_sim.Find(ref);
+                        assert(simpos != SimTxGraph::MISSING);
+                        sum += main_sim.graph.FeeRate(simpos);
+                        // Make sure the chunk contains no duplicate transactions.
+                        assert(!done[simpos]);
+                        done.Set(simpos);
+                        // All elements are preceded by all their descendants.
+                        assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done));
+                    }
+                    assert(sum == worst_chunk_feerate);
+                }
+                break;
+            } else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) {
+                // Trim.
+                bool was_oversized = top_sim.IsOversized();
+                auto removed = real->Trim();
+                if (!was_oversized) {
+                    assert(removed.empty());
+                    break;
+                }
+                auto removed_set = top_sim.MakeSet(removed);
+                // The removed set must contain all its own descendants.
+                for (auto simpos : removed_set) {
+                    assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set));
+                }
+                // Apply all removals to the simulation, and verify the result is no longer
+                // oversized. Don't query the real graph for oversizedness; it is compared
+                // against the simulation anyway later.
+                for (auto simpos : removed_set) {
+                    top_sim.RemoveTransaction(top_sim.GetRef(simpos));
+                }
+                assert(!top_sim.IsOversized());
                 break;
             }
         }
@@ -624,6 +860,16 @@ FUZZ_TARGET(txgraph)
         }
         assert(todo.None());
 
+        // If the real graph claims to be optimal (the last DoWork() call returned true), verify
+        // that calling Linearize on it does not improve it further.
+        if (sims[0].real_is_optimal) {
+            auto real_diagram = ChunkLinearization(sims[0].graph, vec1);
+            auto [sim_lin, _optimal, _cost] = Linearize(sims[0].graph, 300000, rng.rand64(), vec1);
+            auto sim_diagram = ChunkLinearization(sims[0].graph, sim_lin);
+            auto cmp = CompareChunks(real_diagram, sim_diagram);
+            assert(cmp == 0);
+        }
+
         // For every transaction in the total ordering, find a random one before it and after it,
         // and compare their chunk feerates, which must be consistent with the ordering.
         for (size_t pos = 0; pos < vec1.size(); ++pos) {
@@ -639,6 +885,94 @@ FUZZ_TARGET(txgraph)
                 assert(FeeRateCompare(after_feerate, pos_feerate) <= 0);
             }
         }
+
+        // The same order should be obtained through a BlockBuilder as implied by CompareMainOrder,
+        // if nothing is skipped.
+        auto builder = real->GetBlockBuilder();
+        std::vector<SimTxGraph::Pos> vec_builder;
+        std::vector<TxGraph::Ref*> last_chunk;
+        FeePerWeight last_chunk_feerate;
+        while (auto chunk = builder->GetCurrentChunk()) {
+            FeePerWeight sum;
+            for (TxGraph::Ref* ref : chunk->first) {
+                // The reported chunk feerate must match the chunk feerate obtained by asking
+                // it for each of the chunk's transactions individually.
+                assert(real->GetMainChunkFeerate(*ref) == chunk->second);
+                // Verify the chunk feerate matches the sum of the reported individual feerates.
+                sum += real->GetIndividualFeerate(*ref);
+                // Chunks must contain transactions that exist in the graph.
+                auto simpos = sims[0].Find(ref);
+                assert(simpos != SimTxGraph::MISSING);
+                vec_builder.push_back(simpos);
+            }
+            assert(sum == chunk->second);
+            last_chunk = std::move(chunk->first);
+            last_chunk_feerate = chunk->second;
+            builder->Include();
+        }
+        assert(vec_builder == vec1);
+
+        // The last chunk returned by the BlockBuilder must match GetWorstMainChunk, in reverse.
+        std::reverse(last_chunk.begin(), last_chunk.end());
+        auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
+        assert(last_chunk == worst_chunk);
+        assert(last_chunk_feerate == worst_chunk_feerate);
+
+        // Check that the implied ordering gives rise to a combined diagram that matches the
+        // diagram constructed from the individual cluster linearization chunkings.
+        auto main_real_diagram = get_diagram_fn(/*main_only=*/true);
+        auto main_implied_diagram = ChunkLinearization(sims[0].graph, vec1);
+        assert(CompareChunks(main_real_diagram, main_implied_diagram) == 0);
+
+        if (sims.size() >= 2 && !sims[1].IsOversized()) {
+            // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and
+            // fully verify the result.
+            auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams();
+            // Check that the feerates in each diagram are monotonically decreasing.
+            for (size_t i = 1; i < main_cmp_diagram.size(); ++i) {
+                assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0);
+            }
+            for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) {
+                assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0);
+            }
+            // Treat the diagrams as sets of chunk feerates, and sort them in the same way so that
+            // std::set_difference can be used on them below. The exact ordering does not matter
+            // here, but it has to be consistent with the one used in main_diagram and
+            // stage_diagram).
+            std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{});
+            std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{});
+            // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram.
+            // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected
+            // by staging.
+            std::vector<FeeFrac> missing_main_cmp;
+            std::set_difference(main_real_diagram.begin(), main_real_diagram.end(),
+                                main_cmp_diagram.begin(), main_cmp_diagram.end(),
+                                std::inserter(missing_main_cmp, missing_main_cmp.end()),
+                                std::greater{});
+            assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_real_diagram.size());
+            // Do the same for chunks in stage_diagram missing from stage_cmp_diagram.
+            auto stage_real_diagram = get_diagram_fn(false);
+            std::vector<FeeFrac> missing_stage_cmp;
+            std::set_difference(stage_real_diagram.begin(), stage_real_diagram.end(),
+                                stage_cmp_diagram.begin(), stage_cmp_diagram.end(),
+                                std::inserter(missing_stage_cmp, missing_stage_cmp.end()),
+                                std::greater{});
+            assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_real_diagram.size());
+            // The missing chunks must be equal across main & staging (otherwise they couldn't have
+            // been omitted).
+            assert(missing_main_cmp == missing_stage_cmp);
+
+            // The missing part must include at least all transactions in staging which have not been
+            // modified, or been in a cluster together with modified transactions, since they were
+            // copied from main. Note that due to the reordering of removals w.r.t. dependency
+            // additions, it is possible that the real implementation found more unaffected things.
+            FeeFrac missing_real;
+            for (const auto& feerate : missing_main_cmp) missing_real += feerate;
+            FeeFrac missing_expected = sims[1].graph.FeeRate(sims[1].graph.Positions() - sims[1].modified);
+            // Note that missing_real.fee < missing_expected.fee is possible to due the presence of
+            // negative-fee transactions.
+            assert(missing_real.size >= missing_expected.size);
+        }
     }
 
     assert(real->HaveStaging() == (sims.size() > 1));
@@ -680,13 +1014,17 @@ FUZZ_TARGET(txgraph)
                     // linearization).
                     std::vector<DepGraphIndex> simlin;
                     SimTxGraph::SetType done;
+                    uint64_t total_size{0};
                     for (TxGraph::Ref* ptr : cluster) {
                         auto simpos = sim.Find(ptr);
                         assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done));
                         done.Set(simpos);
                         assert(sim.graph.Ancestors(simpos).IsSubsetOf(done));
                         simlin.push_back(simpos);
+                        total_size += sim.graph.FeeRate(simpos).size;
                     }
+                    // Check cluster size.
+                    assert(total_size <= max_size);
                     // Construct a chunking object for the simulated graph, using the reported cluster
                     // linearization as ordering, and compare it against the reported chunk feerates.
                     if (sims.size() == 1 || main_only) {
@@ -714,6 +1052,8 @@ FUZZ_TARGET(txgraph)
     // Sanity check again (because invoking inspectors may modify internal unobservable state).
     real->SanityCheck();
 
+    // Kill the block builders.
+    block_builders.clear();
     // Kill the TxGraph object.
     real.reset();
     // Kill the simulated graphs, with all remaining Refs in it. If any, this verifies that Refs
diff --git a/src/txgraph.cpp b/src/txgraph.cpp
index 7c74ae50ef3..6249cfa45db 100644
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@@ -9,6 +9,7 @@
 #include <util/bitset.h>
 #include <util/check.h>
 #include <util/feefrac.h>
+#include <util/vector.h>
 
 #include <compare>
 #include <memory>
@@ -34,6 +35,9 @@ using ClusterSetIndex = uint32_t;
 /** Quality levels for cached cluster linearizations. */
 enum class QualityLevel
 {
+    /** This is a singleton cluster consisting of a transaction that individually exceeds the
+     *  cluster size limit. It cannot be merged with anything. */
+    OVERSIZED,
     /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */
     NEEDS_SPLIT,
     /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */
@@ -49,6 +53,40 @@ enum class QualityLevel
     NONE,
 };
 
+/** Information about a transaction inside TxGraphImpl::Trim. */
+struct TrimTxData
+{
+    /** Chunk feerate for this transaction. */
+    FeePerWeight m_chunk_feerate;
+    /** GraphIndex of the transaction. */
+    TxGraph::GraphIndex m_index;
+    /** Number of unmet dependencies this transaction has. -1 if the transaction is included. */
+    uint32_t m_deps_left;
+    /** Number of dependencies that apply to this transaction as child. */
+    uint32_t m_parent_count;
+    /** Where in deps_by_child those dependencies begin. */
+    uint32_t m_parent_offset;
+    /** Number of dependencies that apply to this transaction as parent. */
+    uint32_t m_children_count;
+    /** Where in deps_by_parent those dependencies begin. */
+    uint32_t m_children_offset;
+    /** Size of the transaction. */
+    uint32_t m_tx_size;
+
+    // As transactions get processed, they get organized into trees which form partitions
+    // representing the would-be clusters up to that point. The root of each tree is a
+    // representative for that partition.
+    // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
+
+    /** Pointer to another TrimTxData, towards the root of the tree. If this is a root, m_uf_parent
+     *  is equal to this itself. */
+    TrimTxData* m_uf_parent;
+    /** If this is a root, the total number of transactions in the parition. */
+    uint32_t m_uf_count;
+    /** If this is a root, the total size of transactions in the partition. */
+    uint64_t m_uf_size;
+};
+
 /** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */
 class Cluster
 {
@@ -100,6 +138,9 @@ public:
     {
         return m_quality == QualityLevel::OPTIMAL;
     }
+    /** Whether this cluster is oversized (just due to the size of its transaction(s), not due to
+     *  dependencies that are yet to be added. */
+    bool IsOversized() const noexcept { return m_quality == QualityLevel::OVERSIZED; }
     /** Whether this cluster requires splitting. */
     bool NeedsSplitting() const noexcept
     {
@@ -108,6 +149,8 @@ public:
     }
     /** Get the number of transactions in this Cluster. */
     LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); }
+    /** Get the total size of the transactions in this Cluster. */
+    uint64_t GetTxSize() const noexcept;
     /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */
     GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept { return m_mapping[index]; }
     /** Only called by Graph::SwapIndexes. */
@@ -138,8 +181,15 @@ public:
     void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept;
     /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */
     void ApplyDependencies(TxGraphImpl& graph, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept;
-    /** Improve the linearization of this Cluster. */
-    void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept;
+    /** Improve the linearization of this Cluster. Returns how much work was performed and whether
+     *  the Cluster is now optimal. */
+    std::pair<uint64_t, bool> Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept;
+    /** For every chunk in the cluster, append its FeeFrac to ret. */
+    void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept;
+    /** Add a TrimTxData entry for every transaction in the Cluster to ret. Implicit dependencies
+     *  between consecutive transactions in the linearization are added to deps. Return the
+     *  Cluster's combined transaction size. */
+    uint64_t AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept;
 
     // Functions that implement the Cluster-specific side of public TxGraph functions.
 
@@ -149,8 +199,10 @@ public:
     /** Process elements from the front of args that apply to this cluster, and append Refs for the
      *  union of their descendants to output. */
     void GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cluster*, DepGraphIndex>>& args, std::vector<TxGraph::Ref*>& output) noexcept;
-    /** Get a vector of Refs for all elements of this Cluster, in linearization order. */
-    std::vector<TxGraph::Ref*> GetClusterRefs(const TxGraphImpl& graph) noexcept;
+    /** Populate range with refs for the transactions in this Cluster's linearization, from
+     *  position start_pos until start_pos+range.size()-1, inclusive. Returns whether that
+     *  range includes the last transaction in the linearization. */
+    bool GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept;
     /** Get the individual transaction feerate of a Cluster element. */
     FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept;
     /** Modify the fee of a Cluster element. */
@@ -188,11 +240,16 @@ public:
 class TxGraphImpl final : public TxGraph
 {
     friend class Cluster;
+    friend class BlockBuilderImpl;
 private:
     /** Internal RNG. */
     FastRandomContext m_rng;
     /** This TxGraphImpl's maximum cluster count limit. */
     const DepGraphIndex m_max_cluster_count;
+    /** This TxGraphImpl's maximum cluster size limit. */
+    const uint64_t m_max_cluster_size;
+    /** The number of linearization improvements steps needed for it to be considered acceptable. */
+    const uint64_t m_acceptable_iters;
 
     /** Information about one group of Clusters to be merged. */
     struct GroupEntry
@@ -238,8 +295,11 @@ private:
         /** Total number of transactions in this graph (sum of all transaction counts in all
          *  Clusters, and for staging also those inherited from the main ClusterSet). */
         GraphIndex m_txcount{0};
+        /** Total number of individually oversized transactions in the graph. */
+        GraphIndex m_txcount_oversized{0};
         /** Whether this graph is oversized (if known). This roughly matches
-         *  m_group_data->m_group_oversized, but may be known even if m_group_data is not. */
+         *  m_group_data->m_group_oversized || (m_txcount_oversized > 0), but may be known even if
+         *  m_group_data is not. */
         std::optional<bool> m_oversized{false};
 
         ClusterSet() noexcept = default;
@@ -252,6 +312,65 @@ private:
     /** Next sequence number to assign to created Clusters. */
     uint64_t m_next_sequence_counter{0};
 
+    /** Information about a chunk in the main graph. */
+    struct ChunkData
+    {
+        /** The Entry which is the last transaction of the chunk. */
+        mutable GraphIndex m_graph_index;
+        /** How many transactions the chunk contains (-1 = singleton tail of cluster). */
+        LinearizationIndex m_chunk_count;
+
+        ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept :
+            m_graph_index{graph_index}, m_chunk_count{chunk_count} {}
+    };
+
+    /** Compare two Cluster* by their m_sequence value (while supporting nullptr). */
+    static std::strong_ordering CompareClusters(Cluster* a, Cluster* b) noexcept
+    {
+        // The nullptr pointer compares before everything else.
+        if (a == nullptr || b == nullptr) {
+            return (a != nullptr) <=> (b != nullptr);
+        }
+        // If neither pointer is nullptr, compare the Clusters' sequence numbers.
+        Assume(a == b || a->m_sequence != b->m_sequence);
+        return a->m_sequence <=> b->m_sequence;
+    }
+
+    /** Comparator for ChunkData objects in mining order. */
+    class ChunkOrder
+    {
+        const TxGraphImpl* const m_graph;
+    public:
+        explicit ChunkOrder(const TxGraphImpl* graph) : m_graph(graph) {}
+
+        bool operator()(const ChunkData& a, const ChunkData& b) const noexcept
+        {
+            const auto& a_entry = m_graph->m_entries[a.m_graph_index];
+            const auto& b_entry = m_graph->m_entries[b.m_graph_index];
+            // First sort from high feerate to low feerate.
+            auto cmp_feerate = FeeRateCompare(a_entry.m_main_chunk_feerate, b_entry.m_main_chunk_feerate);
+            if (cmp_feerate != 0) return cmp_feerate > 0;
+            // Then sort by increasing Cluster::m_sequence.
+            Assume(a_entry.m_locator[0].IsPresent());
+            Assume(b_entry.m_locator[0].IsPresent());
+            auto cmp_sequence = CompareClusters(a_entry.m_locator[0].cluster, b_entry.m_locator[0].cluster);
+            if (cmp_sequence != 0) return cmp_sequence < 0;
+            // Finally sort by position within the Cluster.
+            return a_entry.m_main_lin_index < b_entry.m_main_lin_index;
+        }
+    };
+
+    /** Definition for the mining index type. */
+    using ChunkIndex = std::set<ChunkData, ChunkOrder>;
+
+    /** Index of ChunkData objects, indexing the last transaction in each chunk in the main
+     *  graph. */
+    ChunkIndex m_main_chunkindex;
+    /** Number of index-observing objects in existence (BlockBuilderImpls). */
+    size_t m_main_chunkindex_observers{0};
+    /** Cache of discarded ChunkIndex node handles to re-use, avoiding additional allocation. */
+    std::vector<ChunkIndex::node_type> m_main_chunkindex_discarded;
+
     /** A Locator that describes whether, where, and in which Cluster an Entry appears.
      *  Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level.
      *
@@ -308,6 +427,9 @@ private:
     {
         /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */
         Ref* m_ref{nullptr};
+        /** Iterator to the corresponding ChunkData, if any, and m_main_chunkindex.end() otherwise.
+         *  This is initialized on construction of the Entry, in AddTransaction. */
+        ChunkIndex::iterator m_main_chunkindex_iterator;
         /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */
         Locator m_locator[MAX_LEVELS];
         /** The chunk feerate of this transaction in main (if present in m_locator[0]). */
@@ -322,22 +444,13 @@ private:
     /** Set of Entries which have no linked Ref anymore. */
     std::vector<GraphIndex> m_unlinked;
 
-    /** Compare two Cluster* by their m_sequence value (while supporting nullptr). */
-    static std::strong_ordering CompareClusters(Cluster* a, Cluster* b) noexcept
-    {
-        // The nullptr pointer compares before everything else.
-        if (a == nullptr || b == nullptr) {
-            return (a != nullptr) <=> (b != nullptr);
-        }
-        // If neither pointer is nullptr, compare the Clusters' sequence numbers.
-        Assume(a == b || a->m_sequence != b->m_sequence);
-        return a->m_sequence <=> b->m_sequence;
-    }
-
 public:
-    /** Construct a new TxGraphImpl with the specified maximum cluster count. */
-    explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept :
-        m_max_cluster_count(max_cluster_count)
+    /** Construct a new TxGraphImpl with the specified limits. */
+    explicit TxGraphImpl(DepGraphIndex max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept :
+        m_max_cluster_count(max_cluster_count),
+        m_max_cluster_size(max_cluster_size),
+        m_acceptable_iters(acceptable_iters),
+        m_main_chunkindex(ChunkOrder(this))
     {
         Assume(max_cluster_count >= 1);
         Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT);
@@ -375,9 +488,13 @@ public:
     ClusterSet& GetClusterSet(int level) noexcept;
     const ClusterSet& GetClusterSet(int level) const noexcept;
     /** Make a transaction not exist at a specified level. It must currently exist there. */
-    void ClearLocator(int level, GraphIndex index) noexcept;
+    void ClearLocator(int level, GraphIndex index, bool oversized_tx) noexcept;
     /** Find which Clusters in main conflict with ones in staging. */
     std::vector<Cluster*> GetConflicts() const noexcept;
+    /** Clear an Entry's ChunkData. */
+    void ClearChunkData(Entry& entry) noexcept;
+    /** Give an Entry a ChunkData object. */
+    void CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept;
 
     // Functions for handling Refs.
 
@@ -394,6 +511,7 @@ public:
     {
         auto& entry = m_entries[idx];
         Assume(entry.m_ref != nullptr);
+        Assume(m_main_chunkindex_observers == 0 || !entry.m_locator[0].IsPresent());
         entry.m_ref = nullptr;
         // Mark the transaction as to be removed in all levels where it explicitly or implicitly
         // exists.
@@ -447,10 +565,11 @@ public:
     void Merge(std::span<Cluster*> to_merge) noexcept;
     /** Apply all m_deps_to_add to the relevant Clusters in the specified level. */
     void ApplyDependencies(int level) noexcept;
-    /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */
-    void MakeAcceptable(Cluster& cluster) noexcept;
-    /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */
-    void MakeAllAcceptable(int level) noexcept;
+    /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. Return how much work was performed. */
+    uint64_t MakeAcceptable(Cluster& cluster) noexcept;
+    /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. Return how much
+     *  was performed. */
+    uint64_t MakeAllAcceptable(int level) noexcept;
 
     // Implementations for the public TxGraph interface.
 
@@ -459,7 +578,7 @@ public:
     void AddDependency(const Ref& parent, const Ref& child) noexcept final;
     void SetTransactionFee(const Ref&, int64_t fee) noexcept final;
 
-    void DoWork() noexcept final;
+    bool DoWork(uint64_t iters) noexcept final;
 
     void StartStaging() noexcept final;
     void CommitStaging() noexcept final;
@@ -478,6 +597,11 @@ public:
     bool IsOversized(bool main_only = false) noexcept final;
     std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final;
     GraphIndex CountDistinctClusters(std::span<const Ref* const> refs, bool main_only = false) noexcept final;
+    std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept final;
+    std::vector<Ref*> Trim() noexcept final;
+
+    std::unique_ptr<BlockBuilder> GetBlockBuilder() noexcept final;
+    std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk() noexcept final;
 
     void SanityCheck() const final;
 };
@@ -498,7 +622,76 @@ const TxGraphImpl::ClusterSet& TxGraphImpl::GetClusterSet(int level) const noexc
     return *m_staging_clusterset;
 }
 
-void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept
+/** Implementation of the TxGraph::BlockBuilder interface. */
+class BlockBuilderImpl final : public TxGraph::BlockBuilder
+{
+    /** Which TxGraphImpl this object is doing block building for. It will have its
+     *  m_main_chunkindex_observers incremented as long as this BlockBuilderImpl exists. */
+    TxGraphImpl* const m_graph;
+    /** Clusters which we're not including further transactions from. */
+    std::set<Cluster*> m_excluded_clusters;
+    /** Iterator to the current chunk in the chunk index. end() if nothing further remains. */
+    TxGraphImpl::ChunkIndex::const_iterator m_cur_iter;
+    /** Which cluster the current chunk belongs to, so we can exclude further transactions from it
+     *  when that chunk is skipped, or nullptr if we know we're at the end of the current
+     *  cluster. */
+    Cluster* m_cur_cluster;
+
+    // Move m_cur_iter / m_cur_cluster to the next acceptable chunk.
+    void Next() noexcept;
+
+public:
+    /** Construct a new BlockBuilderImpl to build blocks for the provided graph. */
+    BlockBuilderImpl(TxGraphImpl& graph) noexcept;
+
+    // Implement the public interface.
+    ~BlockBuilderImpl() final;
+    std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> GetCurrentChunk() noexcept final;
+    void Include() noexcept final;
+    void Skip() noexcept final;
+};
+
+void TxGraphImpl::ClearChunkData(Entry& entry) noexcept
+{
+    if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
+        Assume(m_main_chunkindex_observers == 0);
+        // If the Entry has a non-empty m_main_chunkindex_iterator, extract it, and move the handle
+        // to the cache of discarded chunkindex entries.
+        m_main_chunkindex_discarded.emplace_back(m_main_chunkindex.extract(entry.m_main_chunkindex_iterator));
+        entry.m_main_chunkindex_iterator = m_main_chunkindex.end();
+    }
+}
+
+void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept
+{
+    auto& entry = m_entries[idx];
+    if (!m_main_chunkindex_discarded.empty()) {
+        // Reuse an discarded node handle.
+        auto& node = m_main_chunkindex_discarded.back().value();
+        node.m_graph_index = idx;
+        node.m_chunk_count = chunk_count;
+        auto insert_result = m_main_chunkindex.insert(std::move(m_main_chunkindex_discarded.back()));
+        Assume(insert_result.inserted);
+        entry.m_main_chunkindex_iterator = insert_result.position;
+        m_main_chunkindex_discarded.pop_back();
+    } else {
+        // Construct a new entry.
+        auto emplace_result = m_main_chunkindex.emplace(idx, chunk_count);
+        Assume(emplace_result.second);
+        entry.m_main_chunkindex_iterator = emplace_result.first;
+    }
+}
+
+uint64_t Cluster::GetTxSize() const noexcept
+{
+    uint64_t ret{0};
+    for (auto i : m_linearization) {
+        ret += m_depgraph.FeeRate(i).size;
+    }
+    return ret;
+}
+
+void TxGraphImpl::ClearLocator(int level, GraphIndex idx, bool oversized_tx) noexcept
 {
     auto& entry = m_entries[idx];
     auto& clusterset = GetClusterSet(level);
@@ -512,14 +705,17 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept
     }
     // Update the transaction count.
     --clusterset.m_txcount;
+    clusterset.m_txcount_oversized -= oversized_tx;
     // If clearing main, adjust the status of Locators of this transaction in staging, if it exists.
     if (level == 0 && GetTopLevel() == 1) {
         if (entry.m_locator[1].IsRemoved()) {
             entry.m_locator[1].SetMissing();
         } else if (!entry.m_locator[1].IsPresent()) {
             --m_staging_clusterset->m_txcount;
+            m_staging_clusterset->m_txcount_oversized -= oversized_tx;
         }
     }
+    if (level == 0) ClearChunkData(entry);
 }
 
 void Cluster::Updated(TxGraphImpl& graph) noexcept
@@ -527,6 +723,9 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept
     // Update all the Locators for this Cluster's Entry objects.
     for (DepGraphIndex idx : m_linearization) {
         auto& entry = graph.m_entries[m_mapping[idx]];
+        // Discard any potential ChunkData prior to modifying the Cluster (as that could
+        // invalidate its ordering).
+        if (m_level == 0) graph.ClearChunkData(entry);
         entry.m_locator[m_level].SetPresent(this, idx);
     }
     // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or
@@ -535,14 +734,15 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept
     // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level
     // yet.
     if (m_level == 0 && IsAcceptable()) {
-        LinearizationChunking chunking(m_depgraph, m_linearization);
+        const LinearizationChunking chunking(m_depgraph, m_linearization);
         LinearizationIndex lin_idx{0};
         // Iterate over the chunks.
         for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) {
             auto chunk = chunking.GetChunk(chunk_idx);
-            Assume(chunk.transactions.Any());
+            auto chunk_count = chunk.transactions.Count();
+            Assume(chunk_count > 0);
             // Iterate over the transactions in the linearization, which must match those in chunk.
-            do {
+            while (true) {
                 DepGraphIndex idx = m_linearization[lin_idx];
                 GraphIndex graph_idx = m_mapping[idx];
                 auto& entry = graph.m_entries[graph_idx];
@@ -550,7 +750,18 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept
                 entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate);
                 Assume(chunk.transactions[idx]);
                 chunk.transactions.Reset(idx);
-            } while(chunk.transactions.Any());
+                if (chunk.transactions.None()) {
+                    // Last transaction in the chunk.
+                    if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) {
+                        // If this is the final chunk of the cluster, and it contains just a single
+                        // transaction (which will always be true for the very common singleton
+                        // clusters), store the special value -1 as chunk count.
+                        chunk_count = LinearizationIndex(-1);
+                    }
+                    graph.CreateChunkData(graph_idx, chunk_count);
+                    break;
+                }
+            }
         }
     }
 }
@@ -632,7 +843,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span<GraphIndex>& to_remove
             entry.m_main_lin_index = LinearizationIndex(-1);
         }
         // - Mark it as missing/removed in the Entry's locator.
-        graph.ClearLocator(m_level, idx);
+        bool oversized_tx = uint64_t(m_depgraph.FeeRate(locator.index).size) > graph.m_max_cluster_size;
+        graph.ClearLocator(m_level, idx, oversized_tx);
         to_remove = to_remove.subspan(1);
     } while(!to_remove.empty());
 
@@ -648,9 +860,14 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span<GraphIndex>& to_remove
         todo.Reset(m_linearization.back());
         m_linearization.pop_back();
     }
-    if (todo.None()) {
+    if (m_linearization.empty()) {
+        // Empty Cluster, it just needs to be deleted. Any NEEDS_SPLIT_* state will work.
+        // This will always be hit when the input is QualityLevel::OVERSIZED.
+        quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE;
+    } else if (todo.None()) {
         // If no further removals remain, and thus all removals were at the end, we may be able
         // to leave the cluster at a better quality level.
+        Assume(quality != QualityLevel::OVERSIZED);
         if (IsAcceptable(/*after_split=*/true)) {
             quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE;
         } else {
@@ -671,7 +888,10 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span<GraphIndex>& to_remove
 void Cluster::Clear(TxGraphImpl& graph) noexcept
 {
     for (auto i : m_linearization) {
-        graph.ClearLocator(m_level, m_mapping[i]);
+        // We do not care about setting oversized_tx accurately here, because this function is only
+        // applied to main-graph Clusters in CommitStaging, which will overwrite main's
+        // m_txcount_oversized anyway with the staging graph's value.
+        graph.ClearLocator(m_level, m_mapping[i], /*oversized_tx=*/false);
     }
     m_depgraph = {};
     m_linearization.clear();
@@ -692,6 +912,44 @@ void Cluster::MoveToMain(TxGraphImpl& graph) noexcept
     Updated(graph);
 }
 
+void Cluster::AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept
+{
+    auto chunk_feerates = ChunkLinearization(m_depgraph, m_linearization);
+    ret.reserve(ret.size() + chunk_feerates.size());
+    ret.insert(ret.end(), chunk_feerates.begin(), chunk_feerates.end());
+}
+
+uint64_t Cluster::AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
+{
+    LinearizationChunking linchunking(m_depgraph, m_linearization);
+    LinearizationIndex pos{0};
+    uint64_t size{0};
+    auto prev_index = GraphIndex(-1);
+    // Iterate over the chunks of this cluster's linearization.
+    for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) {
+        const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i);
+        // Iterate over the transactions of that chunk, in linearization order.
+        auto chunk_tx_count = chunk.Count();
+        for (unsigned j = 0; j < chunk_tx_count; ++j) {
+            auto cluster_idx = m_linearization[pos];
+            // The transaction must appear in the chunk.
+            Assume(chunk[cluster_idx]);
+            // Construct a new element in ret.
+            auto& entry = ret.emplace_back();
+            entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk_feerate);
+            entry.m_index = m_mapping[cluster_idx];
+            // If this is not the first transaction of the cluster linearization, it has an
+            // implicit dependency on its predecessor.
+            if (pos != 0) deps.emplace_back(prev_index, entry.m_index);
+            prev_index = entry.m_index;
+            entry.m_tx_size = m_depgraph.FeeRate(cluster_idx).size;
+            size += entry.m_tx_size;
+            ++pos;
+        }
+    }
+    return size;
+}
+
 bool Cluster::Split(TxGraphImpl& graph) noexcept
 {
     // This function can only be called when the Cluster needs splitting.
@@ -718,10 +976,11 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept
     // Iterate over the connected components of this Cluster's m_depgraph.
     while (todo.Any()) {
         auto component = m_depgraph.FindConnectedComponent(todo);
+        auto split_quality = component.Count() == 1 ? QualityLevel::OPTIMAL : new_quality;
         if (first && component == todo) {
             // The existing Cluster is an entire component. Leave it be, but update its quality.
             Assume(todo == m_depgraph.Positions());
-            graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality);
+            graph.SetClusterQuality(m_level, m_quality, m_setindex, split_quality);
             // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its
             // chunking.
             Updated(graph);
@@ -736,7 +995,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept
         for (auto i : component) {
             remap[i] = {new_cluster.get(), DepGraphIndex(-1)};
         }
-        graph.InsertCluster(m_level, std::move(new_cluster), new_quality);
+        graph.InsertCluster(m_level, std::move(new_cluster), split_quality);
         todo -= component;
     }
     // Redistribute the transactions.
@@ -798,7 +1057,11 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept
         // Update the transaction's Locator. There is no need to call Updated() to update chunk
         // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting
         // merged Cluster later anyway).
-        graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos);
+        auto& entry = graph.m_entries[idx];
+        // Discard any potential ChunkData prior to modifying the Cluster (as that could
+        // invalidate its ordering).
+        if (m_level == 0) graph.ClearChunkData(entry);
+        entry.m_locator[m_level].SetPresent(this, new_pos);
     }
     // Purge the other Cluster, now that everything has been moved.
     other.m_depgraph = DepGraph<SetType>{};
@@ -844,6 +1107,9 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span<std::pair<GraphInd
     // linearization, and post-linearize it to fix up the worst problems with it.
     FixLinearization(m_depgraph, m_linearization);
     PostLinearize(m_depgraph, m_linearization);
+    if (IsAcceptable()) {
+        graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE);
+    }
 
     // Finally push the changes to graph.m_entries.
     Updated(graph);
@@ -1012,6 +1278,10 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept
         Entry& entry = m_entries[idx];
         // Update linked Ref, if any exists.
         if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx;
+        // Update linked chunk index entries, if any exist.
+        if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
+            entry.m_main_chunkindex_iterator->m_graph_index = idx;
+        }
         // Update the locators for both levels. The rest of the Entry information will not change,
         // so no need to invoke Cluster::Updated().
         for (int level = 0; level < MAX_LEVELS; ++level) {
@@ -1036,6 +1306,9 @@ void TxGraphImpl::Compact() noexcept
         if (!m_staging_clusterset->m_removed.empty()) return;
     }
 
+    // Release memory used by discarded ChunkData index entries.
+    ClearShrink(m_main_chunkindex_discarded);
+
     // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last
     // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of
     // later-processed ones during the "swap with end of m_entries" step below (which might
@@ -1113,6 +1386,17 @@ void TxGraphImpl::GroupClusters(int level) noexcept
      *  to-be-merged group). */
     std::vector<std::pair<std::pair<GraphIndex, GraphIndex>, uint64_t>> an_deps;
 
+    // Construct an an_clusters entry for every oversized cluster, including ones from levels below,
+    // as they may be inherited in this one.
+    for (int level_iter = 0; level_iter <= level; ++level_iter) {
+        for (auto& cluster : GetClusterSet(level_iter).m_clusters[int(QualityLevel::OVERSIZED)]) {
+            auto graph_idx = cluster->GetClusterEntry(0);
+            auto cur_cluster = FindCluster(graph_idx, level);
+            if (cur_cluster == nullptr) continue;
+            an_clusters.emplace_back(cur_cluster, cur_cluster->m_sequence);
+        }
+    }
+
     // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies,
     // and an an_deps entry for each dependency to be applied.
     an_deps.reserve(clusterset.m_deps_to_add.size());
@@ -1129,7 +1413,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept
         an_deps.emplace_back(std::pair{par, chl}, chl_cluster->m_sequence);
     }
     // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters
-    // to which dependencies apply.
+    // to which dependencies apply, or which are oversized.
     std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
     an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end());
     // Sort an_deps by applying the same order to the involved child cluster.
@@ -1268,10 +1552,12 @@ void TxGraphImpl::GroupClusters(int level) noexcept
         new_entry.m_deps_offset = clusterset.m_deps_to_add.size();
         new_entry.m_deps_count = 0;
         uint32_t total_count{0};
+        uint64_t total_size{0};
         // Add all its clusters to it (copying those from an_clusters to m_group_clusters).
         while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) {
             clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first);
             total_count += an_clusters_it->first->GetTxCount();
+            total_size += an_clusters_it->first->GetTxSize();
             ++an_clusters_it;
             ++new_entry.m_cluster_count;
         }
@@ -1282,7 +1568,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept
             ++new_entry.m_deps_count;
         }
         // Detect oversizedness.
-        if (total_count > m_max_cluster_count) {
+        if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) {
             clusterset.m_group_data->m_group_oversized = true;
         }
     }
@@ -1330,7 +1616,7 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept
     // Nothing to do if there are no dependencies to be added.
     if (clusterset.m_deps_to_add.empty()) return;
     // Dependencies cannot be applied if it would result in oversized clusters.
-    if (clusterset.m_group_data->m_group_oversized) return;
+    if (clusterset.m_oversized == true) return;
 
     // For each group of to-be-merged Clusters.
     for (const auto& group_entry : clusterset.m_group_data->m_groups) {
@@ -1362,15 +1648,15 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept
     clusterset.m_group_data = GroupData{};
 }
 
-void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
+std::pair<uint64_t, bool> Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
 {
     // We can only relinearize Clusters that do not need splitting.
     Assume(!NeedsSplitting());
     // No work is required for Clusters which are already optimally linearized.
-    if (IsOptimal()) return;
+    if (IsOptimal()) return {0, true};
     // Invoke the actual linearization algorithm (passing in the existing one).
     uint64_t rng_seed = graph.m_rng.rand64();
-    auto [linearization, optimal] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization);
+    auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization);
     // Postlinearize if the result isn't optimal already. This guarantees (among other things)
     // that the chunks of the resulting linearization are all connected.
     if (!optimal) PostLinearize(m_depgraph, linearization);
@@ -1381,25 +1667,30 @@ void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
     graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality);
     // Update the Entry objects.
     Updated(graph);
+    return {cost, optimal};
 }
 
-void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept
+uint64_t TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept
 {
+    uint64_t cost{0};
     // Relinearize the Cluster if needed.
-    if (!cluster.NeedsSplitting() && !cluster.IsAcceptable()) {
-        cluster.Relinearize(*this, 10000);
+    if (!cluster.NeedsSplitting() && !cluster.IsAcceptable() && !cluster.IsOversized()) {
+        cost += cluster.Relinearize(*this, m_acceptable_iters).first;
     }
+    return cost;
 }
 
-void TxGraphImpl::MakeAllAcceptable(int level) noexcept
+uint64_t TxGraphImpl::MakeAllAcceptable(int level) noexcept
 {
     ApplyDependencies(level);
     auto& clusterset = GetClusterSet(level);
-    if (clusterset.m_oversized == true) return;
+    if (clusterset.m_oversized == true) return 0;
     auto& queue = clusterset.m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)];
+    uint64_t cost{0};
     while (!queue.empty()) {
-        MakeAcceptable(*queue.back().get());
+        cost += MakeAcceptable(*queue.back().get());
     }
+    return cost;
 }
 
 Cluster::Cluster(uint64_t sequence) noexcept : m_sequence{sequence} {}
@@ -1415,23 +1706,33 @@ Cluster::Cluster(uint64_t sequence, TxGraphImpl& graph, const FeePerWeight& feer
 
 TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept
 {
+    Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
+    Assume(feerate.size > 0);
     // Construct a new Ref.
     Ref ret;
     // Construct a new Entry, and link it with the Ref.
     auto idx = m_entries.size();
     m_entries.emplace_back();
     auto& entry = m_entries.back();
+    entry.m_main_chunkindex_iterator = m_main_chunkindex.end();
     entry.m_ref = &ret;
     GetRefGraph(ret) = this;
     GetRefIndex(ret) = idx;
     // Construct a new singleton Cluster (which is necessarily optimally linearized).
+    bool oversized = uint64_t(feerate.size) > m_max_cluster_size;
     auto cluster = std::make_unique<Cluster>(m_next_sequence_counter++, *this, feerate, idx);
     auto cluster_ptr = cluster.get();
     int level = GetTopLevel();
     auto& clusterset = GetClusterSet(level);
-    InsertCluster(level, std::move(cluster), QualityLevel::OPTIMAL);
+    InsertCluster(level, std::move(cluster), oversized ? QualityLevel::OVERSIZED : QualityLevel::OPTIMAL);
     cluster_ptr->Updated(*this);
     ++clusterset.m_txcount;
+    // Deal with individually oversized transactions.
+    if (oversized) {
+        ++clusterset.m_txcount_oversized;
+        clusterset.m_oversized = true;
+        clusterset.m_group_data = std::nullopt;
+    }
     // Return the Ref.
     return ret;
 }
@@ -1442,6 +1743,7 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept
     // having been removed).
     if (GetRefGraph(arg) == nullptr) return;
     Assume(GetRefGraph(arg) == this);
+    Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
     // Find the Cluster the transaction is in, and stop if it isn't in any.
     int level = GetTopLevel();
     auto cluster = FindCluster(GetRefIndex(arg), level);
@@ -1460,6 +1762,7 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept
     // removed).
     if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return;
     Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this);
+    Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0);
     // Don't do anything if this is a dependency on self.
     if (GetRefIndex(parent) == GetRefIndex(child)) return;
     // Find the Cluster the parent and child transaction are in, and stop if either appears to be
@@ -1526,17 +1829,18 @@ void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span<std::pair<Cl
     }
 }
 
-std::vector<TxGraph::Ref*> Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept
+bool Cluster::GetClusterRefs(TxGraphImpl& graph, std::span<TxGraph::Ref*> range, LinearizationIndex start_pos) noexcept
 {
-    std::vector<TxGraph::Ref*> ret;
-    ret.reserve(m_linearization.size());
-    // Translate all transactions in the Cluster (in linearization order) to Refs.
-    for (auto idx : m_linearization) {
-        const auto& entry = graph.m_entries[m_mapping[idx]];
+    // Translate the transactions in the Cluster (in linearization order, starting at start_pos in
+    // the linearization) to Refs, and fill them in range.
+    for (auto& ref : range) {
+        Assume(start_pos < m_linearization.size());
+        const auto& entry = graph.m_entries[m_mapping[m_linearization[start_pos++]]];
         Assume(entry.m_ref != nullptr);
-        ret.push_back(entry.m_ref);
+        ref = entry.m_ref;
     }
-    return ret;
+    // Return whether start_pos has advanced to the end of the Cluster.
+    return start_pos == m_linearization.size();
 }
 
 FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept
@@ -1680,7 +1984,9 @@ std::vector<TxGraph::Ref*> TxGraphImpl::GetCluster(const Ref& arg, bool main_onl
     if (cluster == nullptr) return {};
     // Make sure the Cluster has an acceptable quality level, and then dispatch to it.
     MakeAcceptable(*cluster);
-    return cluster->GetClusterRefs(*this);
+    std::vector<TxGraph::Ref*> ret(cluster->GetTxCount());
+    cluster->GetClusterRefs(*this, ret, 0);
+    return ret;
 }
 
 TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept
@@ -1739,11 +2045,15 @@ bool TxGraphImpl::IsOversized(bool main_only) noexcept
         // Return cached value if known.
         return *clusterset.m_oversized;
     }
-    // Find which Clusters will need to be merged together, as that is where the oversize
-    // property is assessed.
-    GroupClusters(level);
-    Assume(clusterset.m_group_data.has_value());
-    clusterset.m_oversized = clusterset.m_group_data->m_group_oversized;
+    ApplyRemovals(level);
+    if (clusterset.m_txcount_oversized > 0) {
+        clusterset.m_oversized = true;
+    } else {
+        // Find which Clusters will need to be merged together, as that is where the oversize
+        // property is assessed.
+        GroupClusters(level);
+    }
+    Assume(clusterset.m_oversized.has_value());
     return *clusterset.m_oversized;
 }
 
@@ -1763,6 +2073,7 @@ void TxGraphImpl::StartStaging() noexcept
     // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to
     // the new graph. To-be-applied removals will always be empty at this point.
     m_staging_clusterset->m_txcount = m_main_clusterset.m_txcount;
+    m_staging_clusterset->m_txcount_oversized = m_main_clusterset.m_txcount_oversized;
     m_staging_clusterset->m_deps_to_add = m_main_clusterset.m_deps_to_add;
     m_staging_clusterset->m_group_data = m_main_clusterset.m_group_data;
     m_staging_clusterset->m_oversized = m_main_clusterset.m_oversized;
@@ -1790,7 +2101,13 @@ void TxGraphImpl::AbortStaging() noexcept
     if (!m_main_clusterset.m_group_data.has_value()) {
         // In case m_oversized in main was kept after a Ref destruction while staging exists, we
         // need to re-evaluate m_oversized now.
-        m_main_clusterset.m_oversized = std::nullopt;
+        if (m_main_clusterset.m_to_remove.empty() && m_main_clusterset.m_txcount_oversized > 0) {
+            // It is possible that a Ref destruction caused a removal in main while staging existed.
+            // In this case, m_txcount_oversized may be inaccurate.
+            m_main_clusterset.m_oversized = true;
+        } else {
+            m_main_clusterset.m_oversized = std::nullopt;
+        }
     }
 }
 
@@ -1798,6 +2115,7 @@ void TxGraphImpl::CommitStaging() noexcept
 {
     // Staging must exist.
     Assume(m_staging_clusterset.has_value());
+    Assume(m_main_chunkindex_observers == 0);
     // Delete all conflicting Clusters in main, to make place for moving the staging ones
     // there. All of these have been copied to staging in PullIn().
     auto conflicts = GetConflicts();
@@ -1823,6 +2141,7 @@ void TxGraphImpl::CommitStaging() noexcept
     m_main_clusterset.m_group_data = std::move(m_staging_clusterset->m_group_data);
     m_main_clusterset.m_oversized = std::move(m_staging_clusterset->m_oversized);
     m_main_clusterset.m_txcount = std::move(m_staging_clusterset->m_txcount);
+    m_main_clusterset.m_txcount_oversized = std::move(m_staging_clusterset->m_txcount_oversized);
     // Delete the old staging graph, after all its information was moved to main.
     m_staging_clusterset.reset();
     Compact();
@@ -1837,7 +2156,9 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep
     // Update the fee, remember that relinearization will be necessary, and update the Entries
     // in the same Cluster.
     m_depgraph.FeeRate(idx).fee = fee;
-    if (!NeedsSplitting()) {
+    if (m_quality == QualityLevel::OVERSIZED) {
+        // Nothing to do.
+    } else if (!NeedsSplitting()) {
         graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE);
     } else {
         graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT);
@@ -1850,6 +2171,7 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept
     // Don't do anything if the passed Ref is empty.
     if (GetRefGraph(ref) == nullptr) return;
     Assume(GetRefGraph(ref) == this);
+    Assume(m_main_chunkindex_observers == 0);
     // Find the entry, its locator, and inform its Cluster about the new feerate, if any.
     auto& entry = m_entries[GetRefIndex(ref)];
     for (int level = 0; level < MAX_LEVELS; ++level) {
@@ -1916,6 +2238,32 @@ TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span<const Ref* cons
     return ret;
 }
 
+std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> TxGraphImpl::GetMainStagingDiagrams() noexcept
+{
+    Assume(m_staging_clusterset.has_value());
+    MakeAllAcceptable(0);
+    Assume(m_main_clusterset.m_deps_to_add.empty()); // can only fail if main is oversized
+    MakeAllAcceptable(1);
+    Assume(m_staging_clusterset->m_deps_to_add.empty()); // can only fail if staging is oversized
+    // For all Clusters in main which conflict with Clusters in staging (i.e., all that are removed
+    // by, or replaced in, staging), gather their chunk feerates.
+    auto main_clusters = GetConflicts();
+    std::vector<FeeFrac> main_feerates, staging_feerates;
+    for (Cluster* cluster : main_clusters) {
+        cluster->AppendChunkFeerates(main_feerates);
+    }
+    // Do the same for the Clusters in staging themselves.
+    for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) {
+        for (const auto& cluster : m_staging_clusterset->m_clusters[quality]) {
+            cluster->AppendChunkFeerates(staging_feerates);
+        }
+    }
+    // Sort both by decreasing feerate to obtain diagrams, and return them.
+    std::sort(main_feerates.begin(), main_feerates.end(), [](auto& a, auto& b) { return a > b; });
+    std::sort(staging_feerates.begin(), staging_feerates.end(), [](auto& a, auto& b) { return a > b; });
+    return std::make_pair(std::move(main_feerates), std::move(staging_feerates));
+}
+
 void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const
 {
     // There must be an m_mapping for each m_depgraph position (including holes).
@@ -1926,14 +2274,20 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const
     assert(m_linearization.size() <= graph.m_max_cluster_count);
     // The level must match the level the Cluster occurs in.
     assert(m_level == level);
+    // The sum of their sizes cannot exceed m_max_cluster_size, unless it is oversized.
+    assert(m_quality == QualityLevel::OVERSIZED || GetTxSize() <= graph.m_max_cluster_size);
     // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck.
 
+    // OVERSIZED clusters are singletons.
+    assert(m_quality != QualityLevel::OVERSIZED || m_linearization.size() == 1);
+
     // Compute the chunking of m_linearization.
     LinearizationChunking linchunking(m_depgraph, m_linearization);
 
     // Verify m_linearization.
     SetType m_done;
     LinearizationIndex linindex{0};
+    DepGraphIndex chunk_pos{0}; //!< position within the current chunk
     assert(m_depgraph.IsAcyclic());
     for (auto lin_pos : m_linearization) {
         assert(lin_pos < m_mapping.size());
@@ -1950,8 +2304,21 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const
             ++linindex;
             if (!linchunking.GetChunk(0).transactions[lin_pos]) {
                 linchunking.MarkDone(linchunking.GetChunk(0).transactions);
+                chunk_pos = 0;
             }
             assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate);
+            // Verify that an entry in the chunk index exists for every chunk-ending transaction.
+            ++chunk_pos;
+            bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count());
+            assert((entry.m_main_chunkindex_iterator != graph.m_main_chunkindex.end()) == is_chunk_end);
+            if (is_chunk_end) {
+                auto& chunk_data = *entry.m_main_chunkindex_iterator;
+                if (m_done == m_depgraph.Positions() && chunk_pos == 1) {
+                    assert(chunk_data.m_chunk_count == LinearizationIndex(-1));
+                } else {
+                    assert(chunk_data.m_chunk_count == chunk_pos);
+                }
+            }
             // If this Cluster has an acceptable quality level, its chunks must be connected.
             assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions));
         }
@@ -1970,6 +2337,8 @@ void TxGraphImpl::SanityCheck() const
     std::set<GraphIndex> expected_removed[MAX_LEVELS];
     /** Which Cluster::m_sequence values have been encountered. */
     std::set<uint64_t> sequences;
+    /** Which GraphIndexes ought to occur in m_main_chunkindex, based on m_entries. */
+    std::set<GraphIndex> expected_chunkindex;
     /** Whether compaction is possible in the current state. */
     bool compact_possible{true};
 
@@ -1984,6 +2353,11 @@ void TxGraphImpl::SanityCheck() const
             assert(GetRefGraph(*entry.m_ref) == this);
             assert(GetRefIndex(*entry.m_ref) == idx);
         }
+        if (entry.m_main_chunkindex_iterator != m_main_chunkindex.end()) {
+            // Remember which entries we see a chunkindex entry for.
+            assert(entry.m_locator[0].IsPresent());
+            expected_chunkindex.insert(idx);
+        }
         // Verify the Entry m_locators.
         bool was_present{false}, was_removed{false};
         for (int level = 0; level < MAX_LEVELS; ++level) {
@@ -2077,9 +2451,11 @@ void TxGraphImpl::SanityCheck() const
         if (!clusterset.m_to_remove.empty()) compact_possible = false;
         if (!clusterset.m_removed.empty()) compact_possible = false;
 
-        // If m_group_data exists, its m_group_oversized must match m_oversized.
-        if (clusterset.m_group_data.has_value()) {
-            assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized);
+        // If m_group_data exists, and no outstanding removals remain, m_group_oversized must match
+        // m_group_oversized || (m_txcount_oversized > 0).
+        if (clusterset.m_group_data.has_value() && clusterset.m_to_remove.empty()) {
+            assert(clusterset.m_oversized ==
+                   (clusterset.m_group_data->m_group_oversized || (clusterset.m_txcount_oversized > 0)));
         }
 
         // For non-top levels, m_oversized must be known (as it cannot change until the level
@@ -2096,13 +2472,422 @@ void TxGraphImpl::SanityCheck() const
     if (compact_possible) {
         assert(actual_unlinked.empty());
     }
+
+    // Finally, check the chunk index.
+    std::set<GraphIndex> actual_chunkindex;
+    FeeFrac last_chunk_feerate;
+    for (const auto& chunk : m_main_chunkindex) {
+        GraphIndex idx = chunk.m_graph_index;
+        actual_chunkindex.insert(idx);
+        auto chunk_feerate = m_entries[idx].m_main_chunk_feerate;
+        if (!last_chunk_feerate.IsEmpty()) {
+            assert(FeeRateCompare(last_chunk_feerate, chunk_feerate) >= 0);
+        }
+        last_chunk_feerate = chunk_feerate;
+    }
+    assert(actual_chunkindex == expected_chunkindex);
 }
 
-void TxGraphImpl::DoWork() noexcept
+bool TxGraphImpl::DoWork(uint64_t iters) noexcept
 {
-    for (int level = 0; level <= GetTopLevel(); ++level) {
-        MakeAllAcceptable(level);
+    uint64_t iters_done{0};
+    // Relinearize everything to acceptable level first.
+    for (int level = GetTopLevel(); level >= 0; --level) {
+        if (level == 0 && m_main_chunkindex_observers != 0) continue;
+        ApplyDependencies(level);
+        auto& clusterset = GetClusterSet(level);
+        if (clusterset.m_oversized == true) continue;
+        auto& queue = clusterset.m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)];
+        while (!queue.empty()) {
+            if (iters_done + m_acceptable_iters >= iters) return false;
+            iters_done += MakeAcceptable(*queue.back().get());
+        }
     }
+    // If we have budget for more work left, get things optimal.
+    for (int level = GetTopLevel(); level >= 0; --level) {
+        if (level == 0 && m_main_chunkindex_observers != 0) continue;
+        auto& clusterset = GetClusterSet(level);
+        if (clusterset.m_oversized == true) continue;
+        auto& queue = clusterset.m_clusters[int(QualityLevel::ACCEPTABLE)];
+        while (!queue.empty()) {
+            // Randomize the order in which we process, so that if the first cluster somehow needs
+            // more work than what iters allows, we don't keep spending it on the same one.
+            auto pos = m_rng.randrange<size_t>(queue.size());
+            auto [cost, optimal] = queue[pos].get()->Relinearize(*this, iters - iters_done);
+            iters_done += cost;
+            if (!optimal) return false;
+        }
+    }
+    return true;
+}
+
+void BlockBuilderImpl::Next() noexcept
+{
+    // Don't do anything if we're already done.
+    if (m_cur_iter == m_graph->m_main_chunkindex.end()) return;
+    while (true) {
+        // Advance the pointer, and stop if we reach the end.
+        ++m_cur_iter;
+        m_cur_cluster = nullptr;
+        if (m_cur_iter == m_graph->m_main_chunkindex.end()) break;
+        // Find the cluster pointed to by m_cur_iter.
+        const auto& chunk_data = *m_cur_iter;
+        const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
+        m_cur_cluster = chunk_end_entry.m_locator[0].cluster;
+        // If we previously skipped a chunk from this cluster we cannot include more from it.
+        if (!m_excluded_clusters.contains(m_cur_cluster)) break;
+    }
+}
+
+std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> BlockBuilderImpl::GetCurrentChunk() noexcept
+{
+    std::optional<std::pair<std::vector<TxGraph::Ref*>, FeePerWeight>> ret;
+    // Populate the return value if we are not done.
+    if (m_cur_iter != m_graph->m_main_chunkindex.end()) {
+        ret.emplace();
+        const auto& chunk_data = *m_cur_iter;
+        const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
+        auto cluster = chunk_end_entry.m_locator[0].cluster;
+        if (chunk_data.m_chunk_count == LinearizationIndex(-1)) {
+            // Special case in case just a single transaction remains, avoiding the need to
+            // dispatch to and dereference Cluster.
+            ret->first.resize(1);
+            Assume(chunk_end_entry.m_ref != nullptr);
+            ret->first[0] = chunk_end_entry.m_ref;
+            m_cur_cluster = nullptr;
+        } else {
+            ret->first.resize(chunk_data.m_chunk_count);
+            auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count;
+            bool is_end = cluster->GetClusterRefs(*m_graph, ret->first, start_pos);
+            if (is_end) {
+                m_cur_cluster = nullptr;
+                // If the chunk size was 1, then the special case above should have been used.
+                Assume(chunk_data.m_chunk_count > 1);
+            } else {
+                Assume(cluster == m_cur_cluster);
+            }
+        }
+        ret->second = chunk_end_entry.m_main_chunk_feerate;
+    }
+    return ret;
+}
+
+BlockBuilderImpl::BlockBuilderImpl(TxGraphImpl& graph) noexcept : m_graph(&graph)
+{
+    // Make sure all clusters in main are up to date, and acceptable.
+    m_graph->MakeAllAcceptable(0);
+    // There cannot remain any inapplicable dependencies (only possible if main is oversized).
+    Assume(m_graph->m_main_clusterset.m_deps_to_add.empty());
+    // Remember that this object is observing the graph's index, so that we can detect concurrent
+    // modifications.
+    ++m_graph->m_main_chunkindex_observers;
+    // Find the first chunk.
+    m_cur_iter = m_graph->m_main_chunkindex.begin();
+    m_cur_cluster = nullptr;
+    if (m_cur_iter != m_graph->m_main_chunkindex.end()) {
+        // Find the cluster pointed to by m_cur_iter.
+        const auto& chunk_data = *m_cur_iter;
+        const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index];
+        m_cur_cluster = chunk_end_entry.m_locator[0].cluster;
+    }
+}
+
+BlockBuilderImpl::~BlockBuilderImpl()
+{
+    Assume(m_graph->m_main_chunkindex_observers > 0);
+    // Permit modifications to the main graph again after destroying the BlockBuilderImpl.
+    --m_graph->m_main_chunkindex_observers;
+}
+
+void BlockBuilderImpl::Include() noexcept
+{
+    // The actual inclusion of the chunk is done by the calling code. All we have to do is switch
+    // to the next chunk.
+    Next();
+}
+
+void BlockBuilderImpl::Skip() noexcept
+{
+    // When skipping a chunk we need to not include anything more of the cluster, as that could make
+    // the result topologically invalid. Note that m_cur_cluster == nullptr when this was the last
+    // chunk of a cluster, so in that case nothing is added here. This may significantly reduce the
+    // size of m_excluded_clusters, especially when many singleton clusters are ignored.
+    if (m_cur_cluster != nullptr) m_excluded_clusters.insert(m_cur_cluster);
+    Next();
+}
+
+std::unique_ptr<TxGraph::BlockBuilder> TxGraphImpl::GetBlockBuilder() noexcept
+{
+    return std::make_unique<BlockBuilderImpl>(*this);
+}
+
+std::pair<std::vector<TxGraph::Ref*>, FeePerWeight> TxGraphImpl::GetWorstMainChunk() noexcept
+{
+    std::pair<std::vector<Ref*>, FeePerWeight> ret;
+    // Make sure all clusters in main are up to date, and acceptable.
+    MakeAllAcceptable(0);
+    Assume(m_main_clusterset.m_deps_to_add.empty());
+    // If the graph is not empty, populate ret.
+    if (!m_main_chunkindex.empty()) {
+        const auto& chunk_data = *m_main_chunkindex.rbegin();
+        const auto& chunk_end_entry = m_entries[chunk_data.m_graph_index];
+        Cluster* cluster = chunk_end_entry.m_locator[0].cluster;
+        if (chunk_data.m_chunk_count == LinearizationIndex(-1) || chunk_data.m_chunk_count == 1)  {
+            // Special case for singletons.
+            ret.first.resize(1);
+            Assume(chunk_end_entry.m_ref != nullptr);
+            ret.first[0] = chunk_end_entry.m_ref;
+        } else {
+            ret.first.resize(chunk_data.m_chunk_count);
+            auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count;
+            cluster->GetClusterRefs(*this, ret.first, start_pos);
+            std::reverse(ret.first.begin(), ret.first.end());
+        }
+        ret.second = chunk_end_entry.m_main_chunk_feerate;
+    }
+    return ret;
+}
+
+std::vector<TxGraph::Ref*> TxGraphImpl::Trim() noexcept
+{
+    int level = GetTopLevel();
+    Assume(m_main_chunkindex_observers == 0 || level != 0);
+    std::vector<TxGraph::Ref*> ret;
+
+    // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits).
+    auto& clusterset = GetClusterSet(level);
+    if (clusterset.m_oversized == false) return ret;
+    GroupClusters(level);
+    Assume(clusterset.m_group_data.has_value());
+    // Nothing to do if not oversized.
+    if (!clusterset.m_group_data->m_group_oversized) return ret;
+
+    // In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are
+    // trimmed by removing transactions in them such that the resulting clusters satisfy the size
+    // and count limits.
+    //
+    // It works by defining for each would-be cluster a rudimentary linearization: at every point
+    // the highest-chunk-feerate remaining transaction is picked among those with no unmet
+    // dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or
+    // an implicit dependency added between any two consecutive transaction in their current
+    // cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters,
+    // but respecting the dependencies being added.
+    //
+    // This rudimentary linearization is computed lazily, by putting all eligible (no unmet
+    // dependencies) transactions in a heap, and popping the highest-feerate one from it. Along the
+    // way, the counts and sizes of the would-be clusters up to that point are tracked (by
+    // partitioning the involved transactions using a union-find structure). Any transaction whose
+    // addition would cause a violation is removed, along with all their descendants.
+    //
+    // A next invocation of GroupClusters (after applying the removals) will compute the new
+    // resulting clusters, and none of them will violate the limits.
+
+    /** All dependencies (both to be added ones, and implicit ones between consecutive transactions
+     *  in existing cluster linearizations), sorted by parent. */
+    std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_parent;
+    /** Same, but sorted by child. */
+    std::vector<std::pair<GraphIndex, GraphIndex>> deps_by_child;
+    /** Information about all transactions involved in a Cluster group to be trimmed, sorted by
+     *  GraphIndex. */
+    std::vector<TrimTxData> trim_data;
+    /** Iterators into trim_data, treated as a max heap according to cmp_fn below. */
+    std::vector<std::vector<TrimTxData>::iterator> trim_heap;
+    /** The list of representatives of the partitions a given transaction depends on. */
+    std::vector<TrimTxData*> current_deps;
+
+    /** Function to define the ordering of trim_heap. */
+    static constexpr auto cmp_fn = [](auto a, auto b) noexcept {
+        // Sort by increasing chunk feerate, and then by decreasing size.
+        // We do not need to sort by cluster or within clusters, because due to the implicit
+        // dependency between consecutive linearization elements, no two transactions from the
+        // same Cluster will ever simultaneously be in the heap.
+        return a->m_chunk_feerate < b->m_chunk_feerate;
+    };
+
+    /** Given a TrimTxData entry, find the representative of the partition it is in. */
+    static constexpr auto find_fn = [](TrimTxData* arg) noexcept {
+        while (arg != arg->m_uf_parent) {
+            // Replace pointer to parent with pointer to grandparent (path splitting).
+            // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives.
+            auto par = arg->m_uf_parent;
+            arg->m_uf_parent = par->m_uf_parent;
+            arg = par;
+        }
+        return arg;
+    };
+
+    /** Given two TrimTxData entries, union the partitions they are in, and return the
+     *  representative. */
+    static constexpr auto union_fn = [](TrimTxData* arg1, TrimTxData* arg2) noexcept {
+        // Replace arg1 and arg2 by their representatives.
+        auto rep1 = find_fn(arg1);
+        auto rep2 = find_fn(arg2);
+        // Bail out if both representatives are the same, because that means arg1 and arg2 are in
+        // the same partition already.
+        if (rep1 == rep2) return rep1;
+        // Pick the lower-count root to become a child of the higher-count one.
+        // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_size.
+        if (rep1->m_uf_count < rep2->m_uf_count) std::swap(rep1, rep2);
+        rep2->m_uf_parent = rep1;
+        // Add the statistics of arg2 (which is no longer a representative) to those of arg1 (which
+        // is now the representative for both).
+        rep1->m_uf_size += rep2->m_uf_size;
+        rep1->m_uf_count += rep2->m_uf_count;
+        return rep1;
+    };
+
+    /** Get iterator to TrimTxData entry for a given index. */
+    auto locate_fn = [&](GraphIndex index) noexcept {
+        auto it = std::lower_bound(trim_data.begin(), trim_data.end(), index, [](TrimTxData& elem, GraphIndex idx) noexcept {
+            return elem.m_index < idx;
+        });
+        Assume(it != trim_data.end() && it->m_index == index);
+        return it;
+    };
+
+    // For each group of to-be-merged Clusters.
+    for (const auto& group_data : clusterset.m_group_data->m_groups) {
+        trim_data.clear();
+        trim_heap.clear();
+        deps_by_child.clear();
+        deps_by_parent.clear();
+
+        // Gather trim data from all involved Clusters.
+        auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters}
+                                .subspan(group_data.m_cluster_offset, group_data.m_cluster_count);
+        uint64_t size{0};
+        for (Cluster* cluster : cluster_span) {
+            size += cluster->AppendTrimData(trim_data, deps_by_child);
+        }
+        // If this group of Clusters does not violate any limits, continue to the next group.
+        if (trim_data.size() <= m_max_cluster_count && size <= m_max_cluster_size) continue;
+        // Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as
+        // a map from GraphIndex to TrimTxData, and its ordering will not change anymore.
+        std::sort(trim_data.begin(), trim_data.end(), [](auto& a, auto& b) noexcept { return a.m_index < b.m_index; });
+
+        // Construct deps_by_child.
+        deps_by_child.insert(deps_by_child.end(),
+                             clusterset.m_deps_to_add.begin() + group_data.m_deps_offset,
+                             clusterset.m_deps_to_add.begin() + group_data.m_deps_offset + group_data.m_deps_count);
+        std::sort(deps_by_child.begin(), deps_by_child.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; });
+        // Fill m_parents_count and m_parents_offset in trim_data. Because of the sort above, all
+        // dependencies involving the same child are grouped together, so a single linear scan
+        // suffices.
+        auto deps_it = deps_by_child.begin();
+        for (auto trim_it = trim_data.begin(); trim_it != trim_data.end(); ++trim_it) {
+            trim_it->m_parent_offset = deps_it - deps_by_child.begin();
+            trim_it->m_deps_left = 0;
+            while (deps_it != deps_by_child.end() && deps_it->second == trim_it->m_index) {
+                ++trim_it->m_deps_left;
+                ++deps_it;
+            }
+            trim_it->m_parent_count = trim_it->m_deps_left;
+            // If this transaction has no unmet dependencies, and is not oversized, add it to the
+            // heap (just append for now, the heapification happens below).
+            if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) {
+                // Initialize it as a singleton partition.
+                trim_it->m_uf_parent = &*trim_it;
+                trim_it->m_uf_count = 1;
+                trim_it->m_uf_size = trim_it->m_tx_size;
+                // Add to heap.
+                trim_heap.push_back(trim_it);
+            }
+        }
+        Assume(deps_it == deps_by_child.end());
+
+        // Construct deps_by_parent.
+        deps_by_parent = deps_by_child;
+        std::sort(deps_by_parent.begin(), deps_by_parent.end(), [](auto& a, auto& b) noexcept { return a.first < b.first; });
+        // Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all
+        // dependencies involving the same parent are grouped together, so a single linear scan
+        // suffices.
+        deps_it = deps_by_parent.begin();
+        for (auto& trim_entry : trim_data) {
+            trim_entry.m_children_count = 0;
+            trim_entry.m_children_offset = deps_it - deps_by_parent.begin();
+            while (deps_it != deps_by_parent.end() && deps_it->first == trim_entry.m_index) {
+                ++trim_entry.m_children_count;
+                ++deps_it;
+            }
+        }
+        Assume(deps_it == deps_by_parent.end());
+
+        // Build a heap of all transactions with 0 unmet dependencies.
+        std::make_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
+
+        // Iterate over to-be-included transactions. It is possible that the heap empties without
+        // ever hitting either cluster limit, in case the implied graph (to be added dependencies
+        // plus implicit dependency between each original transaction and its predecessor in the
+        // linearization it came from) contains cycles. Such cycles will be removed entirely,
+        // because each of the transactions in the cycle permanently have unmet dependencies.
+        // However, this cannot occur in real scenarios where Trim() is called to deal with
+        // reorganizations that would violate cluster limits, as all added dependencies are in the
+        // same direction (from old mempool transactions to new from-block transactions); cycles
+        // require dependencies in both directions to be added.
+        while (!trim_heap.empty()) {
+            // Move the best remaining transaction to the end of trim_heap.
+            std::pop_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
+            // Pop it, and find its TrimTxData.
+            auto& entry = *trim_heap.back();
+            trim_heap.pop_back();
+
+            // Find the distinct transaction partitions this entry depends on.
+            current_deps.clear();
+            for (auto& [par, chl] : std::span{deps_by_child}.subspan(entry.m_parent_offset, entry.m_parent_count)) {
+                Assume(chl == entry.m_index);
+                current_deps.push_back(find_fn(&*locate_fn(par)));
+            }
+            std::sort(current_deps.begin(), current_deps.end());
+            current_deps.erase(std::unique(current_deps.begin(), current_deps.end()), current_deps.end());
+
+            // Compute resource counts.
+            uint32_t new_count = 1;
+            uint64_t new_size = entry.m_tx_size;
+            for (TrimTxData* ptr : current_deps) {
+                new_count += ptr->m_uf_count;
+                new_size += ptr->m_uf_size;
+            }
+            // Skip the entry if this would violate any limit.
+            if (new_count > m_max_cluster_count || new_size > m_max_cluster_size) break;
+
+            // Union the partitions this transactions and all its dependencies are in together.
+            auto rep = &entry;
+            for (TrimTxData* ptr : current_deps) rep = union_fn(ptr, rep);
+            // Mark the entry as included (so the loop below will not remove the transaction).
+            entry.m_deps_left = uint32_t(-1);
+            // Mark each to-be-added dependency involving this transaction as parent satisfied.
+            for (auto& [par, chl] : std::span{deps_by_parent}.subspan(entry.m_children_offset, entry.m_children_count)) {
+                Assume(par == entry.m_index);
+                auto chl_it = locate_fn(chl);
+                // Reduce the number of unmet dependencies of chl_it, and if that brings the number
+                // to zero, add it to the heap.
+                Assume(chl_it->m_deps_left > 0);
+                if (--chl_it->m_deps_left == 0) {
+                    // Initialize as a singleton partition.
+                    chl_it->m_uf_parent = &*chl_it;
+                    chl_it->m_uf_count = 1;
+                    chl_it->m_uf_size = chl_it->m_tx_size;
+                    // Add it to the heap.
+                    trim_heap.push_back(chl_it);
+                    std::push_heap(trim_heap.begin(), trim_heap.end(), cmp_fn);
+                }
+            }
+        }
+
+        // Remove all the transactions that were not processed above. Because nothing gets
+        // processed until/unless all its dependencies are met, this automatically guarantees
+        // that if a transaction is removed, all its descendants, or would-be descendants, are
+        // removed as well.
+        for (const auto& trim_entry : trim_data) {
+            if (trim_entry.m_deps_left != uint32_t(-1)) {
+                ret.push_back(m_entries[trim_entry.m_index].m_ref);
+                clusterset.m_to_remove.push_back(trim_entry.m_index);
+            }
+        }
+    }
+    clusterset.m_group_data.reset();
+    clusterset.m_oversized = false;
+    return ret;
 }
 
 } // namespace
@@ -2139,7 +2924,7 @@ TxGraph::Ref::Ref(Ref&& other) noexcept
     std::swap(m_index, other.m_index);
 }
 
-std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count) noexcept
+std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept
 {
-    return std::make_unique<TxGraphImpl>(max_cluster_count);
+    return std::make_unique<TxGraphImpl>(max_cluster_count, max_cluster_size, acceptable_iters);
 }
diff --git a/src/txgraph.h b/src/txgraph.h
index cdfb2fe6dec..42fb12cfec7 100644
--- a/src/txgraph.h
+++ b/src/txgraph.h
@@ -3,9 +3,11 @@
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <compare>
-#include <stdint.h>
 #include <memory>
+#include <optional>
+#include <stdint.h>
 #include <vector>
+#include <utility>
 
 #include <util/feefrac.h>
 
@@ -61,10 +63,10 @@ public:
     /** Virtual destructor, so inheriting is safe. */
     virtual ~TxGraph() = default;
     /** Construct a new transaction with the specified feerate, and return a Ref to it.
-     *  If a staging graph exists, the new transaction is only created there. In all
-     *  further calls, only Refs created by AddTransaction() are allowed to be passed to this
-     *  TxGraph object (or empty Ref objects). Ref objects may outlive the TxGraph they were
-     *  created for. */
+     *  If a staging graph exists, the new transaction is only created there. In all further
+     *  calls, only Refs created by AddTransaction() are allowed to be passed to this TxGraph
+     *  object (or empty Ref objects). Ref objects may outlive the TxGraph they were created
+     *  for. */
     [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0;
     /** Remove the specified transaction. If a staging graph exists, the removal only happens
      *  there. This is a no-op if the transaction was already removed.
@@ -92,9 +94,10 @@ public:
     virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0;
 
     /** TxGraph is internally lazy, and will not compute many things until they are needed.
-     *  Calling DoWork will compute everything now, so that future operations are fast. This can be
-     *  invoked while oversized. */
-    virtual void DoWork() noexcept = 0;
+     *  Calling DoWork will perform some work now (controlled by iters) so that future operations
+     *  are fast, if there is any. Returns whether all work is done. This can be invoked while
+     *  oversized. */
+    virtual bool DoWork(uint64_t iters) noexcept = 0;
 
     /** Create a staging graph (which cannot exist already). This acts as if a full copy of
      *  the transaction graph is made, upon which further modifications are made. This copy can
@@ -162,6 +165,44 @@ public:
      *  main clusters are counted. Refs that do not exist in the queried graph are ignored. Refs
      *  can not be null. The queried graph must not be oversized. */
     virtual GraphIndex CountDistinctClusters(std::span<const Ref* const>, bool main_only = false) noexcept = 0;
+    /** For both main and staging (which must both exist and not be oversized), return the combined
+     *  respective feerate diagrams, including chunks from all clusters, but excluding clusters
+     *  that appear identically in both. Use FeeFrac rather than FeePerWeight so CompareChunks is
+     *  usable without type-conversion. */
+    virtual std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept = 0;
+    /** Remove transactions (including their own descendants) according to a fast but best-effort
+     *  strategy such that the TxGraph's cluster and size limits are respected. Applies to staging
+     *  if it exists, and to main otherwise. Returns the list of all removed transactions in
+     *  unspecified order. This has no effect unless the relevant graph is oversized. */
+    virtual std::vector<Ref*> Trim() noexcept = 0;
+
+    /** Interface returned by GetBlockBuilder. */
+    class BlockBuilder
+    {
+    protected:
+        /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */
+        BlockBuilder() noexcept = default;
+    public:
+        /** Support safe inheritance. */
+        virtual ~BlockBuilder() = default;
+        /** Get the chunk that is currently suggested to be included, plus its feerate, if any. */
+        virtual std::optional<std::pair<std::vector<Ref*>, FeePerWeight>> GetCurrentChunk() noexcept = 0;
+        /** Mark the current chunk as included, and progress to the next one. */
+        virtual void Include() noexcept = 0;
+        /** Mark the current chunk as skipped, and progress to the next one. Further chunks from
+         *  the same cluster as the current one will not be reported anymore. */
+        virtual void Skip() noexcept = 0;
+    };
+
+    /** Construct a block builder, drawing chunks in order, from the main graph, which cannot be
+     *  oversized. While the returned object exists, no mutators on the main graph are allowed.
+     *  The BlockBuilder object must not outlive the TxGraph it was created with. */
+    virtual std::unique_ptr<BlockBuilder> GetBlockBuilder() noexcept = 0;
+    /** Get the last chunk in the main graph, i.e., the last chunk that would be returned by a
+     *  BlockBuilder created now, together with its feerate. The chunk is returned in
+     *  reverse-topological order, so every element is preceded by all its descendants. The main
+     *  graph must not be oversized. If the graph is empty, {{}, FeePerWeight{}} is returned. */
+    virtual std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk() noexcept = 0;
 
     /** Perform an internal consistency check on this object. */
     virtual void SanityCheck() const = 0;
@@ -205,8 +246,10 @@ public:
     };
 };
 
-/** Construct a new TxGraph with the specified limit on transactions within a cluster. That
- *  number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */
-std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count) noexcept;
+/** Construct a new TxGraph with the specified limit on transactions within a cluster, and the
+ *  specified limit on the sum of transaction sizes within a cluster. max_cluster_count cannot
+ *  exceed MAX_CLUSTER_COUNT_LIMIT. acceptable_iters controls how many linearization optimization
+ *  steps will be performed before it is considered to be of acceptable quality. */
+std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept;
 
 #endif // BITCOIN_TXGRAPH_H