Merge 6480423d79 into c5e44a0435

Merge bitcoin/bitcoin#32369 : test: Use the correct node for doubled keypath test
32d55e28af test: Use the correct node for doubled keypath test (Ava Chow) Pull request description: #29124 had a silent merge conflict with #32350 which resulted in it using the wrong node. Fix the test to use the correct v22 node. ACKs for top commit: maflcko: lgtm ACK 32d55e28af rkrux: ACK 32d55e28af BrandonOdiwuor: Code Review ACK 32d55e28af Tree-SHA512: 1e0231985beb382b16e1d608c874750423d0502388db0c8ad450b22d17f9d96f5e16a6b44948ebda5efc750f62b60d0de8dd20131f449427426a36caf374af92
2025-04-29 14:59:39 -04:00 · 2025-04-29 11:52:23 +02:00 · 2025-04-29 09:59:42 +01:00 · 2025-04-28 14:44:17 -07:00 · 2025-04-22 16:50:38 -04:00 · 2025-04-22 16:50:38 -04:00
7 changed files with 1275 additions and 105 deletions
--- a/src/bench/cluster_linearize.cpp
+++ b/src/bench/cluster_linearize.cpp
@ -229,8 +229,8 @@ void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array<uint8_t,
        reader >> Using<DepGraphFormatter>(depgraph);
        uint64_t rng_seed = 0;
        bench.run([&] {
-            auto res = Linearize(depgraph, /*max_iterations=*/10000000, rng_seed++);
-            assert(res.second);
+            auto [_lin, optimal, _cost] = Linearize(depgraph, /*max_iterations=*/10000000, rng_seed++);
+            assert(optimal);
        });
    };

--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@ -1030,19 +1030,20 @@ public:
 *                                linearize.
 * @param[in] old_linearization   An existing linearization for the cluster (which must be
 *                                topologically valid), or empty.
- * @return                        A pair of:
+ * @return                        A tuple of:
 *                                - The resulting linearization. It is guaranteed to be at least as
 *                                  good (in the feerate diagram sense) as old_linearization.
 *                                - A boolean indicating whether the result is guaranteed to be
 *                                  optimal.
+ *                                - How many optimization steps were actually performed.
 *
 * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount().
 */
 template<typename SetType>
-std::pair<std::vector<DepGraphIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
+std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
 {
    Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount());
-    if (depgraph.TxCount() == 0) return {{}, true};
+    if (depgraph.TxCount() == 0) return {{}, true, 0};

    uint64_t iterations_left = max_iterations;
    std::vector<DepGraphIndex> linearization;
@ -1113,7 +1114,7 @@ std::pair<std::vector<DepGraphIndex>, bool> Linearize(const DepGraph<SetType>& d
        }
    }

-    return {std::move(linearization), optimal};
+    return {std::move(linearization), optimal, max_iterations - iterations_left};
 }

 /** Improve a given linearization.
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@ -906,7 +906,8 @@ FUZZ_TARGET(clusterlin_linearize)

    // Invoke Linearize().
    iter_count &= 0x7ffff;
-    auto [linearization, optimal] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
+    auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
+    assert(cost <= iter_count);
    SanityCheck(depgraph, linearization);
    auto chunking = ChunkLinearization(depgraph, linearization);

@ -1090,7 +1091,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree)

    // Try to find an even better linearization directly. This must not change the diagram for the
    // same reason.
-    auto [opt_linearization, _optimal] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization);
+    auto [opt_linearization, _optimal, _cost] = Linearize(depgraph_tree, 100000, rng_seed, post_linearization);
    auto opt_chunking = ChunkLinearization(depgraph_tree, opt_linearization);
    auto cmp_opt = CompareChunks(opt_chunking, post_chunking);
    assert(cmp_opt == 0);
--- a/src/test/fuzz/txgraph.cpp
+++ b/src/test/fuzz/txgraph.cpp
@ -11,6 +11,7 @@
 #include <util/feefrac.h>

 #include <algorithm>
+#include <iterator>
 #include <map>
 #include <memory>
 #include <set>
@ -52,9 +53,17 @@ struct SimTxGraph
    std::optional<bool> oversized;
    /** The configured maximum number of transactions per cluster. */
    DepGraphIndex max_cluster_count;
+    /** Which transactions have been modified in the graph since creation, either directly or by
+     *  being in a cluster which includes modifications. Only relevant for the staging graph. */
+    SetType modified;
+    /** The configured maximum total size of transactions per cluster. */
+    uint64_t max_cluster_size;
+    /** Whether the corresponding real graph is known to be optimally linearized. */
+    bool real_is_optimal{false};

    /** Construct a new SimTxGraph with the specified maximum cluster count. */
-    explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {}
+    explicit SimTxGraph(DepGraphIndex max_cluster, uint64_t max_size) :
+        max_cluster_count(max_cluster), max_cluster_size(max_size) {}

    // Permit copying and moving.
    SimTxGraph(const SimTxGraph&) noexcept = default;
@ -74,15 +83,33 @@ struct SimTxGraph
            while (todo.Any()) {
                auto component = graph.FindConnectedComponent(todo);
                if (component.Count() > max_cluster_count) oversized = true;
+                uint64_t component_size{0};
+                for (auto i : component) component_size += graph.FeeRate(i).size;
+                if (component_size > max_cluster_size) oversized = true;
                todo -= component;
            }
        }
        return *oversized;
    }

+    void MakeModified(DepGraphIndex index)
+    {
+        modified |= graph.GetConnectedComponent(graph.Positions(), index);
+    }
+
    /** Determine the number of (non-removed) transactions in the graph. */
    DepGraphIndex GetTransactionCount() const { return graph.TxCount(); }

+    /** Get the sum of all fees/sizes in the graph. */
+    FeePerWeight SumAll() const
+    {
+        FeePerWeight ret;
+        for (auto i : graph.Positions()) {
+            ret += graph.FeeRate(i);
+        }
+        return ret;
+    }
+
    /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */
    Pos Find(const TxGraph::Ref* ref) const
    {
@ -104,10 +131,14 @@ struct SimTxGraph
    {
        assert(graph.TxCount() < MAX_TRANSACTIONS);
        auto simpos = graph.AddTransaction(feerate);
+        real_is_optimal = false;
+        MakeModified(simpos);
        assert(graph.Positions()[simpos]);
        simmap[simpos] = std::make_shared<TxGraph::Ref>();
        auto ptr = simmap[simpos].get();
        simrevmap[ptr] = simpos;
+        // This may invalidate our cached oversized value.
+        if (oversized.has_value() && !*oversized) oversized = std::nullopt;
        return ptr;
    }

@ -119,6 +150,8 @@ struct SimTxGraph
        auto chl_pos = Find(child);
        if (chl_pos == MISSING) return;
        graph.AddDependencies(SetType::Singleton(par_pos), chl_pos);
+        MakeModified(par_pos);
+        real_is_optimal = false;
        // This may invalidate our cached oversized value.
        if (oversized.has_value() && !*oversized) oversized = std::nullopt;
    }
@ -128,6 +161,8 @@ struct SimTxGraph
    {
        auto pos = Find(ref);
        if (pos == MISSING) return;
+        // No need to invoke MakeModified, because this equally affects main and staging.
+        real_is_optimal = false;
        graph.FeeRate(pos).fee = fee;
    }

@ -136,6 +171,8 @@ struct SimTxGraph
    {
        auto pos = Find(ref);
        if (pos == MISSING) return;
+        MakeModified(pos);
+        real_is_optimal = false;
        graph.RemoveTransactions(SetType::Singleton(pos));
        simrevmap.erase(simmap[pos].get());
        // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't
@ -160,7 +197,9 @@ struct SimTxGraph
            auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; });
            removed.erase(remove, removed.end());
        } else {
+            MakeModified(pos);
            graph.RemoveTransactions(SetType::Singleton(pos));
+            real_is_optimal = false;
            simrevmap.erase(simmap[pos].get());
            simmap[pos].reset();
            // This may invalidate our cached oversized value.
@ -238,12 +277,34 @@ FUZZ_TARGET(txgraph)

    // Decide the maximum number of transactions per cluster we will use in this simulation.
    auto max_count = provider.ConsumeIntegralInRange<DepGraphIndex>(1, MAX_CLUSTER_COUNT_LIMIT);
+    // And the maximum combined size of transactions per cluster.
+    auto max_size = provider.ConsumeIntegralInRange<uint64_t>(1, 0x3fffff * MAX_CLUSTER_COUNT_LIMIT);
+    // And the number of iterations to consider a cluster acceptably linearized.
+    auto acceptable_iters = provider.ConsumeIntegralInRange<uint64_t>(0, 10000);

    // Construct a real graph, and a vector of simulated graphs (main, and possibly staging).
-    auto real = MakeTxGraph(max_count);
+    auto real = MakeTxGraph(max_count, max_size, acceptable_iters);
    std::vector<SimTxGraph> sims;
    sims.reserve(2);
-    sims.emplace_back(max_count);
+    sims.emplace_back(max_count, max_size);
+
+    /** Struct encapsulating information about a BlockBuilder that's currently live. */
+    struct BlockBuilderData
+    {
+        /** BlockBuilder object from real. */
+        std::unique_ptr<TxGraph::BlockBuilder> builder;
+        /** The set of transactions marked as included in *builder. */
+        SimTxGraph::SetType included;
+        /** The set of transactions marked as included or skipped in *builder. */
+        SimTxGraph::SetType done;
+        /** The last chunk feerate returned by *builder. IsEmpty() if none yet. */
+        FeePerWeight last_feerate;
+
+        BlockBuilderData(std::unique_ptr<TxGraph::BlockBuilder> builder_in) : builder(std::move(builder_in)) {}
+    };
+
+    /** Currently active block builders. */
+    std::vector<BlockBuilderData> block_builders;

    /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the
     *  empty Ref). */
@ -282,9 +343,44 @@ FUZZ_TARGET(txgraph)
        return &empty_ref;
    };

+    /** Function to construct the correct fee-size diagram a real graph has based on its graph
+     *  order (as reported by GetCluster(), so it works for both main and staging). */
+    auto get_diagram_fn = [&](bool main_only) -> std::vector<FeeFrac> {
+        int level = main_only ? 0 : sims.size() - 1;
+        auto& sim = sims[level];
+        // For every transaction in the graph, request its cluster, and throw them into a set.
+        std::set<std::vector<TxGraph::Ref*>> clusters;
+        for (auto i : sim.graph.Positions()) {
+            auto ref = sim.GetRef(i);
+            clusters.insert(real->GetCluster(*ref, main_only));
+        }
+        // Compute the chunkings of each (deduplicated) cluster.
+        size_t num_tx{0};
+        std::vector<FeeFrac> chunk_feerates;
+        for (const auto& cluster : clusters) {
+            num_tx += cluster.size();
+            std::vector<SimTxGraph::Pos> linearization;
+            linearization.reserve(cluster.size());
+            for (auto refptr : cluster) linearization.push_back(sim.Find(refptr));
+            for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) {
+                chunk_feerates.push_back(chunk_feerate);
+            }
+        }
+        // Verify the number of transactions after deduplicating clusters. This implicitly verifies
+        // that GetCluster on each element of a cluster reports the cluster transactions in the same
+        // order.
+        assert(num_tx == sim.GetTransactionCount());
+        // Sort by feerate only, since violating topological constraints within same-feerate
+        // chunks won't affect diagram comparisons.
+        std::sort(chunk_feerates.begin(), chunk_feerates.end(), std::greater{});
+        return chunk_feerates;
+    };
+
    LIMITED_WHILE(provider.remaining_bytes() > 0, 200) {
        // Read a one-byte command.
        int command = provider.ConsumeIntegral<uint8_t>();
+        int orig_command = command;
+
        // Treat the lowest bit of a command as a flag (which selects a variant of some of the
        // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave
        // the rest of the bits in command.
@ -292,6 +388,11 @@ FUZZ_TARGET(txgraph)
        bool use_main = command & 2;
        command >>= 2;

+        /** Use the bottom 2 bits of command to select an entry in the block_builders vector (if
+         *  any). These use the same bits as alt/use_main, so don't use those in actions below
+         *  where builder_idx is used as well. */
+        int builder_idx = block_builders.empty() ? -1 : int((orig_command & 3) % block_builders.size());
+
        // Provide convenient aliases for the top simulated graph (main, or staging if it exists),
        // one for the simulated graph selected based on use_main (for operations that can operate
        // on both graphs), and one that always refers to the main graph.
@ -302,7 +403,7 @@ FUZZ_TARGET(txgraph)
        // Keep decrementing command for each applicable operation, until one is hit. Multiple
        // iterations may be necessary.
        while (true) {
-            if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) {
+            if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) {
                // AddTransaction.
                int64_t fee;
                int32_t size;
@ -314,7 +415,7 @@ FUZZ_TARGET(txgraph)
                    // Otherwise, use smaller range which consume fewer fuzz input bytes, as just
                    // these are likely sufficient to trigger all interesting code paths already.
                    fee = provider.ConsumeIntegral<uint8_t>();
-                    size = provider.ConsumeIntegral<uint8_t>() + 1;
+                    size = provider.ConsumeIntegralInRange<uint32_t>(1, 0xff);
                }
                FeePerWeight feerate{fee, size};
                // Create a real TxGraph::Ref.
@ -324,7 +425,7 @@ FUZZ_TARGET(txgraph)
                // Move it in place.
                *ref_loc = std::move(ref);
                break;
-            } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) {
+            } else if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) {
                // AddDependency.
                auto par = pick_fn();
                auto chl = pick_fn();
@ -336,9 +437,10 @@ FUZZ_TARGET(txgraph)
                    if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break;
                }
                top_sim.AddDependency(par, chl);
+                top_sim.real_is_optimal = false;
                real->AddDependency(*par, *chl);
                break;
-            } else if (top_sim.removed.size() < 100 && command-- == 0) {
+            } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) {
                // RemoveTransaction. Either all its ancestors or all its descendants are also
                // removed (if any), to make sure TxGraph's reordering of removals and dependencies
                // has no effect.
@ -368,7 +470,7 @@ FUZZ_TARGET(txgraph)
                }
                sel_sim.removed.pop_back();
                break;
-            } else if (command-- == 0) {
+            } else if (block_builders.empty() && command-- == 0) {
                // ~Ref (of any transaction).
                std::vector<TxGraph::Ref*> to_destroy;
                to_destroy.push_back(pick_fn());
@ -390,7 +492,7 @@ FUZZ_TARGET(txgraph)
                    }
                }
                break;
-            } else if (command-- == 0) {
+            } else if (block_builders.empty() && command-- == 0) {
                // SetTransactionFee.
                int64_t fee;
                if (alt) {
@ -444,6 +546,7 @@ FUZZ_TARGET(txgraph)
                    // Just do some quick checks that the reported value is in range. A full
                    // recomputation of expected chunk feerates is done at the end.
                    assert(feerate.size >= main_sim.graph.FeeRate(simpos).size);
+                    assert(feerate.size <= main_sim.SumAll().size);
                }
                break;
            } else if (!sel_sim.IsOversized() && command-- == 0) {
@ -487,13 +590,17 @@ FUZZ_TARGET(txgraph)
                assert(result.size() <= max_count);
                // Require the result to be topologically valid and not contain duplicates.
                auto left = sel_sim.graph.Positions();
+                uint64_t total_size{0};
                for (auto refptr : result) {
                    auto simpos = sel_sim.Find(refptr);
+                    total_size += sel_sim.graph.FeeRate(simpos).size;
                    assert(simpos != SimTxGraph::MISSING);
                    assert(left[simpos]);
                    left.Reset(simpos);
                    assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left));
                }
+                // Check cluster size limit.
+                assert(total_size <= max_size);
                // Require the set to be connected.
                auto result_set = sel_sim.MakeSet(result);
                assert(sel_sim.graph.IsConnected(result_set));
@ -517,9 +624,10 @@ FUZZ_TARGET(txgraph)
            } else if (sims.size() < 2 && command-- == 0) {
                // StartStaging.
                sims.emplace_back(sims.back());
+                sims.back().modified = SimTxGraph::SetType{};
                real->StartStaging();
                break;
-            } else if (sims.size() > 1 && command-- == 0) {
+            } else if (block_builders.empty() && sims.size() > 1 && command-- == 0) {
                // CommitStaging.
                real->CommitStaging();
                sims.erase(sims.begin());
@ -584,7 +692,135 @@ FUZZ_TARGET(txgraph)
                break;
            } else if (command-- == 0) {
                // DoWork.
-                real->DoWork();
+                uint64_t iters = provider.ConsumeIntegralInRange<uint64_t>(0, alt ? 10000 : 255);
+                if (real->DoWork(iters)) {
+                    for (auto& sim : sims) sim.real_is_optimal = true;
+                }
+                break;
+            } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) {
+                // GetMainStagingDiagrams()
+                auto [main_diagram, staged_diagram] = real->GetMainStagingDiagrams();
+                auto sum_main = std::accumulate(main_diagram.begin(), main_diagram.end(), FeeFrac{});
+                auto sum_staged = std::accumulate(staged_diagram.begin(), staged_diagram.end(), FeeFrac{});
+                auto diagram_gain = sum_staged - sum_main;
+                auto real_gain = sims[1].SumAll() - sims[0].SumAll();
+                // Just check that the total fee gained/lost and size gained/lost according to the
+                // diagram matches the difference in these values in the simulated graph. A more
+                // complete check of the GetMainStagingDiagrams result is performed at the end.
+                assert(diagram_gain == real_gain);
+                // Check that the feerates in each diagram are monotonically decreasing.
+                for (size_t i = 1; i < main_diagram.size(); ++i) {
+                    assert(FeeRateCompare(main_diagram[i], main_diagram[i - 1]) <= 0);
+                }
+                for (size_t i = 1; i < staged_diagram.size(); ++i) {
+                    assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0);
+                }
+                break;
+            } else if (block_builders.size() < 4 && !main_sim.IsOversized() && command-- == 0) {
+                // GetBlockBuilder.
+                block_builders.emplace_back(real->GetBlockBuilder());
+                break;
+            } else if (!block_builders.empty() && command-- == 0) {
+                // ~BlockBuilder.
+                block_builders.erase(block_builders.begin() + builder_idx);
+                break;
+            } else if (!block_builders.empty() && command-- == 0) {
+                // BlockBuilder::GetCurrentChunk, followed by Include/Skip.
+                auto& builder_data = block_builders[builder_idx];
+                auto new_included = builder_data.included;
+                auto new_done = builder_data.done;
+                auto chunk = builder_data.builder->GetCurrentChunk();
+                if (chunk) {
+                    // Chunk feerates must be monotonously decreasing.
+                    if (!builder_data.last_feerate.IsEmpty()) {
+                        assert(!(chunk->second >> builder_data.last_feerate));
+                    }
+                    builder_data.last_feerate = chunk->second;
+                    // Verify the contents of GetCurrentChunk.
+                    FeePerWeight sum_feerate;
+                    for (TxGraph::Ref* ref : chunk->first) {
+                        // Each transaction in the chunk must exist in the main graph.
+                        auto simpos = main_sim.Find(ref);
+                        assert(simpos != SimTxGraph::MISSING);
+                        // Verify the claimed chunk feerate.
+                        sum_feerate += main_sim.graph.FeeRate(simpos);
+                        // Make sure no transaction is reported twice.
+                        assert(!new_done[simpos]);
+                        new_done.Set(simpos);
+                        // The concatenation of all included transactions must be topologically valid.
+                        new_included.Set(simpos);
+                        assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(new_included));
+                    }
+                    assert(sum_feerate == chunk->second);
+                } else {
+                    // When we reach the end, if nothing was skipped, the entire graph should have
+                    // been reported.
+                    if (builder_data.done == builder_data.included) {
+                        assert(builder_data.done.Count() == main_sim.GetTransactionCount());
+                    }
+                }
+                // Possibly invoke GetCurrentChunk() again, which should give the same result.
+                if ((orig_command % 7) >= 5) {
+                    auto chunk2 = builder_data.builder->GetCurrentChunk();
+                    assert(chunk == chunk2);
+                }
+                // Skip or include.
+                if ((orig_command % 5) >= 3) {
+                    // Skip.
+                    builder_data.builder->Skip();
+                } else {
+                    // Include.
+                    builder_data.builder->Include();
+                    builder_data.included = new_included;
+                }
+                builder_data.done = new_done;
+                break;
+            } else if (!main_sim.IsOversized() && command-- == 0) {
+                // GetWorstMainChunk.
+                auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
+                // Just do some sanity checks here. Consistency with GetBlockBuilder is checked
+                // below.
+                if (main_sim.GetTransactionCount() == 0) {
+                    assert(worst_chunk.empty());
+                    assert(worst_chunk_feerate.IsEmpty());
+                } else {
+                    assert(!worst_chunk.empty());
+                    SimTxGraph::SetType done;
+                    FeePerWeight sum;
+                    for (TxGraph::Ref* ref : worst_chunk) {
+                        // Each transaction in the chunk must exist in the main graph.
+                        auto simpos = main_sim.Find(ref);
+                        assert(simpos != SimTxGraph::MISSING);
+                        sum += main_sim.graph.FeeRate(simpos);
+                        // Make sure the chunk contains no duplicate transactions.
+                        assert(!done[simpos]);
+                        done.Set(simpos);
+                        // All elements are preceded by all their descendants.
+                        assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done));
+                    }
+                    assert(sum == worst_chunk_feerate);
+                }
+                break;
+            } else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) {
+                // Trim.
+                bool was_oversized = top_sim.IsOversized();
+                auto removed = real->Trim();
+                if (!was_oversized) {
+                    assert(removed.empty());
+                    break;
+                }
+                auto removed_set = top_sim.MakeSet(removed);
+                // The removed set must contain all its own descendants.
+                for (auto simpos : removed_set) {
+                    assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set));
+                }
+                // Apply all removals to the simulation, and verify the result is no longer
+                // oversized. Don't query the real graph for oversizedness; it is compared
+                // against the simulation anyway later.
+                for (auto simpos : removed_set) {
+                    top_sim.RemoveTransaction(top_sim.GetRef(simpos));
+                }
+                assert(!top_sim.IsOversized());
                break;
            }
        }
@ -624,6 +860,16 @@ FUZZ_TARGET(txgraph)
        }
        assert(todo.None());

+        // If the real graph claims to be optimal (the last DoWork() call returned true), verify
+        // that calling Linearize on it does not improve it further.
+        if (sims[0].real_is_optimal) {
+            auto real_diagram = ChunkLinearization(sims[0].graph, vec1);
+            auto [sim_lin, _optimal, _cost] = Linearize(sims[0].graph, 300000, rng.rand64(), vec1);
+            auto sim_diagram = ChunkLinearization(sims[0].graph, sim_lin);
+            auto cmp = CompareChunks(real_diagram, sim_diagram);
+            assert(cmp == 0);
+        }
+
        // For every transaction in the total ordering, find a random one before it and after it,
        // and compare their chunk feerates, which must be consistent with the ordering.
        for (size_t pos = 0; pos < vec1.size(); ++pos) {
@ -639,6 +885,94 @@ FUZZ_TARGET(txgraph)
                assert(FeeRateCompare(after_feerate, pos_feerate) <= 0);
            }
        }
+
+        // The same order should be obtained through a BlockBuilder as implied by CompareMainOrder,
+        // if nothing is skipped.
+        auto builder = real->GetBlockBuilder();
+        std::vector<SimTxGraph::Pos> vec_builder;
+        std::vector<TxGraph::Ref*> last_chunk;
+        FeePerWeight last_chunk_feerate;
+        while (auto chunk = builder->GetCurrentChunk()) {
+            FeePerWeight sum;
+            for (TxGraph::Ref* ref : chunk->first) {
+                // The reported chunk feerate must match the chunk feerate obtained by asking
+                // it for each of the chunk's transactions individually.
+                assert(real->GetMainChunkFeerate(*ref) == chunk->second);
+                // Verify the chunk feerate matches the sum of the reported individual feerates.
+                sum += real->GetIndividualFeerate(*ref);
+                // Chunks must contain transactions that exist in the graph.
+                auto simpos = sims[0].Find(ref);
+                assert(simpos != SimTxGraph::MISSING);
+                vec_builder.push_back(simpos);
+            }
+            assert(sum == chunk->second);
+            last_chunk = std::move(chunk->first);
+            last_chunk_feerate = chunk->second;
+            builder->Include();
+        }
+        assert(vec_builder == vec1);
+
+        // The last chunk returned by the BlockBuilder must match GetWorstMainChunk, in reverse.
+        std::reverse(last_chunk.begin(), last_chunk.end());
+        auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk();
+        assert(last_chunk == worst_chunk);
+        assert(last_chunk_feerate == worst_chunk_feerate);
+
+        // Check that the implied ordering gives rise to a combined diagram that matches the
+        // diagram constructed from the individual cluster linearization chunkings.
+        auto main_real_diagram = get_diagram_fn(/*main_only=*/true);
+        auto main_implied_diagram = ChunkLinearization(sims[0].graph, vec1);
+        assert(CompareChunks(main_real_diagram, main_implied_diagram) == 0);
+
+        if (sims.size() >= 2 && !sims[1].IsOversized()) {
+            // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and
+            // fully verify the result.
+            auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams();
+            // Check that the feerates in each diagram are monotonically decreasing.
+            for (size_t i = 1; i < main_cmp_diagram.size(); ++i) {
+                assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0);
+            }
+            for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) {
+                assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0);
+            }
+            // Treat the diagrams as sets of chunk feerates, and sort them in the same way so that
+            // std::set_difference can be used on them below. The exact ordering does not matter
+            // here, but it has to be consistent with the one used in main_diagram and
+            // stage_diagram).
+            std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{});
+            std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{});
+            // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram.
+            // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected
+            // by staging.
+            std::vector<FeeFrac> missing_main_cmp;
+            std::set_difference(main_real_diagram.begin(), main_real_diagram.end(),
+                                main_cmp_diagram.begin(), main_cmp_diagram.end(),
+                                std::inserter(missing_main_cmp, missing_main_cmp.end()),
+                                std::greater{});
+            assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_real_diagram.size());
+            // Do the same for chunks in stage_diagram missing from stage_cmp_diagram.
+            auto stage_real_diagram = get_diagram_fn(false);
+            std::vector<FeeFrac> missing_stage_cmp;
+            std::set_difference(stage_real_diagram.begin(), stage_real_diagram.end(),
+                                stage_cmp_diagram.begin(), stage_cmp_diagram.end(),
+                                std::inserter(missing_stage_cmp, missing_stage_cmp.end()),
+                                std::greater{});
+            assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_real_diagram.size());
+            // The missing chunks must be equal across main & staging (otherwise they couldn't have
+            // been omitted).
+            assert(missing_main_cmp == missing_stage_cmp);
+
+            // The missing part must include at least all transactions in staging which have not been
+            // modified, or been in a cluster together with modified transactions, since they were
+            // copied from main. Note that due to the reordering of removals w.r.t. dependency
+            // additions, it is possible that the real implementation found more unaffected things.
+            FeeFrac missing_real;
+            for (const auto& feerate : missing_main_cmp) missing_real += feerate;
+            FeeFrac missing_expected = sims[1].graph.FeeRate(sims[1].graph.Positions() - sims[1].modified);
+            // Note that missing_real.fee < missing_expected.fee is possible to due the presence of
+            // negative-fee transactions.
+            assert(missing_real.size >= missing_expected.size);
+        }
    }

    assert(real->HaveStaging() == (sims.size() > 1));
@ -680,13 +1014,17 @@ FUZZ_TARGET(txgraph)
                    // linearization).
                    std::vector<DepGraphIndex> simlin;
                    SimTxGraph::SetType done;
+                    uint64_t total_size{0};
                    for (TxGraph::Ref* ptr : cluster) {
                        auto simpos = sim.Find(ptr);
                        assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done));
                        done.Set(simpos);
                        assert(sim.graph.Ancestors(simpos).IsSubsetOf(done));
                        simlin.push_back(simpos);
+                        total_size += sim.graph.FeeRate(simpos).size;
                    }
+                    // Check cluster size.
+                    assert(total_size <= max_size);
                    // Construct a chunking object for the simulated graph, using the reported cluster
                    // linearization as ordering, and compare it against the reported chunk feerates.
                    if (sims.size() == 1 || main_only) {
@ -714,6 +1052,8 @@ FUZZ_TARGET(txgraph)
    // Sanity check again (because invoking inspectors may modify internal unobservable state).
    real->SanityCheck();

+    // Kill the block builders.
+    block_builders.clear();
    // Kill the TxGraph object.
    real.reset();
    // Kill the simulated graphs, with all remaining Refs in it. If any, this verifies that Refs
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
--- a/src/txgraph.h
+++ b/src/txgraph.h
@ -3,9 +3,11 @@
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.

 #include <compare>
-#include <stdint.h>
 #include <memory>
+#include <optional>
+#include <stdint.h>
 #include <vector>
+#include <utility>

 #include <util/feefrac.h>

@ -61,10 +63,10 @@ public:
    /** Virtual destructor, so inheriting is safe. */
    virtual ~TxGraph() = default;
    /** Construct a new transaction with the specified feerate, and return a Ref to it.
-     *  If a staging graph exists, the new transaction is only created there. In all
-     *  further calls, only Refs created by AddTransaction() are allowed to be passed to this
-     *  TxGraph object (or empty Ref objects). Ref objects may outlive the TxGraph they were
-     *  created for. */
+     *  If a staging graph exists, the new transaction is only created there. In all further
+     *  calls, only Refs created by AddTransaction() are allowed to be passed to this TxGraph
+     *  object (or empty Ref objects). Ref objects may outlive the TxGraph they were created
+     *  for. */
    [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0;
    /** Remove the specified transaction. If a staging graph exists, the removal only happens
     *  there. This is a no-op if the transaction was already removed.
@ -92,9 +94,10 @@ public:
    virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0;

    /** TxGraph is internally lazy, and will not compute many things until they are needed.
-     *  Calling DoWork will compute everything now, so that future operations are fast. This can be
-     *  invoked while oversized. */
-    virtual void DoWork() noexcept = 0;
+     *  Calling DoWork will perform some work now (controlled by iters) so that future operations
+     *  are fast, if there is any. Returns whether all work is done. This can be invoked while
+     *  oversized. */
+    virtual bool DoWork(uint64_t iters) noexcept = 0;

    /** Create a staging graph (which cannot exist already). This acts as if a full copy of
     *  the transaction graph is made, upon which further modifications are made. This copy can
@ -162,6 +165,44 @@ public:
     *  main clusters are counted. Refs that do not exist in the queried graph are ignored. Refs
     *  can not be null. The queried graph must not be oversized. */
    virtual GraphIndex CountDistinctClusters(std::span<const Ref* const>, bool main_only = false) noexcept = 0;
+    /** For both main and staging (which must both exist and not be oversized), return the combined
+     *  respective feerate diagrams, including chunks from all clusters, but excluding clusters
+     *  that appear identically in both. Use FeeFrac rather than FeePerWeight so CompareChunks is
+     *  usable without type-conversion. */
+    virtual std::pair<std::vector<FeeFrac>, std::vector<FeeFrac>> GetMainStagingDiagrams() noexcept = 0;
+    /** Remove transactions (including their own descendants) according to a fast but best-effort
+     *  strategy such that the TxGraph's cluster and size limits are respected. Applies to staging
+     *  if it exists, and to main otherwise. Returns the list of all removed transactions in
+     *  unspecified order. This has no effect unless the relevant graph is oversized. */
+    virtual std::vector<Ref*> Trim() noexcept = 0;
+
+    /** Interface returned by GetBlockBuilder. */
+    class BlockBuilder
+    {
+    protected:
+        /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */
+        BlockBuilder() noexcept = default;
+    public:
+        /** Support safe inheritance. */
+        virtual ~BlockBuilder() = default;
+        /** Get the chunk that is currently suggested to be included, plus its feerate, if any. */
+        virtual std::optional<std::pair<std::vector<Ref*>, FeePerWeight>> GetCurrentChunk() noexcept = 0;
+        /** Mark the current chunk as included, and progress to the next one. */
+        virtual void Include() noexcept = 0;
+        /** Mark the current chunk as skipped, and progress to the next one. Further chunks from
+         *  the same cluster as the current one will not be reported anymore. */
+        virtual void Skip() noexcept = 0;
+    };
+
+    /** Construct a block builder, drawing chunks in order, from the main graph, which cannot be
+     *  oversized. While the returned object exists, no mutators on the main graph are allowed.
+     *  The BlockBuilder object must not outlive the TxGraph it was created with. */
+    virtual std::unique_ptr<BlockBuilder> GetBlockBuilder() noexcept = 0;
+    /** Get the last chunk in the main graph, i.e., the last chunk that would be returned by a
+     *  BlockBuilder created now, together with its feerate. The chunk is returned in
+     *  reverse-topological order, so every element is preceded by all its descendants. The main
+     *  graph must not be oversized. If the graph is empty, {{}, FeePerWeight{}} is returned. */
+    virtual std::pair<std::vector<Ref*>, FeePerWeight> GetWorstMainChunk() noexcept = 0;

    /** Perform an internal consistency check on this object. */
    virtual void SanityCheck() const = 0;
@ -205,8 +246,10 @@ public:
    };
 };

-/** Construct a new TxGraph with the specified limit on transactions within a cluster. That
- *  number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */
-std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count) noexcept;
+/** Construct a new TxGraph with the specified limit on transactions within a cluster, and the
+ *  specified limit on the sum of transaction sizes within a cluster. max_cluster_count cannot
+ *  exceed MAX_CLUSTER_COUNT_LIMIT. acceptable_iters controls how many linearization optimization
+ *  steps will be performed before it is considered to be of acceptable quality. */
+std::unique_ptr<TxGraph> MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size, uint64_t acceptable_iters) noexcept;

 #endif // BITCOIN_TXGRAPH_H
--- a/test/functional/wallet_backwards_compatibility.py
+++ b/test/functional/wallet_backwards_compatibility.py
@ -87,7 +87,7 @@ class BackwardsCompatibilityTest(BitcoinTestFramework):
        # 0.21.x and 22.x would both produce bad derivation paths when topping up an inactive hd chain
        # Make sure that this is being automatically cleaned up by migration
        node_master = self.nodes[1]
-        node_v22 = self.nodes[self.num_nodes - 5]
+        node_v22 = self.nodes[self.num_nodes - 3]
        wallet_name = "bad_deriv_path"
        node_v22.createwallet(wallet_name=wallet_name, descriptors=False)
        bad_deriv_wallet = node_v22.get_wallet_rpc(wallet_name)
Author	SHA1	Message	Date
Pieter Wuille	11deeac27a	Merge `6480423d79` into `c5e44a0435`	2025-04-29 11:52:23 +02:00
merge-script	c5e44a0435	Merge bitcoin/bitcoin#32369 : test: Use the correct node for doubled keypath test Some checks are pending CI / macOS 14 native, arm64, fuzz (push) Waiting to run Details CI / Windows native, VS 2022 (push) Waiting to run Details CI / Windows native, fuzz, VS 2022 (push) Waiting to run Details CI / Linux->Windows cross, no tests (push) Waiting to run Details CI / Windows, test cross-built (push) Blocked by required conditions Details CI / ASan + LSan + UBSan + integer, no depends, USDT (push) Waiting to run Details CI / test each commit (push) Waiting to run Details CI / macOS 14 native, arm64, no depends, sqlite only, gui (push) Waiting to run Details `32d55e28af` test: Use the correct node for doubled keypath test (Ava Chow) Pull request description: #29124 had a silent merge conflict with #32350 which resulted in it using the wrong node. Fix the test to use the correct v22 node. ACKs for top commit: maflcko: lgtm ACK `32d55e28af` rkrux: ACK `32d55e28af` BrandonOdiwuor: Code Review ACK `32d55e28af` Tree-SHA512: 1e0231985beb382b16e1d608c874750423d0502388db0c8ad450b22d17f9d96f5e16a6b44948ebda5efc750f62b60d0de8dd20131f449427426a36caf374af92	2025-04-29 09:59:42 +01:00
Ava Chow	32d55e28af	test: Use the correct node for doubled keypath test	2025-04-28 14:44:17 -07:00
Pieter Wuille	6480423d79	txgraph: add work limit to DoWork(), try optimal (feature) This adds an `iters` parameter to DoWork(), which controls how much work it is allowed to do right now. Additionally, DoWork() won't stop at just getting everything ACCEPTABLE, but if there is work budget left, will also attempt to get every cluster linearized optimally.	2025-04-22 16:50:38 -04:00
Pieter Wuille	79ef423f42	txgraph: make number of acceptable iterations configurable (feature)	2025-04-22 16:50:38 -04:00
Pieter Wuille	b074308e6f	txgraph: track amount of work done in linearization (preparation)	2025-04-22 16:50:38 -04:00
Pieter Wuille	eb5f4db166	txgraph: singleton split-off clusters are optimal (optimization)	2025-04-22 16:50:38 -04:00
Pieter Wuille	fad3630fb6	txgraph: reset quality when merging clusters (bugfix)	2025-04-22 16:50:38 -04:00
Pieter Wuille	3566dca6c9	txgraph: Track multiple potential would-be clusters in Trim (improvement) In a Trim function, for any given would-be group of clusters, a (rudimentary) linearization for the would-be cluster is constructed on the fly by adding eligible transactions to a heap. This continues until the total count or size of the transaction exists a configured limit. Any transactions which appear later in this linearization are discarded. However, given that transactions at the end are discarded, it is possible that the would-be cluster splits apart into multiple clusters. And those clusters may well permit far more transactions before their limits are reached. Take this into account by using a union-find structure inside TrimTxData to keep track of the count/size of all would-be clusters that would be formed at any point. This is not an optimization in terms of CPU usage or memory; it just improves the quality of the transactions removed by Trim().	2025-04-22 16:50:38 -04:00
Pieter Wuille	c4c96fb3e3	txgraph: Add ability to trim oversized clusters (feature) During reorganisations, it is possible that dependencies get add which result in clusters that violate limits (count, size), when linking the new from-block transactions to the old from-mempool transactions. Unlike RBF scenarios, we cannot simply reject these policy violations when they are due to received blocks. To accomodate this, add a Trim() function to TxGraph, which removes transactions (including descendants) in order to make all resulting clusters satisfy the limits.	2025-04-22 16:50:38 -04:00
Pieter Wuille	f3ee8d75f2	txgraph: Permit transactions that exceed cluster size limit (feature)	2025-04-22 16:50:38 -04:00
Pieter Wuille	7e869040fe	txgraph: Add ability to configure maximum cluster size/weight (feature) This is integrated with the oversized property: the graph is oversized when any connected component within it contains more than the cluster count limit many transactions, or when their combined size/weight exceeds the cluster size limit. It becomes disallowed to call AddTransaction with a size larger than this limit. In addition, SetTransactionFeeRate becomes SetTransactionFee, so that we do not need to deal with the case that a call to this function might affect the oversizedness.	2025-04-22 16:50:38 -04:00
Pieter Wuille	a0becaaa9c	txgraph: Special-case singletons in chunk index (optimization)	2025-04-22 16:50:38 -04:00
Pieter Wuille	df4578d2aa	txgraph: Skipping end of cluster has no impact (optimization)	2025-04-22 16:50:38 -04:00
Pieter Wuille	e3d3ad9723	txgraph: Reuse discarded chunkindex entries (optimization)	2025-04-22 16:50:38 -04:00
Pieter Wuille	82054fa25f	txgraph: Introduce TxGraph::GetWorstMainChunk (feature) It returns the last chunk that would be suggested for mining by BlockBuilder objects. This is intended for eviction.	2025-04-22 16:50:38 -04:00
Pieter Wuille	38e5a1ba26	txgraph: Introduce BlockBuilder interface (feature) This interface lets one iterate efficiently over the chunks of the main graph in a TxGraph, in the same order as CompareMainOrder. Each chunk can be marked as "included" or "skipped" (and in the latter case, dependent chunks will be skipped).	2025-04-22 16:50:34 -04:00
Pieter Wuille	4b998feb65	txgraph: Generalize GetClusterRefs to support subsections (preparation) This is preparation for a next commit which will need a way to extract Refs for just individual chunks from a cluster.	2025-04-22 16:48:14 -04:00
Pieter Wuille	9c2af16cd7	txgraph: Introduce TxGraphImpl observer tracking (preparation) This is preparation for a next commit which will introduce a class whose objects hold references to internals in TxGraphImpl, which disallows modifications to the graph while such objects exist.	2025-04-22 16:31:22 -04:00
Pieter Wuille	63e44512e2	txgraph: Maintain chunk index (preparation) This is preparation for exposing mining and eviction functionality in TxGraph.	2025-04-22 16:31:22 -04:00
Pieter Wuille	e1cb50a957	txgraph: Add GetMainStagingDiagrams function (feature) This allows determining whether the changes in a staging diagram unambiguously improve the graph, through CompareChunks().	2025-04-22 16:31:22 -04:00