diff --git a/src/Makefile.test.include b/src/Makefile.test.include index 0993a65efff..3d044983699 100644 --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -83,6 +83,7 @@ BITCOIN_TESTS =\ test/bloom_tests.cpp \ test/bswap_tests.cpp \ test/checkqueue_tests.cpp \ + test/cluster_linearize_tests.cpp \ test/coins_tests.cpp \ test/coinstatsindex_tests.cpp \ test/common_url_tests.cpp \ @@ -302,6 +303,7 @@ test_fuzz_fuzz_SOURCES = \ test/fuzz/buffered_file.cpp \ test/fuzz/chain.cpp \ test/fuzz/checkqueue.cpp \ + test/fuzz/cluster_linearize.cpp \ test/fuzz/coins_view.cpp \ test/fuzz/coinscache_sim.cpp \ test/fuzz/connman.cpp \ diff --git a/src/Makefile.test_util.include b/src/Makefile.test_util.include index 960eb078c8a..0c0e849fba3 100644 --- a/src/Makefile.test_util.include +++ b/src/Makefile.test_util.include @@ -10,6 +10,7 @@ EXTRA_LIBRARIES += \ TEST_UTIL_H = \ test/util/blockfilter.h \ test/util/chainstate.h \ + test/util/cluster_linearize.h \ test/util/coins.h \ test/util/index.h \ test/util/json.h \ diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp new file mode 100644 index 00000000000..d15e783ea15 --- /dev/null +++ b/src/test/cluster_linearize_tests.cpp @@ -0,0 +1,138 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include + +#include + +#include + +BOOST_FIXTURE_TEST_SUITE(cluster_linearize_tests, BasicTestingSetup) + +using namespace cluster_linearize; + +namespace { + +template +void TestDepGraphSerialization(const Cluster& cluster, const std::string& hexenc) +{ + DepGraph depgraph(cluster); + + // Run normal sanity and correspondence checks, which includes a round-trip test. + VerifyDepGraphFromCluster(cluster, depgraph); + + // There may be multiple serializations of the same graph, but DepGraphFormatter's serializer + // only produces one of those. Verify that hexenc matches that canonical serialization. + std::vector encoding; + VectorWriter writer(encoding, 0); + writer << Using(depgraph); + BOOST_CHECK_EQUAL(HexStr(encoding), hexenc); + + // Test that deserializing that encoding yields depgraph. This is effectively already implied + // by the round-trip test above (if depgraph is acyclic), but verify it explicitly again here. + SpanReader reader(encoding); + DepGraph depgraph_read; + reader >> Using(depgraph_read); + BOOST_CHECK(depgraph == depgraph_read); +} + +} // namespace + +BOOST_AUTO_TEST_CASE(depgraph_ser_tests) +{ + // Empty cluster. + TestDepGraphSerialization( + {}, + "00" /* end of graph */); + + // Transactions: A(fee=0,size=1). + TestDepGraphSerialization( + {{{0, 1}, {}}}, + "01" /* A size */ + "00" /* A fee */ + "00" /* A insertion position (no skips): A */ + "00" /* end of graph */); + + // Transactions: A(fee=42,size=11), B(fee=-13,size=7), B depends on A. + TestDepGraphSerialization( + {{{42, 11}, {}}, {{-13, 7}, {0}}}, + "0b" /* A size */ + "54" /* A fee */ + "00" /* A insertion position (no skips): A */ + "07" /* B size */ + "19" /* B fee */ + "00" /* B->A dependency (no skips) */ + "00" /* B insertion position (no skips): A,B */ + "00" /* end of graph */); + + // Transactions: A(64,128), B(128,256), C(1,1), C depends on A and B. + TestDepGraphSerialization( + {{{64, 128}, {}}, {{128, 256}, {}}, {{1, 1}, {0, 1}}}, + "8000" /* A size */ + "8000" /* A fee */ + "00" /* A insertion position (no skips): A */ + "8100" /* B size */ + "8100" /* B fee */ + "01" /* B insertion position (skip B->A dependency): A,B */ + "01" /* C size */ + "02" /* C fee */ + "00" /* C->B dependency (no skips) */ + "00" /* C->A dependency (no skips) */ + "00" /* C insertion position (no skips): A,B,C */ + "00" /* end of graph */); + + // Transactions: A(-57,113), B(57,114), C(-58,115), D(58,116). Deps: B->A, C->A, D->C, in order + // [B,A,C,D]. This exercises non-topological ordering (internally serialized as A,B,C,D). + TestDepGraphSerialization( + {{{57, 114}, {1}}, {{-57, 113}, {}}, {{-58, 115}, {1}}, {{58, 116}, {2}}}, + "71" /* A size */ + "71" /* A fee */ + "00" /* A insertion position (no skips): A */ + "72" /* B size */ + "72" /* B fee */ + "00" /* B->A dependency (no skips) */ + "01" /* B insertion position (skip A): B,A */ + "73" /* C size */ + "73" /* C fee */ + "01" /* C->A dependency (skip C->B dependency) */ + "00" /* C insertion position (no skips): B,A,C */ + "74" /* D size */ + "74" /* D fee */ + "00" /* D->C dependency (no skips) */ + "01" /* D insertion position (skip D->B dependency, D->A is implied): B,A,C,D */ + "00" /* end of graph */); + + // Transactions: A(1,2), B(3,1), C(2,1), D(1,3), E(1,1). Deps: C->A, D->A, D->B, E->D. + // In order: [D,A,B,E,C]. Internally serialized in order A,B,C,D,E. + TestDepGraphSerialization( + {{{1, 3}, {1, 2}}, {{1, 2}, {}}, {{3, 1}, {}}, {{1, 1}, {0}}, {{2, 1}, {1}}}, + "02" /* A size */ + "02" /* A fee */ + "00" /* A insertion position (no skips): A */ + "01" /* B size */ + "06" /* B fee */ + "01" /* B insertion position (skip B->A dependency): A,B */ + "01" /* C size */ + "04" /* C fee */ + "01" /* C->A dependency (skip C->B dependency) */ + "00" /* C insertion position (no skips): A,B,C */ + "03" /* D size */ + "02" /* D fee */ + "01" /* D->B dependency (skip D->C dependency) */ + "00" /* D->A dependency (no skips) */ + "03" /* D insertion position (skip C,B,A): D,A,B,C */ + "01" /* E size */ + "02" /* E fee */ + "00" /* E->D dependency (no skips) */ + "02" /* E insertion position (skip E->C dependency, E->B and E->A are implied, + skip insertion C): D,A,B,E,C */ + "00" /* end of graph */ + ); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp new file mode 100644 index 00000000000..fd75fd5b081 --- /dev/null +++ b/src/test/fuzz/cluster_linearize.cpp @@ -0,0 +1,87 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +FUZZ_TARGET(clusterlin_add_dependency) +{ + // Verify that computing a DepGraph from a cluster, or building it step by step using AddDependency + // have the same effect. + + // Construct a cluster of a certain length, with no dependencies. + FuzzedDataProvider provider(buffer.data(), buffer.size()); + auto num_tx = provider.ConsumeIntegralInRange(2, 32); + Cluster cluster(num_tx, std::pair{FeeFrac{0, 1}, TestBitSet{}}); + // Construct the corresponding DepGraph object (also no dependencies). + DepGraph depgraph(cluster); + SanityCheck(depgraph); + // Read (parent, child) pairs, and add them to the cluster and depgraph. + LIMITED_WHILE(provider.remaining_bytes() > 0, TestBitSet::Size() * TestBitSet::Size()) { + auto parent = provider.ConsumeIntegralInRange(0, num_tx - 1); + auto child = provider.ConsumeIntegralInRange(0, num_tx - 2); + child += (child >= parent); + cluster[child].second.Set(parent); + depgraph.AddDependency(parent, child); + assert(depgraph.Ancestors(child)[parent]); + assert(depgraph.Descendants(parent)[child]); + } + // Sanity check the result. + SanityCheck(depgraph); + // Verify that the resulting DepGraph matches one recomputed from the cluster. + assert(DepGraph(cluster) == depgraph); +} + +FUZZ_TARGET(clusterlin_cluster_serialization) +{ + // Verify that any graph of transactions has its ancestry correctly computed by DepGraph, and + // if it is a DAG, that it can be serialized as a DepGraph in a way that roundtrips. This + // guarantees that any acyclic cluster has a corresponding DepGraph serialization. + + FuzzedDataProvider provider(buffer.data(), buffer.size()); + + // Construct a cluster in a naive way (using a FuzzedDataProvider-based serialization). + Cluster cluster; + auto num_tx = provider.ConsumeIntegralInRange(1, 32); + cluster.resize(num_tx); + for (ClusterIndex i = 0; i < num_tx; ++i) { + cluster[i].first.size = provider.ConsumeIntegralInRange(1, 0x3fffff); + cluster[i].first.fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + for (ClusterIndex j = 0; j < num_tx; ++j) { + if (i == j) continue; + if (provider.ConsumeBool()) cluster[i].second.Set(j); + } + } + + // Construct dependency graph, and verify it matches the cluster (which includes a round-trip + // check for the serialization). + DepGraph depgraph(cluster); + VerifyDepGraphFromCluster(cluster, depgraph); +} + +FUZZ_TARGET(clusterlin_depgraph_serialization) +{ + // Verify that any deserialized depgraph is acyclic and roundtrips to an identical depgraph. + + // Construct a graph by deserializing. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + SanityCheck(depgraph); + + // Verify the graph is a DAG. + assert(IsAcyclic(depgraph)); +} diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h new file mode 100644 index 00000000000..771fbd543a7 --- /dev/null +++ b/src/test/util/cluster_linearize.h @@ -0,0 +1,336 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H +#define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +using namespace cluster_linearize; + +using TestBitSet = BitSet<32>; + +/** Check if a graph is acyclic. */ +template +bool IsAcyclic(const DepGraph& depgraph) noexcept +{ + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) { + return false; + } + } + return true; +} + +/** A formatter for a bespoke serialization for acyclic DepGraph objects. + * + * The serialization format outputs information about transactions in a topological order (parents + * before children), together with position information so transactions can be moved back to their + * correct position on deserialization. + * + * - For each transaction t in the DepGraph (in some topological order); + * - The size: VARINT(t.size), which cannot be 0. + * - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned. + * - For each direct dependency: + * - VARINT(skip) + * - The position of t in the cluster: VARINT(skip) + * - The end of the graph: VARINT(0) + * + * The list of skip values encodes the dependencies of t, as well as its position in the cluster. + * Each skip value is the number of possibilities that were available, but were not taken. These + * possibilities are, in order: + * - For each previous transaction in the graph, in reverse serialization order, whether it is a + * direct parent of t (but excluding transactions which are already implied to be dependencies + * by parent relations that were serialized before it). + * - The various insertion positions in the cluster, from the very end of the cluster, to the + * front. + * + * Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,G,E,D, but + * serialized in order A,B,C,D,E,F,G, because that happens to be a topological ordering. By the + * time G gets serialized, what has been serialized already represents the cluster F,A,C,B,E,D (in + * that order). G has B and E as direct parents, and E depends on C. + * + * In this case, the possibilities are, in order: + * - [ ] the dependency G->F + * - [X] the dependency G->E + * - [ ] the dependency G->D + * - [X] the dependency G->B + * - [ ] the dependency G->A + * - [ ] put G at the end of the cluster + * - [ ] put G before D + * - [X] put G before E + * - [ ] put G before B + * - [ ] put G before C + * - [ ] put G before A + * - [ ] put G before F + * + * The skip values in this case are 1 (G->F), 1 (G->D), 3 (G->A, G at end, G before D). No skip + * after 3 is needed (or permitted), because there can only be one position for G. Also note that + * G->C is not included in the list of possibilities, as it is implied by the included G->E and + * E->C that came before it. On deserialization, if the last skip value was 8 or larger (putting + * G before the beginning of the cluster), it is interpreted as wrapping around back to the end. + * + * + * Rationale: + * - Why VARINTs? They are flexible enough to represent large numbers where needed, but more + * compact for smaller numbers. The serialization format is designed so that simple structures + * involve smaller numbers, so smaller size maps to simpler graphs. + * - Why use SignedToUnsigned? It results in small unsigned values for signed values with small + * absolute value. This way we can encode negative fees in graphs, but still let small negative + * numbers have small encodings. + * - Why are the parents emitted in reverse order compared to the transactions themselves? This + * naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies. + * - Why encode skip values and not a bitmask to convey the list positions? It turns out that the + * most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per + * transaction. The current encoding uses ~1 byte per transaction for dependencies in this case, + * while a bitmask would require ~N/2 bits per transaction. + */ + +struct DepGraphFormatter +{ + /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */ + static uint64_t SignedToUnsigned(int64_t x) noexcept + { + if (x < 0) { + return 2 * uint64_t(-(x + 1)) + 1; + } else { + return 2 * uint64_t(x); + } + } + + /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */ + static int64_t UnsignedToSigned(uint64_t x) noexcept + { + if (x & 1) { + return -int64_t(x / 2) - 1; + } else { + return int64_t(x / 2); + } + } + + template + static void Ser(Stream& s, const DepGraph& depgraph) + { + /** Construct a topological order to serialize the transactions in. */ + std::vector topo_order(depgraph.TxCount()); + std::iota(topo_order.begin(), topo_order.end(), ClusterIndex{0}); + std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) { + auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count(); + if (anc_a != anc_b) return anc_a < anc_b; + return a < b; + }); + + /** Which transactions the deserializer already knows when it has deserialized what has + * been serialized here so far, and in what order. */ + std::vector rebuilt_order; + rebuilt_order.reserve(depgraph.TxCount()); + + // Loop over the transactions in topological order. + for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { + /** Which depgraph index we are currently writing. */ + ClusterIndex idx = topo_order[topo_idx]; + // Write size, which must be larger than 0. + s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED); + // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative). + s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee)); + // Write dependency information. + SetType written_parents; + uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over. + for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + /** Which depgraph index we are currently considering as parent of idx. */ + ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; + // Ignore transactions which are already known to be ancestors. + if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue; + if (depgraph.Ancestors(idx)[dep_idx]) { + // When an actual parent is encounted, encode how many non-parents were skipped + // before it. + s << VARINT(diff); + diff = 0; + written_parents.Set(dep_idx); + } else { + // When a non-parent is encountered, increment the skip counter. + ++diff; + } + } + // Write position information. + ClusterIndex insert_distance = 0; + while (insert_distance < rebuilt_order.size()) { + // Loop to find how far from the end in rebuilt_order to insert. + if (idx > *(rebuilt_order.end() - 1 - insert_distance)) break; + ++insert_distance; + } + rebuilt_order.insert(rebuilt_order.end() - insert_distance, idx); + s << VARINT(diff + insert_distance); + } + + // Output a final 0 to denote the end of the graph. + s << uint8_t{0}; + } + + template + void Unser(Stream& s, DepGraph& depgraph) + { + /** The dependency graph which we deserialize into first, with transactions in + * topological serialization order, not original cluster order. */ + DepGraph topo_depgraph; + /** Mapping from cluster order to serialization order, used later to reconstruct the + * cluster order. */ + std::vector reordering; + + // Read transactions in topological order. + try { + while (true) { + // Read size. Size 0 signifies the end of the DepGraph. + int32_t size; + s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED); + size &= 0x3FFFFF; // Enough for size up to 4M. + static_assert(0x3FFFFF >= 4000000); + if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break; + // Read fee, encoded as an unsigned varint (odd=negative, even=non-negative). + uint64_t coded_fee; + s >> VARINT(coded_fee); + coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC. + static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000); + auto fee = UnsignedToSigned(coded_fee); + // Extend topo_depgraph with the new transaction (at the end). + auto topo_idx = topo_depgraph.AddTransaction({fee, size}); + reordering.push_back(topo_idx); + // Read dependency information. + uint64_t diff = 0; //!< How many potential parents we have to skip. + s >> VARINT(diff); + for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + /** Which topo_depgraph index we are currently considering as parent of topo_idx. */ + ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist; + // Ignore transactions which are already known ancestors of topo_idx. + if (topo_depgraph.Descendants(dep_topo_idx)[topo_idx]) continue; + if (diff == 0) { + // When the skip counter has reached 0, add an actual dependency. + topo_depgraph.AddDependency(dep_topo_idx, topo_idx); + // And read the number of skips after it. + s >> VARINT(diff); + } else { + // Otherwise, dep_topo_idx is not a parent. Decrement and continue. + --diff; + } + } + // If we reach this point, we can interpret the remaining skip value as how far from the + // end of reordering topo_idx should be placed (wrapping around), so move it to its + // correct location. The preliminary reordering.push_back(topo_idx) above was to make + // sure that if a deserialization exception occurs, topo_idx still appears somewhere. + reordering.pop_back(); + reordering.insert(reordering.end() - (diff % (reordering.size() + 1)), topo_idx); + } + } catch (const std::ios_base::failure&) {} + + // Construct the original cluster order depgraph. + depgraph = {}; + // Add transactions to depgraph in the original cluster order. + for (auto topo_idx : reordering) { + depgraph.AddTransaction(topo_depgraph.FeeRate(topo_idx)); + } + // Translate dependencies from topological to cluster order. + for (ClusterIndex idx = 0; idx < reordering.size(); ++idx) { + ClusterIndex topo_idx = reordering[idx]; + for (ClusterIndex dep_idx = 0; dep_idx < reordering.size(); ++dep_idx) { + ClusterIndex dep_topo_idx = reordering[dep_idx]; + if (topo_depgraph.Ancestors(topo_idx)[dep_topo_idx]) { + depgraph.AddDependency(dep_idx, idx); + } + } + } + } +}; + +/** Perform a sanity/consistency check on a DepGraph. */ +template +void SanityCheck(const DepGraph& depgraph) +{ + // Consistency check between ancestors internally. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + // Transactions include themselves as ancestors. + assert(depgraph.Ancestors(i)[i]); + // If a is an ancestor of b, then b's ancestors must include all of a's ancestors. + for (auto a : depgraph.Ancestors(i)) { + assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a))); + } + } + // Consistency check between ancestors and descendants. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + for (ClusterIndex j = 0; j < depgraph.TxCount(); ++j) { + assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]); + } + } + // If DepGraph is acyclic, serialize + deserialize must roundtrip. + if (IsAcyclic(depgraph)) { + std::vector ser; + VectorWriter writer(ser, 0); + writer << Using(depgraph); + SpanReader reader(ser); + DepGraph decoded_depgraph; + reader >> Using(decoded_depgraph); + assert(depgraph == decoded_depgraph); + assert(reader.empty()); + // It must also deserialize correctly without the terminal 0 byte (as the deserializer + // will upon EOF still return what it read so far). + assert(ser.size() >= 1 && ser.back() == 0); + ser.pop_back(); + reader = SpanReader{ser}; + decoded_depgraph = {}; + reader >> Using(decoded_depgraph); + assert(depgraph == decoded_depgraph); + assert(reader.empty()); + } +} + +/** Verify that a DepGraph corresponds to the information in a cluster. */ +template +void VerifyDepGraphFromCluster(const Cluster& cluster, const DepGraph& depgraph) +{ + // Sanity check the depgraph, which includes a check for correspondence between ancestors and + // descendants, so it suffices to check just ancestors below. + SanityCheck(depgraph); + // Verify transaction count. + assert(cluster.size() == depgraph.TxCount()); + // Verify feerates. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + assert(depgraph.FeeRate(i) == cluster[i].first); + } + // Verify ancestors. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + // Start with the transaction having itself as ancestor. + auto ancestors = SetType::Singleton(i); + // Add parents of ancestors to the set of ancestors until it stops changing. + while (true) { + const auto old_ancestors = ancestors; + for (auto ancestor : ancestors) { + ancestors |= cluster[ancestor].second; + } + if (old_ancestors == ancestors) break; + } + // Compare against depgraph. + assert(depgraph.Ancestors(i) == ancestors); + // Some additional sanity tests: + // - Every transaction has itself as ancestor. + assert(ancestors[i]); + // - Every transaction has its direct parents as ancestors. + for (auto parent : cluster[i].second) { + assert(ancestors[parent]); + } + } +} + +} // namespace + +#endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H