tests: framework for testing DepGraph class

This introduces a bespoke fuzzing-focused serialization format for DepGraphs,
and then tests that this format can represent any graph, roundtrips, and then
uses that to test the correctness of DepGraph itself.

This forms the basis for future fuzz tests that need to work with interesting
graphs.
This commit is contained in:
Pieter Wuille 2024-05-08 20:52:56 -04:00
parent a6e07e769a
commit 58f7e01db4
5 changed files with 564 additions and 0 deletions

View file

@ -83,6 +83,7 @@ BITCOIN_TESTS =\
test/bloom_tests.cpp \
test/bswap_tests.cpp \
test/checkqueue_tests.cpp \
test/cluster_linearize_tests.cpp \
test/coins_tests.cpp \
test/coinstatsindex_tests.cpp \
test/common_url_tests.cpp \
@ -302,6 +303,7 @@ test_fuzz_fuzz_SOURCES = \
test/fuzz/buffered_file.cpp \
test/fuzz/chain.cpp \
test/fuzz/checkqueue.cpp \
test/fuzz/cluster_linearize.cpp \
test/fuzz/coins_view.cpp \
test/fuzz/coinscache_sim.cpp \
test/fuzz/connman.cpp \

View file

@ -10,6 +10,7 @@ EXTRA_LIBRARIES += \
TEST_UTIL_H = \
test/util/blockfilter.h \
test/util/chainstate.h \
test/util/cluster_linearize.h \
test/util/coins.h \
test/util/index.h \
test/util/json.h \

View file

@ -0,0 +1,138 @@
// Copyright (c) The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <cluster_linearize.h>
#include <test/util/cluster_linearize.h>
#include <test/util/setup_common.h>
#include <util/bitset.h>
#include <util/strencodings.h>
#include <vector>
#include <boost/test/unit_test.hpp>
BOOST_FIXTURE_TEST_SUITE(cluster_linearize_tests, BasicTestingSetup)
using namespace cluster_linearize;
namespace {
template<typename SetType>
void TestDepGraphSerialization(const Cluster<SetType>& cluster, const std::string& hexenc)
{
DepGraph depgraph(cluster);
// Run normal sanity and correspondence checks, which includes a round-trip test.
VerifyDepGraphFromCluster(cluster, depgraph);
// There may be multiple serializations of the same graph, but DepGraphFormatter's serializer
// only produces one of those. Verify that hexenc matches that canonical serialization.
std::vector<unsigned char> encoding;
VectorWriter writer(encoding, 0);
writer << Using<DepGraphFormatter>(depgraph);
BOOST_CHECK_EQUAL(HexStr(encoding), hexenc);
// Test that deserializing that encoding yields depgraph. This is effectively already implied
// by the round-trip test above (if depgraph is acyclic), but verify it explicitly again here.
SpanReader reader(encoding);
DepGraph<SetType> depgraph_read;
reader >> Using<DepGraphFormatter>(depgraph_read);
BOOST_CHECK(depgraph == depgraph_read);
}
} // namespace
BOOST_AUTO_TEST_CASE(depgraph_ser_tests)
{
// Empty cluster.
TestDepGraphSerialization<TestBitSet>(
{},
"00" /* end of graph */);
// Transactions: A(fee=0,size=1).
TestDepGraphSerialization<TestBitSet>(
{{{0, 1}, {}}},
"01" /* A size */
"00" /* A fee */
"00" /* A insertion position (no skips): A */
"00" /* end of graph */);
// Transactions: A(fee=42,size=11), B(fee=-13,size=7), B depends on A.
TestDepGraphSerialization<TestBitSet>(
{{{42, 11}, {}}, {{-13, 7}, {0}}},
"0b" /* A size */
"54" /* A fee */
"00" /* A insertion position (no skips): A */
"07" /* B size */
"19" /* B fee */
"00" /* B->A dependency (no skips) */
"00" /* B insertion position (no skips): A,B */
"00" /* end of graph */);
// Transactions: A(64,128), B(128,256), C(1,1), C depends on A and B.
TestDepGraphSerialization<TestBitSet>(
{{{64, 128}, {}}, {{128, 256}, {}}, {{1, 1}, {0, 1}}},
"8000" /* A size */
"8000" /* A fee */
"00" /* A insertion position (no skips): A */
"8100" /* B size */
"8100" /* B fee */
"01" /* B insertion position (skip B->A dependency): A,B */
"01" /* C size */
"02" /* C fee */
"00" /* C->B dependency (no skips) */
"00" /* C->A dependency (no skips) */
"00" /* C insertion position (no skips): A,B,C */
"00" /* end of graph */);
// Transactions: A(-57,113), B(57,114), C(-58,115), D(58,116). Deps: B->A, C->A, D->C, in order
// [B,A,C,D]. This exercises non-topological ordering (internally serialized as A,B,C,D).
TestDepGraphSerialization<TestBitSet>(
{{{57, 114}, {1}}, {{-57, 113}, {}}, {{-58, 115}, {1}}, {{58, 116}, {2}}},
"71" /* A size */
"71" /* A fee */
"00" /* A insertion position (no skips): A */
"72" /* B size */
"72" /* B fee */
"00" /* B->A dependency (no skips) */
"01" /* B insertion position (skip A): B,A */
"73" /* C size */
"73" /* C fee */
"01" /* C->A dependency (skip C->B dependency) */
"00" /* C insertion position (no skips): B,A,C */
"74" /* D size */
"74" /* D fee */
"00" /* D->C dependency (no skips) */
"01" /* D insertion position (skip D->B dependency, D->A is implied): B,A,C,D */
"00" /* end of graph */);
// Transactions: A(1,2), B(3,1), C(2,1), D(1,3), E(1,1). Deps: C->A, D->A, D->B, E->D.
// In order: [D,A,B,E,C]. Internally serialized in order A,B,C,D,E.
TestDepGraphSerialization<TestBitSet>(
{{{1, 3}, {1, 2}}, {{1, 2}, {}}, {{3, 1}, {}}, {{1, 1}, {0}}, {{2, 1}, {1}}},
"02" /* A size */
"02" /* A fee */
"00" /* A insertion position (no skips): A */
"01" /* B size */
"06" /* B fee */
"01" /* B insertion position (skip B->A dependency): A,B */
"01" /* C size */
"04" /* C fee */
"01" /* C->A dependency (skip C->B dependency) */
"00" /* C insertion position (no skips): A,B,C */
"03" /* D size */
"02" /* D fee */
"01" /* D->B dependency (skip D->C dependency) */
"00" /* D->A dependency (no skips) */
"03" /* D insertion position (skip C,B,A): D,A,B,C */
"01" /* E size */
"02" /* E fee */
"00" /* E->D dependency (no skips) */
"02" /* E insertion position (skip E->C dependency, E->B and E->A are implied,
skip insertion C): D,A,B,E,C */
"00" /* end of graph */
);
}
BOOST_AUTO_TEST_SUITE_END()

View file

@ -0,0 +1,87 @@
// Copyright (c) The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <cluster_linearize.h>
#include <serialize.h>
#include <streams.h>
#include <test/fuzz/fuzz.h>
#include <test/fuzz/FuzzedDataProvider.h>
#include <test/util/cluster_linearize.h>
#include <util/bitset.h>
#include <util/feefrac.h>
#include <stdint.h>
#include <vector>
#include <utility>
FUZZ_TARGET(clusterlin_add_dependency)
{
// Verify that computing a DepGraph from a cluster, or building it step by step using AddDependency
// have the same effect.
// Construct a cluster of a certain length, with no dependencies.
FuzzedDataProvider provider(buffer.data(), buffer.size());
auto num_tx = provider.ConsumeIntegralInRange<ClusterIndex>(2, 32);
Cluster<TestBitSet> cluster(num_tx, std::pair{FeeFrac{0, 1}, TestBitSet{}});
// Construct the corresponding DepGraph object (also no dependencies).
DepGraph depgraph(cluster);
SanityCheck(depgraph);
// Read (parent, child) pairs, and add them to the cluster and depgraph.
LIMITED_WHILE(provider.remaining_bytes() > 0, TestBitSet::Size() * TestBitSet::Size()) {
auto parent = provider.ConsumeIntegralInRange<ClusterIndex>(0, num_tx - 1);
auto child = provider.ConsumeIntegralInRange<ClusterIndex>(0, num_tx - 2);
child += (child >= parent);
cluster[child].second.Set(parent);
depgraph.AddDependency(parent, child);
assert(depgraph.Ancestors(child)[parent]);
assert(depgraph.Descendants(parent)[child]);
}
// Sanity check the result.
SanityCheck(depgraph);
// Verify that the resulting DepGraph matches one recomputed from the cluster.
assert(DepGraph(cluster) == depgraph);
}
FUZZ_TARGET(clusterlin_cluster_serialization)
{
// Verify that any graph of transactions has its ancestry correctly computed by DepGraph, and
// if it is a DAG, that it can be serialized as a DepGraph in a way that roundtrips. This
// guarantees that any acyclic cluster has a corresponding DepGraph serialization.
FuzzedDataProvider provider(buffer.data(), buffer.size());
// Construct a cluster in a naive way (using a FuzzedDataProvider-based serialization).
Cluster<TestBitSet> cluster;
auto num_tx = provider.ConsumeIntegralInRange<ClusterIndex>(1, 32);
cluster.resize(num_tx);
for (ClusterIndex i = 0; i < num_tx; ++i) {
cluster[i].first.size = provider.ConsumeIntegralInRange<int32_t>(1, 0x3fffff);
cluster[i].first.fee = provider.ConsumeIntegralInRange<int64_t>(-0x8000000000000, 0x7ffffffffffff);
for (ClusterIndex j = 0; j < num_tx; ++j) {
if (i == j) continue;
if (provider.ConsumeBool()) cluster[i].second.Set(j);
}
}
// Construct dependency graph, and verify it matches the cluster (which includes a round-trip
// check for the serialization).
DepGraph depgraph(cluster);
VerifyDepGraphFromCluster(cluster, depgraph);
}
FUZZ_TARGET(clusterlin_depgraph_serialization)
{
// Verify that any deserialized depgraph is acyclic and roundtrips to an identical depgraph.
// Construct a graph by deserializing.
SpanReader reader(buffer);
DepGraph<TestBitSet> depgraph;
try {
reader >> Using<DepGraphFormatter>(depgraph);
} catch (const std::ios_base::failure&) {}
SanityCheck(depgraph);
// Verify the graph is a DAG.
assert(IsAcyclic(depgraph));
}

View file

@ -0,0 +1,336 @@
// Copyright (c) The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
#define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
#include <cluster_linearize.h>
#include <serialize.h>
#include <streams.h>
#include <util/bitset.h>
#include <util/feefrac.h>
#include <stdint.h>
#include <numeric>
#include <vector>
#include <utility>
namespace {
using namespace cluster_linearize;
using TestBitSet = BitSet<32>;
/** Check if a graph is acyclic. */
template<typename SetType>
bool IsAcyclic(const DepGraph<SetType>& depgraph) noexcept
{
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) {
return false;
}
}
return true;
}
/** A formatter for a bespoke serialization for acyclic DepGraph objects.
*
* The serialization format outputs information about transactions in a topological order (parents
* before children), together with position information so transactions can be moved back to their
* correct position on deserialization.
*
* - For each transaction t in the DepGraph (in some topological order);
* - The size: VARINT(t.size), which cannot be 0.
* - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned.
* - For each direct dependency:
* - VARINT(skip)
* - The position of t in the cluster: VARINT(skip)
* - The end of the graph: VARINT(0)
*
* The list of skip values encodes the dependencies of t, as well as its position in the cluster.
* Each skip value is the number of possibilities that were available, but were not taken. These
* possibilities are, in order:
* - For each previous transaction in the graph, in reverse serialization order, whether it is a
* direct parent of t (but excluding transactions which are already implied to be dependencies
* by parent relations that were serialized before it).
* - The various insertion positions in the cluster, from the very end of the cluster, to the
* front.
*
* Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,G,E,D, but
* serialized in order A,B,C,D,E,F,G, because that happens to be a topological ordering. By the
* time G gets serialized, what has been serialized already represents the cluster F,A,C,B,E,D (in
* that order). G has B and E as direct parents, and E depends on C.
*
* In this case, the possibilities are, in order:
* - [ ] the dependency G->F
* - [X] the dependency G->E
* - [ ] the dependency G->D
* - [X] the dependency G->B
* - [ ] the dependency G->A
* - [ ] put G at the end of the cluster
* - [ ] put G before D
* - [X] put G before E
* - [ ] put G before B
* - [ ] put G before C
* - [ ] put G before A
* - [ ] put G before F
*
* The skip values in this case are 1 (G->F), 1 (G->D), 3 (G->A, G at end, G before D). No skip
* after 3 is needed (or permitted), because there can only be one position for G. Also note that
* G->C is not included in the list of possibilities, as it is implied by the included G->E and
* E->C that came before it. On deserialization, if the last skip value was 8 or larger (putting
* G before the beginning of the cluster), it is interpreted as wrapping around back to the end.
*
*
* Rationale:
* - Why VARINTs? They are flexible enough to represent large numbers where needed, but more
* compact for smaller numbers. The serialization format is designed so that simple structures
* involve smaller numbers, so smaller size maps to simpler graphs.
* - Why use SignedToUnsigned? It results in small unsigned values for signed values with small
* absolute value. This way we can encode negative fees in graphs, but still let small negative
* numbers have small encodings.
* - Why are the parents emitted in reverse order compared to the transactions themselves? This
* naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies.
* - Why encode skip values and not a bitmask to convey the list positions? It turns out that the
* most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per
* transaction. The current encoding uses ~1 byte per transaction for dependencies in this case,
* while a bitmask would require ~N/2 bits per transaction.
*/
struct DepGraphFormatter
{
/** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */
static uint64_t SignedToUnsigned(int64_t x) noexcept
{
if (x < 0) {
return 2 * uint64_t(-(x + 1)) + 1;
} else {
return 2 * uint64_t(x);
}
}
/** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */
static int64_t UnsignedToSigned(uint64_t x) noexcept
{
if (x & 1) {
return -int64_t(x / 2) - 1;
} else {
return int64_t(x / 2);
}
}
template <typename Stream, typename SetType>
static void Ser(Stream& s, const DepGraph<SetType>& depgraph)
{
/** Construct a topological order to serialize the transactions in. */
std::vector<ClusterIndex> topo_order(depgraph.TxCount());
std::iota(topo_order.begin(), topo_order.end(), ClusterIndex{0});
std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) {
auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count();
if (anc_a != anc_b) return anc_a < anc_b;
return a < b;
});
/** Which transactions the deserializer already knows when it has deserialized what has
* been serialized here so far, and in what order. */
std::vector<ClusterIndex> rebuilt_order;
rebuilt_order.reserve(depgraph.TxCount());
// Loop over the transactions in topological order.
for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) {
/** Which depgraph index we are currently writing. */
ClusterIndex idx = topo_order[topo_idx];
// Write size, which must be larger than 0.
s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED);
// Write fee, encoded as an unsigned varint (odd=negative, even=non-negative).
s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee));
// Write dependency information.
SetType written_parents;
uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over.
for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
/** Which depgraph index we are currently considering as parent of idx. */
ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist];
// Ignore transactions which are already known to be ancestors.
if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue;
if (depgraph.Ancestors(idx)[dep_idx]) {
// When an actual parent is encounted, encode how many non-parents were skipped
// before it.
s << VARINT(diff);
diff = 0;
written_parents.Set(dep_idx);
} else {
// When a non-parent is encountered, increment the skip counter.
++diff;
}
}
// Write position information.
ClusterIndex insert_distance = 0;
while (insert_distance < rebuilt_order.size()) {
// Loop to find how far from the end in rebuilt_order to insert.
if (idx > *(rebuilt_order.end() - 1 - insert_distance)) break;
++insert_distance;
}
rebuilt_order.insert(rebuilt_order.end() - insert_distance, idx);
s << VARINT(diff + insert_distance);
}
// Output a final 0 to denote the end of the graph.
s << uint8_t{0};
}
template <typename Stream, typename SetType>
void Unser(Stream& s, DepGraph<SetType>& depgraph)
{
/** The dependency graph which we deserialize into first, with transactions in
* topological serialization order, not original cluster order. */
DepGraph<SetType> topo_depgraph;
/** Mapping from cluster order to serialization order, used later to reconstruct the
* cluster order. */
std::vector<ClusterIndex> reordering;
// Read transactions in topological order.
try {
while (true) {
// Read size. Size 0 signifies the end of the DepGraph.
int32_t size;
s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED);
size &= 0x3FFFFF; // Enough for size up to 4M.
static_assert(0x3FFFFF >= 4000000);
if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break;
// Read fee, encoded as an unsigned varint (odd=negative, even=non-negative).
uint64_t coded_fee;
s >> VARINT(coded_fee);
coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC.
static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000);
auto fee = UnsignedToSigned(coded_fee);
// Extend topo_depgraph with the new transaction (at the end).
auto topo_idx = topo_depgraph.AddTransaction({fee, size});
reordering.push_back(topo_idx);
// Read dependency information.
uint64_t diff = 0; //!< How many potential parents we have to skip.
s >> VARINT(diff);
for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
/** Which topo_depgraph index we are currently considering as parent of topo_idx. */
ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist;
// Ignore transactions which are already known ancestors of topo_idx.
if (topo_depgraph.Descendants(dep_topo_idx)[topo_idx]) continue;
if (diff == 0) {
// When the skip counter has reached 0, add an actual dependency.
topo_depgraph.AddDependency(dep_topo_idx, topo_idx);
// And read the number of skips after it.
s >> VARINT(diff);
} else {
// Otherwise, dep_topo_idx is not a parent. Decrement and continue.
--diff;
}
}
// If we reach this point, we can interpret the remaining skip value as how far from the
// end of reordering topo_idx should be placed (wrapping around), so move it to its
// correct location. The preliminary reordering.push_back(topo_idx) above was to make
// sure that if a deserialization exception occurs, topo_idx still appears somewhere.
reordering.pop_back();
reordering.insert(reordering.end() - (diff % (reordering.size() + 1)), topo_idx);
}
} catch (const std::ios_base::failure&) {}
// Construct the original cluster order depgraph.
depgraph = {};
// Add transactions to depgraph in the original cluster order.
for (auto topo_idx : reordering) {
depgraph.AddTransaction(topo_depgraph.FeeRate(topo_idx));
}
// Translate dependencies from topological to cluster order.
for (ClusterIndex idx = 0; idx < reordering.size(); ++idx) {
ClusterIndex topo_idx = reordering[idx];
for (ClusterIndex dep_idx = 0; dep_idx < reordering.size(); ++dep_idx) {
ClusterIndex dep_topo_idx = reordering[dep_idx];
if (topo_depgraph.Ancestors(topo_idx)[dep_topo_idx]) {
depgraph.AddDependency(dep_idx, idx);
}
}
}
}
};
/** Perform a sanity/consistency check on a DepGraph. */
template<typename SetType>
void SanityCheck(const DepGraph<SetType>& depgraph)
{
// Consistency check between ancestors internally.
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
// Transactions include themselves as ancestors.
assert(depgraph.Ancestors(i)[i]);
// If a is an ancestor of b, then b's ancestors must include all of a's ancestors.
for (auto a : depgraph.Ancestors(i)) {
assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a)));
}
}
// Consistency check between ancestors and descendants.
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
for (ClusterIndex j = 0; j < depgraph.TxCount(); ++j) {
assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]);
}
}
// If DepGraph is acyclic, serialize + deserialize must roundtrip.
if (IsAcyclic(depgraph)) {
std::vector<unsigned char> ser;
VectorWriter writer(ser, 0);
writer << Using<DepGraphFormatter>(depgraph);
SpanReader reader(ser);
DepGraph<TestBitSet> decoded_depgraph;
reader >> Using<DepGraphFormatter>(decoded_depgraph);
assert(depgraph == decoded_depgraph);
assert(reader.empty());
// It must also deserialize correctly without the terminal 0 byte (as the deserializer
// will upon EOF still return what it read so far).
assert(ser.size() >= 1 && ser.back() == 0);
ser.pop_back();
reader = SpanReader{ser};
decoded_depgraph = {};
reader >> Using<DepGraphFormatter>(decoded_depgraph);
assert(depgraph == decoded_depgraph);
assert(reader.empty());
}
}
/** Verify that a DepGraph corresponds to the information in a cluster. */
template<typename SetType>
void VerifyDepGraphFromCluster(const Cluster<SetType>& cluster, const DepGraph<SetType>& depgraph)
{
// Sanity check the depgraph, which includes a check for correspondence between ancestors and
// descendants, so it suffices to check just ancestors below.
SanityCheck(depgraph);
// Verify transaction count.
assert(cluster.size() == depgraph.TxCount());
// Verify feerates.
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
assert(depgraph.FeeRate(i) == cluster[i].first);
}
// Verify ancestors.
for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
// Start with the transaction having itself as ancestor.
auto ancestors = SetType::Singleton(i);
// Add parents of ancestors to the set of ancestors until it stops changing.
while (true) {
const auto old_ancestors = ancestors;
for (auto ancestor : ancestors) {
ancestors |= cluster[ancestor].second;
}
if (old_ancestors == ancestors) break;
}
// Compare against depgraph.
assert(depgraph.Ancestors(i) == ancestors);
// Some additional sanity tests:
// - Every transaction has itself as ancestor.
assert(ancestors[i]);
// - Every transaction has its direct parents as ancestors.
for (auto parent : cluster[i].second) {
assert(ancestors[parent]);
}
}
}
} // namespace
#endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H