Use smaller keys and values

Key was 36 bytes (txid: 32 bytes, output index: 4 bytes) and is now 8 bytes: the siphash of the spent outpoint, keyed with a random key that is created when the index is created (to avoid collision attacks).
Value was 32 bytes (txid: 32 bytes), and is now a list of tx positions (9 bytes unless there are collisions which should be extremely rare).
This commit is contained in:
sstone 2024-09-23 16:14:26 +02:00
parent e41b0f550f
commit 00ea901253
6 changed files with 160 additions and 71 deletions

View file

@ -3,9 +3,13 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <common/args.h>
#include <crypto/siphash.h>
#include <index/disktxpos.h>
#include <index/txospenderindex.h>
#include <logging.h>
#include <node/blockstorage.h>
#include <random.h>
#include <uint256.h>
#include <validation.h>
// LeveLDB key prefix. We only have one key for now but it will make it easier to add others if needed.
@ -13,61 +17,99 @@ constexpr uint8_t DB_TXOSPENDERINDEX{'s'};
std::unique_ptr<TxoSpenderIndex> g_txospenderindex;
/** Access to the txo spender index database (indexes/txospenderindex/) */
class TxoSpenderIndex::DB : public BaseIndex::DB
{
public:
explicit DB(size_t n_cache_size, bool f_memory = false, bool f_wipe = false);
bool WriteSpenderInfos(const std::vector<std::pair<COutPoint, uint256>>& items);
bool EraseSpenderInfos(const std::vector<COutPoint>& items);
};
TxoSpenderIndex::DB::DB(size_t n_cache_size, bool f_memory, bool f_wipe)
: BaseIndex::DB(gArgs.GetDataDirNet() / "indexes" / "txospenderindex", n_cache_size, f_memory, f_wipe)
{
}
TxoSpenderIndex::TxoSpenderIndex(std::unique_ptr<interfaces::Chain> chain, size_t n_cache_size, bool f_memory, bool f_wipe)
: BaseIndex(std::move(chain), "txospenderindex")
, m_db(std::make_unique<TxoSpenderIndex::DB>(n_cache_size, f_memory, f_wipe))
{
fs::path path{gArgs.GetDataDirNet() / "indexes" / "txospenderindex"};
fs::create_directories(path);
m_db = std::make_unique<TxoSpenderIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
if (!m_db->Read("siphash_key", m_siphash_key)) {
FastRandomContext rng(false);
m_siphash_key = {rng.rand64(), rng.rand64()};
assert(m_db->Write("siphash_key", m_siphash_key));
}
}
TxoSpenderIndex::~TxoSpenderIndex() = default;
bool TxoSpenderIndex::DB::WriteSpenderInfos(const std::vector<std::pair<COutPoint, uint256>>& items)
uint64_t TxoSpenderIndex::CreateKey(const COutPoint& vout) const
{
CDBBatch batch(*this);
for (const auto& [outpoint, hash] : items) {
batch.Write(std::pair{DB_TXOSPENDERINDEX, outpoint}, hash);
}
return WriteBatch(batch);
return SipHashUint256Extra(m_siphash_key.first, m_siphash_key.second, vout.hash.ToUint256(), vout.n);
}
bool TxoSpenderIndex::DB::EraseSpenderInfos(const std::vector<COutPoint>& items)
bool TxoSpenderIndex::WriteSpenderInfos(const std::vector<std::pair<COutPoint, CDiskTxPos>>& items)
{
CDBBatch batch(*this);
for (const auto& outpoint : items) {
batch.Erase(std::pair{DB_TXOSPENDERINDEX, outpoint});
CDBBatch batch(*m_db);
for (const auto& [outpoint, pos] : items) {
std::vector<CDiskTxPos> positions;
std::pair<uint8_t, uint64_t> key{DB_TXOSPENDERINDEX, CreateKey(outpoint)};
if (m_db->Exists(key)) {
if (!m_db->Read(key, positions)) {
LogError("Cannot read current state; tx spender index may be corrupted\n");
}
}
if (std::find(positions.begin(), positions.end(), pos) == positions.end()) {
positions.push_back(pos);
batch.Write(key, positions);
}
}
return WriteBatch(batch);
return m_db->WriteBatch(batch);
}
bool TxoSpenderIndex::EraseSpenderInfos(const std::vector<COutPoint>& items)
{
CDBBatch batch(*m_db);
for (const auto& outpoint : items) {
std::vector<CDiskTxPos> positions;
std::pair<uint8_t, uint64_t> key{DB_TXOSPENDERINDEX, CreateKey(outpoint)};
if (!m_db->Read(key, positions)) {
LogWarning("Could not read expected entry");
continue;
}
if (positions.size() > 1) {
// there are collisions: find the position of the tx that spends the outpoint we want to erase
// this is expensive but extremely uncommon
size_t index = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < positions.size(); i++) {
CTransactionRef tx;
if (!ReadTransaction(positions[i], tx)) continue;
for (const auto& input : tx->vin) {
if (input.prevout == outpoint) {
index = i;
break;
}
}
}
if (index != std::numeric_limits<size_t>::max()) {
// remove it from the list
positions.erase(positions.begin() + index);
batch.Write(key, positions);
}
} else {
batch.Erase(key);
}
}
return m_db->WriteBatch(batch);
}
bool TxoSpenderIndex::CustomAppend(const interfaces::BlockInfo& block)
{
std::vector<std::pair<COutPoint, uint256>> items;
std::vector<std::pair<COutPoint, CDiskTxPos>> items;
items.reserve(block.data->vtx.size());
CDiskTxPos pos({block.file_number, block.data_pos}, GetSizeOfCompactSize(block.data->vtx.size()));
for (const auto& tx : block.data->vtx) {
if (tx->IsCoinBase()) {
continue;
}
for (const auto& input : tx->vin) {
items.emplace_back(input.prevout, tx->GetHash());
if (!tx->IsCoinBase()) {
for (const auto& input : tx->vin) {
items.emplace_back(input.prevout, pos);
}
}
pos.nTxOffset += ::GetSerializeSize(TX_WITH_WITNESS(*tx));
}
return m_db->WriteSpenderInfos(items);
return WriteSpenderInfos(items);
}
bool TxoSpenderIndex::CustomRewind(const interfaces::BlockRef& current_tip, const interfaces::BlockRef& new_tip)
@ -92,7 +134,7 @@ bool TxoSpenderIndex::CustomRewind(const interfaces::BlockRef& current_tip, cons
items.emplace_back(input.prevout);
}
}
if (!m_db->EraseSpenderInfos(items)) {
if (!EraseSpenderInfos(items)) {
LogError("Failed to erase indexed data for disconnected block %s from disk\n", iter_tip->GetBlockHash().ToString());
return false;
}
@ -103,13 +145,43 @@ bool TxoSpenderIndex::CustomRewind(const interfaces::BlockRef& current_tip, cons
return true;
}
std::optional<Txid> TxoSpenderIndex::FindSpender(const COutPoint& txo) const
bool TxoSpenderIndex::ReadTransaction(const CDiskTxPos& tx_pos, CTransactionRef& tx) const
{
uint256 tx_hash_out;
if (m_db->Read(std::pair{DB_TXOSPENDERINDEX, txo}, tx_hash_out)) {
return Txid::FromUint256(tx_hash_out);
AutoFile file{m_chainstate->m_blockman.OpenBlockFile(tx_pos, true)};
if (file.IsNull()) {
return false;
}
return std::nullopt;
CBlockHeader header;
try {
file >> header;
file.seek(tx_pos.nTxOffset, SEEK_CUR);
file >> TX_WITH_WITNESS(tx);
return true;
} catch (const std::exception& e) {
LogError("Deserialize or I/O error - %s\n", e.what());
return false;
}
}
CTransactionRef TxoSpenderIndex::FindSpender(const COutPoint& txo) const
{
std::vector<CDiskTxPos> positions;
// read all tx position candidates from the db. there may be index collisions, in which case the db will return more than one tx position
if (!m_db->Read(std::pair{DB_TXOSPENDERINDEX, CreateKey(txo)}, positions)) {
return nullptr;
}
// loop until we find a tx that spends our outpoint
for (const auto& postx : positions) {
CTransactionRef tx;
if (ReadTransaction(postx, tx)) {
for (const auto& input : tx->vin) {
if (input.prevout == txo) {
return tx;
}
}
}
}
return nullptr;
}
BaseIndex::DB& TxoSpenderIndex::GetDB() const { return *m_db; }

View file

@ -6,6 +6,7 @@
#define BITCOIN_INDEX_TXOSPENDERINDEX_H
#include <index/base.h>
#include <index/disktxpos.h>
static constexpr bool DEFAULT_TXOSPENDERINDEX{false};
@ -16,13 +17,14 @@ static constexpr bool DEFAULT_TXOSPENDERINDEX{false};
*/
class TxoSpenderIndex final : public BaseIndex
{
protected:
class DB;
private:
const std::unique_ptr<DB> m_db;
std::unique_ptr<BaseIndex::DB> m_db;
std::pair<uint64_t, uint64_t> m_siphash_key;
uint64_t CreateKey(const COutPoint& vout) const;
bool AllowPrune() const override { return true; }
bool WriteSpenderInfos(const std::vector<std::pair<COutPoint, CDiskTxPos>>& items);
bool EraseSpenderInfos(const std::vector<COutPoint>& items);
bool ReadTransaction(const CDiskTxPos& pos, CTransactionRef& tx) const;
protected:
bool CustomAppend(const interfaces::BlockInfo& block) override;
@ -37,7 +39,7 @@ public:
// Destroys unique_ptr to an incomplete type.
virtual ~TxoSpenderIndex() override;
std::optional<Txid> FindSpender(const COutPoint& txo) const;
CTransactionRef FindSpender(const COutPoint& txo) const;
};
/// The global txo spender index. May be null.

View file

@ -267,6 +267,8 @@ static const CRPCConvertParam vRPCConvertParams[] =
{ "getmempooldescendants", 1, "verbose" },
{ "gettxspendingprevout", 0, "outputs" },
{ "gettxspendingprevout", 1, "options" },
{ "gettxspendingprevout", 1, "mempool_only" },
{ "gettxspendingprevout", 1, "return_spending_tx" },
{ "bumpfee", 1, "options" },
{ "bumpfee", 1, "conf_target"},
{ "bumpfee", 1, "fee_rate"},

View file

@ -598,9 +598,10 @@ static RPCHelpMan gettxspendingprevout()
},
},
},
{"options", RPCArg::Type::OBJ, RPCArg::Optional::OMITTED, "",
{"options", RPCArg::Type::OBJ_NAMED_PARAMS, RPCArg::Optional::OMITTED, "",
{
{"mempool_only", RPCArg::Type::BOOL, RPCArg::DefaultHint{"true if txospenderindex unavailable, otherwise false"}, "If false and empool lacks a relevant spend, use txospenderindex (throws an exception if not available)."},
{"return_spending_tx", RPCArg::Type::BOOL, RPCArg::DefaultHint{"false"}, "If true, return the full spending tx."},
},
},
},
@ -612,6 +613,7 @@ static RPCHelpMan gettxspendingprevout()
{RPCResult::Type::STR_HEX, "txid", "the transaction id of the checked output"},
{RPCResult::Type::NUM, "vout", "the vout value of the checked output"},
{RPCResult::Type::STR_HEX, "spendingtxid", /*optional=*/true, "the transaction id of the mempool transaction spending this output (omitted if unspent)"},
{RPCResult::Type::STR_HEX, "spendingtx", /*optional=*/true, "the transaction spending this output (only if return_spending_tx is set, omitted if unspent)"},
{RPCResult::Type::ARR, "warnings", /* optional */ true, "If spendingtxid isn't found in the mempool, and the mempool_only option isn't set explicitly, this will advise of issues using the txospenderindex.",
{
{RPCResult::Type::STR, "", ""},
@ -631,16 +633,21 @@ static RPCHelpMan gettxspendingprevout()
}
std::optional<bool> mempool_only;
std::optional<bool> return_spending_tx;
if (!request.params[1].isNull()) {
const UniValue& options = request.params[1];
RPCTypeCheckObj(options,
{
{"mempool_only", UniValueType(UniValue::VBOOL)},
{"return_spending_tx", UniValueType(UniValue::VBOOL)},
},
/*fAllowNull=*/true, /*fStrict=*/true);
if (options.exists("mempool_only")) {
mempool_only = options["mempool_only"].get_bool();
}
if (options.exists("return_spending_tx")) {
return_spending_tx = options["return_spending_tx"].get_bool();
}
}
std::vector<COutPoint> prevouts;
@ -679,12 +686,18 @@ static RPCHelpMan gettxspendingprevout()
const CTransaction* spendingTx = mempool.GetConflictTx(prevout);
if (spendingTx != nullptr) {
o.pushKV("spendingtxid", spendingTx->GetHash().ToString());
if (return_spending_tx) {
o.pushKV("spendingtx", EncodeHexTx(*spendingTx));
}
} else if (mempool_only.value_or(false)) {
// do nothing, caller has selected to only query the mempool
} else if (g_txospenderindex) {
// no spending tx in mempool, query txospender index
if (auto spending_txid{g_txospenderindex->FindSpender(prevout)}) {
o.pushKV("spendingtxid", spending_txid->GetHex());
if (auto spending_tx{g_txospenderindex->FindSpender(prevout)}) {
o.pushKV("spendingtxid", spending_tx->GetHash().GetHex());
if (return_spending_tx) {
o.pushKV("spendingtx", EncodeHexTx(*spending_tx));
}
if (!f_txospenderindex_ready) {
// warn if index is not ready as the spending tx that we found may be stale (it may be reorged out)
UniValue warnings(UniValue::VARR);

View file

@ -47,7 +47,7 @@ BOOST_FIXTURE_TEST_CASE(txospenderindex_initial_sync, TestChain100Setup)
// Transaction should not be found in the index before it is started.
for (const auto& outpoint : spent) {
BOOST_CHECK(!txospenderindex.FindSpender(outpoint).has_value());
BOOST_CHECK(!txospenderindex.FindSpender(outpoint));
}
// BlockUntilSyncedToCurrentChain should return false before txospenderindex is started.
@ -63,7 +63,7 @@ BOOST_FIXTURE_TEST_CASE(txospenderindex_initial_sync, TestChain100Setup)
UninterruptibleSleep(std::chrono::milliseconds{100});
}
for (size_t i = 0; i < spent.size(); i++) {
BOOST_CHECK_EQUAL(txospenderindex.FindSpender(spent[i]).value(), spender[i].GetHash());
BOOST_CHECK_EQUAL(txospenderindex.FindSpender(spent[i])->GetHash(), spender[i].GetHash());
}
// It is not safe to stop and destroy the index until it finishes handling

View file

@ -109,11 +109,11 @@ class RPCMempoolInfoTest(BitcoinTestFramework):
self.generate(self.wallet, 1)
# spending transactions are found in the index of nodes 0 and 1 but not node 2
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : txidA}, {'txid' : txidA, 'vout' : 1, 'spendingtxid' : txidC} ])
result = self.nodes[1].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : txidA}, {'txid' : txidA, 'vout' : 1, 'spendingtxid' : txidC} ])
result = self.nodes[2].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : txidA, 'spendingtx' : txA['hex']}, {'txid' : txidA, 'vout' : 1, 'spendingtxid' : txidC, 'spendingtx' : txC['hex']} ])
result = self.nodes[1].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : txidA, 'spendingtx' : txA['hex']}, {'txid' : txidA, 'vout' : 1, 'spendingtxid' : txidC, 'spendingtx' : txC['hex']} ])
result = self.nodes[2].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0}, {'txid' : txidA, 'vout' : 1} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'warnings': ['txospenderindex is unavailable.']}, {'txid' : txidA, 'vout' : 1, 'warnings': ['txospenderindex is unavailable.']} ])
@ -123,8 +123,8 @@ class RPCMempoolInfoTest(BitcoinTestFramework):
self.generate(self.wallet, 1)
# tx1 is confirmed, and indexed in txospenderindex as spending our utxo
assert not tx1["txid"] in self.nodes[0].getrawmempool()
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx1["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx1["txid"], 'spendingtx' : tx1['hex']} ])
# replace tx1 with tx2
self.nodes[0].invalidateblock(self.nodes[0].getbestblockhash())
self.nodes[1].invalidateblock(self.nodes[1].getbestblockhash())
@ -135,13 +135,13 @@ class RPCMempoolInfoTest(BitcoinTestFramework):
assert tx2["txid"] in self.nodes[0].getrawmempool()
# check that when we find tx2 when we look in the mempool for a tx spending our output
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"], 'spendingtx' : tx2['hex']} ])
# check that our txospenderindex has been updated
self.generate(self.wallet, 1)
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"], 'spendingtx' : tx2['hex']} ])
self.log.info("Check that our txospenderindex is updated when a reorg cancels a spending transaction")
confirmed_utxo = self.wallet.get_utxo(mark_as_spent = False)
@ -150,10 +150,10 @@ class RPCMempoolInfoTest(BitcoinTestFramework):
# tx1 spends our utxo, tx2 spends tx1
self.generate(self.wallet, 1)
# tx1 and tx2 are confirmed, and indexed in txospenderindex
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx1["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : tx1['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx1["txid"], 'spendingtx' : tx1['hex']} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : tx1['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"], 'spendingtx' : tx2['hex']} ])
# replace tx1 with tx3
blockhash= self.nodes[0].getbestblockhash()
self.nodes[0].invalidateblock(blockhash)
@ -164,18 +164,18 @@ class RPCMempoolInfoTest(BitcoinTestFramework):
assert not tx1["txid"] in self.nodes[0].getrawmempool()
assert not tx2["txid"] in self.nodes[0].getrawmempool()
# tx2 is not in the mempool anymore, but still in txospender index which has not been rewound yet
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : tx1['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : tx1['txid'], 'vout' : 0, 'spendingtxid' : tx2["txid"], 'spendingtx' : tx2['hex']} ])
txinfo = self.nodes[0].getrawtransaction(tx2["txid"], verbose = True, blockhash = blockhash)
assert_equal(txinfo["confirmations"], 0)
assert_equal(txinfo["in_active_chain"], False)
self.generate(self.wallet, 1)
# we check that the spending tx for tx1 is now tx3
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ])
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx3["txid"]} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : confirmed_utxo['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : confirmed_utxo['txid'], 'vout' : 0, 'spendingtxid' : tx3["txid"], 'spendingtx' : tx3['hex']} ])
# we check that there is no more spending tx for tx1
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ])
result = self.nodes[0].gettxspendingprevout([ {'txid' : tx1['txid'], 'vout' : 0} ], return_spending_tx=True)
assert_equal(result, [ {'txid' : tx1['txid'], 'vout' : 0} ])