Compare commits

...

6 commits

Author SHA1 Message Date
Sebastian Falbesoner
41ef6b9de8
Merge 59df8480be into 433412fd84 2025-01-07 01:10:29 +01:00
Sebastian Falbesoner
59df8480be contrib: add script dump_to_sqlite.sh for direct SQLite3 UTXO dump 2024-12-28 02:57:13 +01:00
Sebastian Falbesoner
15e917c930 test: add test for utxo-to-sqlite conversion using named pipe 2024-12-28 02:57:09 +01:00
Sebastian Falbesoner
baa2f174a6 rpc: support writing UTXO set dump (dumptxoutset) to a named pipe
This allows external tooling (e.g. converters) to consume the output
directly, rather than having to write the dump to disk first and then
read it from there again.
2024-12-28 02:48:52 +01:00
Sebastian Falbesoner
4080b66cbe test: add test for utxo-to-sqlite conversion script 2024-12-28 02:38:57 +01:00
Sebastian Falbesoner
ec99ed7380 contrib: add tool to convert compact-serialized UTXO set to SQLite database 2024-12-28 02:38:57 +01:00
6 changed files with 375 additions and 3 deletions

View file

@ -43,3 +43,15 @@ Command Line Tools
### [Completions](/contrib/completions) ###
Shell completions for bash and fish.
UTXO Set Tools
--------------
### [UTXO-to-SQLite](/contrib/utxo-tools/utxo_to_sqlite.py) ###
This script converts a compact-serialized UTXO set (as generated by Bitcoin Core with `dumptxoutset`)
to a SQLite3 database. For more details like e.g. the created table name and schema, refer to the
module docstring on top of the script, which is also contained in the command's `--help` output.
### [Dump-to-SQLite](/contrib/utxo-tools/dump_to_sqlite.sh) ###
This script creates an UTXO set dump in SQLite3 format on the fly from a running bitcoind instance,
i.e. with the intermediate step of storing the compact-serialized UTXO set on disk is skipped.

View file

@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Copyright (c) 2024-present The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
export LC_ALL=C
set -e
if [ $# -ne 2 ]; then
echo "Usage: $0 <bitcoin-cli-path> <output-file>"
exit 1
fi
BITCOIN_CLI=$1
OUTPUT_FILE=$2
UTXO_TO_SQLITE=$(dirname "$0")/utxo_to_sqlite.py
# create named pipe in unique temporary folder
TEMPPATH=$(mktemp -d)
FIFOPATH=$TEMPPATH/utxos.fifo
mkfifo "$FIFOPATH"
# start dumping UTXO set to the pipe in background
$BITCOIN_CLI dumptxoutset "$FIFOPATH" latest &
BITCOIN_CLI_PID=$!
# start UTXO to SQLite conversion tool, reading from pipe
$UTXO_TO_SQLITE "$FIFOPATH" "$OUTPUT_FILE"
# wait and cleanup
wait $BITCOIN_CLI_PID
rm -r "$TEMPPATH"

View file

@ -0,0 +1,195 @@
#!/usr/bin/env python3
# Copyright (c) 2024-present The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Tool to convert a compact-serialized UTXO set to a SQLite3 database.
The input UTXO set can be generated by Bitcoin Core with the `dumptxoutset` RPC:
$ bitcoin-cli dumptxoutset ~/utxos.dat
The created database contains a table `utxos` with the following schema:
(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)
"""
import argparse
import os
import sqlite3
import sys
import time
UTXO_DUMP_MAGIC = b'utxo\xff'
UTXO_DUMP_VERSION = 2
NET_MAGIC_BYTES = {
b"\xf9\xbe\xb4\xd9": "Mainnet",
b"\x0a\x03\xcf\x40": "Signet",
b"\x0b\x11\x09\x07": "Testnet3",
b"\x1c\x16\x3f\x28": "Testnet4",
b"\xfa\xbf\xb5\xda": "Regtest",
}
def read_varint(f):
"""Equivalent of `ReadVarInt()` (see serialization module)."""
n = 0
while True:
dat = f.read(1)[0]
n = (n << 7) | (dat & 0x7f)
if (dat & 0x80) > 0:
n += 1
else:
return n
def read_compactsize(f):
"""Equivalent of `ReadCompactSize()` (see serialization module)."""
n = f.read(1)[0]
if n == 253:
n = int.from_bytes(f.read(2), "little")
elif n == 254:
n = int.from_bytes(f.read(4), "little")
elif n == 255:
n = int.from_bytes(f.read(8), "little")
return n
def decompress_amount(x):
"""Equivalent of `DecompressAmount()` (see compressor module)."""
if x == 0:
return 0
x -= 1
e = x % 10
x //= 10
n = 0
if e < 9:
d = (x % 9) + 1
x //= 9
n = x * 10 + d
else:
n = x + 1
while e > 0:
n *= 10
e -= 1
return n
def decompress_script(f):
"""Equivalent of `DecompressScript()` (see compressor module)."""
size = read_varint(f) # sizes 0-5 encode compressed script types
if size == 0: # P2PKH
return bytes([0x76, 0xa9, 20]) + f.read(20) + bytes([0x88, 0xac])
elif size == 1: # P2SH
return bytes([0xa9, 20]) + f.read(20) + bytes([0x87])
elif size in (2, 3): # P2PK (compressed)
return bytes([33, size]) + f.read(32) + bytes([0xac])
elif size in (4, 5): # P2PK (uncompressed)
compressed_pubkey = bytes([size - 2]) + f.read(32)
return bytes([65]) + decompress_pubkey(compressed_pubkey) + bytes([0xac])
else: # others (bare multisig, segwit etc.)
size -= 6
assert size <= 10000, f"too long script with size {size}"
return f.read(size)
def decompress_pubkey(compressed_pubkey):
"""Decompress pubkey by calculating y = sqrt(x^3 + 7) % p
(see functions `secp256k1_eckey_pubkey_parse` and `secp256k1_ge_set_xo_var`).
"""
P = 2**256 - 2**32 - 977 # secp256k1 field size
assert len(compressed_pubkey) == 33 and compressed_pubkey[0] in (2, 3)
x = int.from_bytes(compressed_pubkey[1:], 'big')
rhs = (x**3 + 7) % P
y = pow(rhs, (P + 1)//4, P) # get sqrt using Tonelli-Shanks algorithm (for p % 4 = 3)
assert pow(y, 2, P) == rhs, f"pubkey is not on curve ({compressed_pubkey.hex()})"
tag_is_odd = compressed_pubkey[0] == 3
y_is_odd = (y & 1) == 1
if tag_is_odd != y_is_odd: # fix parity (even/odd) if necessary
y = P - y
return bytes([4]) + x.to_bytes(32, 'big') + y.to_bytes(32, 'big')
def main():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)')
parser.add_argument('outfile', help='filename of created SQLite3 database (output)')
parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO')
args = parser.parse_args()
if not os.path.exists(args.infile):
print(f"Error: provided input file '{args.infile}' doesn't exist.")
sys.exit(1)
if os.path.exists(args.outfile):
print(f"Error: provided output file '{args.outfile}' already exists.")
sys.exit(1)
# create database table
con = sqlite3.connect(args.outfile)
con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)")
# read metadata (magic bytes, version, network magic, block height, block hash, UTXO count)
f = open(args.infile, 'rb')
magic_bytes = f.read(5)
version = int.from_bytes(f.read(2), 'little')
network_magic = f.read(4)
block_hash = f.read(32)
num_utxos = int.from_bytes(f.read(8), 'little')
if magic_bytes != UTXO_DUMP_MAGIC:
print(f"Error: provided input file '{args.infile}' is not an UTXO dump.")
sys.exit(1)
if version != UTXO_DUMP_VERSION:
print(f"Error: provided input file '{args.infile}' has unknown UTXO dump version {version} "
f"(only version {UTXO_DUMP_VERSION} supported)")
sys.exit(1)
network_string = NET_MAGIC_BYTES.get(network_magic, f"unknown network ({network_magic.hex()})")
print(f"UTXO Snapshot for {network_string} at block hash "
f"{block_hash[::-1].hex()[:32]}..., contains {num_utxos} coins")
start_time = time.time()
write_batch = []
coins_per_hash_left = 0
prevout_hash = None
max_height = 0
for coin_idx in range(1, num_utxos+1):
# read key (COutPoint)
if coins_per_hash_left == 0: # read next prevout hash
prevout_hash = f.read(32)[::-1].hex()
coins_per_hash_left = read_compactsize(f)
prevout_index = read_compactsize(f)
# read value (Coin)
code = read_varint(f)
height = code >> 1
is_coinbase = code & 1
amount = decompress_amount(read_varint(f))
scriptpubkey = decompress_script(f).hex()
write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey))
if height > max_height:
max_height = height
coins_per_hash_left -= 1
if args.verbose:
print(f"Coin {coin_idx}/{num_utxos}:")
print(f" prevout = {prevout_hash}:{prevout_index}")
print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}")
print(f" scriptPubKey = {scriptpubkey}\n")
if coin_idx % (16*1024) == 0 or coin_idx == num_utxos:
# write utxo batch to database
con.executemany("INSERT INTO utxos VALUES(?, ?, ?, ?, ?, ?)", write_batch)
con.commit()
write_batch.clear()
if coin_idx % (1024*1024) == 0:
elapsed = time.time() - start_time
print(f"{coin_idx} coins converted [{coin_idx/num_utxos*100:.2f}%], " +
f"{elapsed:.3f}s passed since start")
con.close()
print(f"TOTAL: {num_utxos} coins written to {args.outfile}, snapshot height is {max_height}.")
if f.read(1) != b'': # EOF should be reached by now
print(f"WARNING: input file {args.infile} has not reached EOF yet!")
sys.exit(1)
if __name__ == '__main__':
main()

View file

@ -3005,9 +3005,10 @@ static RPCHelpMan dumptxoutset()
const fs::path path = fsbridge::AbsPathJoin(args.GetDataDirNet(), fs::u8path(request.params[0].get_str()));
// Write to a temporary path and then move into `path` on completion
// to avoid confusion due to an interruption.
const fs::path temppath = fsbridge::AbsPathJoin(args.GetDataDirNet(), fs::u8path(request.params[0].get_str() + ".incomplete"));
const fs::path temppath = fs::is_fifo(path) ? path : // If a named pipe is passed, write directly to it
fsbridge::AbsPathJoin(args.GetDataDirNet(), fs::u8path(request.params[0].get_str() + ".incomplete"));
if (fs::exists(path)) {
if (fs::exists(path) && !fs::is_fifo(path)) {
throw JSONRPCError(
RPC_INVALID_PARAMETER,
path.utf8string() + " already exists. If you are sure this is what you want, "
@ -3084,7 +3085,7 @@ static RPCHelpMan dumptxoutset()
}
UniValue result = WriteUTXOSnapshot(*chainstate, cursor.get(), &stats, tip, afile, path, temppath, node.rpc_interruption_point);
fs::rename(temppath, path);
if (!fs::is_fifo(path)) fs::rename(temppath, path);
result.pushKV("path", path.utf8string());
return result;

View file

@ -289,6 +289,7 @@ BASE_SCRIPTS = [
'mempool_package_onemore.py',
'mempool_package_limits.py',
'mempool_package_rbf.py',
'tool_utxo_to_sqlite.py',
'feature_versionbits_warning.py',
'feature_blocksxor.py',
'rpc_preciousblock.py',

View file

@ -0,0 +1,131 @@
#!/usr/bin/env python3
# Copyright (c) 2024-present The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Test utxo-to-sqlite conversion tool"""
import os
try:
import sqlite3
except ImportError:
pass
import platform
import subprocess
import sys
from test_framework.key import ECKey
from test_framework.messages import (
COutPoint,
CTxOut,
)
from test_framework.crypto.muhash import MuHash3072
from test_framework.script import (
CScript,
CScriptOp,
)
from test_framework.script_util import (
PAY_TO_ANCHOR,
key_to_p2pk_script,
key_to_p2pkh_script,
key_to_p2wpkh_script,
keys_to_multisig_script,
output_key_to_p2tr_script,
script_to_p2sh_script,
script_to_p2wsh_script,
)
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (
assert_equal,
)
from test_framework.wallet import MiniWallet
def calculate_muhash_from_sqlite_utxos(filename):
muhash = MuHash3072()
con = sqlite3.connect(filename)
cur = con.cursor()
for (txid_hex, vout, value, coinbase, height, spk_hex) in cur.execute("SELECT * FROM utxos"):
# serialize UTXO for MuHash (see function `TxOutSer` in the coinstats module)
utxo_ser = COutPoint(int(txid_hex, 16), vout).serialize()
utxo_ser += (height * 2 + coinbase).to_bytes(4, 'little')
utxo_ser += CTxOut(value, bytes.fromhex(spk_hex)).serialize()
muhash.insert(utxo_ser)
con.close()
return muhash.digest()[::-1].hex()
class UtxoToSqliteTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 1
# we want to create some UTXOs with non-standard output scripts
self.extra_args = [['-acceptnonstdtxn=1']]
def skip_test_if_missing_module(self):
self.skip_if_no_py_sqlite3()
def run_test(self):
node = self.nodes[0]
wallet = MiniWallet(node)
key = ECKey()
self.log.info('Create UTXOs with various output script types')
for i in range(1, 10+1):
key.generate(compressed=False)
uncompressed_pubkey = key.get_pubkey().get_bytes()
key.generate(compressed=True)
pubkey = key.get_pubkey().get_bytes()
# add output scripts for compressed script type 0 (P2PKH), type 1 (P2SH),
# types 2-3 (P2PK compressed), types 4-5 (P2PK uncompressed) and
# for uncompressed scripts (bare multisig, segwit, etc.)
output_scripts = (
key_to_p2pkh_script(pubkey),
script_to_p2sh_script(key_to_p2pkh_script(pubkey)),
key_to_p2pk_script(pubkey),
key_to_p2pk_script(uncompressed_pubkey),
keys_to_multisig_script([pubkey]*i),
keys_to_multisig_script([uncompressed_pubkey]*i),
key_to_p2wpkh_script(pubkey),
script_to_p2wsh_script(key_to_p2pkh_script(pubkey)),
output_key_to_p2tr_script(pubkey[1:]),
PAY_TO_ANCHOR,
CScript([CScriptOp.encode_op_n(i)]*(1000*i)), # large script (up to 10000 bytes)
)
# create outputs and mine them in a block
for output_script in output_scripts:
wallet.send_to(from_node=node, scriptPubKey=output_script, amount=i, fee=20000)
self.generate(wallet, 1)
self.log.info('Dump UTXO set via `dumptxoutset` RPC')
input_filename = os.path.join(self.options.tmpdir, "utxos.dat")
node.dumptxoutset(input_filename, "latest")
self.log.info('Convert UTXO set from compact-serialized format to sqlite format')
output_filename = os.path.join(self.options.tmpdir, "utxos.sqlite")
base_dir = self.config["environment"]["SRCDIR"]
utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py")
subprocess.run([sys.executable, utxo_to_sqlite_path, input_filename, output_filename],
check=True, stderr=subprocess.STDOUT)
self.log.info('Verify that both UTXO sets match by comparing their MuHash')
muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename)
muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash']
assert_equal(muhash_sqlite, muhash_compact_serialized)
if platform.system() != "Windows": # FIFOs are not available on Windows
self.log.info('Convert UTXO set directly (without intermediate dump) via named pipe')
fifo_filename = os.path.join(self.options.tmpdir, "utxos.fifo")
os.mkfifo(fifo_filename)
output_direct_filename = os.path.join(self.options.tmpdir, "utxos_direct.sqlite")
p = subprocess.Popen([sys.executable, utxo_to_sqlite_path, fifo_filename, output_direct_filename],
stderr=subprocess.STDOUT)
node.dumptxoutset(fifo_filename, "latest")
p.wait(timeout=10)
muhash_direct_sqlite = calculate_muhash_from_sqlite_utxos(output_direct_filename)
assert_equal(muhash_sqlite, muhash_direct_sqlite)
os.remove(fifo_filename)
if __name__ == "__main__":
UtxoToSqliteTest(__file__).main()