mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-04-29 14:59:39 -04:00
Allocate P2WSH
/P2TR
/P2PK
scripts on stack
The current `prevector` size of 28 bytes (chosen to fill the `sizeof(CScript)` aligned size) was introduced in 2015 (https://github.com/bitcoin/bitcoin/pull/6914) before SegWit and TapRoot. However, the increasingly common `P2WSH` and `P2TR` scripts are both 34 bytes, and are forced to use heap (re)allocation rather than efficient inline storage. The core trade-off of this change is to eliminate heap allocations for common 34-36 byte scripts at the cost of increasing the base memory footprint of all `CScript` objects by 8 bytes (while still respecting peak memory usage defined by `-dbcache`). Increasing the `prevector` size allows these scripts to be stored on the stack, avoiding heap allocations, reducing potential memory fragmentation, and improving performance during cache flushes. Massif analysis confirms a lower stable memory usage after flushing, suggesting the elimination of heap allocations outweighs the larger base size for common workloads. Due to memory alignment, increasing the `prevector` size to 36 bytes doesn't change the overall `sizeof(CScript)` compared to an increase to 34 bytes, allowing us to include `P2PK` scripts as well at no additional memory cost. Performance benchmarks for AssumeUTXO load and flush show: - Small dbcache (450MB): ~1% performance penalty due to more frequent flushes - Large dbcache (4500-4500MB+): ~6-7% performance improvement due to fewer heap allocations Full IBD and reindex-chainstate with larger `dbcache` values also show an overall ~3% speedup. Co-authored-by: Ava Chow <github@achow101.com> Co-authored-by: Andrew Toth <andrewstoth@gmail.com>
This commit is contained in:
parent
b5dc42874d
commit
b6b4235c14
5 changed files with 39 additions and 41 deletions
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
static const size_t BATCHES = 101;
|
static const size_t BATCHES = 101;
|
||||||
static const size_t BATCH_SIZE = 30;
|
static const size_t BATCH_SIZE = 30;
|
||||||
static const int PREVECTOR_SIZE = 28;
|
static const int PREVECTOR_SIZE = 36;
|
||||||
static const unsigned int QUEUE_BATCH_SIZE = 128;
|
static const unsigned int QUEUE_BATCH_SIZE = 128;
|
||||||
|
|
||||||
// This Benchmark tests the CheckQueue with a slightly realistic workload,
|
// This Benchmark tests the CheckQueue with a slightly realistic workload,
|
||||||
|
|
|
@ -27,22 +27,22 @@ template <typename T>
|
||||||
static void PrevectorDestructor(benchmark::Bench& bench)
|
static void PrevectorDestructor(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
bench.batch(2).run([&] {
|
bench.batch(2).run([&] {
|
||||||
prevector<28, T> t0;
|
prevector<36, T> t0;
|
||||||
prevector<28, T> t1;
|
prevector<36, T> t1;
|
||||||
t0.resize(28);
|
t0.resize(36);
|
||||||
t1.resize(29);
|
t1.resize(37);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void PrevectorClear(benchmark::Bench& bench)
|
static void PrevectorClear(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
prevector<28, T> t0;
|
prevector<36, T> t0;
|
||||||
prevector<28, T> t1;
|
prevector<36, T> t1;
|
||||||
bench.batch(2).run([&] {
|
bench.batch(2).run([&] {
|
||||||
t0.resize(28);
|
t0.resize(36);
|
||||||
t0.clear();
|
t0.clear();
|
||||||
t1.resize(29);
|
t1.resize(37);
|
||||||
t1.clear();
|
t1.clear();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -50,12 +50,12 @@ static void PrevectorClear(benchmark::Bench& bench)
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void PrevectorResize(benchmark::Bench& bench)
|
static void PrevectorResize(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
prevector<28, T> t0;
|
prevector<36, T> t0;
|
||||||
prevector<28, T> t1;
|
prevector<36, T> t1;
|
||||||
bench.batch(4).run([&] {
|
bench.batch(4).run([&] {
|
||||||
t0.resize(28);
|
t0.resize(36);
|
||||||
t0.resize(0);
|
t0.resize(0);
|
||||||
t1.resize(29);
|
t1.resize(37);
|
||||||
t1.resize(0);
|
t1.resize(0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -64,8 +64,8 @@ template <typename T>
|
||||||
static void PrevectorDeserialize(benchmark::Bench& bench)
|
static void PrevectorDeserialize(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
DataStream s0{};
|
DataStream s0{};
|
||||||
prevector<28, T> t0;
|
prevector<36, T> t0;
|
||||||
t0.resize(28);
|
t0.resize(36);
|
||||||
for (auto x = 0; x < 900; ++x) {
|
for (auto x = 0; x < 900; ++x) {
|
||||||
s0 << t0;
|
s0 << t0;
|
||||||
}
|
}
|
||||||
|
@ -74,7 +74,7 @@ static void PrevectorDeserialize(benchmark::Bench& bench)
|
||||||
s0 << t0;
|
s0 << t0;
|
||||||
}
|
}
|
||||||
bench.batch(1000).run([&] {
|
bench.batch(1000).run([&] {
|
||||||
prevector<28, T> t1;
|
prevector<36, T> t1;
|
||||||
for (auto x = 0; x < 1000; ++x) {
|
for (auto x = 0; x < 1000; ++x) {
|
||||||
s0 >> t1;
|
s0 >> t1;
|
||||||
}
|
}
|
||||||
|
@ -86,7 +86,7 @@ template <typename T>
|
||||||
static void PrevectorFillVectorDirect(benchmark::Bench& bench)
|
static void PrevectorFillVectorDirect(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
bench.run([&] {
|
bench.run([&] {
|
||||||
std::vector<prevector<28, T>> vec;
|
std::vector<prevector<36, T>> vec;
|
||||||
vec.reserve(260);
|
vec.reserve(260);
|
||||||
for (size_t i = 0; i < 260; ++i) {
|
for (size_t i = 0; i < 260; ++i) {
|
||||||
vec.emplace_back();
|
vec.emplace_back();
|
||||||
|
@ -99,11 +99,11 @@ template <typename T>
|
||||||
static void PrevectorFillVectorIndirect(benchmark::Bench& bench)
|
static void PrevectorFillVectorIndirect(benchmark::Bench& bench)
|
||||||
{
|
{
|
||||||
bench.run([&] {
|
bench.run([&] {
|
||||||
std::vector<prevector<28, T>> vec;
|
std::vector<prevector<36, T>> vec;
|
||||||
vec.reserve(260);
|
vec.reserve(260);
|
||||||
for (size_t i = 0; i < 260; ++i) {
|
for (size_t i = 0; i < 260; ++i) {
|
||||||
// force allocation
|
// force allocation
|
||||||
vec.emplace_back(29, T{});
|
vec.emplace_back(37, T{});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -406,7 +406,7 @@ private:
|
||||||
* Tests in October 2015 showed use of this reduced dbcache memory usage by 23%
|
* Tests in October 2015 showed use of this reduced dbcache memory usage by 23%
|
||||||
* and made an initial sync 13% faster.
|
* and made an initial sync 13% faster.
|
||||||
*/
|
*/
|
||||||
typedef prevector<28, unsigned char> CScriptBase;
|
typedef prevector<36, unsigned char> CScriptBase;
|
||||||
|
|
||||||
bool GetScriptOp(CScriptBase::const_iterator& pc, CScriptBase::const_iterator end, opcodetype& opcodeRet, std::vector<unsigned char>* pvchRet);
|
bool GetScriptOp(CScriptBase::const_iterator& pc, CScriptBase::const_iterator end, opcodetype& opcodeRet, std::vector<unsigned char>* pvchRet);
|
||||||
|
|
||||||
|
|
|
@ -1131,10 +1131,10 @@ BOOST_AUTO_TEST_CASE(script_CHECKMULTISIG23)
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
||||||
{
|
{
|
||||||
BOOST_CHECK_EQUAL(sizeof(prevector<28, unsigned char>), 32);
|
BOOST_CHECK_EQUAL(sizeof(prevector<34, uint8_t>), sizeof(prevector<36, uint8_t>));
|
||||||
BOOST_CHECK_EQUAL(sizeof(CScriptBase), 32);
|
BOOST_CHECK_EQUAL(sizeof(CScriptBase), 40);
|
||||||
BOOST_CHECK_EQUAL(sizeof(CScript), 32);
|
BOOST_CHECK_EQUAL(sizeof(CScript), 40);
|
||||||
BOOST_CHECK_EQUAL(sizeof(CTxOut), 40);
|
BOOST_CHECK_EQUAL(sizeof(CTxOut), 48);
|
||||||
|
|
||||||
CKey dummyKey;
|
CKey dummyKey;
|
||||||
dummyKey.MakeNewKey(true);
|
dummyKey.MakeNewKey(true);
|
||||||
|
@ -1146,7 +1146,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
||||||
const auto scriptSmallOpReturn{CScript() << OP_RETURN << std::vector<uint8_t>(10, 0xaa)};
|
const auto scriptSmallOpReturn{CScript() << OP_RETURN << std::vector<uint8_t>(10, 0xaa)};
|
||||||
BOOST_CHECK_EQUAL(Solver(scriptSmallOpReturn, dummyVSolutions), TxoutType::NULL_DATA);
|
BOOST_CHECK_EQUAL(Solver(scriptSmallOpReturn, dummyVSolutions), TxoutType::NULL_DATA);
|
||||||
BOOST_CHECK_EQUAL(scriptSmallOpReturn.size(), 12);
|
BOOST_CHECK_EQUAL(scriptSmallOpReturn.size(), 12);
|
||||||
BOOST_CHECK_EQUAL(scriptSmallOpReturn.capacity(), 28);
|
BOOST_CHECK_EQUAL(scriptSmallOpReturn.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptSmallOpReturn.allocated_memory(), 0);
|
BOOST_CHECK_EQUAL(scriptSmallOpReturn.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1155,7 +1155,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
||||||
const auto scriptP2WPKH{GetScriptForDestination(WitnessV0KeyHash{PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}}})};
|
const auto scriptP2WPKH{GetScriptForDestination(WitnessV0KeyHash{PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}}})};
|
||||||
BOOST_CHECK_EQUAL(Solver(scriptP2WPKH, dummyVSolutions), TxoutType::WITNESS_V0_KEYHASH);
|
BOOST_CHECK_EQUAL(Solver(scriptP2WPKH, dummyVSolutions), TxoutType::WITNESS_V0_KEYHASH);
|
||||||
BOOST_CHECK_EQUAL(scriptP2WPKH.size(), 22);
|
BOOST_CHECK_EQUAL(scriptP2WPKH.size(), 22);
|
||||||
BOOST_CHECK_EQUAL(scriptP2WPKH.capacity(), 28);
|
BOOST_CHECK_EQUAL(scriptP2WPKH.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptP2WPKH.allocated_memory(), 0);
|
BOOST_CHECK_EQUAL(scriptP2WPKH.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1164,7 +1164,7 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
||||||
const auto scriptP2SH{GetScriptForDestination(ScriptHash{CScript{} << OP_TRUE})};
|
const auto scriptP2SH{GetScriptForDestination(ScriptHash{CScript{} << OP_TRUE})};
|
||||||
BOOST_CHECK(scriptP2SH.IsPayToScriptHash());
|
BOOST_CHECK(scriptP2SH.IsPayToScriptHash());
|
||||||
BOOST_CHECK_EQUAL(scriptP2SH.size(), 23);
|
BOOST_CHECK_EQUAL(scriptP2SH.size(), 23);
|
||||||
BOOST_CHECK_EQUAL(scriptP2SH.capacity(), 28);
|
BOOST_CHECK_EQUAL(scriptP2SH.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptP2SH.allocated_memory(), 0);
|
BOOST_CHECK_EQUAL(scriptP2SH.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1173,35 +1173,35 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test)
|
||||||
const auto scriptP2PKH{GetScriptForDestination(PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}})};
|
const auto scriptP2PKH{GetScriptForDestination(PKHash{CKeyID{CPubKey{dummyKey.GetPubKey()}.GetID()}})};
|
||||||
BOOST_CHECK_EQUAL(Solver(scriptP2PKH, dummyVSolutions), TxoutType::PUBKEYHASH);
|
BOOST_CHECK_EQUAL(Solver(scriptP2PKH, dummyVSolutions), TxoutType::PUBKEYHASH);
|
||||||
BOOST_CHECK_EQUAL(scriptP2PKH.size(), 25);
|
BOOST_CHECK_EQUAL(scriptP2PKH.size(), 25);
|
||||||
BOOST_CHECK_EQUAL(scriptP2PKH.capacity(), 28);
|
BOOST_CHECK_EQUAL(scriptP2PKH.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptP2PKH.allocated_memory(), 0);
|
BOOST_CHECK_EQUAL(scriptP2PKH.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// P2WSH is heap allocated
|
// P2WSH is stack allocated
|
||||||
{
|
{
|
||||||
const auto scriptP2WSH{GetScriptForDestination(WitnessV0ScriptHash{CScript{} << OP_TRUE})};
|
const auto scriptP2WSH{GetScriptForDestination(WitnessV0ScriptHash{CScript{} << OP_TRUE})};
|
||||||
BOOST_CHECK(scriptP2WSH.IsPayToWitnessScriptHash());
|
BOOST_CHECK(scriptP2WSH.IsPayToWitnessScriptHash());
|
||||||
BOOST_CHECK_EQUAL(scriptP2WSH.size(), 34);
|
BOOST_CHECK_EQUAL(scriptP2WSH.size(), 34);
|
||||||
BOOST_CHECK_EQUAL(scriptP2WSH.capacity(), 34);
|
BOOST_CHECK_EQUAL(scriptP2WSH.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptP2WSH.allocated_memory(), 34);
|
BOOST_CHECK_EQUAL(scriptP2WSH.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// P2TR is heap allocated
|
// P2TR is stack allocated
|
||||||
{
|
{
|
||||||
const auto scriptTaproot{GetScriptForDestination(WitnessV1Taproot{XOnlyPubKey{CPubKey{dummyKey.GetPubKey()}}})};
|
const auto scriptTaproot{GetScriptForDestination(WitnessV1Taproot{XOnlyPubKey{CPubKey{dummyKey.GetPubKey()}}})};
|
||||||
BOOST_CHECK_EQUAL(Solver(scriptTaproot, dummyVSolutions), TxoutType::WITNESS_V1_TAPROOT);
|
BOOST_CHECK_EQUAL(Solver(scriptTaproot, dummyVSolutions), TxoutType::WITNESS_V1_TAPROOT);
|
||||||
BOOST_CHECK_EQUAL(scriptTaproot.size(), 34);
|
BOOST_CHECK_EQUAL(scriptTaproot.size(), 34);
|
||||||
BOOST_CHECK_EQUAL(scriptTaproot.capacity(), 34);
|
BOOST_CHECK_EQUAL(scriptTaproot.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptTaproot.allocated_memory(), 34);
|
BOOST_CHECK_EQUAL(scriptTaproot.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// P2PK is heap allocated
|
// P2PK is stack allocated
|
||||||
{
|
{
|
||||||
const auto scriptPubKey{GetScriptForRawPubKey(CPubKey{dummyKey.GetPubKey()})};
|
const auto scriptPubKey{GetScriptForRawPubKey(CPubKey{dummyKey.GetPubKey()})};
|
||||||
BOOST_CHECK_EQUAL(Solver(scriptPubKey, dummyVSolutions), TxoutType::PUBKEY);
|
BOOST_CHECK_EQUAL(Solver(scriptPubKey, dummyVSolutions), TxoutType::PUBKEY);
|
||||||
BOOST_CHECK_EQUAL(scriptPubKey.size(), 35);
|
BOOST_CHECK_EQUAL(scriptPubKey.size(), 35);
|
||||||
BOOST_CHECK_EQUAL(scriptPubKey.capacity(), 35);
|
BOOST_CHECK_EQUAL(scriptPubKey.capacity(), 36);
|
||||||
BOOST_CHECK_EQUAL(scriptPubKey.allocated_memory(), 35);
|
BOOST_CHECK_EQUAL(scriptPubKey.allocated_memory(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// MULTISIG is always heap allocated
|
// MULTISIG is always heap allocated
|
||||||
|
|
|
@ -26,10 +26,8 @@ BOOST_AUTO_TEST_CASE(getcoinscachesizestate)
|
||||||
LOCK(::cs_main);
|
LOCK(::cs_main);
|
||||||
auto& view = chainstate.CoinsTip();
|
auto& view = chainstate.CoinsTip();
|
||||||
|
|
||||||
// The number of bytes consumed by coin's heap data, i.e. CScript
|
// The number of bytes consumed by coin's heap data, i.e. CScript (prevector<36, unsigned char>)
|
||||||
// (prevector<28, unsigned char>) when assigned 56 bytes of data per above.
|
// when assigned 56 bytes of data per above. See also: Coin::DynamicMemoryUsage().
|
||||||
//
|
|
||||||
// See also: Coin::DynamicMemoryUsage().
|
|
||||||
constexpr unsigned int COIN_SIZE = is_64_bit ? 80 : 64;
|
constexpr unsigned int COIN_SIZE = is_64_bit ? 80 : 64;
|
||||||
|
|
||||||
auto print_view_mem_usage = [](CCoinsViewCache& view) {
|
auto print_view_mem_usage = [](CCoinsViewCache& view) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue