2019-12-30 06:39:22 -03:00
|
|
|
// Copyright (c) 2011-2019 The Bitcoin Core developers
|
2016-10-02 18:38:48 -03:00
|
|
|
// Distributed under the MIT software license, see the accompanying
|
|
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
|
2017-11-09 21:57:53 -03:00
|
|
|
#include <bench/bench.h>
|
|
|
|
#include <policy/policy.h>
|
2020-04-09 11:47:32 -04:00
|
|
|
#include <test/util/setup_common.h>
|
2017-11-09 21:57:53 -03:00
|
|
|
#include <txmempool.h>
|
2016-10-02 18:38:48 -03:00
|
|
|
|
|
|
|
|
2018-12-17 16:27:43 -03:00
|
|
|
static void AddTx(const CTransactionRef& tx, const CAmount& nFee, CTxMemPool& pool) EXCLUSIVE_LOCKS_REQUIRED(cs_main, pool.cs)
|
2016-10-02 18:38:48 -03:00
|
|
|
{
|
|
|
|
int64_t nTime = 0;
|
|
|
|
unsigned int nHeight = 1;
|
|
|
|
bool spendsCoinbase = false;
|
|
|
|
unsigned int sigOpCost = 4;
|
|
|
|
LockPoints lp;
|
2018-07-30 09:11:13 -04:00
|
|
|
pool.addUnchecked(CTxMemPoolEntry(
|
2020-04-24 16:48:21 -04:00
|
|
|
tx, nFee, nTime, nHeight,
|
|
|
|
spendsCoinbase, sigOpCost, lp));
|
2016-10-02 18:38:48 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Right now this is only testing eviction performance in an extremely small
|
|
|
|
// mempool. Code needs to be written to generate a much wider variety of
|
|
|
|
// unique transactions for a more meaningful performance measurement.
|
Replace current benchmarking framework with nanobench
This replaces the current benchmarking framework with nanobench [1], an
MIT licensed single-header benchmarking library, of which I am the
autor. This has in my opinion several advantages, especially on Linux:
* fast: Running all benchmarks takes ~6 seconds instead of 4m13s on
an Intel i7-8700 CPU @ 3.20GHz.
* accurate: I ran e.g. the benchmark for SipHash_32b 10 times and
calculate standard deviation / mean = coefficient of variation:
* 0.57% CV for old benchmarking framework
* 0.20% CV for nanobench
So the benchmark results with nanobench seem to vary less than with
the old framework.
* It automatically determines runtime based on clock precision, no need
to specify number of evaluations.
* measure instructions, cycles, branches, instructions per cycle,
branch misses (only Linux, when performance counters are available)
* output in markdown table format.
* Warn about unstable environment (frequency scaling, turbo, ...)
* For better profiling, it is possible to set the environment variable
NANOBENCH_ENDLESS to force endless running of a particular benchmark
without the need to recompile. This makes it to e.g. run "perf top"
and look at hotspots.
Here is an example copy & pasted from the terminal output:
| ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160`
| 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1`
| 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256`
| 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024`
| 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b`
| 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512`
[1] https://github.com/martinus/nanobench
* Adds support for asymptotes
This adds support to calculate asymptotic complexity of a benchmark.
This is similar to #17375, but currently only one asymptote is
supported, and I have added support in the benchmark `ComplexMemPool`
as an example.
Usage is e.g. like this:
```
./bench_bitcoin -filter=ComplexMemPool -asymptote=25,50,100,200,400,600,800
```
This runs the benchmark `ComplexMemPool` several times but with
different complexityN settings. The benchmark can extract that number
and use it accordingly. Here, it's used for `childTxs`. The output is
this:
| complexityN | ns/op | op/s | err% | ins/op | cyc/op | IPC | total | benchmark
|------------:|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|----------:|:----------
| 25 | 1,064,241.00 | 939.64 | 1.4% | 3,960,279.00 | 2,829,708.00 | 1.400 | 0.01 | `ComplexMemPool`
| 50 | 1,579,530.00 | 633.10 | 1.0% | 6,231,810.00 | 4,412,674.00 | 1.412 | 0.02 | `ComplexMemPool`
| 100 | 4,022,774.00 | 248.58 | 0.6% | 16,544,406.00 | 11,889,535.00 | 1.392 | 0.04 | `ComplexMemPool`
| 200 | 15,390,986.00 | 64.97 | 0.2% | 63,904,254.00 | 47,731,705.00 | 1.339 | 0.17 | `ComplexMemPool`
| 400 | 69,394,711.00 | 14.41 | 0.1% | 272,602,461.00 | 219,014,691.00 | 1.245 | 0.76 | `ComplexMemPool`
| 600 | 168,977,165.00 | 5.92 | 0.1% | 639,108,082.00 | 535,316,887.00 | 1.194 | 1.86 | `ComplexMemPool`
| 800 | 310,109,077.00 | 3.22 | 0.1% |1,149,134,246.00 | 984,620,812.00 | 1.167 | 3.41 | `ComplexMemPool`
| coefficient | err% | complexity
|--------------:|-------:|------------
| 4.78486e-07 | 4.5% | O(n^2)
| 6.38557e-10 | 21.7% | O(n^3)
| 3.42338e-05 | 38.0% | O(n log n)
| 0.000313914 | 46.9% | O(n)
| 0.0129823 | 114.4% | O(log n)
| 0.0815055 | 133.8% | O(1)
The best fitting curve is O(n^2), so the algorithm seems to scale
quadratic with `childTxs` in the range 25 to 800.
2020-06-13 03:37:27 -04:00
|
|
|
static void MempoolEviction(benchmark::Bench& bench)
|
2016-10-02 18:38:48 -03:00
|
|
|
{
|
2020-04-24 16:48:21 -04:00
|
|
|
TestingSetup test_setup{
|
|
|
|
CBaseChainParams::REGTEST,
|
|
|
|
/* extra_args */ {
|
|
|
|
"-nodebuglogfile",
|
|
|
|
"-nodebug",
|
|
|
|
},
|
|
|
|
};
|
2020-04-09 11:47:32 -04:00
|
|
|
|
2016-10-02 18:38:48 -03:00
|
|
|
CMutableTransaction tx1 = CMutableTransaction();
|
|
|
|
tx1.vin.resize(1);
|
|
|
|
tx1.vin[0].scriptSig = CScript() << OP_1;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx1.vin[0].scriptWitness.stack.push_back({1});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx1.vout.resize(1);
|
|
|
|
tx1.vout[0].scriptPubKey = CScript() << OP_1 << OP_EQUAL;
|
|
|
|
tx1.vout[0].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx2 = CMutableTransaction();
|
|
|
|
tx2.vin.resize(1);
|
|
|
|
tx2.vin[0].scriptSig = CScript() << OP_2;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx2.vin[0].scriptWitness.stack.push_back({2});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx2.vout.resize(1);
|
|
|
|
tx2.vout[0].scriptPubKey = CScript() << OP_2 << OP_EQUAL;
|
|
|
|
tx2.vout[0].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx3 = CMutableTransaction();
|
|
|
|
tx3.vin.resize(1);
|
|
|
|
tx3.vin[0].prevout = COutPoint(tx2.GetHash(), 0);
|
|
|
|
tx3.vin[0].scriptSig = CScript() << OP_2;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx3.vin[0].scriptWitness.stack.push_back({3});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx3.vout.resize(1);
|
|
|
|
tx3.vout[0].scriptPubKey = CScript() << OP_3 << OP_EQUAL;
|
|
|
|
tx3.vout[0].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx4 = CMutableTransaction();
|
|
|
|
tx4.vin.resize(2);
|
|
|
|
tx4.vin[0].prevout.SetNull();
|
|
|
|
tx4.vin[0].scriptSig = CScript() << OP_4;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx4.vin[0].scriptWitness.stack.push_back({4});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx4.vin[1].prevout.SetNull();
|
|
|
|
tx4.vin[1].scriptSig = CScript() << OP_4;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx4.vin[1].scriptWitness.stack.push_back({4});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx4.vout.resize(2);
|
|
|
|
tx4.vout[0].scriptPubKey = CScript() << OP_4 << OP_EQUAL;
|
|
|
|
tx4.vout[0].nValue = 10 * COIN;
|
|
|
|
tx4.vout[1].scriptPubKey = CScript() << OP_4 << OP_EQUAL;
|
|
|
|
tx4.vout[1].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx5 = CMutableTransaction();
|
|
|
|
tx5.vin.resize(2);
|
|
|
|
tx5.vin[0].prevout = COutPoint(tx4.GetHash(), 0);
|
|
|
|
tx5.vin[0].scriptSig = CScript() << OP_4;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx5.vin[0].scriptWitness.stack.push_back({4});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx5.vin[1].prevout.SetNull();
|
|
|
|
tx5.vin[1].scriptSig = CScript() << OP_5;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx5.vin[1].scriptWitness.stack.push_back({5});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx5.vout.resize(2);
|
|
|
|
tx5.vout[0].scriptPubKey = CScript() << OP_5 << OP_EQUAL;
|
|
|
|
tx5.vout[0].nValue = 10 * COIN;
|
|
|
|
tx5.vout[1].scriptPubKey = CScript() << OP_5 << OP_EQUAL;
|
|
|
|
tx5.vout[1].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx6 = CMutableTransaction();
|
|
|
|
tx6.vin.resize(2);
|
|
|
|
tx6.vin[0].prevout = COutPoint(tx4.GetHash(), 1);
|
|
|
|
tx6.vin[0].scriptSig = CScript() << OP_4;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx6.vin[0].scriptWitness.stack.push_back({4});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx6.vin[1].prevout.SetNull();
|
|
|
|
tx6.vin[1].scriptSig = CScript() << OP_6;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx6.vin[1].scriptWitness.stack.push_back({6});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx6.vout.resize(2);
|
|
|
|
tx6.vout[0].scriptPubKey = CScript() << OP_6 << OP_EQUAL;
|
|
|
|
tx6.vout[0].nValue = 10 * COIN;
|
|
|
|
tx6.vout[1].scriptPubKey = CScript() << OP_6 << OP_EQUAL;
|
|
|
|
tx6.vout[1].nValue = 10 * COIN;
|
|
|
|
|
|
|
|
CMutableTransaction tx7 = CMutableTransaction();
|
|
|
|
tx7.vin.resize(2);
|
|
|
|
tx7.vin[0].prevout = COutPoint(tx5.GetHash(), 0);
|
|
|
|
tx7.vin[0].scriptSig = CScript() << OP_5;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx7.vin[0].scriptWitness.stack.push_back({5});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx7.vin[1].prevout = COutPoint(tx6.GetHash(), 0);
|
|
|
|
tx7.vin[1].scriptSig = CScript() << OP_6;
|
2018-04-17 20:33:01 -03:00
|
|
|
tx7.vin[1].scriptWitness.stack.push_back({6});
|
2016-10-02 18:38:48 -03:00
|
|
|
tx7.vout.resize(2);
|
|
|
|
tx7.vout[0].scriptPubKey = CScript() << OP_7 << OP_EQUAL;
|
|
|
|
tx7.vout[0].nValue = 10 * COIN;
|
|
|
|
tx7.vout[1].scriptPubKey = CScript() << OP_7 << OP_EQUAL;
|
|
|
|
tx7.vout[1].nValue = 10 * COIN;
|
|
|
|
|
2017-01-13 18:53:21 -03:00
|
|
|
CTxMemPool pool;
|
2018-12-17 16:27:43 -03:00
|
|
|
LOCK2(cs_main, pool.cs);
|
2018-04-17 20:15:46 -03:00
|
|
|
// Create transaction references outside the "hot loop"
|
|
|
|
const CTransactionRef tx1_r{MakeTransactionRef(tx1)};
|
|
|
|
const CTransactionRef tx2_r{MakeTransactionRef(tx2)};
|
|
|
|
const CTransactionRef tx3_r{MakeTransactionRef(tx3)};
|
|
|
|
const CTransactionRef tx4_r{MakeTransactionRef(tx4)};
|
|
|
|
const CTransactionRef tx5_r{MakeTransactionRef(tx5)};
|
|
|
|
const CTransactionRef tx6_r{MakeTransactionRef(tx6)};
|
|
|
|
const CTransactionRef tx7_r{MakeTransactionRef(tx7)};
|
2016-10-02 18:38:48 -03:00
|
|
|
|
Replace current benchmarking framework with nanobench
This replaces the current benchmarking framework with nanobench [1], an
MIT licensed single-header benchmarking library, of which I am the
autor. This has in my opinion several advantages, especially on Linux:
* fast: Running all benchmarks takes ~6 seconds instead of 4m13s on
an Intel i7-8700 CPU @ 3.20GHz.
* accurate: I ran e.g. the benchmark for SipHash_32b 10 times and
calculate standard deviation / mean = coefficient of variation:
* 0.57% CV for old benchmarking framework
* 0.20% CV for nanobench
So the benchmark results with nanobench seem to vary less than with
the old framework.
* It automatically determines runtime based on clock precision, no need
to specify number of evaluations.
* measure instructions, cycles, branches, instructions per cycle,
branch misses (only Linux, when performance counters are available)
* output in markdown table format.
* Warn about unstable environment (frequency scaling, turbo, ...)
* For better profiling, it is possible to set the environment variable
NANOBENCH_ENDLESS to force endless running of a particular benchmark
without the need to recompile. This makes it to e.g. run "perf top"
and look at hotspots.
Here is an example copy & pasted from the terminal output:
| ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160`
| 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1`
| 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256`
| 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024`
| 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b`
| 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512`
[1] https://github.com/martinus/nanobench
* Adds support for asymptotes
This adds support to calculate asymptotic complexity of a benchmark.
This is similar to #17375, but currently only one asymptote is
supported, and I have added support in the benchmark `ComplexMemPool`
as an example.
Usage is e.g. like this:
```
./bench_bitcoin -filter=ComplexMemPool -asymptote=25,50,100,200,400,600,800
```
This runs the benchmark `ComplexMemPool` several times but with
different complexityN settings. The benchmark can extract that number
and use it accordingly. Here, it's used for `childTxs`. The output is
this:
| complexityN | ns/op | op/s | err% | ins/op | cyc/op | IPC | total | benchmark
|------------:|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|----------:|:----------
| 25 | 1,064,241.00 | 939.64 | 1.4% | 3,960,279.00 | 2,829,708.00 | 1.400 | 0.01 | `ComplexMemPool`
| 50 | 1,579,530.00 | 633.10 | 1.0% | 6,231,810.00 | 4,412,674.00 | 1.412 | 0.02 | `ComplexMemPool`
| 100 | 4,022,774.00 | 248.58 | 0.6% | 16,544,406.00 | 11,889,535.00 | 1.392 | 0.04 | `ComplexMemPool`
| 200 | 15,390,986.00 | 64.97 | 0.2% | 63,904,254.00 | 47,731,705.00 | 1.339 | 0.17 | `ComplexMemPool`
| 400 | 69,394,711.00 | 14.41 | 0.1% | 272,602,461.00 | 219,014,691.00 | 1.245 | 0.76 | `ComplexMemPool`
| 600 | 168,977,165.00 | 5.92 | 0.1% | 639,108,082.00 | 535,316,887.00 | 1.194 | 1.86 | `ComplexMemPool`
| 800 | 310,109,077.00 | 3.22 | 0.1% |1,149,134,246.00 | 984,620,812.00 | 1.167 | 3.41 | `ComplexMemPool`
| coefficient | err% | complexity
|--------------:|-------:|------------
| 4.78486e-07 | 4.5% | O(n^2)
| 6.38557e-10 | 21.7% | O(n^3)
| 3.42338e-05 | 38.0% | O(n log n)
| 0.000313914 | 46.9% | O(n)
| 0.0129823 | 114.4% | O(log n)
| 0.0815055 | 133.8% | O(1)
The best fitting curve is O(n^2), so the algorithm seems to scale
quadratic with `childTxs` in the range 25 to 800.
2020-06-13 03:37:27 -04:00
|
|
|
bench.run([&]() NO_THREAD_SAFETY_ANALYSIS {
|
2018-04-17 20:15:46 -03:00
|
|
|
AddTx(tx1_r, 10000LL, pool);
|
|
|
|
AddTx(tx2_r, 5000LL, pool);
|
|
|
|
AddTx(tx3_r, 20000LL, pool);
|
|
|
|
AddTx(tx4_r, 7000LL, pool);
|
|
|
|
AddTx(tx5_r, 1000LL, pool);
|
|
|
|
AddTx(tx6_r, 1100LL, pool);
|
|
|
|
AddTx(tx7_r, 9000LL, pool);
|
2016-10-02 18:38:48 -03:00
|
|
|
pool.TrimToSize(pool.DynamicMemoryUsage() * 3 / 4);
|
2018-12-08 21:30:55 -03:00
|
|
|
pool.TrimToSize(GetVirtualTransactionSize(*tx1_r));
|
Replace current benchmarking framework with nanobench
This replaces the current benchmarking framework with nanobench [1], an
MIT licensed single-header benchmarking library, of which I am the
autor. This has in my opinion several advantages, especially on Linux:
* fast: Running all benchmarks takes ~6 seconds instead of 4m13s on
an Intel i7-8700 CPU @ 3.20GHz.
* accurate: I ran e.g. the benchmark for SipHash_32b 10 times and
calculate standard deviation / mean = coefficient of variation:
* 0.57% CV for old benchmarking framework
* 0.20% CV for nanobench
So the benchmark results with nanobench seem to vary less than with
the old framework.
* It automatically determines runtime based on clock precision, no need
to specify number of evaluations.
* measure instructions, cycles, branches, instructions per cycle,
branch misses (only Linux, when performance counters are available)
* output in markdown table format.
* Warn about unstable environment (frequency scaling, turbo, ...)
* For better profiling, it is possible to set the environment variable
NANOBENCH_ENDLESS to force endless running of a particular benchmark
without the need to recompile. This makes it to e.g. run "perf top"
and look at hotspots.
Here is an example copy & pasted from the terminal output:
| ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160`
| 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1`
| 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256`
| 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024`
| 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b`
| 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512`
[1] https://github.com/martinus/nanobench
* Adds support for asymptotes
This adds support to calculate asymptotic complexity of a benchmark.
This is similar to #17375, but currently only one asymptote is
supported, and I have added support in the benchmark `ComplexMemPool`
as an example.
Usage is e.g. like this:
```
./bench_bitcoin -filter=ComplexMemPool -asymptote=25,50,100,200,400,600,800
```
This runs the benchmark `ComplexMemPool` several times but with
different complexityN settings. The benchmark can extract that number
and use it accordingly. Here, it's used for `childTxs`. The output is
this:
| complexityN | ns/op | op/s | err% | ins/op | cyc/op | IPC | total | benchmark
|------------:|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|----------:|:----------
| 25 | 1,064,241.00 | 939.64 | 1.4% | 3,960,279.00 | 2,829,708.00 | 1.400 | 0.01 | `ComplexMemPool`
| 50 | 1,579,530.00 | 633.10 | 1.0% | 6,231,810.00 | 4,412,674.00 | 1.412 | 0.02 | `ComplexMemPool`
| 100 | 4,022,774.00 | 248.58 | 0.6% | 16,544,406.00 | 11,889,535.00 | 1.392 | 0.04 | `ComplexMemPool`
| 200 | 15,390,986.00 | 64.97 | 0.2% | 63,904,254.00 | 47,731,705.00 | 1.339 | 0.17 | `ComplexMemPool`
| 400 | 69,394,711.00 | 14.41 | 0.1% | 272,602,461.00 | 219,014,691.00 | 1.245 | 0.76 | `ComplexMemPool`
| 600 | 168,977,165.00 | 5.92 | 0.1% | 639,108,082.00 | 535,316,887.00 | 1.194 | 1.86 | `ComplexMemPool`
| 800 | 310,109,077.00 | 3.22 | 0.1% |1,149,134,246.00 | 984,620,812.00 | 1.167 | 3.41 | `ComplexMemPool`
| coefficient | err% | complexity
|--------------:|-------:|------------
| 4.78486e-07 | 4.5% | O(n^2)
| 6.38557e-10 | 21.7% | O(n^3)
| 3.42338e-05 | 38.0% | O(n log n)
| 0.000313914 | 46.9% | O(n)
| 0.0129823 | 114.4% | O(log n)
| 0.0815055 | 133.8% | O(1)
The best fitting curve is O(n^2), so the algorithm seems to scale
quadratic with `childTxs` in the range 25 to 800.
2020-06-13 03:37:27 -04:00
|
|
|
});
|
2016-10-02 18:38:48 -03:00
|
|
|
}
|
|
|
|
|
Replace current benchmarking framework with nanobench
This replaces the current benchmarking framework with nanobench [1], an
MIT licensed single-header benchmarking library, of which I am the
autor. This has in my opinion several advantages, especially on Linux:
* fast: Running all benchmarks takes ~6 seconds instead of 4m13s on
an Intel i7-8700 CPU @ 3.20GHz.
* accurate: I ran e.g. the benchmark for SipHash_32b 10 times and
calculate standard deviation / mean = coefficient of variation:
* 0.57% CV for old benchmarking framework
* 0.20% CV for nanobench
So the benchmark results with nanobench seem to vary less than with
the old framework.
* It automatically determines runtime based on clock precision, no need
to specify number of evaluations.
* measure instructions, cycles, branches, instructions per cycle,
branch misses (only Linux, when performance counters are available)
* output in markdown table format.
* Warn about unstable environment (frequency scaling, turbo, ...)
* For better profiling, it is possible to set the environment variable
NANOBENCH_ENDLESS to force endless running of a particular benchmark
without the need to recompile. This makes it to e.g. run "perf top"
and look at hotspots.
Here is an example copy & pasted from the terminal output:
| ns/byte | byte/s | err% | ins/byte | cyc/byte | IPC | bra/byte | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 2.52 | 396,529,415.94 | 0.6% | 25.42 | 8.02 | 3.169 | 0.06 | 0.0% | 0.03 | `bench/crypto_hash.cpp RIPEMD160`
| 1.87 | 535,161,444.83 | 0.3% | 21.36 | 5.95 | 3.589 | 0.06 | 0.0% | 0.02 | `bench/crypto_hash.cpp SHA1`
| 3.22 | 310,344,174.79 | 1.1% | 36.80 | 10.22 | 3.601 | 0.09 | 0.0% | 0.04 | `bench/crypto_hash.cpp SHA256`
| 2.01 | 496,375,796.23 | 0.0% | 18.72 | 6.43 | 2.911 | 0.01 | 1.0% | 0.00 | `bench/crypto_hash.cpp SHA256D64_1024`
| 7.23 | 138,263,519.35 | 0.1% | 82.66 | 23.11 | 3.577 | 1.63 | 0.1% | 0.00 | `bench/crypto_hash.cpp SHA256_32b`
| 3.04 | 328,780,166.40 | 0.3% | 35.82 | 9.69 | 3.696 | 0.03 | 0.0% | 0.03 | `bench/crypto_hash.cpp SHA512`
[1] https://github.com/martinus/nanobench
* Adds support for asymptotes
This adds support to calculate asymptotic complexity of a benchmark.
This is similar to #17375, but currently only one asymptote is
supported, and I have added support in the benchmark `ComplexMemPool`
as an example.
Usage is e.g. like this:
```
./bench_bitcoin -filter=ComplexMemPool -asymptote=25,50,100,200,400,600,800
```
This runs the benchmark `ComplexMemPool` several times but with
different complexityN settings. The benchmark can extract that number
and use it accordingly. Here, it's used for `childTxs`. The output is
this:
| complexityN | ns/op | op/s | err% | ins/op | cyc/op | IPC | total | benchmark
|------------:|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|----------:|:----------
| 25 | 1,064,241.00 | 939.64 | 1.4% | 3,960,279.00 | 2,829,708.00 | 1.400 | 0.01 | `ComplexMemPool`
| 50 | 1,579,530.00 | 633.10 | 1.0% | 6,231,810.00 | 4,412,674.00 | 1.412 | 0.02 | `ComplexMemPool`
| 100 | 4,022,774.00 | 248.58 | 0.6% | 16,544,406.00 | 11,889,535.00 | 1.392 | 0.04 | `ComplexMemPool`
| 200 | 15,390,986.00 | 64.97 | 0.2% | 63,904,254.00 | 47,731,705.00 | 1.339 | 0.17 | `ComplexMemPool`
| 400 | 69,394,711.00 | 14.41 | 0.1% | 272,602,461.00 | 219,014,691.00 | 1.245 | 0.76 | `ComplexMemPool`
| 600 | 168,977,165.00 | 5.92 | 0.1% | 639,108,082.00 | 535,316,887.00 | 1.194 | 1.86 | `ComplexMemPool`
| 800 | 310,109,077.00 | 3.22 | 0.1% |1,149,134,246.00 | 984,620,812.00 | 1.167 | 3.41 | `ComplexMemPool`
| coefficient | err% | complexity
|--------------:|-------:|------------
| 4.78486e-07 | 4.5% | O(n^2)
| 6.38557e-10 | 21.7% | O(n^3)
| 3.42338e-05 | 38.0% | O(n log n)
| 0.000313914 | 46.9% | O(n)
| 0.0129823 | 114.4% | O(log n)
| 0.0815055 | 133.8% | O(1)
The best fitting curve is O(n^2), so the algorithm seems to scale
quadratic with `childTxs` in the range 25 to 800.
2020-06-13 03:37:27 -04:00
|
|
|
BENCHMARK(MempoolEviction);
|