From 9d3c943d550fbc679e236a43c5301a3901c51458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 19 Nov 2024 16:11:56 +0100 Subject: [PATCH 1/6] bench: measure block (size)serialization speed Measure both full block serialization and size computation via `SizeComputer`. `SizeComputer` returns the exact final size of the serialized content without writing any bytes. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 195,610.62 | 5,112.20 | 0.3% | 11.00 | `SerializeBlock` | 12,061.83 | 82,906.19 | 0.1% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 867,857.55 | 1,152.26 | 0.0% | 8,015,883.90 | 3,116,099.08 | 2.572 | 1,517,035.87 | 0.5% | 10.81 | `SerializeBlock` | 30,928.27 | 32,332.88 | 0.0% | 221,683.03 | 111,055.84 | 1.996 | 53,037.03 | 0.8% | 11.03 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 33 +++++++++++++++++++++++++++++---- src/streams.h | 1 + 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 9558d64f199..ba26457b31a 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -21,11 +21,34 @@ #include #include +static void SizeComputerBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + bench.unit("block").run([&] { + SizeComputer size_computer; + size_computer << TX_WITH_WITNESS(block); + assert(size_computer.size() == benchmark::data::block413567.size()); + }); +} + +static void SerializeBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + // Create output stream and verify first serialization matches input + bench.unit("block").run([&] { + DataStream output_stream(benchmark::data::block413567.size()); + output_stream << TX_WITH_WITNESS(block); + assert(output_stream.size() == benchmark::data::block413567.size()); + }); +} + // These are the two major time-sinks which happen after we have fully received // a block off the wire, but before we can relay the block on to peers using // compact block relay. -static void DeserializeBlockTest(benchmark::Bench& bench) +static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -39,7 +62,7 @@ static void DeserializeBlockTest(benchmark::Bench& bench) }); } -static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) +static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -60,5 +83,7 @@ static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) }); } -BENCHMARK(DeserializeBlockTest, benchmark::PriorityLevel::HIGH); -BENCHMARK(DeserializeAndCheckBlockTest, benchmark::PriorityLevel::HIGH); +BENCHMARK(SizeComputerBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(SerializeBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(DeserializeBlock, benchmark::PriorityLevel::HIGH); +BENCHMARK(DeserializeAndCheckBlock, benchmark::PriorityLevel::HIGH); diff --git a/src/streams.h b/src/streams.h index 1ebcff3671f..59538b2af0e 100644 --- a/src/streams.h +++ b/src/streams.h @@ -162,6 +162,7 @@ public: typedef vector_type::reverse_iterator reverse_iterator; explicit DataStream() = default; + explicit DataStream(size_type n) { reserve(n); } explicit DataStream(std::span sp) : DataStream{std::as_bytes(sp)} {} explicit DataStream(std::span sp) : vch(sp.data(), sp.data() + sp.size()) {} From ffc615edebe87d20d5a60549fe442fe46afce6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 13 Feb 2025 22:49:51 +0100 Subject: [PATCH 2/6] cleanup: remove unused `ser_writedata16be` and `ser_readdata16be` --- src/serialize.h | 11 ----------- src/test/fuzz/integer.cpp | 4 ---- 2 files changed, 15 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 98851056bdb..6807a2be229 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -60,11 +60,6 @@ template inline void ser_writedata16(Stream &s, uint16_t obj) obj = htole16_internal(obj); s.write(std::as_bytes(std::span{&obj, 1})); } -template inline void ser_writedata16be(Stream &s, uint16_t obj) -{ - obj = htobe16_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); -} template inline void ser_writedata32(Stream &s, uint32_t obj) { obj = htole32_internal(obj); @@ -92,12 +87,6 @@ template inline uint16_t ser_readdata16(Stream &s) s.read(std::as_writable_bytes(std::span{&obj, 1})); return le16toh_internal(obj); } -template inline uint16_t ser_readdata16be(Stream &s) -{ - uint16_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); - return be16toh_internal(obj); -} template inline uint32_t ser_readdata32(Stream &s) { uint32_t obj; diff --git a/src/test/fuzz/integer.cpp b/src/test/fuzz/integer.cpp index a6729155d1f..1215f532b74 100644 --- a/src/test/fuzz/integer.cpp +++ b/src/test/fuzz/integer.cpp @@ -236,10 +236,6 @@ FUZZ_TARGET(integer, .init = initialize_integer) const uint16_t deserialized_u16 = ser_readdata16(stream); assert(u16 == deserialized_u16 && stream.empty()); - ser_writedata16be(stream, u16); - const uint16_t deserialized_u16be = ser_readdata16be(stream); - assert(u16 == deserialized_u16be && stream.empty()); - ser_writedata8(stream, u8); const uint8_t deserialized_u8 = ser_readdata8(stream); assert(u8 == deserialized_u8 && stream.empty()); From e00ce6ed3c81408b2c837f8e4dde7a9c362947ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 17 Jan 2025 14:29:33 +0100 Subject: [PATCH 3/6] refactor: reduce template bloat in primitive serialization Merged multiple template methods into single constexpr-delimited implementation to reduce template bloat (i.e. related functionality is grouped into a single method, but can be optimized because of C++20 constexpr conditions). This unifies related methods that were only bound before by similar signatures - and enables `SizeComputer` optimizations later --- src/serialize.h | 53 +++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 6807a2be229..48ddd4d565a 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -241,38 +241,47 @@ const Out& AsBase(const In& x) template concept CharNotInt8 = std::same_as && !std::same_as; +template +concept ByteOrIntegral = std::is_same_v || + (std::is_integral_v && !std::is_same_v); + template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Serialize(Stream& s, std::byte a) { ser_writedata8(s, uint8_t(a)); } -template inline void Serialize(Stream& s, int8_t a ) { ser_writedata8(s, a); } -template inline void Serialize(Stream& s, uint8_t a ) { ser_writedata8(s, a); } -template inline void Serialize(Stream& s, int16_t a ) { ser_writedata16(s, a); } -template inline void Serialize(Stream& s, uint16_t a) { ser_writedata16(s, a); } -template inline void Serialize(Stream& s, int32_t a ) { ser_writedata32(s, a); } -template inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); } -template inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); } -template inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); } +template void Serialize(Stream& s, T a) +{ + if constexpr (sizeof(T) == 1) { + ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + ser_writedata16(s, static_cast(a)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + ser_writedata32(s, static_cast(a)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + ser_writedata64(s, static_cast(a)); // (u)int64_t + } +} template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Unserialize(Stream& s, std::byte& a) { a = std::byte{ser_readdata8(s)}; } -template inline void Unserialize(Stream& s, int8_t& a ) { a = ser_readdata8(s); } -template inline void Unserialize(Stream& s, uint8_t& a ) { a = ser_readdata8(s); } -template inline void Unserialize(Stream& s, int16_t& a ) { a = ser_readdata16(s); } -template inline void Unserialize(Stream& s, uint16_t& a) { a = ser_readdata16(s); } -template inline void Unserialize(Stream& s, int32_t& a ) { a = ser_readdata32(s); } -template inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); } -template inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); } -template inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); } +template void Unserialize(Stream& s, T& a) +{ + if constexpr (sizeof(T) == 1) { + a = static_cast(ser_readdata8(s)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + a = static_cast(ser_readdata16(s)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + a = static_cast(ser_readdata32(s)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + a = static_cast(ser_readdata64(s)); // (u)int64_t + } +} template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } - -template inline void Serialize(Stream& s, bool a) { uint8_t f = a; ser_writedata8(s, f); } -template inline void Unserialize(Stream& s, bool& a) { uint8_t f = ser_readdata8(s); a = f; } // clang-format on @@ -478,7 +487,7 @@ public: * serialization, and Unser(stream, object&) for deserialization. Serialization routines (inside * READWRITE, or directly with << and >> operators), can then use Using(object). * - * This works by constructing a Wrapper-wrapped version of object, where T is + * This works by constructing a Wrapper-wrapped version of object, where T is * const during serialization, and non-const during deserialization, which maintains const * correctness. */ From cbdb759d0e3f85235f0e0409d847ace7d3f30c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 20 Mar 2025 10:28:52 +0100 Subject: [PATCH 4/6] refactor: add explicit static extent to spans --- src/serialize.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 48ddd4d565a..c1c145fa4b5 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -53,56 +53,56 @@ constexpr deserialize_type deserialize {}; */ template inline void ser_writedata8(Stream &s, uint8_t obj) { - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata16(Stream &s, uint16_t obj) { obj = htole16_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32(Stream &s, uint32_t obj) { obj = htole32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32be(Stream &s, uint32_t obj) { obj = htobe32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata64(Stream &s, uint64_t obj) { obj = htole64_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline uint8_t ser_readdata8(Stream &s) { uint8_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return obj; } template inline uint16_t ser_readdata16(Stream &s) { uint16_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le16toh_internal(obj); } template inline uint32_t ser_readdata32(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le32toh_internal(obj); } template inline uint32_t ser_readdata32be(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return be32toh_internal(obj); } template inline uint64_t ser_readdata64(Stream &s) { uint64_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le64toh_internal(obj); } @@ -281,7 +281,6 @@ template void Unserialize(Stream& s, T& a) template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } -template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } // clang-format on @@ -534,10 +533,10 @@ struct CustomUintFormatter if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); if (BigEndian) { uint64_t raw = htobe64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).last(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { uint64_t raw = htole64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).first(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template first()); } } @@ -547,10 +546,10 @@ struct CustomUintFormatter static_assert(std::numeric_limits::max() >= MAX && std::numeric_limits::min() <= 0, "Assigned type too small"); uint64_t raw = 0; if (BigEndian) { - s.read(std::as_writable_bytes(std::span{&raw, 1}).last(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).last()); v = static_cast(be64toh_internal(raw)); } else { - s.read(std::as_writable_bytes(std::span{&raw, 1}).first(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).first()); v = static_cast(le64toh_internal(raw)); } } From f40e3487d502f06f895000728de4a22d2ad7cbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Feb 2025 13:54:57 +0100 Subject: [PATCH 5/6] optimization: merge `SizeComputer` specializations and add new overloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Endianness doesn’t affect final size, so skip it in `SizeComputer`. Fold existing overloads into one implementation, short‑circuiting logic when only the serialized size is needed. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 191,652.29 | 5,217.78 | 0.4% | 10.96 | `SerializeBlock` | 10,323.55 | 96,865.92 | 0.2% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 614,847.32 | 1,626.42 | 0.0% | 8,015,883.64 | 2,207,628.07 | 3.631 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 26,020.31 | 38,431.52 | 0.0% | 159,390.03 | 93,438.33 | 1.706 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/serialize.h | 115 ++++++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index c1c145fa4b5..611263ac086 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -48,6 +48,16 @@ static const unsigned int MAX_VECTOR_ALLOCATE = 5000000; struct deserialize_type {}; constexpr deserialize_type deserialize {}; +class SizeComputer; + +//! Check if type contains a stream by seeing if it has a GetStream() method. +template +concept ContainsStream = requires(T t) { t.GetStream(); }; + +template +concept ContainsSizeComputer = ContainsStream && + std::is_same_v().GetStream())>, SizeComputer>; + /* * Lowest-level serialization and conversion. */ @@ -107,8 +117,6 @@ template inline uint64_t ser_readdata64(Stream &s) } -class SizeComputer; - /** * Convert any argument to a reference to X, maintaining constness. * @@ -248,7 +256,9 @@ concept ByteOrIntegral = std::is_same_v || template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Serialize(Stream& s, T a) { - if constexpr (sizeof(T) == 1) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(sizeof(T)); + } else if constexpr (sizeof(T) == 1) { ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool } else if constexpr (sizeof(T) == 2) { ser_writedata16(s, static_cast(a)); // (u)int16_t @@ -259,10 +269,38 @@ template void Serialize(Stream& s, T a) ser_writedata64(s, static_cast(a)); // (u)int64_t } } -template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } +template void Serialize(Stream& s, const B (&a)[N]) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, const std::array& a) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(std::as_bytes(span)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(span.size()); + } else { + s.write(std::as_bytes(span)); + } +} template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Unserialize(Stream& s, T& a) @@ -299,12 +337,14 @@ constexpr inline unsigned int GetSizeOfCompactSize(uint64_t nSize) else return sizeof(unsigned char) + sizeof(uint64_t); } -inline void WriteCompactSize(SizeComputer& os, uint64_t nSize); - template void WriteCompactSize(Stream& os, uint64_t nSize) { - if (nSize < 253) + if constexpr (ContainsSizeComputer) + { + os.GetStream().seek(GetSizeOfCompactSize(nSize)); + } + else if (nSize < 253) { ser_writedata8(os, nSize); } @@ -411,7 +451,7 @@ struct CheckVarIntMode { }; template -inline unsigned int GetSizeOfVarInt(I n) +constexpr unsigned int GetSizeOfVarInt(I n) { CheckVarIntMode(); int nRet = 0; @@ -424,25 +464,26 @@ inline unsigned int GetSizeOfVarInt(I n) return nRet; } -template -inline void WriteVarInt(SizeComputer& os, I n); - template void WriteVarInt(Stream& os, I n) { - CheckVarIntMode(); - unsigned char tmp[(sizeof(n)*8+6)/7]; - int len=0; - while(true) { - tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); - if (n <= 0x7F) - break; - n = (n >> 7) - 1; - len++; + if constexpr (ContainsSizeComputer) { + os.GetStream().seek(GetSizeOfVarInt(n)); + } else { + CheckVarIntMode(); + unsigned char tmp[(sizeof(n)*8+6)/7]; + int len=0; + while(true) { + tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); + if (n <= 0x7F) + break; + n = (n >> 7) - 1; + len++; + } + do { + ser_writedata8(os, tmp[len]); + } while(len--); } - do { - ser_writedata8(os, tmp[len]); - } while(len--); } template @@ -531,7 +572,9 @@ struct CustomUintFormatter template void Ser(Stream& s, I v) { if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); - if (BigEndian) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(Bytes); + } else if (BigEndian) { uint64_t raw = htobe64_internal(v); s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { @@ -1062,6 +1105,9 @@ protected: public: SizeComputer() = default; + SizeComputer& GetStream() { return *this; } + const SizeComputer& GetStream() const { return *this; }; + void write(std::span src) { this->nSize += src.size(); @@ -1085,27 +1131,12 @@ public: } }; -template -inline void WriteVarInt(SizeComputer &s, I n) -{ - s.seek(GetSizeOfVarInt(n)); -} - -inline void WriteCompactSize(SizeComputer &s, uint64_t nSize) -{ - s.seek(GetSizeOfCompactSize(nSize)); -} - template size_t GetSerializeSize(const T& t) { return (SizeComputer() << t).size(); } -//! Check if type contains a stream by seeing if has a GetStream() method. -template -concept ContainsStream = requires(T t) { t.GetStream(); }; - /** Wrapper that overrides the GetParams() function of a stream. */ template class ParamsStream From 073e28b1e9e494389597e2ceb761ca1c842abe3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sun, 9 Mar 2025 21:33:36 +0100 Subject: [PATCH 6/6] optimization: add single byte writes Single byte writes are used very often (used for every (u)int8_t or std::byte or bool and for every VarInt's first byte which is also needed for every (pre)Vector). It makes sense to avoid the generalized serialization infrastructure that isn't needed: * AutoFile write doesn't need to allocate 4k buffer for a single byte now; * `VectorWriter` and `DataStream` avoids memcpy/insert calls. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 174,569.19 | 5,728.39 | 0.6% | 10.89 | `SerializeBlock` | 10,241.16 | 97,645.21 | 0.0% | 11.00 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 615,000.56 | 1,626.01 | 0.0% | 8,015,883.64 | 2,208,340.88 | 3.630 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 25,676.76 | 38,945.72 | 0.0% | 159,390.03 | 92,202.10 | 1.729 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 4 ++-- src/bench/rpc_blockchain.cpp | 2 +- src/crypto/sha256.cpp | 15 +++++++++++++++ src/crypto/sha256.h | 1 + src/hash.h | 24 +++++++++++++++++++++++- src/serialize.h | 6 ++++++ src/streams.cpp | 13 +++++++++++++ src/streams.h | 17 +++++++++++++++++ src/test/crypto_tests.cpp | 2 +- src/test/fuzz/autofile.cpp | 4 ++-- src/test/streams_tests.cpp | 6 +++--- 11 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index ba26457b31a..2d9eac12339 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -52,7 +52,7 @@ static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction bench.unit("block").run([&] { CBlock block; @@ -66,7 +66,7 @@ static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction ArgsManager bench_args; const auto chainParams = CreateChainParams(bench_args, ChainType::MAIN); diff --git a/src/bench/rpc_blockchain.cpp b/src/bench/rpc_blockchain.cpp index df951a14e49..a5d28b4a60b 100644 --- a/src/bench/rpc_blockchain.cpp +++ b/src/bench/rpc_blockchain.cpp @@ -33,7 +33,7 @@ struct TestBlockAndIndex { { DataStream stream{benchmark::data::block413567}; std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction stream >> TX_WITH_WITNESS(block); diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 09c5d3123e8..6cf77849aac 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -723,6 +723,21 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len) } return *this; } +CSHA256& CSHA256::Write(unsigned char data) +{ + size_t bufsize = bytes % 64; + + // Add the single byte to the buffer + buf[bufsize] = data; + bytes += 1; + + if (bufsize == 63) { + // Process the buffer if full + Transform(s, buf, 1); + } + + return *this; +} void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) { diff --git a/src/crypto/sha256.h b/src/crypto/sha256.h index b1348631d32..16aae54aa23 100644 --- a/src/crypto/sha256.h +++ b/src/crypto/sha256.h @@ -22,6 +22,7 @@ public: CSHA256(); CSHA256& Write(const unsigned char* data, size_t len); + CSHA256& Write(unsigned char data); void Finalize(unsigned char hash[OUTPUT_SIZE]); CSHA256& Reset(); }; diff --git a/src/hash.h b/src/hash.h index 34486af64a1..da3b1ab1145 100644 --- a/src/hash.h +++ b/src/hash.h @@ -38,6 +38,10 @@ public: sha.Write(input.data(), input.size()); return *this; } + CHash256& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash256& Reset() { sha.Reset(); @@ -63,6 +67,10 @@ public: sha.Write(input.data(), input.size()); return *this; } + CHash160& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash160& Reset() { sha.Reset(); @@ -107,6 +115,10 @@ public: { ctx.Write(UCharCast(src.data()), src.size()); } + void write(std::span src) + { + ctx.Write(*UCharCast(&src[0])); + } /** Compute the double-SHA256 hash of all data written to this object. * @@ -160,13 +172,18 @@ public: m_source.read(dst); this->write(dst); } + void read(std::span dst) + { + m_source.read(dst); + this->write(std::span{dst}); + } void ignore(size_t num_bytes) { std::byte data[1024]; while (num_bytes > 0) { size_t now = std::min(num_bytes, 1024); - read({data, now}); + read(std::span{data, now}); num_bytes -= now; } } @@ -194,6 +211,11 @@ public: m_source.write(src); HashWriter::write(src); } + void write(std::span src) + { + m_source.write(src); + HashWriter::write(src); + } template HashedSourceWriter& operator<<(const T& obj) diff --git a/src/serialize.h b/src/serialize.h index 611263ac086..5ec8b24df80 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -1112,6 +1112,10 @@ public: { this->nSize += src.size(); } + void write(std::span) + { + this->nSize += 1; + } /** Pretend _nSize bytes are written, without specifying them. */ void seek(size_t _nSize) @@ -1161,7 +1165,9 @@ public: template ParamsStream& operator<<(const U& obj) { ::Serialize(*this, obj); return *this; } template ParamsStream& operator>>(U&& obj) { ::Unserialize(*this, obj); return *this; } void write(std::span src) { GetStream().write(src); } + void write(std::span src) { GetStream().write(src); } void read(std::span dst) { GetStream().read(dst); } + void read(std::span dst) { GetStream().read(dst); } void ignore(size_t num) { GetStream().ignore(num); } bool eof() const { return GetStream().eof(); } size_t size() const { return GetStream().size(); } diff --git a/src/streams.cpp b/src/streams.cpp index 19c2b474452..b060e8f4ad8 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -64,6 +64,13 @@ void AutoFile::read(std::span dst) } } +void AutoFile::read(std::span dst) +{ + if (detail_fread(dst) != 1) { + throw std::ios_base::failure(feof() ? "AutoFile::read: end of file" : "AutoFile::read: fread failed"); + } +} + void AutoFile::ignore(size_t nSize) { if (!m_file) throw std::ios_base::failure("AutoFile::ignore: file handle is nullptr"); @@ -97,6 +104,12 @@ void AutoFile::write(std::span src) } } +void AutoFile::write(std::span src) +{ + std::byte temp_byte = src[0]; + write_buffer(std::span(&temp_byte, 1)); +} + void AutoFile::write_buffer(std::span src) { if (!m_file) throw std::ios_base::failure("AutoFile::write_buffer: file handle is nullptr"); diff --git a/src/streams.h b/src/streams.h index 59538b2af0e..c11bbd4234d 100644 --- a/src/streams.h +++ b/src/streams.h @@ -83,6 +83,17 @@ public: } nPos += src.size(); } + void write(std::span src) + { + assert(nPos <= vchData.size()); + const auto byte{*UCharCast(&src[0])}; + if (nPos < vchData.size()) { + vchData[nPos] = byte; + } else { + vchData.push_back(byte); + } + nPos += 1; + } template VectorWriter& operator<<(const T& obj) { @@ -254,6 +265,10 @@ public: // Write to the end of the buffer vch.insert(vch.end(), src.begin(), src.end()); } + void write(std::span src) + { + vch.push_back(src[0]); + } template DataStream& operator<<(const T& obj) @@ -453,8 +468,10 @@ public: // Stream subset // void read(std::span dst); + void read(std::span dst); void ignore(size_t nSize); void write(std::span src); + void write(std::span src); template AutoFile& operator<<(const T& obj) diff --git a/src/test/crypto_tests.cpp b/src/test/crypto_tests.cpp index 5588d4cdbc6..0aab9ef0e77 100644 --- a/src/test/crypto_tests.cpp +++ b/src/test/crypto_tests.cpp @@ -1079,7 +1079,7 @@ BOOST_AUTO_TEST_CASE(sha256d64) in[j] = m_rng.randbits(8); } for (int j = 0; j < i; ++j) { - CHash256().Write({in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); + CHash256().Write(std::span{in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); } SHA256D64(out2, in, i); BOOST_CHECK(memcmp(out1, out2, 32 * i) == 0); diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp index 81761c7bf96..f89b617479d 100644 --- a/src/test/fuzz/autofile.cpp +++ b/src/test/fuzz/autofile.cpp @@ -29,14 +29,14 @@ FUZZ_TARGET(autofile) [&] { std::array arr{}; try { - auto_file.read({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.read(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, [&] { const std::array arr{}; try { - auto_file.write({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.write(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index c7b5cd353e0..4ec007f8820 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -26,9 +26,9 @@ BOOST_AUTO_TEST_CASE(xor_file) { // Check errors for missing file AutoFile xor_file{raw_file("rb"), xor_pat}; - BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullpt"}); - BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullpt"}); - BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullpt"}); + BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"file handle is nullpt"}); + BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"file handle is nullpt"}); + BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"file handle is nullpt"}); } { #ifdef __MINGW64__