From df516ee5e3686bf8360daecab5be708dc880a8ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 6 Dec 2024 16:18:03 +0100
Subject: [PATCH] bench: Make Xor benchmark more representative

To make the benchmarks representative, I've collected the write-vector's sizes during IBD for every invocation of `util::Xor` until 860k blocks, and used it as a basis for the micro-benchmarks, having a similar distribution of random data (taking the 1000 most frequent ones, making sure the very big ones are also covered).

And even though we already have serialization tests, `AutoFileXor` was added to serializing 1 MB via the provided key_bytes.
This was used to test the effect of disabling obfuscation.

>  cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build -j$(nproc) \
&& build/src/bench/bench_bitcoin -filter='XorHistogram|AutoFileXor' -min-time=10000

C++ compiler .......................... AppleClang 16.0.0.16000026

|             ns/byte |              byte/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|                1.07 |      937,527,289.88 |    0.4% |     10.24 | `AutoFileXor`
|                0.87 |    1,149,859,017.49 |    0.3% |     10.80 | `XorHistogram`

C++ compiler .......................... GNU 13.2.0

|             ns/byte |              byte/s |    err% |        ins/byte |        cyc/byte |    IPC |       bra/byte |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|                1.87 |      535,253,389.72 |    0.0% |            9.20 |            3.45 |  2.669 |           1.03 |    0.1% |     11.02 | `AutoFileXor`
|                1.70 |      587,844,715.57 |    0.0% |            9.35 |            5.41 |  1.729 |           1.05 |    1.7% |     10.95 | `XorHistogram`
---
 src/bench/xor.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/bench/xor.cpp b/src/bench/xor.cpp
index fc9dc5d1721..1c9c5189baa 100644
--- a/src/bench/xor.cpp
+++ b/src/bench/xor.cpp
@@ -7,17 +7,24 @@
 #include <span.h>
 #include <streams.h>
 
+#include <cmath>
 #include <cstddef>
+#include <map>
 #include <vector>
 
 static void Xor(benchmark::Bench& bench)
 {
-    FastRandomContext frc{/*fDeterministic=*/true};
-    auto data{frc.randbytes<std::byte>(1024)};
-    auto key{frc.randbytes<std::byte>(31)};
+    FastRandomContext rng{/*fDeterministic=*/true};
+    auto test_data{rng.randbytes<std::byte>(1 << 20)};
 
-    bench.batch(data.size()).unit("byte").run([&] {
-        util::Xor(data, key);
+    std::vector key_bytes{rng.randbytes<std::byte>(8)};
+    uint64_t key;
+    std::memcpy(&key, key_bytes.data(), 8);
+
+    size_t offset{0};
+    bench.batch(test_data.size()).unit("byte").run([&] {
+        util::Xor(test_data, key_bytes, offset++);
+        ankerl::nanobench::doNotOptimizeAway(test_data);
     });
 }