2020-12-31 05:48:25 -03:00
|
|
|
// Copyright (c) 2018-2020 The Bitcoin Core developers
|
2018-01-23 21:25:21 -03:00
|
|
|
// Distributed under the MIT software license, see the accompanying
|
|
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
|
2018-08-30 02:15:50 -03:00
|
|
|
#include <mutex>
|
|
|
|
#include <sstream>
|
2019-12-06 17:47:55 -03:00
|
|
|
#include <set>
|
2018-08-30 02:15:50 -03:00
|
|
|
|
2018-01-23 21:25:21 -03:00
|
|
|
#include <blockfilter.h>
|
2018-08-24 18:48:23 -03:00
|
|
|
#include <crypto/siphash.h>
|
2018-01-23 21:25:21 -03:00
|
|
|
#include <hash.h>
|
2018-01-23 22:25:30 -03:00
|
|
|
#include <primitives/transaction.h>
|
|
|
|
#include <script/script.h>
|
2018-01-23 21:25:21 -03:00
|
|
|
#include <streams.h>
|
2020-02-21 12:57:02 -03:00
|
|
|
#include <util/golombrice.h>
|
2018-01-23 21:25:21 -03:00
|
|
|
|
|
|
|
/// SerType used to serialize parameters in GCS filter encoding.
|
|
|
|
static constexpr int GCS_SER_TYPE = SER_NETWORK;
|
|
|
|
|
|
|
|
/// Protocol version used to serialize parameters in GCS filter encoding.
|
|
|
|
static constexpr int GCS_SER_VERSION = 0;
|
|
|
|
|
2018-08-27 19:42:35 -03:00
|
|
|
static const std::map<BlockFilterType, std::string> g_filter_types = {
|
|
|
|
{BlockFilterType::BASIC, "basic"},
|
|
|
|
};
|
|
|
|
|
2018-01-23 21:25:21 -03:00
|
|
|
// Map a value x that is uniformly distributed in the range [0, 2^64) to a
|
|
|
|
// value uniformly distributed in [0, n) by returning the upper 64 bits of
|
|
|
|
// x * n.
|
|
|
|
//
|
|
|
|
// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
|
|
|
static uint64_t MapIntoRange(uint64_t x, uint64_t n)
|
|
|
|
{
|
2018-05-13 15:00:02 -04:00
|
|
|
#ifdef __SIZEOF_INT128__
|
|
|
|
return (static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(n)) >> 64;
|
|
|
|
#else
|
2018-01-23 21:25:21 -03:00
|
|
|
// To perform the calculation on 64-bit numbers without losing the
|
|
|
|
// result to overflow, split the numbers into the most significant and
|
|
|
|
// least significant 32 bits and perform multiplication piece-wise.
|
|
|
|
//
|
|
|
|
// See: https://stackoverflow.com/a/26855440
|
|
|
|
uint64_t x_hi = x >> 32;
|
|
|
|
uint64_t x_lo = x & 0xFFFFFFFF;
|
|
|
|
uint64_t n_hi = n >> 32;
|
|
|
|
uint64_t n_lo = n & 0xFFFFFFFF;
|
|
|
|
|
|
|
|
uint64_t ac = x_hi * n_hi;
|
|
|
|
uint64_t ad = x_hi * n_lo;
|
|
|
|
uint64_t bc = x_lo * n_hi;
|
|
|
|
uint64_t bd = x_lo * n_lo;
|
|
|
|
|
|
|
|
uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
|
|
|
|
uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
|
|
|
|
return upper64;
|
2018-05-13 15:00:02 -04:00
|
|
|
#endif
|
2018-01-23 21:25:21 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t GCSFilter::HashToRange(const Element& element) const
|
|
|
|
{
|
2018-08-27 19:04:43 -03:00
|
|
|
uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1)
|
2018-01-23 21:25:21 -03:00
|
|
|
.Write(element.data(), element.size())
|
|
|
|
.Finalize();
|
|
|
|
return MapIntoRange(hash, m_F);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const
|
|
|
|
{
|
|
|
|
std::vector<uint64_t> hashed_elements;
|
|
|
|
hashed_elements.reserve(elements.size());
|
|
|
|
for (const Element& element : elements) {
|
|
|
|
hashed_elements.push_back(HashToRange(element));
|
|
|
|
}
|
|
|
|
std::sort(hashed_elements.begin(), hashed_elements.end());
|
|
|
|
return hashed_elements;
|
|
|
|
}
|
|
|
|
|
2018-08-27 19:04:43 -03:00
|
|
|
GCSFilter::GCSFilter(const Params& params)
|
|
|
|
: m_params(params), m_N(0), m_F(0), m_encoded{0}
|
2018-01-23 21:25:21 -03:00
|
|
|
{}
|
|
|
|
|
2018-08-27 19:04:43 -03:00
|
|
|
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter)
|
|
|
|
: m_params(params), m_encoded(std::move(encoded_filter))
|
2018-01-23 21:25:21 -03:00
|
|
|
{
|
Remove unused (and broken) functionality in SpanReader
This removes the ability to set an offset in the SpanReader constructor,
as the current code is broken. All call sites use pos=0, so it is actually
unused. If future call sites need it, SpanReader{a, b, c, d} is equivalent
to SpanReader{a, b, c.subspan(d)}.
It also removes the ability to deserialize from SpanReader directly from
the constructor. This too is unused, and can be more idiomatically
simulated using (SpanReader{a, b, c} >> x >> y >> z) instead of
SpanReader{a, b, c, x, y, z}.
2021-12-06 17:45:38 -03:00
|
|
|
SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded};
|
2018-01-23 21:25:21 -03:00
|
|
|
|
|
|
|
uint64_t N = ReadCompactSize(stream);
|
|
|
|
m_N = static_cast<uint32_t>(N);
|
|
|
|
if (m_N != N) {
|
|
|
|
throw std::ios_base::failure("N must be <2^32");
|
|
|
|
}
|
2018-08-27 19:04:43 -03:00
|
|
|
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
|
2018-01-23 21:25:21 -03:00
|
|
|
|
|
|
|
// Verify that the encoded filter contains exactly N elements. If it has too much or too little
|
|
|
|
// data, a std::ios_base::failure exception will be raised.
|
2021-12-01 16:40:25 -03:00
|
|
|
BitStreamReader<SpanReader> bitreader{stream};
|
2018-01-23 21:25:21 -03:00
|
|
|
for (uint64_t i = 0; i < m_N; ++i) {
|
2018-08-27 19:04:43 -03:00
|
|
|
GolombRiceDecode(bitreader, m_params.m_P);
|
2018-01-23 21:25:21 -03:00
|
|
|
}
|
|
|
|
if (!stream.empty()) {
|
|
|
|
throw std::ios_base::failure("encoded_filter contains excess data");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-27 19:04:43 -03:00
|
|
|
GCSFilter::GCSFilter(const Params& params, const ElementSet& elements)
|
|
|
|
: m_params(params)
|
2018-01-23 21:25:21 -03:00
|
|
|
{
|
|
|
|
size_t N = elements.size();
|
|
|
|
m_N = static_cast<uint32_t>(N);
|
|
|
|
if (m_N != N) {
|
|
|
|
throw std::invalid_argument("N must be <2^32");
|
|
|
|
}
|
2018-08-27 19:04:43 -03:00
|
|
|
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
|
2018-01-23 21:25:21 -03:00
|
|
|
|
|
|
|
CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0);
|
|
|
|
|
|
|
|
WriteCompactSize(stream, m_N);
|
|
|
|
|
|
|
|
if (elements.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
BitStreamWriter<CVectorWriter> bitwriter(stream);
|
|
|
|
|
|
|
|
uint64_t last_value = 0;
|
|
|
|
for (uint64_t value : BuildHashedSet(elements)) {
|
|
|
|
uint64_t delta = value - last_value;
|
2018-08-27 19:04:43 -03:00
|
|
|
GolombRiceEncode(bitwriter, m_params.m_P, delta);
|
2018-01-23 21:25:21 -03:00
|
|
|
last_value = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
bitwriter.Flush();
|
|
|
|
}
|
2018-01-23 21:33:26 -03:00
|
|
|
|
|
|
|
bool GCSFilter::MatchInternal(const uint64_t* element_hashes, size_t size) const
|
|
|
|
{
|
Remove unused (and broken) functionality in SpanReader
This removes the ability to set an offset in the SpanReader constructor,
as the current code is broken. All call sites use pos=0, so it is actually
unused. If future call sites need it, SpanReader{a, b, c, d} is equivalent
to SpanReader{a, b, c.subspan(d)}.
It also removes the ability to deserialize from SpanReader directly from
the constructor. This too is unused, and can be more idiomatically
simulated using (SpanReader{a, b, c} >> x >> y >> z) instead of
SpanReader{a, b, c, x, y, z}.
2021-12-06 17:45:38 -03:00
|
|
|
SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded};
|
2018-01-23 21:33:26 -03:00
|
|
|
|
|
|
|
// Seek forward by size of N
|
|
|
|
uint64_t N = ReadCompactSize(stream);
|
|
|
|
assert(N == m_N);
|
|
|
|
|
2021-12-01 16:40:25 -03:00
|
|
|
BitStreamReader<SpanReader> bitreader{stream};
|
2018-01-23 21:33:26 -03:00
|
|
|
|
|
|
|
uint64_t value = 0;
|
|
|
|
size_t hashes_index = 0;
|
|
|
|
for (uint32_t i = 0; i < m_N; ++i) {
|
2018-08-27 19:04:43 -03:00
|
|
|
uint64_t delta = GolombRiceDecode(bitreader, m_params.m_P);
|
2018-01-23 21:33:26 -03:00
|
|
|
value += delta;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
if (hashes_index == size) {
|
|
|
|
return false;
|
|
|
|
} else if (element_hashes[hashes_index] == value) {
|
|
|
|
return true;
|
|
|
|
} else if (element_hashes[hashes_index] > value) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
hashes_index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool GCSFilter::Match(const Element& element) const
|
|
|
|
{
|
|
|
|
uint64_t query = HashToRange(element);
|
|
|
|
return MatchInternal(&query, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool GCSFilter::MatchAny(const ElementSet& elements) const
|
|
|
|
{
|
|
|
|
const std::vector<uint64_t> queries = BuildHashedSet(elements);
|
|
|
|
return MatchInternal(queries.data(), queries.size());
|
|
|
|
}
|
2018-01-23 22:25:30 -03:00
|
|
|
|
2018-08-27 19:42:35 -03:00
|
|
|
const std::string& BlockFilterTypeName(BlockFilterType filter_type)
|
|
|
|
{
|
|
|
|
static std::string unknown_retval = "";
|
|
|
|
auto it = g_filter_types.find(filter_type);
|
|
|
|
return it != g_filter_types.end() ? it->second : unknown_retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BlockFilterTypeByName(const std::string& name, BlockFilterType& filter_type) {
|
|
|
|
for (const auto& entry : g_filter_types) {
|
|
|
|
if (entry.second == name) {
|
|
|
|
filter_type = entry.first;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-12-06 17:47:55 -03:00
|
|
|
const std::set<BlockFilterType>& AllBlockFilterTypes()
|
2018-08-30 02:15:50 -03:00
|
|
|
{
|
2019-12-06 17:47:55 -03:00
|
|
|
static std::set<BlockFilterType> types;
|
2018-08-30 02:15:50 -03:00
|
|
|
|
|
|
|
static std::once_flag flag;
|
|
|
|
std::call_once(flag, []() {
|
|
|
|
for (auto entry : g_filter_types) {
|
2019-12-06 17:47:55 -03:00
|
|
|
types.insert(entry.first);
|
2018-08-30 02:15:50 -03:00
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
return types;
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::string& ListBlockFilterTypes()
|
|
|
|
{
|
|
|
|
static std::string type_list;
|
|
|
|
|
|
|
|
static std::once_flag flag;
|
|
|
|
std::call_once(flag, []() {
|
|
|
|
std::stringstream ret;
|
|
|
|
bool first = true;
|
|
|
|
for (auto entry : g_filter_types) {
|
|
|
|
if (!first) ret << ", ";
|
|
|
|
ret << entry.second;
|
|
|
|
first = false;
|
|
|
|
}
|
|
|
|
type_list = ret.str();
|
|
|
|
});
|
|
|
|
|
|
|
|
return type_list;
|
|
|
|
}
|
|
|
|
|
2018-01-23 22:25:30 -03:00
|
|
|
static GCSFilter::ElementSet BasicFilterElements(const CBlock& block,
|
|
|
|
const CBlockUndo& block_undo)
|
|
|
|
{
|
|
|
|
GCSFilter::ElementSet elements;
|
|
|
|
|
|
|
|
for (const CTransactionRef& tx : block.vtx) {
|
|
|
|
for (const CTxOut& txout : tx->vout) {
|
|
|
|
const CScript& script = txout.scriptPubKey;
|
2018-08-26 15:25:36 -03:00
|
|
|
if (script.empty() || script[0] == OP_RETURN) continue;
|
2018-01-23 22:25:30 -03:00
|
|
|
elements.emplace(script.begin(), script.end());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const CTxUndo& tx_undo : block_undo.vtxundo) {
|
|
|
|
for (const Coin& prevout : tx_undo.vprevout) {
|
|
|
|
const CScript& script = prevout.out.scriptPubKey;
|
2018-08-26 15:25:36 -03:00
|
|
|
if (script.empty()) continue;
|
2018-01-23 22:25:30 -03:00
|
|
|
elements.emplace(script.begin(), script.end());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return elements;
|
|
|
|
}
|
|
|
|
|
2018-08-27 19:08:31 -03:00
|
|
|
BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
|
|
|
|
std::vector<unsigned char> filter)
|
|
|
|
: m_filter_type(filter_type), m_block_hash(block_hash)
|
|
|
|
{
|
|
|
|
GCSFilter::Params params;
|
|
|
|
if (!BuildParams(params)) {
|
|
|
|
throw std::invalid_argument("unknown filter_type");
|
|
|
|
}
|
|
|
|
m_filter = GCSFilter(params, std::move(filter));
|
|
|
|
}
|
|
|
|
|
2018-01-23 22:25:30 -03:00
|
|
|
BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo)
|
|
|
|
: m_filter_type(filter_type), m_block_hash(block.GetHash())
|
2018-08-27 19:04:43 -03:00
|
|
|
{
|
|
|
|
GCSFilter::Params params;
|
|
|
|
if (!BuildParams(params)) {
|
|
|
|
throw std::invalid_argument("unknown filter_type");
|
|
|
|
}
|
|
|
|
m_filter = GCSFilter(params, BasicFilterElements(block, block_undo));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BlockFilter::BuildParams(GCSFilter::Params& params) const
|
2018-01-23 22:25:30 -03:00
|
|
|
{
|
|
|
|
switch (m_filter_type) {
|
|
|
|
case BlockFilterType::BASIC:
|
2018-08-27 19:04:43 -03:00
|
|
|
params.m_siphash_k0 = m_block_hash.GetUint64(0);
|
|
|
|
params.m_siphash_k1 = m_block_hash.GetUint64(1);
|
|
|
|
params.m_P = BASIC_FILTER_P;
|
|
|
|
params.m_M = BASIC_FILTER_M;
|
2018-12-22 02:53:29 -03:00
|
|
|
return true;
|
2019-03-04 15:52:50 -03:00
|
|
|
case BlockFilterType::INVALID:
|
|
|
|
return false;
|
2018-01-23 22:25:30 -03:00
|
|
|
}
|
2018-08-27 19:04:43 -03:00
|
|
|
|
2018-12-22 02:53:29 -03:00
|
|
|
return false;
|
2018-01-23 22:25:30 -03:00
|
|
|
}
|
2018-01-23 22:32:46 -03:00
|
|
|
|
|
|
|
uint256 BlockFilter::GetHash() const
|
|
|
|
{
|
|
|
|
const std::vector<unsigned char>& data = GetEncodedFilter();
|
|
|
|
|
|
|
|
uint256 result;
|
2020-06-18 20:19:46 -04:00
|
|
|
CHash256().Write(data).Finalize(result);
|
2018-01-23 22:32:46 -03:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint256 BlockFilter::ComputeHeader(const uint256& prev_header) const
|
|
|
|
{
|
|
|
|
const uint256& filter_hash = GetHash();
|
|
|
|
|
|
|
|
uint256 result;
|
|
|
|
CHash256()
|
2020-06-18 19:32:32 -04:00
|
|
|
.Write(filter_hash)
|
|
|
|
.Write(prev_header)
|
2020-06-18 20:19:46 -04:00
|
|
|
.Finalize(result);
|
2018-01-23 22:32:46 -03:00
|
|
|
return result;
|
|
|
|
}
|