Merge pull request #1795 from TheBlueMatt/bloom

Bloom filters
This commit is contained in:
Gavin Andresen 2013-01-17 10:04:08 -08:00
commit 91f70a75da
21 changed files with 1315 additions and 62 deletions

View file

@ -155,6 +155,7 @@ HEADERS += src/qt/bitcoingui.h \
src/script.h \ src/script.h \
src/init.h \ src/init.h \
src/irc.h \ src/irc.h \
src/bloom.h \
src/mruset.h \ src/mruset.h \
src/json/json_spirit_writer_template.h \ src/json/json_spirit_writer_template.h \
src/json/json_spirit_writer.h \ src/json/json_spirit_writer.h \
@ -215,6 +216,7 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \
src/version.cpp \ src/version.cpp \
src/sync.cpp \ src/sync.cpp \
src/util.cpp \ src/util.cpp \
src/hash.cpp \
src/netbase.cpp \ src/netbase.cpp \
src/key.cpp \ src/key.cpp \
src/script.cpp \ src/script.cpp \
@ -222,6 +224,7 @@ SOURCES += src/qt/bitcoin.cpp src/qt/bitcoingui.cpp \
src/init.cpp \ src/init.cpp \
src/net.cpp \ src/net.cpp \
src/irc.cpp \ src/irc.cpp \
src/bloom.cpp \
src/checkpoints.cpp \ src/checkpoints.cpp \
src/addrman.cpp \ src/addrman.cpp \
src/db.cpp \ src/db.cpp \

156
src/bloom.cpp Normal file
View file

@ -0,0 +1,156 @@
// Copyright (c) 2012 The Bitcoin developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <math.h>
#include <stdlib.h>
#include "bloom.h"
#include "main.h"
#include "script.h"
#define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455
#define LN2 0.6931471805599453094172321214581765680755001343602552
using namespace std;
static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
// The ideal size for a bloom filter with a given number of elements and false positive rate is:
// - nElements * log(fp rate) / ln(2)^2
// We ignore filter parameters which will create a bloom filter larger than the protocol limits
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
// The ideal number of hash functions is filter size * ln(2) / number of elements
// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn),
nFlags(nFlagsIn)
{
}
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
{
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);
}
void CBloomFilter::insert(const vector<unsigned char>& vKey)
{
for (unsigned int i = 0; i < nHashFuncs; i++)
{
unsigned int nIndex = Hash(i, vKey);
// Sets bit nIndex of vData
vData[nIndex >> 3] |= bit_mask[7 & nIndex];
}
}
void CBloomFilter::insert(const COutPoint& outpoint)
{
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << outpoint;
vector<unsigned char> data(stream.begin(), stream.end());
insert(data);
}
void CBloomFilter::insert(const uint256& hash)
{
vector<unsigned char> data(hash.begin(), hash.end());
insert(data);
}
bool CBloomFilter::contains(const vector<unsigned char>& vKey) const
{
for (unsigned int i = 0; i < nHashFuncs; i++)
{
unsigned int nIndex = Hash(i, vKey);
// Checks bit nIndex of vData
if (!(vData[nIndex >> 3] & bit_mask[7 & nIndex]))
return false;
}
return true;
}
bool CBloomFilter::contains(const COutPoint& outpoint) const
{
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << outpoint;
vector<unsigned char> data(stream.begin(), stream.end());
return contains(data);
}
bool CBloomFilter::contains(const uint256& hash) const
{
vector<unsigned char> data(hash.begin(), hash.end());
return contains(data);
}
bool CBloomFilter::IsWithinSizeConstraints() const
{
return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;
}
bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash)
{
bool fFound = false;
// Match if the filter contains the hash of tx
// for finding tx when they appear in a block
if (contains(hash))
fFound = true;
for (unsigned int i = 0; i < tx.vout.size(); i++)
{
const CTxOut& txout = tx.vout[i];
// Match if the filter contains any arbitrary script data element in any scriptPubKey in tx
// If this matches, also add the specific output that was matched.
// This means clients don't have to update the filter themselves when a new relevant tx
// is discovered in order to find spending transactions, which avoids round-tripping and race conditions.
CScript::const_iterator pc = txout.scriptPubKey.begin();
vector<unsigned char> data;
while (pc < txout.scriptPubKey.end())
{
opcodetype opcode;
if (!txout.scriptPubKey.GetOp(pc, opcode, data))
break;
if (data.size() != 0 && contains(data))
{
fFound = true;
if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_ALL)
insert(COutPoint(hash, i));
else if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_P2PUBKEY_ONLY)
{
txnouttype type;
vector<vector<unsigned char> > vSolutions;
if (Solver(txout.scriptPubKey, type, vSolutions) &&
(type == TX_PUBKEY || type == TX_MULTISIG))
insert(COutPoint(hash, i));
}
break;
}
}
}
if (fFound)
return true;
BOOST_FOREACH(const CTxIn& txin, tx.vin)
{
// Match if the filter contains an outpoint tx spends
if (contains(txin.prevout))
return true;
// Match if the filter contains any arbitrary script data element in any scriptSig in tx
CScript::const_iterator pc = txin.scriptSig.begin();
vector<unsigned char> data;
while (pc < txin.scriptSig.end())
{
opcodetype opcode;
if (!txin.scriptSig.GetOp(pc, opcode, data))
break;
if (data.size() != 0 && contains(data))
return true;
}
}
return false;
}

88
src/bloom.h Normal file
View file

@ -0,0 +1,88 @@
// Copyright (c) 2012 The Bitcoin developers
// Distributed under the MIT/X11 software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_BLOOM_H
#define BITCOIN_BLOOM_H
#include <vector>
#include "uint256.h"
#include "serialize.h"
class COutPoint;
class CTransaction;
// 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
static const unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes
static const unsigned int MAX_HASH_FUNCS = 50;
// First two bits of nFlags control how much IsRelevantAndUpdate actually updates
// The remaining bits are reserved
enum bloomflags
{
BLOOM_UPDATE_NONE = 0,
BLOOM_UPDATE_ALL = 1,
// Only adds outpoints to the filter if the output is a pay-to-pubkey/pay-to-multisig script
BLOOM_UPDATE_P2PUBKEY_ONLY = 2,
BLOOM_UPDATE_MASK = 3,
};
/**
* BloomFilter is a probabilistic filter which SPV clients provide
* so that we can filter the transactions we sends them.
*
* This allows for significantly more efficient transaction and block downloads.
*
* Because bloom filters are probabilistic, an SPV node can increase the false-
* positive rate, making us send them transactions which aren't actually theirs,
* allowing clients to trade more bandwidth for more privacy by obfuscating which
* keys are owned by them.
*/
class CBloomFilter
{
private:
std::vector<unsigned char> vData;
unsigned int nHashFuncs;
unsigned int nTweak;
unsigned char nFlags;
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
public:
// Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
// Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
// the filter created will be as close to the given parameters as possible within the protocol limits.
// This will apply if nFPRate is very low or nElements is unreasonably high.
// nTweak is a constant which is added to the seed value passed to the hash function
// It should generally always be a random value (and is largely only exposed for unit testing)
// nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK)
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak, unsigned char nFlagsIn);
// Using a filter initialized with this results in undefined behavior
// Should only be used for deserialization
CBloomFilter() {}
IMPLEMENT_SERIALIZE
(
READWRITE(vData);
READWRITE(nHashFuncs);
READWRITE(nTweak);
READWRITE(nFlags);
)
void insert(const std::vector<unsigned char>& vKey);
void insert(const COutPoint& outpoint);
void insert(const uint256& hash);
bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(const COutPoint& outpoint) const;
bool contains(const uint256& hash) const;
// True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
// (catch a filter which was just deserialized which was too big)
bool IsWithinSizeConstraints() const;
// Also adds any outputs which match the filter to the filter (to match their spending txes)
bool IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash);
};
#endif /* BITCOIN_BLOOM_H */

58
src/hash.cpp Normal file
View file

@ -0,0 +1,58 @@
#include "hash.h"
inline uint32_t ROTL32 ( uint32_t x, int8_t r )
{
return (x << r) | (x >> (32 - r));
}
unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash)
{
// The following is MurmurHash3 (x86_32), see http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
uint32_t h1 = nHashSeed;
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
const int nblocks = vDataToHash.size() / 4;
//----------
// body
const uint32_t * blocks = (const uint32_t *)(&vDataToHash[0] + nblocks*4);
for(int i = -nblocks; i; i++)
{
uint32_t k1 = blocks[i];
k1 *= c1;
k1 = ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
//----------
// tail
const uint8_t * tail = (const uint8_t*)(&vDataToHash[0] + nblocks*4);
uint32_t k1 = 0;
switch(vDataToHash.size() & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= vDataToHash.size();
h1 ^= h1 >> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >> 16;
return h1;
}

View file

@ -10,6 +10,7 @@
#include <openssl/sha.h> #include <openssl/sha.h>
#include <openssl/ripemd.h> #include <openssl/ripemd.h>
#include <vector>
template<typename T1> template<typename T1>
inline uint256 Hash(const T1 pbegin, const T1 pend) inline uint256 Hash(const T1 pbegin, const T1 pend)
@ -113,4 +114,6 @@ inline uint160 Hash160(const std::vector<unsigned char>& vch)
return hash2; return hash2;
} }
unsigned int MurmurHash3(unsigned int nHashSeed, const std::vector<unsigned char>& vDataToHash);
#endif #endif

View file

@ -2239,6 +2239,160 @@ bool ProcessBlock(CNode* pfrom, CBlock* pblock, CDiskBlockPos *dbp)
CMerkleBlock::CMerkleBlock(const CBlock& block, CBloomFilter& filter)
{
header = block.GetBlockHeader();
vector<bool> vMatch;
vector<uint256> vHashes;
vMatch.reserve(block.vtx.size());
vHashes.reserve(block.vtx.size());
for (unsigned int i = 0; i < block.vtx.size(); i++)
{
uint256 hash = block.vtx[i].GetHash();
if (filter.IsRelevantAndUpdate(block.vtx[i], hash))
{
vMatch.push_back(true);
vMatchedTxn.push_back(make_pair(i, hash));
}
else
vMatch.push_back(false);
vHashes.push_back(hash);
}
txn = CPartialMerkleTree(vHashes, vMatch);
}
uint256 CPartialMerkleTree::CalcHash(int height, unsigned int pos, const std::vector<uint256> &vTxid) {
if (height == 0) {
// hash at height 0 is the txids themself
return vTxid[pos];
} else {
// calculate left hash
uint256 left = CalcHash(height-1, pos*2, vTxid), right;
// calculate right hash if not beyong the end of the array - copy left hash otherwise1
if (pos*2+1 < CalcTreeWidth(height-1))
right = CalcHash(height-1, pos*2+1, vTxid);
else
right = left;
// combine subhashes
return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
}
}
void CPartialMerkleTree::TraverseAndBuild(int height, unsigned int pos, const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) {
// determine whether this node is the parent of at least one matched txid
bool fParentOfMatch = false;
for (unsigned int p = pos << height; p < (pos+1) << height && p < nTransactions; p++)
fParentOfMatch |= vMatch[p];
// store as flag bit
vBits.push_back(fParentOfMatch);
if (height==0 || !fParentOfMatch) {
// if at height 0, or nothing interesting below, store hash and stop
vHash.push_back(CalcHash(height, pos, vTxid));
} else {
// otherwise, don't store any hash, but descend into the subtrees
TraverseAndBuild(height-1, pos*2, vTxid, vMatch);
if (pos*2+1 < CalcTreeWidth(height-1))
TraverseAndBuild(height-1, pos*2+1, vTxid, vMatch);
}
}
uint256 CPartialMerkleTree::TraverseAndExtract(int height, unsigned int pos, unsigned int &nBitsUsed, unsigned int &nHashUsed, std::vector<uint256> &vMatch) {
if (nBitsUsed >= vBits.size()) {
// overflowed the bits array - failure
fBad = true;
return 0;
}
bool fParentOfMatch = vBits[nBitsUsed++];
if (height==0 || !fParentOfMatch) {
// if at height 0, or nothing interesting below, use stored hash and do not descend
if (nHashUsed >= vHash.size()) {
// overflowed the hash array - failure
fBad = true;
return 0;
}
const uint256 &hash = vHash[nHashUsed++];
if (height==0 && fParentOfMatch) // in case of height 0, we have a matched txid
vMatch.push_back(hash);
return hash;
} else {
// otherwise, descend into the subtrees to extract matched txids and hashes
uint256 left = TraverseAndExtract(height-1, pos*2, nBitsUsed, nHashUsed, vMatch), right;
if (pos*2+1 < CalcTreeWidth(height-1))
right = TraverseAndExtract(height-1, pos*2+1, nBitsUsed, nHashUsed, vMatch);
else
right = left;
// and combine them before returning
return Hash(BEGIN(left), END(left), BEGIN(right), END(right));
}
}
CPartialMerkleTree::CPartialMerkleTree(const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch) : nTransactions(vTxid.size()), fBad(false) {
// reset state
vBits.clear();
vHash.clear();
// calculate height of tree
int nHeight = 0;
while (CalcTreeWidth(nHeight) > 1)
nHeight++;
// traverse the partial tree
TraverseAndBuild(nHeight, 0, vTxid, vMatch);
}
CPartialMerkleTree::CPartialMerkleTree() : nTransactions(0), fBad(true) {}
uint256 CPartialMerkleTree::ExtractMatches(std::vector<uint256> &vMatch) {
vMatch.clear();
// An empty set will not work
if (nTransactions == 0)
return 0;
// check for excessively high numbers of transactions
if (nTransactions > MAX_BLOCK_SIZE / 60) // 60 is the lower bound for the size of a serialized CTransaction
return 0;
// there can never be more hashes provided than one for every txid
if (vHash.size() > nTransactions)
return 0;
// there must be at least one bit per node in the partial tree, and at least one node per hash
if (vBits.size() < vHash.size())
return 0;
// calculate height of tree
int nHeight = 0;
while (CalcTreeWidth(nHeight) > 1)
nHeight++;
// traverse the partial tree
unsigned int nBitsUsed = 0, nHashUsed = 0;
uint256 hashMerkleRoot = TraverseAndExtract(nHeight, 0, nBitsUsed, nHashUsed, vMatch);
// verify that no problems occured during the tree traversal
if (fBad)
return 0;
// verify that all bits were consumed (except for the padding caused by serializing it as a byte sequence)
if ((nBitsUsed+7)/8 != (vBits.size()+7)/8)
return 0;
// verify that all hashes were consumed
if (nHashUsed != vHash.size())
return 0;
return hashMerkleRoot;
}
bool CheckDiskSpace(uint64 nAdditionalBytes) bool CheckDiskSpace(uint64 nAdditionalBytes)
{ {
uint64 nFreeBytesAvailable = filesystem::space(GetDataDir()).available; uint64 nFreeBytesAvailable = filesystem::space(GetDataDir()).available;
@ -2815,6 +2969,10 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
vRecv >> pfrom->strSubVer; vRecv >> pfrom->strSubVer;
if (!vRecv.empty()) if (!vRecv.empty())
vRecv >> pfrom->nStartingHeight; vRecv >> pfrom->nStartingHeight;
if (!vRecv.empty())
vRecv >> pfrom->fRelayTxes; // set to true after we get the first filter* message
else
pfrom->fRelayTxes = true;
if (pfrom->fInbound && addrMe.IsRoutable()) if (pfrom->fInbound && addrMe.IsRoutable())
{ {
@ -3045,7 +3203,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
if (fDebugNet || (vInv.size() == 1)) if (fDebugNet || (vInv.size() == 1))
printf("received getdata for: %s\n", inv.ToString().c_str()); printf("received getdata for: %s\n", inv.ToString().c_str());
if (inv.type == MSG_BLOCK) if (inv.type == MSG_BLOCK || inv.type == MSG_FILTERED_BLOCK)
{ {
// Send block from disk // Send block from disk
map<uint256, CBlockIndex*>::iterator mi = mapBlockIndex.find(inv.hash); map<uint256, CBlockIndex*>::iterator mi = mapBlockIndex.find(inv.hash);
@ -3053,7 +3211,29 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
{ {
CBlock block; CBlock block;
block.ReadFromDisk((*mi).second); block.ReadFromDisk((*mi).second);
if (inv.type == MSG_BLOCK)
pfrom->PushMessage("block", block); pfrom->PushMessage("block", block);
else // MSG_FILTERED_BLOCK)
{
LOCK(pfrom->cs_filter);
if (pfrom->pfilter)
{
CMerkleBlock merkleBlock(block, *pfrom->pfilter);
// CMerkleBlock just contains hashes, so also push any transactions in the block the client did not see
// This avoids hurting performance by pointlessly requiring a round-trip
// Note that there is currently no way for a node to request any single transactions we didnt send here -
// they must either disconnect and retry or request the full block.
// Thus, the protocol spec specified allows for us to provide duplicate txn here,
// however we MUST always provide at least what the remote peer needs
typedef std::pair<unsigned int, uint256> PairType;
BOOST_FOREACH(PairType& pair, merkleBlock.vMatchedTxn)
if (!pfrom->setInventoryKnown.count(CInv(MSG_TX, pair.second)))
pfrom->PushMessage("tx", block.vtx[pair.first]);
pfrom->PushMessage("merkleblock", merkleBlock);
}
// else
// no response
}
// Trigger them to send a getblocks request for the next batch of inventory // Trigger them to send a getblocks request for the next batch of inventory
if (inv.hash == pfrom->hashContinue) if (inv.hash == pfrom->hashContinue)
@ -3184,7 +3364,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
if (tx.AcceptToMemoryPool(true, &fMissingInputs)) if (tx.AcceptToMemoryPool(true, &fMissingInputs))
{ {
SyncWithWallets(inv.hash, tx, NULL, true); SyncWithWallets(inv.hash, tx, NULL, true);
RelayMessage(inv, vMsg); RelayTransaction(tx, inv.hash, vMsg);
mapAlreadyAskedFor.erase(inv); mapAlreadyAskedFor.erase(inv);
vWorkQueue.push_back(inv.hash); vWorkQueue.push_back(inv.hash);
vEraseQueue.push_back(inv.hash); vEraseQueue.push_back(inv.hash);
@ -3207,7 +3387,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
{ {
printf(" accepted orphan tx %s\n", inv.hash.ToString().substr(0,10).c_str()); printf(" accepted orphan tx %s\n", inv.hash.ToString().substr(0,10).c_str());
SyncWithWallets(inv.hash, tx, NULL, true); SyncWithWallets(inv.hash, tx, NULL, true);
RelayMessage(inv, vMsg); RelayTransaction(tx, inv.hash, vMsg);
mapAlreadyAskedFor.erase(inv); mapAlreadyAskedFor.erase(inv);
vWorkQueue.push_back(inv.hash); vWorkQueue.push_back(inv.hash);
vEraseQueue.push_back(inv.hash); vEraseQueue.push_back(inv.hash);
@ -3266,12 +3446,15 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
else if (strCommand == "mempool") else if (strCommand == "mempool")
{ {
std::vector<uint256> vtxid; std::vector<uint256> vtxid;
LOCK2(mempool.cs, pfrom->cs_filter);
mempool.queryHashes(vtxid); mempool.queryHashes(vtxid);
vector<CInv> vInv; vector<CInv> vInv;
for (unsigned int i = 0; i < vtxid.size(); i++) { BOOST_FOREACH(uint256& hash, vtxid) {
CInv inv(MSG_TX, vtxid[i]); CInv inv(MSG_TX, hash);
if ((pfrom->pfilter && pfrom->pfilter->IsRelevantAndUpdate(mempool.lookup(hash), hash)) ||
(!pfrom->pfilter))
vInv.push_back(inv); vInv.push_back(inv);
if (i == (MAX_INV_SZ - 1)) if (vInv.size() == MAX_INV_SZ)
break; break;
} }
if (vInv.size() > 0) if (vInv.size() > 0)
@ -3332,6 +3515,53 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
} }
else if (strCommand == "filterload")
{
CBloomFilter filter;
vRecv >> filter;
if (!filter.IsWithinSizeConstraints())
// There is no excuse for sending a too-large filter
pfrom->Misbehaving(100);
else
{
LOCK(pfrom->cs_filter);
delete pfrom->pfilter;
pfrom->pfilter = new CBloomFilter(filter);
}
pfrom->fRelayTxes = true;
}
else if (strCommand == "filteradd")
{
vector<unsigned char> vData;
vRecv >> vData;
// Nodes must NEVER send a data item > 520 bytes (the max size for a script data object,
// and thus, the maximum size any matched object can have) in a filteradd message
if (vData.size() > 520)
{
pfrom->Misbehaving(100);
} else {
LOCK(pfrom->cs_filter);
if (pfrom->pfilter)
pfrom->pfilter->insert(vData);
else
pfrom->Misbehaving(100);
}
}
else if (strCommand == "filterclear")
{
LOCK(pfrom->cs_filter);
delete pfrom->pfilter;
pfrom->pfilter = NULL;
pfrom->fRelayTxes = true;
}
else else
{ {
// Ignore unknown commands for extensibility // Ignore unknown commands for extensibility

View file

@ -1110,11 +1110,101 @@ public:
/** Data structure that represents a partial merkle tree.
*
* It respresents a subset of the txid's of a known block, in a way that
* allows recovery of the list of txid's and the merkle root, in an
* authenticated way.
*
* The encoding works as follows: we traverse the tree in depth-first order,
* storing a bit for each traversed node, signifying whether the node is the
* parent of at least one matched leaf txid (or a matched txid itself). In
* case we are at the leaf level, or this bit is 0, its merkle node hash is
* stored, and its children are not explorer further. Otherwise, no hash is
* stored, but we recurse into both (or the only) child branch. During
* decoding, the same depth-first traversal is performed, consuming bits and
* hashes as they written during encoding.
*
* The serialization is fixed and provides a hard guarantee about the
* encoded size:
*
* SIZE <= 10 + ceil(32.25*N)
*
* Where N represents the number of leaf nodes of the partial tree. N itself
* is bounded by:
*
* N <= total_transactions
* N <= 1 + matched_transactions*tree_height
*
* The serialization format:
* - uint32 total_transactions (4 bytes)
* - varint number of hashes (1-3 bytes)
* - uint256[] hashes in depth-first order (<= 32*N bytes)
* - varint number of bytes of flag bits (1-3 bytes)
* - byte[] flag bits, packed per 8 in a byte, least significant bit first (<= 2*N-1 bits)
* The size constraints follow from this.
*/
class CPartialMerkleTree
{
protected:
// the total number of transactions in the block
unsigned int nTransactions;
// node-is-parent-of-matched-txid bits
std::vector<bool> vBits;
// txids and internal hashes
std::vector<uint256> vHash;
// flag set when encountering invalid data
bool fBad;
// helper function to efficiently calculate the number of nodes at given height in the merkle tree
unsigned int CalcTreeWidth(int height) {
return (nTransactions+(1 << height)-1) >> height;
}
// calculate the hash of a node in the merkle tree (at leaf level: the txid's themself)
uint256 CalcHash(int height, unsigned int pos, const std::vector<uint256> &vTxid);
// recursive function that traverses tree nodes, storing the data as bits and hashes
void TraverseAndBuild(int height, unsigned int pos, const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch);
// recursive function that traverses tree nodes, consuming the bits and hashes produced by TraverseAndBuild.
// it returns the hash of the respective node.
uint256 TraverseAndExtract(int height, unsigned int pos, unsigned int &nBitsUsed, unsigned int &nHashUsed, std::vector<uint256> &vMatch);
public:
// serialization implementation
IMPLEMENT_SERIALIZE(
READWRITE(nTransactions);
READWRITE(vHash);
std::vector<unsigned char> vBytes;
if (fRead) {
READWRITE(vBytes);
CPartialMerkleTree &us = *(const_cast<CPartialMerkleTree*>(this));
us.vBits.resize(vBytes.size() * 8);
for (unsigned int p = 0; p < us.vBits.size(); p++)
us.vBits[p] = (vBytes[p / 8] & (1 << (p % 8))) != 0;
us.fBad = false;
} else {
vBytes.resize((vBits.size()+7)/8);
for (unsigned int p = 0; p < vBits.size(); p++)
vBytes[p / 8] |= vBits[p] << (p % 8);
READWRITE(vBytes);
}
)
// Construct a partial merkle tree from a list of transaction id's, and a mask that selects a subset of them
CPartialMerkleTree(const std::vector<uint256> &vTxid, const std::vector<bool> &vMatch);
CPartialMerkleTree();
// extract the matching txid's represented by this partial merkle tree.
// returns the merkle root, or 0 in case of failure
uint256 ExtractMatches(std::vector<uint256> &vMatch);
};
/** Nodes collect new transactions into a block, hash them into a hash tree, /** Nodes collect new transactions into a block, hash them into a hash tree,
@ -1218,6 +1308,18 @@ public:
nDoS = 0; nDoS = 0;
} }
CBlockHeader GetBlockHeader() const
{
CBlockHeader block;
block.nVersion = nVersion;
block.hashPrevBlock = hashPrevBlock;
block.hashMerkleRoot = hashMerkleRoot;
block.nTime = nTime;
block.nBits = nBits;
block.nNonce = nNonce;
return block;
}
uint256 BuildMerkleTree() const uint256 BuildMerkleTree() const
{ {
vMerkleTree.clear(); vMerkleTree.clear();
@ -2027,4 +2129,36 @@ struct CBlockTemplate
std::vector<int64_t> vTxSigOps; std::vector<int64_t> vTxSigOps;
}; };
/** Used to relay blocks as header + vector<merkle branch>
* to filtered nodes.
*/
class CMerkleBlock
{
public:
// Public only for unit testing
CBlockHeader header;
CPartialMerkleTree txn;
public:
// Public only for unit testing and relay testing
// (not relayed)
std::vector<std::pair<unsigned int, uint256> > vMatchedTxn;
// Create from a CBlock, filtering transactions according to filter
// Note that this will call IsRelevantAndUpdate on the filter for each transaction,
// thus the filter will likely be modified.
CMerkleBlock(const CBlock& block, CBloomFilter& filter);
IMPLEMENT_SERIALIZE
(
READWRITE(header);
READWRITE(txn);
)
};
#endif #endif

View file

@ -83,6 +83,8 @@ OBJS= \
obj/wallet.o \ obj/wallet.o \
obj/walletdb.o \ obj/walletdb.o \
obj/noui.o \ obj/noui.o \
obj/hash.o \
obj/bloom.o \
obj/leveldb.o \ obj/leveldb.o \
obj/txdb.o obj/txdb.o

View file

@ -78,6 +78,8 @@ OBJS= \
obj/util.o \ obj/util.o \
obj/wallet.o \ obj/wallet.o \
obj/walletdb.o \ obj/walletdb.o \
obj/hash.o \
obj/bloom.o \
obj/noui.o \ obj/noui.o \
obj/leveldb.o \ obj/leveldb.o \
obj/txdb.o obj/txdb.o

View file

@ -96,6 +96,8 @@ OBJS= \
obj/util.o \ obj/util.o \
obj/wallet.o \ obj/wallet.o \
obj/walletdb.o \ obj/walletdb.o \
obj/hash.o \
obj/bloom.o \
obj/noui.o \ obj/noui.o \
obj/leveldb.o \ obj/leveldb.o \
obj/txdb.o obj/txdb.o

View file

@ -127,6 +127,8 @@ OBJS= \
obj/util.o \ obj/util.o \
obj/wallet.o \ obj/wallet.o \
obj/walletdb.o \ obj/walletdb.o \
obj/hash.o \
obj/bloom.o \
obj/noui.o \ obj/noui.o \
obj/leveldb.o \ obj/leveldb.o \
obj/txdb.o obj/txdb.o

View file

@ -9,6 +9,7 @@
#include "init.h" #include "init.h"
#include "addrman.h" #include "addrman.h"
#include "ui_interface.h" #include "ui_interface.h"
#include "script.h"
#ifdef WIN32 #ifdef WIN32
#include <string.h> #include <string.h>
@ -1996,3 +1997,48 @@ public:
} }
} }
instance_of_cnetcleanup; instance_of_cnetcleanup;
void RelayTransaction(const CTransaction& tx, const uint256& hash)
{
CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
ss.reserve(10000);
ss << tx;
RelayTransaction(tx, hash, ss);
}
void RelayTransaction(const CTransaction& tx, const uint256& hash, const CDataStream& ss)
{
CInv inv(MSG_TX, hash);
{
LOCK(cs_mapRelay);
// Expire old relay messages
while (!vRelayExpiration.empty() && vRelayExpiration.front().first < GetTime())
{
mapRelay.erase(vRelayExpiration.front().second);
vRelayExpiration.pop_front();
}
// Save original serialized message so newer versions are preserved
mapRelay.insert(std::make_pair(inv, ss));
vRelayExpiration.push_back(std::make_pair(GetTime() + 15 * 60, inv));
}
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
{
if(!pnode->fRelayTxes)
continue;
LOCK(pnode->cs_filter);
if (pnode->pfilter)
{
if (pnode->pfilter->IsRelevantAndUpdate(tx, hash))
pnode->PushInventory(inv);
} else
pnode->PushInventory(inv);
}
}

View file

@ -19,6 +19,7 @@
#include "protocol.h" #include "protocol.h"
#include "addrman.h" #include "addrman.h"
#include "hash.h" #include "hash.h"
#include "bloom.h"
class CNode; class CNode;
class CBlockIndex; class CBlockIndex;
@ -151,7 +152,14 @@ public:
bool fNetworkNode; bool fNetworkNode;
bool fSuccessfullyConnected; bool fSuccessfullyConnected;
bool fDisconnect; bool fDisconnect;
// We use fRelayTxes for two purposes -
// a) it allows us to not relay tx invs before receiving the peer's version message
// b) the peer may tell us in their version message that we should not relay tx invs
// until they have initialized their bloom filter.
bool fRelayTxes;
CSemaphoreGrant grantOutbound; CSemaphoreGrant grantOutbound;
CCriticalSection cs_filter;
CBloomFilter* pfilter;
protected: protected:
int nRefCount; int nRefCount;
@ -208,7 +216,9 @@ public:
nStartingHeight = -1; nStartingHeight = -1;
fGetAddr = false; fGetAddr = false;
nMisbehavior = 0; nMisbehavior = 0;
fRelayTxes = false;
setInventoryKnown.max_size(SendBufferSize() / 1000); setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = NULL;
// Be shy and don't send version until we hear // Be shy and don't send version until we hear
if (!fInbound) if (!fInbound)
@ -222,6 +232,8 @@ public:
closesocket(hSocket); closesocket(hSocket);
hSocket = INVALID_SOCKET; hSocket = INVALID_SOCKET;
} }
if (pfilter)
delete pfilter;
} }
private: private:
@ -556,51 +568,8 @@ public:
class CTransaction;
void RelayTransaction(const CTransaction& tx, const uint256& hash);
void RelayTransaction(const CTransaction& tx, const uint256& hash, const CDataStream& ss);
inline void RelayInventory(const CInv& inv)
{
// Put on lists to offer to the other nodes
{
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
pnode->PushInventory(inv);
}
}
template<typename T>
void RelayMessage(const CInv& inv, const T& a)
{
CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
ss.reserve(10000);
ss << a;
RelayMessage(inv, ss);
}
template<>
inline void RelayMessage<>(const CInv& inv, const CDataStream& ss)
{
{
LOCK(cs_mapRelay);
// Expire old relay messages
while (!vRelayExpiration.empty() && vRelayExpiration.front().first < GetTime())
{
mapRelay.erase(vRelayExpiration.front().second);
vRelayExpiration.pop_front();
}
// Save original serialized message so newer versions are preserved
mapRelay.insert(std::make_pair(inv, ss));
vRelayExpiration.push_back(std::make_pair(GetTime() + 15 * 60, inv));
}
RelayInventory(inv);
}
#endif #endif

View file

@ -17,6 +17,7 @@ static const char* ppszTypeName[] =
"ERROR", "ERROR",
"tx", "tx",
"block", "block",
"filtered block"
}; };
CMessageHeader::CMessageHeader() CMessageHeader::CMessageHeader()

View file

@ -138,6 +138,9 @@ enum
{ {
MSG_TX = 1, MSG_TX = 1,
MSG_BLOCK, MSG_BLOCK,
// Nodes may always request a MSG_FILTERED_BLOCK in a getdata, however,
// MSG_FILTERED_BLOCK should not appear in any invs except as a part of getdata.
MSG_FILTERED_BLOCK,
}; };
#endif // __INCLUDED_PROTOCOL_H__ #endif // __INCLUDED_PROTOCOL_H__

View file

@ -558,7 +558,7 @@ Value sendrawtransaction(const Array& params, bool fHelp)
} else { } else {
SyncWithWallets(hashTx, tx, NULL, true); SyncWithWallets(hashTx, tx, NULL, true);
} }
RelayMessage(CInv(MSG_TX, hashTx), tx); RelayTransaction(tx, hashTx);
return hashTx.GetHex(); return hashTx.GetHex();
} }

447
src/test/bloom_tests.cpp Normal file

File diff suppressed because one or more lines are too long

98
src/test/pmt_tests.cpp Normal file
View file

@ -0,0 +1,98 @@
#include <boost/test/unit_test.hpp>
#include "uint256.h"
#include "main.h"
using namespace std;
class CPartialMerkleTreeTester : public CPartialMerkleTree
{
public:
// flip one bit in one of the hashes - this should break the authentication
void Damage() {
unsigned int n = rand() % vHash.size();
int bit = rand() % 256;
uint256 &hash = vHash[n];
hash ^= ((uint256)1 << bit);
}
};
BOOST_AUTO_TEST_SUITE(pmt_tests)
BOOST_AUTO_TEST_CASE(pmt_test1)
{
static const unsigned int nTxCounts[] = {1, 4, 7, 17, 56, 100, 127, 256, 312, 513, 1000, 4095};
for (int n = 0; n < 12; n++) {
unsigned int nTx = nTxCounts[n];
// build a block with some dummy transactions
CBlock block;
for (unsigned int j=0; j<nTx; j++) {
CTransaction tx;
tx.nLockTime = rand(); // actual transaction data doesn't matter; just make the nLockTime's unique
block.vtx.push_back(tx);
}
// calculate actual merkle root and height
uint256 merkleRoot1 = block.BuildMerkleTree();
std::vector<uint256> vTxid(nTx, 0);
for (unsigned int j=0; j<nTx; j++)
vTxid[j] = block.vtx[j].GetHash();
int nHeight = 1, nTx_ = nTx;
while (nTx_ > 1) {
nTx_ = (nTx_+1)/2;
nHeight++;
}
// check with random subsets with inclusion chances 1, 1/2, 1/4, ..., 1/128
for (int att = 1; att < 15; att++) {
// build random subset of txid's
std::vector<bool> vMatch(nTx, false);
std::vector<uint256> vMatchTxid1;
for (unsigned int j=0; j<nTx; j++) {
bool fInclude = (rand() & ((1 << (att/2)) - 1)) == 0;
vMatch[j] = fInclude;
if (fInclude)
vMatchTxid1.push_back(vTxid[j]);
}
// build the partial merkle tree
CPartialMerkleTree pmt1(vTxid, vMatch);
// serialize
CDataStream ss(SER_NETWORK, PROTOCOL_VERSION);
ss << pmt1;
// verify CPartialMerkleTree's size guarantees
unsigned int n = std::min<unsigned int>(nTx, 1 + vMatchTxid1.size()*nHeight);
BOOST_CHECK(ss.size() <= 10 + (258*n+7)/8);
// deserialize into a tester copy
CPartialMerkleTreeTester pmt2;
ss >> pmt2;
// extract merkle root and matched txids from copy
std::vector<uint256> vMatchTxid2;
uint256 merkleRoot2 = pmt2.ExtractMatches(vMatchTxid2);
// check that it has the same merkle root as the original, and a valid one
BOOST_CHECK(merkleRoot1 == merkleRoot2);
BOOST_CHECK(merkleRoot2 != 0);
// check that it contains the matched transactions (in the same order!)
BOOST_CHECK(vMatchTxid1 == vMatchTxid2);
// check that random bit flips break the authentication
for (int j=0; j<4; j++) {
CPartialMerkleTreeTester pmt3(pmt2);
pmt3.Damage();
std::vector<uint256> vMatchTxid3;
uint256 merkleRoot3 = pmt3.ExtractMatches(vMatchTxid3);
BOOST_CHECK(merkleRoot3 != merkleRoot1);
}
}
}
}
BOOST_AUTO_TEST_SUITE_END()

View file

@ -344,7 +344,17 @@ public:
return (unsigned char*)&pn[WIDTH]; return (unsigned char*)&pn[WIDTH];
} }
unsigned int size() const unsigned char* begin() const
{
return (unsigned char*)&pn[0];
}
const unsigned char* end() const
{
return (unsigned char*)&pn[WIDTH];
}
unsigned int size() const
{ {
return sizeof(pn); return sizeof(pn);
} }

View file

@ -25,7 +25,7 @@ extern const std::string CLIENT_DATE;
// network protocol versioning // network protocol versioning
// //
static const int PROTOCOL_VERSION = 60002; static const int PROTOCOL_VERSION = 70001;
// earlier versions not supported as of Feb 2012, and are disconnected // earlier versions not supported as of Feb 2012, and are disconnected
static const int MIN_PROTO_VERSION = 209; static const int MIN_PROTO_VERSION = 209;

View file

@ -826,17 +826,16 @@ void CWalletTx::RelayWalletTransaction()
{ {
BOOST_FOREACH(const CMerkleTx& tx, vtxPrev) BOOST_FOREACH(const CMerkleTx& tx, vtxPrev)
{ {
if (!tx.IsCoinBase()) { if (!tx.IsCoinBase())
if (tx.GetDepthInMainChain() == 0) if (tx.GetDepthInMainChain() == 0)
RelayMessage(CInv(MSG_TX, tx.GetHash()), (CTransaction)tx); RelayTransaction((CTransaction)tx, tx.GetHash());
}
} }
if (!IsCoinBase()) if (!IsCoinBase())
{ {
if (GetDepthInMainChain() == 0) { if (GetDepthInMainChain() == 0) {
uint256 hash = GetHash(); uint256 hash = GetHash();
printf("Relaying wtx %s\n", hash.ToString().substr(0,10).c_str()); printf("Relaying wtx %s\n", hash.ToString().substr(0,10).c_str());
RelayMessage(CInv(MSG_TX, hash), (CTransaction)*this); RelayTransaction((CTransaction)*this, hash);
} }
} }
} }