contrib: Use asmap for ASN lookup in makeseeds

Add an argument `-a` to provide a asmap file to do the IP to ASN
lookups.

This speeds up the script greatly, and makes the output deterministic.
Also removes the dependency on `dns.lookup`.

I've annotated the output with ASxxxx comments to provide a way to
verify the functionality.

For now I've added instructions in README.md to download and use the
`demo.map` from the asmap repository. When we have some other mechanism
for distributing asmap files we could switch to that.

This continues #24824. I've removed all the fallbacks and extra
complexity, as everyone will be using the same instructions anyway.

Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com>
Co-authored-by: James O'Beirne <james.obeirne@pm.me>
Co-authored-by: russeree <reese.russell@ymail.com>
This commit is contained in:
laanwj 2022-04-15 17:14:40 +02:00
parent bd6c5e4108
commit b54180303d
4 changed files with 120 additions and 48 deletions

View file

@ -1 +1,2 @@
seeds_main.txt
asmap-filled.dat

View file

@ -8,21 +8,11 @@ and remove old versions as necessary (at a minimum when GetDesirableServiceFlags
changes its default return value, as those are the services which seeds are added
to addrman with).
The seeds compiled into the release are created from sipa's DNS seed data, like this:
The seeds compiled into the release are created from sipa's DNS seed and AS map
data. Run the following commands from the `/contrib/seeds` directory:
curl https://bitcoin.sipa.be/seeds.txt.gz | gzip -dc > seeds_main.txt
python3 makeseeds.py < seeds_main.txt > nodes_main.txt
curl https://bitcoin.sipa.be/asmap-filled.dat > asmap-filled.dat
python3 makeseeds.py -a asmap-filled.dat < seeds_main.txt > nodes_main.txt
cat nodes_main_manual.txt >> nodes_main.txt
python3 generate-seeds.py . > ../../src/chainparamsseeds.h
## Dependencies
Ubuntu, Debian:
sudo apt-get install python3-dnspython
and/or for other operating systems:
pip install dnspython
See https://dnspython.readthedocs.io/en/latest/installation.html for more information.

90
contrib/seeds/asmap.py Normal file
View file

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# Copyright (c) 2013-2020 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
import ipaddress
# Convert a byte array to a bit array
def DecodeBytes(byts):
return [(byt >> i) & 1 for byt in byts for i in range(8)]
def DecodeBits(stream, bitpos, minval, bit_sizes):
val = minval
for pos in range(len(bit_sizes)):
bit_size = bit_sizes[pos]
if pos + 1 < len(bit_sizes):
bit = stream[bitpos]
bitpos += 1
else:
bit = 0
if bit:
val += (1 << bit_size)
else:
for b in range(bit_size):
bit = stream[bitpos]
bitpos += 1
val += bit << (bit_size - 1 - b)
return (val, bitpos)
assert(False)
def DecodeType(stream, bitpos):
return DecodeBits(stream, bitpos, 0, [0, 0, 1])
def DecodeASN(stream, bitpos):
return DecodeBits(stream, bitpos, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24])
def DecodeMatch(stream, bitpos):
return DecodeBits(stream, bitpos, 2, [1, 2, 3, 4, 5, 6, 7, 8])
def DecodeJump(stream, bitpos):
return DecodeBits(stream, bitpos, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
def Interpret(asmap, num, bits):
pos = 0
default = None
while True:
assert(len(asmap) >= pos + 1)
(opcode, pos) = DecodeType(asmap, pos)
if opcode == 0:
(asn, pos) = DecodeASN(asmap, pos)
return asn
elif opcode == 1:
(jump, pos) = DecodeJump(asmap, pos)
if (num >> (bits - 1)) & 1:
pos += jump
bits -= 1
elif opcode == 2:
(match, pos) = DecodeMatch(asmap, pos)
matchlen = match.bit_length() - 1
for bit in range(matchlen):
if ((num >> (bits - 1)) & 1) != ((match >> (matchlen - 1 - bit)) & 1):
return default
bits -= 1
elif opcode == 3:
(default, pos) = DecodeASN(asmap, pos)
else:
assert(False)
def decode_ip(ip: str) -> int:
addr = ipaddress.ip_address(ip)
if isinstance(addr, ipaddress.IPv4Address):
return int.from_bytes(addr.packed, 'big') + 0xffff00000000
elif isinstance(addr, ipaddress.IPv6Address):
return int.from_bytes(addr.packed, 'big')
class ASMap:
def __init__(self, filename):
'''
Instantiate an ASMap from a file.
'''
with open(filename, "rb") as f:
self.asmap = DecodeBytes(f.read())
def lookup_asn(self, ip):
'''
Look up the ASN for an IP, returns an ASN id as integer or None if not
known.
'''
return Interpret(self.asmap, decode_ip(ip), 128)

View file

@ -6,12 +6,14 @@
# Generate seeds.txt from Pieter's DNS seeder
#
import argparse
import re
import sys
import dns.resolver
import collections
from typing import List, Dict, Union
from asmap import ASMap
NSEEDS=512
MAX_SEEDS_PER_ASN = {
@ -123,34 +125,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
def lookup_asn(net: str, ip: str) -> Union[int, None]:
""" Look up the asn for an `ip` address by querying cymru.com
on network `net` (e.g. ipv4 or ipv6).
Returns in integer ASN or None if it could not be found.
"""
try:
if net == 'ipv4':
ipaddr = ip
prefix = '.origin'
else: # http://www.team-cymru.com/IP-ASN-mapping.html
res = str() # 2001:4860:b002:23::68
for nb in ip.split(':')[:4]: # pick the first 4 nibbles
for c in nb.zfill(4): # right padded with '0'
res += c + '.' # 2001 4860 b002 0023
ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
prefix = '.origin6'
asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
'TXT').response.answer][0].split('\"')[1].split(' ')[0])
return asn
except Exception as e:
sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
return None
# Based on Greg Maxwell's seed_filter.py
def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
""" Prunes `ips` by
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
@ -173,13 +149,14 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di
# do not add this ip as we already too many
# ips from this network
continue
asn = lookup_asn(ip['net'], ip['ip'])
if asn is None or asn_count[asn] == max_per_asn[ip['net']]:
asn = asmap.lookup_asn(ip['ip'])
if asn is None or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
# do not add this ip as we already have too many
# ips from this ASN on this network
continue
asn_count[asn] += 1
asn_count[ip['net'], asn] += 1
net_count[ip['net']] += 1
ip['asn'] = asn
result.append(ip)
# Add back Onions (up to max_per_net)
@ -195,7 +172,18 @@ def ip_stats(ips: List[Dict]) -> str:
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
def parse_args():
argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
return argparser.parse_args()
def main():
args = parse_args()
print(f'Loading asmap database "{args.asmap}"', end='', file=sys.stderr, flush=True)
asmap = ASMap(args.asmap)
print('Done.', file=sys.stderr)
lines = sys.stdin.readlines()
ips = [parseline(line) for line in lines]
@ -230,15 +218,18 @@ def main():
ips = filtermultiport(ips)
print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
# Look up ASNs and limit results, both per ASN and globally.
ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
# Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips:
if ip['net'] == 'ipv6':
print('[%s]:%i' % (ip['ip'], ip['port']))
print(f"[{ip['ip']}]:{ip['port']}", end="")
else:
print('%s:%i' % (ip['ip'], ip['port']))
print(f"{ip['ip']}:{ip['port']}", end="")
if 'asn' in ip:
print(f" # AS{ip['asn']}", end="")
print()
if __name__ == '__main__':
main()