#!/usr/bin/env python3 # # linearize-data.py: Construct a linear, no-fork version of the chain. # # Copyright (c) 2013-2021 The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. # import struct import re import os import os.path import sys import hashlib import datetime import time import glob from collections import namedtuple settings = {} def hex_switchEndian(s): """ Switches the endianness of a hex string (in pairs of hex chars) """ pairList = [s[i:i+2].encode() for i in range(0, len(s), 2)] return b''.join(pairList[::-1]).decode() def calc_hash_str(blk_hdr): blk_hdr_hash = hashlib.sha256(hashlib.sha256(blk_hdr).digest()).digest() return blk_hdr_hash[::-1].hex() def get_blk_dt(blk_hdr): members = struct.unpack(" self.maxOutSz): self.outF.close() if self.setFileTime: os.utime(self.outFname, (int(time.time()), self.highTS)) self.outF = None self.outFname = None self.outFn = self.outFn + 1 self.outsz = 0 (blkDate, blkTS) = get_blk_dt(blk_hdr) if self.timestampSplit and (blkDate > self.lastDate): print("New month " + blkDate.strftime("%Y-%m") + " @ " + self.hash_str) self.lastDate = blkDate if self.outF: self.outF.close() if self.setFileTime: os.utime(self.outFname, (int(time.time()), self.highTS)) self.outF = None self.outFname = None self.outFn = self.outFn + 1 self.outsz = 0 if not self.outF: if self.fileOutput: self.outFname = self.settings['output_file'] else: self.outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn) print("Output file " + self.outFname) self.outF = open(self.outFname, "wb") self.outF.write(inhdr) self.outF.write(blk_hdr) self.outF.write(rawblock) self.outsz = self.outsz + len(inhdr) + len(blk_hdr) + len(rawblock) self.blkCountOut = self.blkCountOut + 1 if blkTS > self.highTS: self.highTS = blkTS if (self.blkCountOut % 1000) == 0: print('%i blocks scanned, %i blocks written (of %i, %.1f%% complete)' % (self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex))) def inFileName(self, fn): return os.path.join(self.settings['input'], "blk%05d.dat" % fn) def fetchBlock(self, extent): '''Fetch block contents from disk given extents''' with open(self.inFileName(extent.fn), "rb") as f: f.seek(extent.offset) return f.read(extent.size) def copyOneBlock(self): '''Find the next block to be written in the input, and copy it to the output.''' extent = self.blockExtents.pop(self.blkCountOut) if self.blkCountOut in self.outOfOrderData: # If the data is cached, use it from memory and remove from the cache rawblock = self.outOfOrderData.pop(self.blkCountOut) self.outOfOrderSize -= len(rawblock) else: # Otherwise look up data on disk rawblock = self.fetchBlock(extent) self.writeBlock(extent.inhdr, extent.blkhdr, rawblock) def run(self): while self.blkCountOut < len(self.blkindex): if not self.inF: fname = self.inFileName(self.inFn) print("Input file " + fname) try: self.inF = open(fname, "rb") except IOError: print("Premature end of block data") return inhdr = self.inF.read(8) if (not inhdr or (inhdr[0] == "\0")): self.inF.close() self.inF = None self.inFn = self.inFn + 1 continue inMagic = inhdr[:4] if (inMagic != self.settings['netmagic']): # Seek backwards 7 bytes (skipping the first byte in the previous search) # and continue searching from the new position if the magic bytes are not # found. self.inF.seek(-7, os.SEEK_CUR) continue inLenLE = inhdr[4:] su = struct.unpack("