## Bencode decoder ## Resources: ## https://en.m.wikipedia.org/wiki/Bencode ## https://archive.is/yCwj4 import std/[streams, strutils, tables] from strformat import fmt type BencodeSyntaxError = object of ValueError BencodeInt = int64 BencodeByteStr = string BencodeDict = Table[string, BencodeObject] BencodeObjectKind = enum bokNum bokBseq bokList bokDict BencodeObject = object case kind: BencodeObjectKind of bokNum: num: BencodeInt of bokBseq: bseq: BencodeByteStr of bokList: list: seq[BencodeObject] of bokDict: dict: BencodeDict proc decodeBencode*(bs: Stream): BencodeObject var putbackdChar: char proc expectToken(bs: Stream, expected: char) = var c: char if putbackdChar != '\0': c = putbackdChar putbackdChar = '\0' else: c = bs.readChar() if c != expected: let msg = if c != '\0': fmt"expected `{expected}`, got `{c}`" else: "unexpected EOB" raise newException(BencodeSyntaxError, msg) # converts a base-10 number to its integer representation proc readNumber(bs: Stream): BencodeInt = const digits = "0123456789" var c = bs.readChar() sign: int if c == '-': c = bs.readChar() sign = 2 while isDigit(c): let charVal = digits.find(c) assert charVal != -1 result = (result * 10) + charVal c = bs.readChar() putbackdChar = c result -= result * sign # i1234e i-727e proc parseBencodeInteger(bs: Stream): BencodeInt = expectToken(bs, 'i') result = readNumber(bs) expectToken(bs, 'e') # 4:spam proc parseByteSequence(bs: Stream): BencodeByteStr = var byteLen = readNumber(bs) expectToken(bs, ':') while byteLen > 0: result.add(char(bs.readInt8())) dec byteLen proc parseList(bs: Stream): seq[BencodeObject] = expectToken(bs, 'l') while bs.peekChar() != 'e': result.add(decodeBencode(bs)) expectToken(bs, 'e') proc parseDict(bs: Stream): BencodeDict = expectToken(bs, 'd') while bs.peekChar() != 'e': let (key, val) = (parseByteSequence(bs), decodeBencode(bs)) result[key] = val expectToken(bs, 'e') proc decodeBencode*(bs: Stream): BencodeObject = let c = bs.peekChar() if isDigit(c): if c == '0': raise newException(BencodeSyntaxError, "Byte string with zero length") let bseq = parseByteSequence(bs) return BencodeObject(kind: bokBseq, bseq: bseq) case c of 'i': let num = parseBencodeInteger(bs) result = BencodeObject(kind: bokNum, num: num) of 'l': let ben = parseList(bs) result = BencodeObject(kind: bokList, list: ben) of 'd': let dict = parseDict(bs) result = BencodeObject(kind: bokDict, dict: dict) of '\0': raise newException(BencodeSyntaxError, fmt"Unexpected EOB") else: raise newException(BencodeSyntaxError, fmt"Unknown token `{c}`") #var bs = newStringStream("li1234e4:spam6:i1234ed3:foo3:bar4:fuzzli1337e4:abcdeee") var bs = newStringStream("li1234el4:spamli1337e4:eggseee") echo decodeBencode(bs).repr() bs.close()