116 lines
2.9 KiB
Nim
116 lines
2.9 KiB
Nim
## Bencode decoder
|
|
## Resources:
|
|
## https://en.m.wikipedia.org/wiki/Bencode
|
|
## https://archive.is/yCwj4
|
|
import std/[streams, strutils, tables]
|
|
from strformat import fmt
|
|
|
|
type
|
|
BencodeSyntaxError = object of ValueError
|
|
BencodeInt = int64
|
|
BencodeByteStr = string
|
|
BencodeDict = Table[string, BencodeObject]
|
|
|
|
BencodeObjectKind = enum
|
|
bokNum
|
|
bokBseq
|
|
bokList
|
|
bokDict
|
|
BencodeObject = object
|
|
case kind: BencodeObjectKind
|
|
of bokNum: num: BencodeInt
|
|
of bokBseq: bseq: BencodeByteStr
|
|
of bokList: list: seq[BencodeObject]
|
|
of bokDict: dict: BencodeDict
|
|
|
|
|
|
proc decodeBencode*(bs: Stream): BencodeObject
|
|
|
|
var putbackdChar: char
|
|
proc expectToken(bs: Stream, expected: char) =
|
|
var c: char
|
|
if putbackdChar != '\0':
|
|
c = putbackdChar
|
|
putbackdChar = '\0'
|
|
else:
|
|
c = bs.readChar()
|
|
if c != expected:
|
|
let msg =
|
|
if c != '\0': fmt"expected `{expected}`, got `{c}`"
|
|
else: "unexpected EOB"
|
|
raise newException(BencodeSyntaxError, msg)
|
|
|
|
# converts a base-10 number to its integer representation
|
|
proc readNumber(bs: Stream): BencodeInt =
|
|
const digits = "0123456789"
|
|
var
|
|
c = bs.readChar()
|
|
sign: int
|
|
|
|
if c == '-':
|
|
c = bs.readChar()
|
|
sign = 2
|
|
|
|
while isDigit(c):
|
|
let charVal = digits.find(c)
|
|
assert charVal != -1
|
|
result = (result * 10) + charVal
|
|
c = bs.readChar()
|
|
putbackdChar = c
|
|
result -= result * sign
|
|
|
|
# i1234e i-727e
|
|
proc parseBencodeInteger(bs: Stream): BencodeInt =
|
|
expectToken(bs, 'i')
|
|
result = readNumber(bs)
|
|
expectToken(bs, 'e')
|
|
|
|
# 4:spam
|
|
proc parseByteSequence(bs: Stream): BencodeByteStr =
|
|
var byteLen = readNumber(bs)
|
|
expectToken(bs, ':')
|
|
|
|
while byteLen > 0:
|
|
result.add(char(bs.readInt8()))
|
|
dec byteLen
|
|
|
|
proc parseList(bs: Stream): seq[BencodeObject] =
|
|
expectToken(bs, 'l')
|
|
while bs.peekChar() != 'e':
|
|
result.add(decodeBencode(bs))
|
|
expectToken(bs, 'e')
|
|
|
|
proc parseDict(bs: Stream): BencodeDict =
|
|
expectToken(bs, 'd')
|
|
while bs.peekChar() != 'e':
|
|
let (key, val) = (parseByteSequence(bs), decodeBencode(bs))
|
|
result[key] = val
|
|
expectToken(bs, 'e')
|
|
|
|
proc decodeBencode*(bs: Stream): BencodeObject =
|
|
let c = bs.peekChar()
|
|
if isDigit(c):
|
|
if c == '0':
|
|
raise newException(BencodeSyntaxError, "Byte string with zero length")
|
|
let bseq = parseByteSequence(bs)
|
|
return BencodeObject(kind: bokBseq, bseq: bseq)
|
|
|
|
case c
|
|
of 'i':
|
|
let num = parseBencodeInteger(bs)
|
|
result = BencodeObject(kind: bokNum, num: num)
|
|
of 'l':
|
|
let ben = parseList(bs)
|
|
result = BencodeObject(kind: bokList, list: ben)
|
|
of 'd':
|
|
let dict = parseDict(bs)
|
|
result = BencodeObject(kind: bokDict, dict: dict)
|
|
of '\0':
|
|
raise newException(BencodeSyntaxError, fmt"Unexpected EOB")
|
|
else:
|
|
raise newException(BencodeSyntaxError, fmt"Unknown token `{c}`")
|
|
|
|
#var bs = newStringStream("li1234e4:spam6:i1234ed3:foo3:bar4:fuzzli1337e4:abcdeee")
|
|
var bs = newStringStream("li1234el4:spamli1337e4:eggseee")
|
|
echo decodeBencode(bs).repr()
|
|
bs.close()
|