scratch/parsers/bencode.nim
2024-12-26 14:02:40 -03:00

116 lines
2.9 KiB
Nim

## Bencode decoder
## Resources:
## https://en.m.wikipedia.org/wiki/Bencode
## https://archive.is/yCwj4
import std/[streams, strutils, tables]
from strformat import fmt
type
BencodeSyntaxError = object of ValueError
BencodeInt = int64
BencodeByteStr = string
BencodeDict = Table[string, BencodeObject]
BencodeObjectKind = enum
bokNum
bokBseq
bokList
bokDict
BencodeObject = object
case kind: BencodeObjectKind
of bokNum: num: BencodeInt
of bokBseq: bseq: BencodeByteStr
of bokList: list: seq[BencodeObject]
of bokDict: dict: BencodeDict
proc decodeBencode*(bs: Stream): BencodeObject
var putbackdChar: char
proc expectToken(bs: Stream, expected: char) =
var c: char
if putbackdChar != '\0':
c = putbackdChar
putbackdChar = '\0'
else:
c = bs.readChar()
if c != expected:
let msg =
if c != '\0': fmt"expected `{expected}`, got `{c}`"
else: "unexpected EOB"
raise newException(BencodeSyntaxError, msg)
# converts a base-10 number to its integer representation
proc readNumber(bs: Stream): BencodeInt =
const digits = "0123456789"
var
c = bs.readChar()
sign: int
if c == '-':
c = bs.readChar()
sign = 2
while isDigit(c):
let charVal = digits.find(c)
assert charVal != -1
result = (result * 10) + charVal
c = bs.readChar()
putbackdChar = c
result -= result * sign
# i1234e i-727e
proc parseBencodeInteger(bs: Stream): BencodeInt =
expectToken(bs, 'i')
result = readNumber(bs)
expectToken(bs, 'e')
# 4:spam
proc parseByteSequence(bs: Stream): BencodeByteStr =
var byteLen = readNumber(bs)
expectToken(bs, ':')
while byteLen > 0:
result.add(char(bs.readInt8()))
dec byteLen
proc parseList(bs: Stream): seq[BencodeObject] =
expectToken(bs, 'l')
while bs.peekChar() != 'e':
result.add(decodeBencode(bs))
expectToken(bs, 'e')
proc parseDict(bs: Stream): BencodeDict =
expectToken(bs, 'd')
while bs.peekChar() != 'e':
let (key, val) = (parseByteSequence(bs), decodeBencode(bs))
result[key] = val
expectToken(bs, 'e')
proc decodeBencode*(bs: Stream): BencodeObject =
let c = bs.peekChar()
if isDigit(c):
if c == '0':
raise newException(BencodeSyntaxError, "Byte string with zero length")
let bseq = parseByteSequence(bs)
return BencodeObject(kind: bokBseq, bseq: bseq)
case c
of 'i':
let num = parseBencodeInteger(bs)
result = BencodeObject(kind: bokNum, num: num)
of 'l':
let ben = parseList(bs)
result = BencodeObject(kind: bokList, list: ben)
of 'd':
let dict = parseDict(bs)
result = BencodeObject(kind: bokDict, dict: dict)
of '\0':
raise newException(BencodeSyntaxError, fmt"Unexpected EOB")
else:
raise newException(BencodeSyntaxError, fmt"Unknown token `{c}`")
#var bs = newStringStream("li1234e4:spam6:i1234ed3:foo3:bar4:fuzzli1337e4:abcdeee")
var bs = newStringStream("li1234el4:spamli1337e4:eggseee")
echo decodeBencode(bs).repr()
bs.close()