#ifndef _lex_h_ #define _lex_h_ #include #include "pre.h" #include "location.h" #include "state.h" #include "libs/stb_ds.h" enum LexTokenId { T_INVALID = 0, /* Unary and binary operators */ T_PLUS, T_MINUS, T_STAR, T_BAR, T_LESSTHAN, T_GREATTHAN, T_LOGNOT, T_LOGAND, T_LOGOR, T_LOGICEQUAL, T_NOTEQUAL, T_HASH, /* Others */ T_EQUAL, T_EXCLAMATION, T_LPAREN, T_RPAREN, T_COMMA, T_COLON, T_SEMICOLON, T_LBRACKET, T_RBRACKET, T_LBRACE, T_RBRACE, /* Atoms */ T_IDENT, T_STRING, T_NUMBER, T_DECNUMBER, /* Keywords */ T_CONST, T_ELSE, T_END, T_ELIF, T_IF, T_LET, T_PROC, T_RETURN, T_VAR, T_DISCARD, T_WHILE, T_STRUCT, T_USE, T_BREAK, T_NEXT, /* Control */ T_EOF, T_ERROR, T_TOKEN_COUNT, /* does not represent an actual token */ }; /* Table mapping a `LexTokenId` to a string name of the token */ extern const char *TokenIdStr[]; typedef struct { enum LexTokenId id; union { Str ident, str, keyword; /* XXX: Defer number parsing until it is actually needed? * So we can move number parsing out of the lexer. */ /* Integer literal, it's the parser problem to tell * whether the literal is negative or not. */ u64 inumber; /* Floating point literal */ double floatn; }; isize len; /* Size in bytes of this token */ Location loc; /* Start position of this token in the file or stream */ } LexToken; typedef HashMapStr(i8) IdentsBucket; typedef struct { FILE *input_fp; /* Lexing buffer. This is actually split into two buffers, providing * a double-buffering scheme */ u8 *buf; /* Actual length of each buffer (fread may read less than LEX_BUFFER_SIZE) */ isize buflen, buflen2; u8 *lbegin; /* marks the begin of the current lexeme */ u8 *fwd; /* this pointer is the scanner */ Vec(LexToken) backlist; /* stack of backed up tokens */ int tabsize; bool eof; Location cur_loc; Compiler *cm; IdentsBucket *idents; } LexState; LexToken lex_scan(LexState *ls); void lex_backup(LexState *ls, LexToken token); bool lex_match(LexState *ls, LexToken *t, enum LexTokenId exp_tok); LexState * lex_new(Compiler *cm, FILE *input_fp, Str file_name, usize tabsize); void lex_destroy(LexState *l); #endif