commit d5c2167efcb398aeaed3304f9138e7b9693f0dd1 Author: tocariimaa Date: Mon Feb 3 20:09:55 2025 -0300 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..33140dc --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# pila +Concatenative stack-based language. (written in C99) [not turing complete yet] + +## Goal +- Keeping it small and portable +- No floating point +- A little of syntactic sugar (so strings literals look like you would expect, for example) + +## TODO +- Work in removing hard dependency on libc +- Native code generation? +- Implement more of Pila in itself +- Proper error handling +- Case insensitiveness diff --git a/pila.c b/pila.c new file mode 100644 index 0000000..f793aba --- /dev/null +++ b/pila.c @@ -0,0 +1,643 @@ +#include +#include +#include + +#include "pre.h" + +#define DATA_STACK_LEN 4096 +#define RETURN_STACK_LEN 4096 +#define REPL_LINE_BUF_LEN 4096 + +#define BOX_INTN(n) (Value){.kind = VAL_INT, .intn = n} +#define BOX_STR(n) (Value){.kind = VAL_STR, .str = n} + +typedef struct UserWord UserWord; +typedef struct Word Word; +typedef struct DictionaryEntry DictionaryEntry; +typedef struct RpnState RpnState; +typedef void (*NativeWord)(RpnState *); + +typedef struct { + char *s; + isize len; +} Str; + +typedef enum { + VAL_NIL, + VAL_INT, + VAL_STR, +} ValueKind; + +typedef struct { + ValueKind kind; + union { + u64 intn; + Str str; + }; +} Value; + +typedef enum { + WORD_NATIVE, + WORD_USER, +} WordKind; + +struct UserWord { + Word **words; + isize len; +}; + +struct Word { + Str name; + WordKind kind; + union { + NativeWord nat; + UserWord uword; + }; +}; + +struct RpnState { + Value data_stack[DATA_STACK_LEN]; + Word *rts[RETURN_STACK_LEN]; + + isize sp, rp; + bool err; + DictionaryEntry *dict; + + bool compiling; + char *rsp; /* reader start pointer */ + char *rep; /* " end " */ + UserWord cur_comp; + isize cur_comp_cap; + Str cur_comp_name; +}; + +struct DictionaryEntry { + Word *word; + DictionaryEntry *next; +}; + +static const Value Nil = (Value){.kind = VAL_NIL}; +static const Str StrEmpty = (Str){.s = "", .len = 0}; + +void +signal_error(RpnState *st, const char *err); +void +signal_errorf(RpnState *st, const char *err, ...); +Word +*search_word(RpnState *st, Str word_name); +Str +read_word_name(char **pstart, char *pend); +bool +read_number(Str ns, u64 *res); +void +skip_whitespace(char **pstart, char *pend); +void +add_word(RpnState *st, Word *w); +void +eval_word(RpnState *st, Word *w); +void +eval(RpnState *st, Str src); + +bool +ascii_isdigit(char c) +{ + return '0' <= c && c <= '9'; +} + +bool +ascii_isspace(char c) +{ + return c == ' ' || ('\t' <= c && c <= '\r'); +} + +bool +ascii_iswordname(char c) +{ + return ('A' <= c && c <= 'Z') + || ('a' <= c && c <= 'z') + || (('!' <= c && c <= '\'') || ('*' <= c && c <= '@')) + || ascii_isdigit(c); +} + +bool +Str_equal(Str s1, Str s2) +{ + /* because passing nil to mem* is UB even if size == 0... */ + return (s1.len == s2.len) && (s1.len == 0 || memcmp(s1.s, s2.s, s1.len) == 0); +} + +Str +Str_new(const char *data, isize len) +{ + Assert(len >= 0); + Str s; + s.s = calloc(len + 1, sizeof(*s.s)); + s.len = len; + if (data != nil) { + memcpy(s.s, data, len); + s.s[len + 1] = '\0'; /* ensure */ + } + return s; +} + +void +signal_errorf(RpnState *st, const char *err, ...) +{ + va_list args; + va_start(args, err); + char buf[2048]; + vsnprintf(buf, sizeof(buf), err, args); + fputs(buf, stderr); + va_end(args); + st->err = true; +} + +void +signal_error(RpnState *st, const char *err) +{ + fputs("error: ", stderr); + fputs(err, stderr); + fputc('\n', stderr); + st->err = true; +} + +void +push_val(RpnState *st, Value val) +{ + if (st->sp + 1 > DATA_STACK_LEN) { + signal_error(st, "stack overflow"); + return; + } + st->data_stack[st->sp++] = val; +} + +Value +top_val(RpnState *st) +{ + if (st->sp == 0) { + signal_error(st, "stack underflow"); + return Nil; + } + return st->data_stack[st->sp - 1]; +} + +Value +pop_val(RpnState *st) +{ + Value val = top_val(st); + if (val.kind == VAL_NIL) + return Nil; + --st->sp; + return val; +} + +void +add_nat(RpnState *st) +{ + Value y = pop_val(st); + Value x = pop_val(st); + push_val(st, BOX_INTN(x.intn + y.intn)); +} + +void +sub_nat(RpnState *st) +{ + Value y = pop_val(st); + Value x = pop_val(st); + push_val(st, BOX_INTN(x.intn - y.intn)); +} + +void +mul_nat(RpnState *st) +{ + Value y = pop_val(st); + Value x = pop_val(st); + push_val(st, BOX_INTN(x.intn * y.intn)); +} + +void +div_nat(RpnState *st) +{ + Value y = pop_val(st); + Value x = pop_val(st); + if (y.intn == 0 || x.intn == 0) { + signal_error(st, "division by zero"); + return; + } + push_val(st, BOX_INTN(x.intn / y.intn)); +} + +void +lt_nat(RpnState *st) +{ + Value rhs = pop_val(st); + Value lhs = pop_val(st); + push_val(st, BOX_INTN(lhs.intn < rhs.intn)); +} + +void +eq_nat(RpnState *st) +{ + Value rhs = pop_val(st); + Value lhs = pop_val(st); + push_val(st, BOX_INTN(lhs.intn == rhs.intn)); +} + +void +not_nat(RpnState *st) +{ + Value lhs = pop_val(st); + push_val(st, BOX_INTN(!lhs.intn)); +} + +void +dup_nat(RpnState *st) +{ + push_val(st, top_val(st)); +} + +void +swap_nat(RpnState *st) +{ + if (st->sp < 2) + return; + + Value top = st->data_stack[st->sp - 1]; + st->data_stack[st->sp - 1] = st->data_stack[st->sp - 2]; + st->data_stack[st->sp - 2] = top; +} + +void +drop_nat(RpnState *st) +{ + pop_val(st); +} + +void +bye_nat(RpnState *st) +{ + (void)st; + exit(0); +} + +void +print_value(RpnState *st, Value val, bool reader_fmt) +{ + (void)st; + switch (val.kind) { + case VAL_NIL: + fputs("nil", stdout); + break; + case VAL_INT: + printf("%lu", val.intn); + break; + case VAL_STR: + printf(reader_fmt ? "\"%.*s\"": "%.*s", (int)val.str.len, val.str.s); + break; + } +} + +void +stack_contents_nat(RpnState *st) +{ + if (st->sp == 0) { + puts(""); + } else { + printf("<%lu> ", st->sp); + for (isize i = 0; i < st->sp; ++i) { + Value val = st->data_stack[i]; + print_value(st, val, true); + putchar(' '); + } + putchar('\n'); + } +} + +void +print_nat(RpnState *st) +{ + Value top = pop_val(st); + print_value(st, top, false); + putchar('\n'); +} + +void +getch_nat(RpnState *st) +{ + char c = getchar(); + push_val(st, BOX_INTN((u64)c)); +} + +void +emit_nat(RpnState *st) +{ + Value c = pop_val(st); + printf("%c", (char)c.intn); +} + +void +compile_start(RpnState *st, Str wn) +{ + st->compiling = true; + st->cur_comp_name = wn; + st->cur_comp.words = calloc(16, sizeof(Word)); + st->cur_comp.len = 0; + st->cur_comp_cap = 16; +} + +void +compile_start_nat(RpnState *st) +{ + Str wn; + skip_whitespace(&st->rsp, st->rep); + wn = read_word_name(&st->rsp, st->rep); + wn = Str_new(wn.s, wn.len); + + compile_start(st, wn); +} + +void +compile_anon_start_nat(RpnState *st) +{ + compile_start(st, StrEmpty); +} + +void +compile_end_nat(RpnState *st) +{ + Word *w = malloc(sizeof(*w)); + + st->compiling = false; + w->name = st->cur_comp_name; + w->kind = WORD_USER; + w->uword = st->cur_comp; + add_word(st, w); +} + +void +tick_nat(RpnState *st) +{ + Str wn; + skip_whitespace(&st->rsp, st->rep); + wn = read_word_name(&st->rsp, st->rep); + wn = Str_new(wn.s, wn.len); + Word *w = search_word(st, wn); + if (w == nil) { + push_val(st, BOX_INTN(0)); + return; + } + push_val(st, BOX_INTN((u64)w)); +} + +void +exec_nat(RpnState *st) +{ + Value waddr_w = pop_val(st); + if (waddr_w.kind == VAL_NIL) { + return; + } + eval_word(st, (Word *)waddr_w.intn); /* :DDDD */ +} + +void +eval_nat(RpnState *st) +{ + Value pstr = pop_val(st); + if (pstr.kind == VAL_NIL || pstr.kind != VAL_STR) { + signal_error(st, "cannot eval non string Value"); + return; + } + eval(st, pstr.str); +} + +void +to_number_nat(RpnState *st) +{ + Value ns = pop_val(st); + if (ns.kind == VAL_NIL || ns.kind != VAL_STR) { + signal_error(st, "cannot parse non string value"); + return; + } + u64 n; + if (!read_number(ns.str, &n)) { + signal_error(st, "invalid number"); + return; + } + push_val(st, BOX_INTN(n)); +} + +void +add_word(RpnState *st, Word *w) +{ + if (!Str_empty(w->name)) { + DictionaryEntry *de = malloc(sizeof(*de)); + de->word = w; + de->next = st->dict != nil ? st->dict : nil; + st->dict = de; + } else { + push_val(st, BOX_INTN((u64)w)); + } +} + +Word * +search_word(RpnState *st, Str word_name) +{ + DictionaryEntry *de = st->dict; + while (de != nil) { + if (Str_equal(de->word->name, word_name)) + return de->word; + de = de->next; + } + return nil; +} + +bool +read_number(Str ns, u64 *res) +{ + u64 n = 0; + char *p = ns.s; + char *pend = ns.s + ns.len; + + while (p < pend && ascii_isdigit(*p)) { + u64 cv = *p - '0'; + n = n * 10 + cv; + ++p; + } + + if (p < pend) + return false; + + *res = n; + return true; +} + +Str +read_word_name(char **pstart, char *pend) +{ + char *p = *pstart; + Str wn = { .s = p }; + while (p < pend && ascii_iswordname(*p)) + ++p; + wn.len = p - *pstart; + *pstart = p; + return wn; +} + +void +skip_whitespace(char **pstart, char *pend) +{ + char *p = *pstart; + while (p < pend && ascii_isspace(*p)) + ++p; + *pstart = p; +} + +void +read_comment(char **pstart, char *pend) +{ + char *p = *pstart; + switch (*p) { + case '\\': + while (p < pend && *p++ != '\n'); + break; + case '(': + while (p < pend && *p++ != ')'); + break; + default: + break; + } + *pstart = p; +} + +Str +read_string_lit(char **pstart, char *pend) +{ + isize i = 0; + char buf[4096] = {0}; + char *p = *pstart; + + while (p < pend && *p != '"') + buf[i++] = *p++; + ++p; /* move past end " */ + *pstart = p; + return Str_new(buf, i); +} + +void +eval_word(RpnState *st, Word *w) +{ + /* Here, as an implementation detail, the "return stack" behaves + * more like a queue. */ + st->rts[st->rp++] = w; + + while (st->rp > 0) { + Word *tw = st->rts[--st->rp]; + switch (tw->kind) { + case WORD_NATIVE: + tw->nat(st); + break; + case WORD_USER: + for (isize i = tw->uword.len - 1; i >= 0; --i) + st->rts[st->rp++] = tw->uword.words[i]; + break; + } + } +} + +void +eval(RpnState *st, Str src) +{ + st->rsp = src.s; + st->rep = src.s + src.len; + + while (st->rsp < st->rep) { + if (*st->rsp == '"') { + ++st->rsp; + Str s = read_string_lit(&st->rsp, st->rep); + push_val(st, BOX_STR(s)); + } else if (ascii_iswordname(*st->rsp)) { + Str wn = read_word_name(&st->rsp, st->rep); + Word *w = search_word(st, wn); + if (w == nil) { + u64 val; + if (read_number(wn, &val)) { + push_val(st, BOX_INTN(val)); + continue; + } else { + signal_errorf(st, "no such word: '%.*s'\n", (int)wn.len, wn.s); + continue; + } + } + + if (!st->compiling) + eval_word(st, w); + else { + if (w->kind == WORD_NATIVE && w->nat == compile_end_nat) { + w->nat(st); + } else { + st->cur_comp.words[st->cur_comp.len++] = w; + } + } + } else if (*st->rsp == '\\' || *st->rsp == '(') { + read_comment(&st->rsp, st->rep); + } else { + ++st->rsp; + } + } +} + +void +repl(RpnState *st) +{ + char buf[REPL_LINE_BUF_LEN] = {0}; + for (;;) { + fprintf(stdout, "%s> ", st->err ? "err" : "ok"); + fflush(stdout); + if (fgets(buf, sizeof(buf), stdin) == nil) { + break; + } + Str line = Str_from_c(buf); + if (line.len == 0) + continue; + eval(st, line); + } +} + +int +main(int argc, char **argv) +{ + RpnState st = {0}; + Word natws[] = { + {.name = Sl("+"), .kind = WORD_NATIVE, .nat = add_nat }, + {.name = Sl("-"), .kind = WORD_NATIVE, .nat = sub_nat }, + {.name = Sl("*"), .kind = WORD_NATIVE, .nat = mul_nat }, + {.name = Sl("/"), .kind = WORD_NATIVE, .nat = div_nat }, + {.name = Sl("<"), .kind = WORD_NATIVE, .nat = lt_nat }, + {.name = Sl("="), .kind = WORD_NATIVE, .nat = eq_nat }, + {.name = Sl("!"), .kind = WORD_NATIVE, .nat = not_nat }, + {.name = Sl(":"), .kind = WORD_NATIVE, .nat = compile_start_nat }, + {.name = Sl(":?"), .kind = WORD_NATIVE, .nat = compile_anon_start_nat }, + {.name = Sl(";"), .kind = WORD_NATIVE, .nat = compile_end_nat }, + {.name = Sl("'"), .kind = WORD_NATIVE, .nat = tick_nat }, + {.name = Sl(">#"), .kind = WORD_NATIVE, .nat = to_number_nat }, + {.name = Sl("exec"), .kind = WORD_NATIVE, .nat = exec_nat}, + {.name = Sl("eval"), .kind = WORD_NATIVE, .nat = eval_nat}, + {.name = Sl("dup"), .kind = WORD_NATIVE, .nat = dup_nat }, + {.name = Sl("drop"), .kind = WORD_NATIVE, .nat = drop_nat }, + {.name = Sl("swap"), .kind = WORD_NATIVE, .nat = swap_nat }, + {.name = Sl("bye"), .kind = WORD_NATIVE, .nat = bye_nat }, + {.name = Sl("s?"), .kind = WORD_NATIVE, .nat = stack_contents_nat }, + {.name = Sl("emit"), .kind = WORD_NATIVE, .nat = emit_nat }, + {.name = Sl("."), .kind = WORD_NATIVE, .nat = print_nat }, + {.name = Sl(","), .kind = WORD_NATIVE, .nat = getch_nat }, + }; + for (isize i = 0; i < countof(natws); ++i) + add_word(&st, &natws[i]); + + if (argc > 1) { + eval(&st, Str_from_c(argv[1])); + } + repl(&st); + + return 0; +} diff --git a/pre.h b/pre.h new file mode 100644 index 0000000..753287a --- /dev/null +++ b/pre.h @@ -0,0 +1,29 @@ +#include +#include +#include + +#define nil ((void *)0) +#define false ((bool)0) +#define true ((bool)1) +/* Length of an array */ +#define countof(arr) (isize)(sizeof(arr) / sizeof(*(arr))) +/* Length of string literal */ +#define lengthof(s) (countof(s) - 1) +#define Sl(s) ((Str){ s, (isize)lengthof(s) }) +#define Str_from_c(s) ((Str){ s, (isize)(s != nil ? strlen(s) : 0) }) +#define Str_empty(str) ((str).s == nil || (str).len == 0) + +#ifndef NDEBUG +# ifdef __GNUC__ +# define Assert(pred) if (!(pred)) {__builtin_trap();} +# else +# define Assert(pred) if (!(pred)) {abort();} +# endif +#else +# define Assert(pred) +#endif + +typedef uint8_t u8; +typedef uint64_t u64; +typedef ptrdiff_t isize; +typedef u8 bool;