Initial commit

2025-01-12 18:20:42 -03:00 · 2025-01-12 18:20:42 -03:00 · bba597f7bf
commit bba597f7bf
28 changed files with 6029 additions and 0 deletions
--- a/38
+++ b/38
@ -0,0 +1,38 @@
+# This makefile should work for both GNU and BSD Make I think...
+
+SRCDIR = ./compiler
+COMPILER_SRCS != find $(SRCDIR) -type f -name '*.c'
+COMPILER_OBJS := $(COMPILER_SRCS:.c=.o)
+COMPILER_DEPS := $(COMPILER_OBJS:.o=.d)
+
+ASAN = -fsanitize=address,undefined
+CFLAGS := -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wvla -Wwrite-strings \
+		 -Wnull-dereference -pipe -O0 -ggdb3 -std=c11 $(ASAN)
+LDFLAGS := $(ASAN)
+
+all: rutilec ast2dot
+
+rutilec: $(COMPILER_OBJS)
+	$(CC) $(LDFLAGS) $^ -o $@$(EXE)
+
+ast2dot: tools/ast2dot.c $(COMPILER_OBJS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $(COMPILER_OBJS) $< -o $@$(EXE)
+
+clean:
+	rm -f $(COMPILER_OBJS) $(COMPILER_DEPS) ./rutilec$(EXE) ./ast2dot$(EXE)
+
+options:
+	@echo "Build options:"
+	@echo "CC = $(CC)"
+	@echo "CFLAGS = $(CFLAGS)"
+	@echo "LDFLAGS = $(LDFLAGS)"
+	@echo "ASan flags = $(ASAN)"
+	@echo "SRCS = $(COMPILER_SRCS)"
+	@echo "OBJS = $(COMPILER_OBJS)"
+
+.PHONY: all clean options
+
+-include $(COMPILER_DEPS)
+
+%.o: %.c Makefile
+	$(CC) $(CFLAGS) -MMD -MP -c -o $@ $<
--- a/README.md
+++ b/README.md
@ -0,0 +1,39 @@
+# Rutile
+Yet another compiled programming language.
+Very unstable and in early development.
+
+```
+proc main*(): cint
+    puts("Hello, world!")
+    return 0
+end
+```
+
+## Building
+### Build time dependencies
+- C11 C compiler (tested on GCC, Clang and TCC)
+- Libc
+- BSD or GNU Make
+- POSIX shell
+- `find` command
+
+### Build time dependencies (single header libraries)
+These are contained in `compiler/libs`.
+- [stb_ds.h](https://github.com/nothings/stb)
+- [optparse](https://github.com/skeeto/optparse)
+
+### Procedure
+Note that the default `CFLAGS` and `LDFLAGS` are meant for development builds.
+```sh
+# debug build (uses default flags):
+make -j$(nproc) ptgc
+# for release:
+make -j$(nproc) CFLAGS='-O2 -march=native -DNDEBUG' LDFLAGS='' ASAN=''
+```
+
+## License
+GNU GPLv3 for the compiler and BSD 3-Clause for the standard library.
+
+## Acknowledgements
+- Christopher Wellons, for his public domain libraries.
+- Sean Barrett's `stb_ds.h`.
--- a/compiler/ast.h
+++ b/compiler/ast.h
@ -0,0 +1,151 @@
+#ifndef _ast_h_
+#define _ast_h_
+
+#include "pre.h"
+#include "datatype.h"
+#include "symbol.h"
+#include "location.h"
+
+#define ast_node_is_atom(nk) \
+	(nk == AST_IDENT || nk == AST_NUMBER || nk == AST_STRLIT || nk == AST_PROCCALL)
+#define ast_node_is_unary(nk) \
+	(nk == AST_UNARY || ast_node_is_atom(nk))
+#define ast_node_is_expr(nk) \
+	(nk == AST_BINEXPR || ast_node_is_unary(nk))
+
+enum AstType
+{
+	AST_INVALID, /* For use as a placeholder until the actual type is decided */
+	AST_NUMBER, /* number */
+	AST_IDENT, /* ident */
+	AST_STRLIT, /* strlit */
+	AST_PROCDEF, /* proc */
+	AST_PROCCALL, /* call */
+	AST_PROCCALL_ARGS, /* */
+	AST_VARDECL, /* var */
+	AST_VARASSIGN, /* varassgn */
+	AST_IF, /* ifse */
+	AST_RETURN, /* ret */
+	AST_BREAK,
+	AST_LOOP, /* loop */
+	AST_STMTS, /* stmts */
+	AST_EXPRS, /* exprs */
+	AST_BINEXPR, /* bin */
+	AST_UNARY, /* unary */
+	AST_ATTRIBUTE, /* attribute */
+	AST_DISCARD,
+};
+
+typedef struct Ast Ast;
+
+typedef struct {
+	Str op;
+	Ast *left, *right;
+	DataType *type; /* filled in by sema */
+} AstBinop;
+
+typedef struct {
+	Str op;
+	Ast *atom;
+	DataType *type; /* filled in by sema */
+} AstUnary;
+
+typedef struct {
+	Str ident;
+	Str dtype;
+	/* Symbol kind for this parameter, `SymVar` would represent a mutable
+	 * parameter and `SymLet` a immutable one. */
+	enum SymbolKind kind;
+	Location loc, dtype_loc;
+} AstIdentTypePair;
+
+typedef struct {
+	Str name;
+	bool ispublic;
+	Ast *body;
+	Vec(AstIdentTypePair) args;
+	Ast *rettype;
+
+	DataType *type;
+} AstProc;
+
+typedef struct {
+	Str name;
+	Ast *args;
+} AstProcCall;
+
+typedef struct {
+	Str name;
+	/* Data type, nil if no type was explicitly stated, meaning that
+	 * type deduction must be made from the expression, also implying that
+	 * if this field is nil, `expr` MUSN'T be nil. */
+	Ast *datatype;
+	Ast *expr; /* if the declaration assigns a value */
+	enum SymbolKind kind; /* whether is a let, var or const... */
+	DataType *type; /* filled in by sema */
+} AstVarDecl;
+
+typedef struct {
+	Str name;
+	Ast *expr;
+} AstVarAssign;
+
+typedef struct {
+	u64 n;
+	DataType *type; /* filled in by the sema */
+} AstNumber;
+
+typedef struct {
+	Ast *cond;
+	Ast *body;
+} AstElif;
+
+typedef struct {
+	Ast *cond;
+	Ast *true_body;
+	Ast *false_body;
+	Vec(AstElif) elifs;
+} AstIf;
+
+/* Abstract representation of a loop, providing a pre and post condition.
+ * `while` loops are modelled as a loop with a precondition only.
+ * For infinite loops both `precond` and `postcond` are nil. */
+typedef struct {
+	Ast *precond, *postcond, *body;
+} AstLoop;
+
+typedef struct {
+	/* Attributes for now can only be identifiers */
+	Vec(Str) attrs;
+	Ast *node; /* The decorated node */
+} AstAttribute;
+
+typedef struct {
+	Ast *expr;
+} AstDiscard;
+
+struct Ast {
+	enum AstType type;
+	union {
+		AstBinop bin; /* binary expression */
+		AstUnary unary; /* unary operator */
+		AstNumber number; /* number (this is an atom) */
+		Str ident; /* identifier (this is an atom too) */
+		AstProc proc; /* procedure definition */
+		AstProcCall call; /* procedure call */
+		AstVarDecl var; /* variable declaration */
+		AstVarAssign varassgn;
+		Ast *ret; /* return statement, this points to its expression (if any) */
+		AstIf ifse; /* if statement/expression */
+		AstLoop loop;
+		Vec(Ast *) stmts;
+		Vec(Ast *) exprs;
+		Str strlit; /* String literal */
+		AstAttribute attribute;
+		AstDiscard discard;
+	};
+	Location loc; /* location in the source code of this node */
+};
+_Static_assert(sizeof(Ast) <= 512, "AST node got too bloated");
+
+#endif
--- a/compiler/cgBackends.h
+++ b/compiler/cgBackends.h
@ -0,0 +1,10 @@
+#ifndef _cgbackends_
+#define _cgbackends_
+
+enum CodegenBackends
+{
+	CgBackendC,
+	CgBackendLibGccJit, /* libgccjit backend */
+};
+
+#endif
--- a/compiler/cgC.c
+++ b/compiler/cgC.c
@ -0,0 +1,382 @@
+#include <stdio.h>
+
+#include "pre.h"
+#include "codegen.h"
+#include "cgC.h"
+#include "ast.h"
+#include "libs/stb_ds.h"
+
+#define EMIT_SEMICOLON_NL(out) fputs(";\n", out)
+#define EMIT_RB_NL(out) fputs("}\n", out)
+
+static void
+emit_expr(CodegenC *cgc, const Ast *expr);
+static void
+emit_expr_list(CodegenC *cgc, const Vec(Ast *) exprs, bool sep);
+static void
+emit_node(CodegenC *cgc, const Ast *node);
+
+static void
+indent(CodegenC *cgc)
+{
+#define INDENT(out) fputc('\t', out)
+	switch (cgc->indent) {
+	case 8: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 7: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 6: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 5: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 4: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 3: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 2: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 1: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 0: break;
+	default:
+		for (isize left = 0; left < cgc->indent; ++left)
+			INDENT(cgc->cgctx->out);
+	}
+#undef INDENT
+}
+
+/* Interns a string literal into the string table, returning its ID */
+static i64
+intern_strlit(CodegenC *cgc, const Str *str)
+{
+	const i64 strno = shget(cgc->cgctx->strings, str->s);
+	if (strno != -1) /* string already exists, return its index number */
+		return strno;
+
+	shput(cgc->cgctx->strings, str->s, cgc->cgctx->strlit_no);
+	return cgc->cgctx->strlit_no++;
+}
+
+static void
+emit_comment(CodegenC *cgc, Str comment, bool nl_after)
+{
+	fprintf(cgc->cgctx->out, "/* %s */%c", comment.s, nl_after ? '\n' : '\0');
+}
+
+static void
+emit_include(CodegenC *cgc, Str path, bool local)
+{
+	fprintf(
+		cgc->cgctx->out, "#include %c%s%c\n",
+		local ? '"' : '<', path.s, local ? '"' : '>'
+	);
+}
+
+static const char *
+basic_datatype_to_c(CodegenC *cgc, const DataType *dt)
+{
+	switch (dt->kind) {
+	case DtkBasic:
+		switch (dt->size) {
+		case 0: return "void";
+		case 1: return "uint8_t";
+		case 2: return "uint16_t";
+		case 4: return "uint32_t";
+		case 8: return "uint64_t";
+		}
+		break;
+	case DtkVoid:
+		return "void";
+		break;
+	}
+	return nil;
+}
+
+static void
+emit_datatype(CodegenC *cgc, const DataType *dt)
+{
+	switch (dt->kind) {
+	case DtkBasic:
+	case DtkVoid:
+		fputs(basic_datatype_to_c(cgc, dt), cgc->cgctx->out);
+		break;
+	case DtkStruct:
+		fprintf(cgc->cgctx->out, "struct %s", dt->name.s);
+		break;
+	}
+}
+
+static void
+emit_c_attribute(CodegenC *cgc, Str attr)
+{
+	fprintf(cgc->cgctx->out, "__attribute((%s))", attr.s);
+}
+
+static void
+emit_structdecl(CodegenC *cgc, const DataType *dt)
+{
+	fputs("struct %s {\n", cgc->cgctx->out);
+	for (isize i = 0; i < arrlen(dt->compound.fields); ++i) {
+		emit_datatype(cgc, dt->compound.fields[i]);
+		EMIT_SEMICOLON_NL(cgc->cgctx->out);
+	}
+	fputc('}', cgc->cgctx->out);
+	if (dt->compound.packed)
+		emit_c_attribute(cgc, Sl("packed"));
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_vardecl(CodegenC *cgc, const AstVarDecl *decl)
+{
+	if (decl->kind == SymConst)
+		fputs("const ", cgc->cgctx->out);
+
+	emit_datatype(cgc, decl->type);
+	fprintf(cgc->cgctx->out, " %s", decl->name.s);
+	if (decl->expr != nil) {
+		fputc('=', cgc->cgctx->out);
+		emit_expr(cgc, decl->expr);
+	}
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_varassign(CodegenC *cgc, const AstVarAssign *assign)
+{
+	fprintf(cgc->cgctx->out, "%s = ", assign->name.s);
+	emit_expr(cgc, assign->expr);
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_proc(CodegenC *cgc, const AstProc *proc)
+{
+	if (!proc->ispublic)
+		fputs("static ", cgc->cgctx->out);
+
+	emit_datatype(cgc, proc->type->proc.rettype);
+	fprintf(cgc->cgctx->out, " %s(", proc->name.s);
+
+	const isize arglen = arrlen(proc->args);
+	if (arglen == 0)
+		fputs("void", cgc->cgctx->out);
+	for (isize i = 0; i < arglen; ++i) {
+		AstIdentTypePair arg = proc->args[i];
+		//emit_datatype(cgc, arg.dtype);
+		fputs("uint64_t ", cgc->cgctx->out);
+		fputs((char *)arg.ident.s, cgc->cgctx->out);
+		if (i + 1 < arglen)
+			fputc(',', cgc->cgctx->out);
+	}
+	fputs(")\n{\n", cgc->cgctx->out);
+	if (proc->body != nil)
+		emit_node(cgc, proc->body);
+	EMIT_RB_NL(cgc->cgctx->out);
+}
+
+static void
+emit_proccall(CodegenC *cgc, const AstProcCall *call)
+{
+	fprintf(cgc->cgctx->out, "%s(", call->name.s);
+	if (call->args != nil)
+		emit_expr_list(cgc, (const Vec(Ast *))call->args->stmts, true);
+	fputs(")", cgc->cgctx->out);
+}
+
+static void
+emit_if(CodegenC *cgc, const AstIf *ift)
+{
+	fputs("if (", cgc->cgctx->out);
+	emit_expr(cgc, ift->cond);
+	fputs("){\n", cgc->cgctx->out);
+	emit_node(cgc, ift->true_body);
+	fputc('}', cgc->cgctx->out);
+	if (ift->false_body != nil) {
+		fputs("else", cgc->cgctx->out);
+		fputs("{\n", cgc->cgctx->out);
+		emit_node(cgc, ift->false_body);
+		fputc('}', cgc->cgctx->out);
+	}
+	fputc('\n', cgc->cgctx->out);
+}
+
+static void
+emit_whileLoop(CodegenC *cgc, const AstLoop *whl)
+{
+	fputs("while (", cgc->cgctx->out);
+	emit_expr(cgc, whl->precond);
+	fputs("){\n", cgc->cgctx->out);
+	emit_node(cgc, whl->body);
+	fputs("}\n", cgc->cgctx->out);
+}
+
+static void
+emit_loop(CodegenC *cgc, const AstLoop *loop)
+{
+	if (loop->precond != nil)
+		emit_whileLoop(cgc, loop);
+	else if (loop->postcond != nil)
+		unreachable();
+}
+
+static void
+emit_return(CodegenC *cgc, const Ast *ret_expr)
+{
+	fputs("return ", cgc->cgctx->out);
+	emit_expr(cgc, ret_expr);
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_break(CodegenC *cgc, const Ast *unused)
+{
+	(void)unused;
+	fputs("break;\n", cgc->cgctx->out);
+}
+
+static void
+emit_discard(CodegenC *cgc, const Ast *expr)
+{
+	emit_node(cgc, expr);
+}
+
+static void
+emit_expr_number(CodegenC *cgc, const AstNumber *num)
+{
+	fprintf(cgc->cgctx->out, "%lu", num->n);
+}
+
+static void
+emit_expr_strlit(CodegenC *cgc, const Str *strlit)
+{
+	fprintf(cgc->cgctx->out, "\"%s\"", strlit->s);
+}
+
+static void
+emit_expr_ident(CodegenC *cgc, const Str *ident)
+{
+	fputs((char *)ident->s, cgc->cgctx->out);
+}
+
+static void
+emit_expr_unary(CodegenC *cgc, const AstUnary *unary)
+{
+	emit_expr(cgc, unary->atom);
+}
+
+static void
+emit_expr_binop(CodegenC *cgc, const AstBinop *expr)
+{
+	/* guard binops with parenthesis, even if they are redundant */
+	fputc('(', cgc->cgctx->out);
+	emit_expr(cgc, expr->left);
+	fputc('+', cgc->cgctx->out);
+	emit_expr(cgc, expr->right);
+	fputc(')', cgc->cgctx->out);
+}
+
+static void
+emit_expr(CodegenC *cgc, const Ast *expr)
+{
+	if (expr == nil)
+		return;
+	switch (expr->type) {
+	case AST_BINEXPR:
+		emit_expr_binop(cgc, &expr->bin);
+		break;
+	case AST_UNARY:
+		emit_expr_unary(cgc, &expr->unary);
+		break;
+	case AST_NUMBER:
+		emit_expr_number(cgc, &expr->number);
+		break;
+	case AST_STRLIT:
+		emit_expr_strlit(cgc, &expr->strlit);
+		break;
+	case AST_IDENT:
+		emit_expr_ident(cgc, &expr->ident);
+		break;
+	case AST_PROCCALL:
+		emit_proccall(cgc, &expr->call);
+		break;
+	default:
+		unreachable();
+	}
+}
+
+static void
+emit_expr_list(CodegenC *cgc, const Vec(Ast *) exprs, bool sep)
+{
+	const isize exprs_len = arrlen(exprs);
+	for (isize i = 0; i < exprs_len; ++i) {
+		emit_expr(cgc, exprs[i]);
+		if (sep && i + 1 < exprs_len) /* no trailling separator */
+			fputc(',', cgc->cgctx->out);
+	}
+}
+
+static void
+emit_stmt_list(CodegenC *cgc, Vec(Ast *) stmts)
+{
+	for (isize i = 0; i < arrlen(stmts); ++i) {
+		emit_node(cgc, stmts[i]);
+	}
+}
+
+static void
+emit_node(CodegenC *cgc, const Ast *node)
+{
+	switch (node->type) {
+	case AST_STMTS:
+		emit_stmt_list(cgc, node->stmts);
+		break;
+	case AST_PROCDEF:
+		emit_proc(cgc, &node->proc);
+		break;
+	case AST_PROCCALL:
+		emit_proccall(cgc, &node->call);
+		EMIT_SEMICOLON_NL(cgc->cgctx->out);
+		break;
+	case AST_IF:
+		emit_if(cgc, &node->ifse);
+		break;
+	case AST_LOOP:
+		emit_loop(cgc, &node->loop);
+		break;
+	case AST_RETURN:
+		emit_return(cgc, node->ret);
+		break;
+	case AST_BREAK:
+		emit_break(cgc, nil);
+		break;
+	case AST_DISCARD:
+		emit_discard(cgc, node->discard.expr);
+		break;
+	case AST_VARDECL:
+		emit_vardecl(cgc, &node->var);
+		break;
+	case AST_VARASSIGN:
+		emit_varassign(cgc, &node->varassgn);
+		break;
+	case AST_BINEXPR:
+	case AST_UNARY:
+	case AST_NUMBER:
+	case AST_STRLIT:
+	case AST_IDENT:
+		emit_expr(cgc, node);
+		break;
+	case AST_PROCCALL_ARGS:
+	case AST_EXPRS:
+	case AST_INVALID:
+		unreachable();
+	}
+}
+
+void
+cgC(CodegenC *cgc, const Ast *program)
+{
+	cgc->cgctx->out = stdout;
+
+	char note_buf[255] = {0};
+	snprintf(note_buf, sizeof(note_buf),
+		"generated C IR from %s", cgc->cgctx->cctx->current_filename.s
+	);
+	emit_comment(cgc, Str_from_c(note_buf), true);
+
+	emit_include(cgc, Sl("stdint.h"), false);
+	fputc('\n', cgc->cgctx->out);
+	emit_node(cgc, program);
+}
--- a/compiler/cgC.h
+++ b/compiler/cgC.h
@ -0,0 +1,15 @@
+#ifndef _cgC_h_
+#define _cgC_h_
+
+#include "codegen.h"
+#include "ast.h"
+
+typedef struct {
+	CodegenCtx *cgctx;
+	int indent;
+} CodegenC;
+
+void
+cgC(CodegenC *cgc, const Ast *program);
+
+#endif
--- a/compiler/codegen.c
+++ b/compiler/codegen.c
@ -0,0 +1,101 @@
+#define _POSIX_C_SOURCE 200809L
+#include <unistd.h>
+#include <spawn.h>
+#include <sys/wait.h>
+
+#include "codegen.h"
+#include "cgC.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+/* (Std)In --> process --> (Std)Out */
+void
+spawn_with_iofp(const char *path, char *const *argv,
+					 pid_t *pid, FILE **in, FILE **out)
+{
+	int irp[2], asmp[2];
+	posix_spawn_file_actions_t fileacts;
+	posix_spawn_file_actions_init(&fileacts);
+
+	if (in != nil) {
+		/* the "in" pipe */
+		if (pipe(irp) < 0)
+			fatal(nil, nil, "could not open pipe");
+		posix_spawn_file_actions_addclose(&fileacts, irp[1]);
+		posix_spawn_file_actions_adddup2(&fileacts, irp[0], STDIN_FILENO);
+	}
+	if (out != nil) {
+		/* the "out" pipe */
+		if (pipe(asmp) < 0)
+			fatal(nil, nil, "could not open pipe");
+		posix_spawn_file_actions_addclose(&fileacts, asmp[0]);
+		posix_spawn_file_actions_adddup2(&fileacts, asmp[1], STDOUT_FILENO);
+	}
+
+	if (posix_spawn(pid, path, &fileacts, nil, argv, nil) != 0)
+		fatal(nil, nil, "posix_spawn failed");
+
+	posix_spawn_file_actions_destroy(&fileacts);
+
+	if (in != nil) {
+		close(irp[0]);
+		if ((*in = fdopen(irp[1], "wb")) == nil)
+			fatal(nil, nil, "fdopen fail");
+	}
+	if (out != nil) {
+		close(asmp[1]);
+		if ((*out = fdopen(asmp[0], "rb")) == nil)
+			fatal(nil, nil, "fdopen fail");
+	}
+}
+
+void
+process_wait(pid_t pid)
+{
+	int pstat;
+	waitpid(pid, &pstat, 0);
+	if (!WIFEXITED(pstat))
+		fatal(nil, nil, "qbe crashed");
+	/* did not crash, read return status */
+	int exitc;
+	if ((exitc = WEXITSTATUS(pstat)) != 0)
+		fatal(nil, nil, "qbe exited with non-zero status %d", exitc);
+}
+
+
+CodegenCtx *
+codegen_new(Compiler *cm, enum CodegenBackends backend)
+{
+	CodegenCtx *ctx = calloc(1, sizeof(*ctx));
+	ctx->ext_pid = ctx->ld_pid = -1;
+	ctx->backend = backend;
+	ctx->cctx = cm;
+
+	sh_new_arena(ctx->strings);
+	shdefault(ctx->strings, -1);
+	return ctx;
+}
+
+void
+codegen_destroy(CodegenCtx *cgctx)
+{
+	if (cgctx->ext_pid != -1)
+		process_wait(cgctx->ext_pid);
+	if (cgctx->ld_pid != -1)
+		process_wait(cgctx->ld_pid);
+	shfree(cgctx->strings);	
+	free(cgctx);
+}
+
+void
+codegen(CodegenCtx *cgctx, Ast *program)
+{
+	switch (cgctx->backend) {
+	case CgBackendC:
+		cgC(&(CodegenC){.cgctx = cgctx, .indent = 2}, program);
+		break;
+	case CgBackendLibGccJit:
+		fatal(nil, nil, "libgccjit backend not implemented yet");
+		break;
+	}
+}
--- a/compiler/codegen.h
+++ b/compiler/codegen.h
@ -0,0 +1,37 @@
+#ifndef _codegen_h_
+#define _codegen_h_
+#include <stdio.h> /* FILE */
+#include <sys/types.h> /* for pid_t */
+
+#include "pre.h"
+#include "ast.h"
+#include "state.h"
+#include "cgBackends.h"
+
+typedef struct {
+	FILE *out; /* File where to output QBE IR */
+	FILE *asm_out;
+
+	i64 strlit_no;
+	i64 internal_label;
+	/* Hash map acting as a set, which contains all strings in a compilation
+	 * unit. Strings get interned on this hash map to remove duplicates.
+	 */
+	HashMapStr(i64) *strings;
+	pid_t ext_pid, ld_pid;
+	enum CodegenBackends backend;
+	Compiler *cctx;
+} CodegenCtx;
+
+void
+spawn_with_iofp(const char *path, char *const *argv, pid_t *pid, FILE **in, FILE **out);
+void
+process_wait(pid_t pid);
+CodegenCtx *
+codegen_new(Compiler *cm, enum CodegenBackends backend);
+void
+codegen_destroy(CodegenCtx *cgctx);
+void
+codegen(CodegenCtx *cgctx, Ast *program);
+
+#endif
--- a/compiler/datatype.h
+++ b/compiler/datatype.h
@ -0,0 +1,53 @@
+#ifndef _datatype_h_
+#define _datatype_h_
+#include "pre.h"
+
+enum DataTypeKind
+{
+	DtkInvalid = 0,
+	DtkVoid,
+	DtkBasic,
+	DtkStruct,
+	DtkUnion,
+	DtkProc,
+	DtkArray,
+	DtkBool,
+};
+
+typedef struct DataType DataType;
+
+typedef struct {
+	bool packed;
+	Vec(DataType *) fields;
+} DataTypeCompound;
+
+struct DataType
+{
+	enum DataTypeKind kind;
+	u16 size; /* size in bytes of the data type */
+	bool builtin; /* if this type is defined in compilerland */
+	bool sign; /* if the type is numerical and has a sign or not */
+	Str name;
+
+	union {
+		DataTypeCompound compound; /* Represents either a struct or union type */
+		struct {
+			DataType *rettype;
+			Vec(DataType *) argtypes;
+			bool public;
+			bool extern_lnk; /* external linkage */
+			bool c_varargs; /* C-style varargs (for FFI) */
+		} proc;
+		struct {
+			DataType *base;
+			isize len;
+		} array;
+	};
+};
+
+typedef struct {
+	bool ok; /* whether the type checking succeeded */
+	Str msg; /* message describing the type error */
+} DataTypeCheck;
+
+#endif
--- a/compiler/lex.c
+++ b/compiler/lex.c
@ -0,0 +1,581 @@
+#include <stdio.h> /* feof, ferror, fread, FILE, EOF */
+#include <stdlib.h> /* malloc calloc free */
+#include <string.h> /* memset */
+
+#include "lex.h"
+#include "messages.h"
+#include "pre.h"
+#include "libs/stb_ds.h"
+
+#define LEX_BUFFER_SIZE 8192
+#define LEX_HALF_BUFFER_SIZE LEX_BUFFER_SIZE / 2
+#define LEX_BUFFER_SENTINEL '\0'
+
+#define MAX_IDENT_SIZE 1024u
+#define STRING_LITERAL_BASE_SIZE 255
+#define STRING_LITERAL_MAX_SIZE 4096
+
+#define at_buffer_end(ls) (*(ls)->fwd == '\0')
+#define ascii_isident(c) (c == '_' || c == '?' || c == '!' || ascii_isalnum(c))
+#define ascii_isident_start(c) (c == '_' || ascii_isalpha(c))
+
+#define lex_error(ls, ...) do {							\
+		error((ls)->cm, &(ls)->cur_loc, __VA_ARGS__);	\
+	} while(0)
+
+#define lex_fatal(ls, ...) do {							\
+		fatal((ls)->cm, &(ls)->cur_loc, __VA_ARGS__);	\
+	} while(0)
+
+typedef Optional(u8) MaybeChr;
+
+const char *TokenIdStr[T_TOKEN_COUNT] = {
+	[T_INVALID] = "(invalid token)",
+	[T_PLUS] = "+",
+	[T_MINUS] = "-",
+	[T_STAR] = "*",
+	[T_BAR] = "/",
+	[T_EXCLAMATION] = "!",
+	[T_LPAREN] = "(",
+	[T_RPAREN] = ")",
+	[T_COMMA] = ",",
+	[T_LESSTHAN] = "<",
+	[T_GREATTHAN] = ">",
+	[T_LOGAND] = "and",
+	[T_LOGOR] = "or",
+	[T_EQUAL] = "=",
+	[T_LOGICEQUAL] = "==",
+	[T_NOTEQUAL] = "!=",
+	[T_HASH] = "#",
+	[T_COLON] = ":",
+	[T_SEMICOLON] = ";",
+	[T_LBRACKET] = "[",
+	[T_RBRACKET] = "]",
+	[T_LBRACE] = "{",
+	[T_RBRACE] = "}",
+	[T_IDENT] = "(identifier)",
+	[T_STRING] = "(string literal)",
+	[T_NUMBER] = "(number)",
+	[T_DECNUMBER] = "(decimal number)",
+	[T_CONST] = "const",
+	[T_DISCARD] = "discard",
+	[T_ELIF] = "elif",
+	[T_ELSE] = "else",
+	[T_END] = "end",
+	[T_IF] = "if",
+	[T_LET] = "let",
+	[T_PROC] = "proc",
+	[T_RETURN] = "return",
+	[T_VAR] = "var",
+	[T_WHILE] = "while",
+	[T_STRUCT] = "struct",
+	[T_USE] = "use",
+	[T_BREAK] = "break",
+	[T_NEXT] = "next",
+	[T_EOF] = "(EOF)",
+	[T_ERROR] = "(error)",
+};
+
+/* Non retarded ASCII character class comparison */
+static bool 
+ascii_isdigit(u32 c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static bool
+ascii_isalpha(u32 c)
+{
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool
+ascii_isspace(u32 c)
+{
+	return c == ' ' || (c >= '\t' && c <= '\r');
+}
+
+static bool
+ascii_isalnum(u32 c)
+{
+	return ascii_isalpha(c) || ascii_isdigit(c);
+}
+
+static void
+update_line_count(LexState *ls, u8 chr)
+{
+	switch (chr) {
+	case '\n':
+		ls->cur_loc.column = 1;
+		++ls->cur_loc.line;
+		break;
+	case '\t': /* fallthrough */
+	default:
+		++ls->cur_loc.column;
+	}
+}
+
+static u8
+peek(LexState *ls)
+{
+	return *ls->fwd;
+}
+
+static void
+backup(LexState *ls, int n)
+{
+	ls->fwd -= n;
+	if (*ls->fwd == '\n')
+		--ls->cur_loc.line;
+	/* not quite right if fwd is \n... */
+	--ls->cur_loc.column;
+}
+
+static bool
+read_buf(LexState *ls, u8 *buf, isize n, isize *ar)
+{
+	if (feof(ls->input_fp))
+		return false;
+	const isize rb = fread(buf, sizeof(*buf), n, ls->input_fp);
+	if (ferror(ls->input_fp)) {
+		fatal(ls->cm, nil, "could not read input file\n");
+	}
+   	*ar	= rb;
+	buf[rb] = LEX_BUFFER_SENTINEL;
+	return true;
+}
+
+static bool
+reload_buffers(LexState *ls)
+{
+	//if ((ls->fwd != ls->buf + ls->buflen) || (ls->fwd != ls->buf + ls->buflen2))
+	//	lex_fatal(ls, "invalid nil byte in middle of source file");
+
+	const u8 *end_of_buf1 = ls->buf + ls->buflen;
+	const u8 *end_of_buf2 = ls->buf + LEX_HALF_BUFFER_SIZE + ls->buflen2;
+
+	if (ls->fwd == end_of_buf1) { /* end of first buffer */
+		u8 *buf2 = ls->buf + LEX_HALF_BUFFER_SIZE;
+		if (!read_buf(ls, buf2, LEX_HALF_BUFFER_SIZE, &ls->buflen2))
+			return false; /* reached EOF, no more data */
+		ls->fwd = buf2;
+	} else if (ls->fwd == end_of_buf2) { /* end of second buffer */
+		u8 *buf1 = ls->buf;
+		if (!read_buf(ls, buf1, LEX_HALF_BUFFER_SIZE, &ls->buflen))
+			return false; /* reached EOF, no more data */
+		ls->fwd = buf1;
+	}
+	/* reset pointers back to the beginning of the buffer */
+	ls->lbegin = ls->fwd;
+	return true;
+}
+
+static MaybeChr
+read_chr(LexState *ls)
+{
+	u8 chr = peek(ls);
+	if (chr == LEX_BUFFER_SENTINEL) { /* maybe end of buffer */
+		if (!reload_buffers(ls))
+			return None(MaybeChr);
+	}
+	update_line_count(ls, chr);
+	return Some(MaybeChr, *ls->fwd++);
+}
+
+static MaybeChr
+skip_whitespace(LexState *ls)
+{
+	/* skip any whitespace
+	 * [ abc =    2*9  -  1 ]
+	 *        ^-fwd, lbegin
+	 * [ abc =    2*9  -  1 ]
+	 *     lbegin-^^-fwd
+	 * */
+	MaybeChr c;
+	for (;;) {
+		c = read_chr(ls);
+		if (!c.ok) {
+			return None(MaybeChr);
+		}
+		if (!ascii_isspace(c.val))
+			break;
+		++ls->lbegin;
+	}
+	return c;
+}
+
+static LexToken
+make_error(void)
+{
+	return (LexToken){ .id = T_ERROR };
+}
+
+static u8 *
+intern_identifier(LexState *ls, u8 *ident)
+{
+	IdentsBucket *entry;
+	if ((entry = shgetp_null(ls->idents, ident)) == nil) {
+		shput(ls->idents, ident, 0);
+		return (u8 *)shgets(ls->idents, ident).key;
+	}
+	return (u8 *)entry->key;
+}
+
+/* 
+ * *──┬(ident)┬──*
+ *    ╰───<───╯
+ */
+static LexToken
+identifier(LexState *ls)
+{
+	/* this gets copied to the hash table arena, no problem */
+	u8 ident_buf[MAX_IDENT_SIZE];
+	usize i = 0;
+
+	MaybeChr chr = { *ls->lbegin, true };
+	while (chr.ok && ascii_isident(chr.val)) {
+		if (i + 1 == MAX_IDENT_SIZE) {
+			lex_error(ls, "identifier is too long (max: %u)\n", MAX_IDENT_SIZE);
+			return make_error();
+		}
+		ident_buf[i++] = chr.val;
+		chr = read_chr(ls);
+	}
+	ident_buf[i] = '\0';
+	/* ate 1 extra character, give it back */
+	if (chr.ok)
+		backup(ls, 1);
+
+	return (LexToken) {
+		.id = T_IDENT,
+		.ident = {intern_identifier(ls, ident_buf), i},
+		.len = i,
+	};
+}
+
+static LexToken
+string_literal(LexState *ls)
+{
+	isize str_buf_len = STRING_LITERAL_BASE_SIZE;
+	u8 *str_buf = malloc(str_buf_len);
+	isize i = 0;
+
+	/* skip past " */
+	MaybeChr chr = read_chr(ls);
+	while (chr.val != '"') {
+		if (i + 1 == STRING_LITERAL_MAX_SIZE) {
+			lex_error(ls, "string literal length exceeds maximum of %d bytes", STRING_LITERAL_MAX_SIZE);
+			goto err;
+		}
+		if (i + 1 > str_buf_len) {
+			str_buf = realloc(str_buf, str_buf_len *= 2);
+		}
+		str_buf[i++] = chr.val;
+		chr = read_chr(ls);
+		if (!chr.ok || chr.val == '\n') {
+			lex_error(ls, "unterminated string literal");
+			goto err;
+		}
+	}
+	if (i > 0) {
+		str_buf[i] = '\0';
+	} else { /* empty literal */
+		free(str_buf); /* we wasted our time */
+		str_buf = nil;
+	}
+
+	return (LexToken) {
+		.id = T_STRING,
+		.str = {str_buf, i},
+		.len = i,
+	};
+err:
+	return make_error();
+}
+
+/*	Identifies a numeric literal that may have a prefix:
+ *
+ *	('0')─┬──────────────────────┬─*
+ *	      ├('b')╭──┬(digit)┬─────╯
+ *	      ├('o')┤  ╰───<───╯
+ *	      ╰('x')╯
+ * Indirectly based on a BSD (?) implementation.
+ */
+static LexToken
+number_literal(LexState *ls)
+{
+	LexToken t = { .id = T_NUMBER };
+	u64 number = 0;
+	u8 base = 10;
+
+	MaybeChr chr = { *ls->lbegin, true };
+
+	if (chr.val == '0') {
+		chr = read_chr(ls); /* skip 0 prefix */
+		if (!chr.ok) { /* EOF edge case */
+			return t; /* 0 */
+		}
+		switch (chr.val) {
+		case 'b':
+			base = 2;
+			break;
+		case 'o':
+			base = 8;
+			break;
+		case 'x':
+			base = 16;
+			break;
+		default:
+		if (ascii_isdigit(chr.val)) {
+				lex_error(ls, "use '0o' for an octal literal");
+				return make_error();
+			}
+			//lex_error(ls, "unknown numeric prefix '0%c'", chr.val);
+			/* start of another token */
+			return t; /* 0 */
+		}
+		chr = read_chr(ls);
+		if (!chr.ok) {
+			lex_error(ls, "expected a digit after the base prefix");
+			return make_error();
+		}
+	}
+
+	const u64 mmax = U64_MAX / base;
+	static const u8 digits[] = "0123456789abcdef";
+
+	while (chr.ok) {
+		u8 *digitp = memchr(digits, chr.val, lengthof(digits));
+		if (digitp == nil)
+			break;
+
+		u8 digit = digitp - digits;
+		if (digit >= base) {
+			lex_error(ls, "invalid literal");
+			return make_error();
+		}
+		if (number > mmax)
+			goto overflow;
+		number *= base;
+		/* overflow for adding the digit */
+		if (U64_MAX - digit < number)
+			goto overflow;
+
+		number += digit;
+		chr = read_chr(ls);
+	}
+	if (chr.ok)
+		backup(ls, 1);
+
+	t.inumber = number;
+	return t;
+overflow:
+	lex_error(ls, "integer literal is too big (2^64 max)");
+	return make_error();
+}
+
+static LexToken
+keyword(LexToken *t)
+{
+#define kwcmp(ident, kw, tid) \
+	{if (Str_equal(ident, kw)) return (LexToken){ .id = tid, .len = kw.len };}
+
+	Str ident = t->ident;
+	--ident.len;
+	switch (*ident.s++) {
+	case 'a':
+		kwcmp(ident, Sl("nd"), T_LOGAND);
+		break;
+	case 'b':
+		kwcmp(ident, Sl("reak"), T_BREAK);
+		break;
+	case 'c':
+		kwcmp(ident, Sl("onst"), T_CONST);
+		break;
+	case 'd':
+		kwcmp(ident, Sl("iscard"), T_DISCARD);
+		break;
+	case 'e':
+		kwcmp(ident, Sl("nd"), T_END);
+		kwcmp(ident, Sl("lse"), T_ELSE);
+		kwcmp(ident, Sl("lif"), T_ELIF);
+		break;
+	case 'i':
+		kwcmp(ident, Sl("f"), T_IF);
+		break;
+	case 'l':
+		kwcmp(ident, Sl("et"), T_LET);
+		break;
+	case 'n':
+		kwcmp(ident, Sl("ot"), T_LOGNOT);
+		kwcmp(ident, Sl("ext"), T_NEXT);
+		break;
+	case 'o':
+		kwcmp(ident, Sl("r"), T_LOGOR);
+		break;
+	case 'p':
+		kwcmp(ident, Sl("roc"), T_PROC);
+		break;
+	case 'r':
+		kwcmp(ident, Sl("eturn"), T_RETURN);
+		break;
+	case 's':
+		kwcmp(ident, Sl("truct"), T_STRUCT);
+		break;
+	case 'v':
+		kwcmp(ident, Sl("ar"), T_VAR);
+		break;
+	case 'w':
+		kwcmp(ident, Sl("hile"), T_WHILE);
+		break;
+	case 'u':
+		kwcmp(ident, Sl("se"), T_USE);
+		break;
+	}
+	return *t;
+#undef kwcmp
+}
+
+LexToken
+lex_scan(LexState *ls)
+{
+	if (arrlen(ls->backlist) > 0) {
+		return arrpop(ls->backlist);
+	}
+	/* lexeme start pointer */
+	ls->lbegin = ls->fwd;
+
+	LexToken token = {0};
+	MaybeChr c = skip_whitespace(ls); 
+	if (!c.ok) {
+		token.id = T_EOF;
+		ls->eof = true;
+		return token;
+	}
+	
+#define TOKEN(chr, t) case chr: token.id = t; break;
+	//trace("token now: '%c'\n", c.val);
+	//trace("lp: <%s>\n", ls->lbegin);
+	//trace("fwd: <%s>\n", ls->fwd);
+	switch (c.val) {
+	case '!':
+		if (peek(ls) == '=') {
+			token.id = T_NOTEQUAL;
+			++ls->fwd;
+		} else {
+			token.id = T_EXCLAMATION;
+		}
+		break;
+	TOKEN('+', T_PLUS)
+	TOKEN('-', T_MINUS)
+	TOKEN('*', T_STAR)
+	TOKEN('/', T_BAR)
+	TOKEN('(', T_LPAREN)
+	TOKEN(')', T_RPAREN)
+	TOKEN(',', T_COMMA)
+	TOKEN('<', T_LESSTHAN)
+	TOKEN('>', T_GREATTHAN)
+	TOKEN('#', T_HASH)
+	TOKEN(':', T_COLON)
+	TOKEN(';', T_SEMICOLON)
+	TOKEN('[', T_LBRACKET)
+	TOKEN(']', T_RBRACKET)
+	TOKEN('{', T_LBRACE)
+	TOKEN('}', T_RBRACE)
+	case '=':
+		if (peek(ls) == '=') {
+			token.id = T_LOGICEQUAL;
+			++ls->fwd;
+		} else {
+			token.id = T_EQUAL;
+		}
+		break;
+	case '"':
+		return string_literal(ls);
+	case '0' ... '9':
+		return number_literal(ls);
+	default: {
+		const u8 uc = c.val;
+		if (ascii_isident_start(uc)) {
+			LexToken ident_or_keyword = identifier(ls);
+			if (ident_or_keyword.id != T_IDENT)
+				return make_error();
+		   	return keyword(&ident_or_keyword);
+		}
+
+		if (uc > 0x7f) /* DEL, the last ASCII character */
+			lex_error(ls, "unicode tokens aren't allowed yet");
+		else
+			lex_error(ls, "unknown token '%c' (\\x%02x)", uc, uc);
+		return make_error();
+	}
+	}
+	return token;
+#undef TOKEN
+}
+
+/* Put a token into the backlist. The next call to `lex_scan` will return this
+ * token. The backlist is a stack of tokens, so technically you can have unlimited
+ * look-ahead at the cost of memory.
+ */
+void
+lex_backup(LexState *ls, LexToken token)
+{
+	arrput(ls->backlist, token);
+	i64 col = ls->cur_loc.column - token.len;
+	if (col < 1) {
+		if (ls->cur_loc.line > 1)
+			--ls->cur_loc.line;
+	} else {
+		ls->cur_loc.column = col;
+	}
+}
+
+/* Checks if `t` token type is equal to `exp_tok`. This does not eat any token. */
+bool
+lex_match(LexState *ls, LexToken *token, enum LexTokenId exp_tok)
+{
+	if (token->id != exp_tok) {
+		lex_error(ls, "expected '%s' but got '%s' instead\n",
+				TokenIdStr[exp_tok], TokenIdStr[token->id]);
+		return false;
+	}
+	return true;
+}
+
+LexState *
+lex_new(Compiler *cm, FILE *input_fp, Str file_name, usize tabsize)
+{
+	LexState *ls = calloc(1, sizeof(*ls));
+	ls->buf = calloc(LEX_BUFFER_SIZE + 1, sizeof(*ls->buf));
+	ls->lbegin = ls->fwd = ls->buf;
+	ls->tabsize = tabsize;
+	ls->input_fp = input_fp;
+	ls->cur_loc.line = 1;
+	ls->cur_loc.source = file_name;
+	ls->cm = cm;
+	/* We use a hash table with string keys as a set containing all identifiers 
+	 * in a compilation unit, to avoid dupplicate allocations.
+	 */
+	sh_new_arena(ls->idents);
+	/* We provide our own buffering scheme */
+	setbuf(input_fp, nil);
+	/* Initial fill of first buffer.
+	 * Any file error gets caught in the function, only thing that can happen
+	 * here is that the file is actually empty, so instant EOF.
+	 */
+	read_buf(ls, ls->buf, LEX_HALF_BUFFER_SIZE, &ls->buflen);
+	return ls;
+}
+
+/* Destroys a lexing context and frees its allocated memory.
+ * Note that this will also deallocate the identifier arena.
+ */
+void
+lex_destroy(LexState *ls)
+{
+	shfree(ls->idents);
+	arrfree(ls->backlist);
+	free(ls->buf);
+	free(ls);
+}
--- a/compiler/lex.h
+++ b/compiler/lex.h
@ -0,0 +1,93 @@
+#ifndef _lex_h_
+#define _lex_h_
+#include <stdio.h>
+
+#include "pre.h"
+#include "location.h"
+#include "state.h"
+#include "libs/stb_ds.h"
+
+enum LexTokenId {
+	T_INVALID = 0,
+	/* Unary and binary operators */
+	T_PLUS, T_MINUS, T_STAR, T_BAR,
+	T_LESSTHAN, T_GREATTHAN, T_LOGNOT, T_LOGAND, T_LOGOR, T_LOGICEQUAL, T_NOTEQUAL,
+	T_HASH,
+	/* Others */
+	T_EQUAL, T_EXCLAMATION, T_LPAREN, T_RPAREN, T_COMMA,
+	T_COLON, T_SEMICOLON, T_LBRACKET, T_RBRACKET, T_LBRACE, T_RBRACE,
+	/* Atoms */
+	T_IDENT, T_STRING, T_NUMBER, T_DECNUMBER,
+	/* Keywords */
+	T_CONST,
+	T_ELSE,
+	T_END,
+	T_ELIF,
+	T_IF,
+	T_LET,
+	T_PROC,
+	T_RETURN,
+	T_VAR,
+	T_DISCARD,
+	T_WHILE,
+	T_STRUCT,
+	T_USE,
+	T_BREAK,
+	T_NEXT,
+	/* Control */
+	T_EOF, T_ERROR,
+	T_TOKEN_COUNT, /* does not represent an actual token */
+};
+
+/* Table mapping a `LexTokenId` to a string name of the token */
+extern const char *TokenIdStr[];
+
+typedef struct {
+	enum LexTokenId id;
+	union {
+		Str ident, str, keyword;
+		/* XXX: Defer number parsing until it is actually needed?
+		 * So we can move number parsing out of the lexer. */
+		/* Integer literal, it's the parser problem to tell
+		 * whether the literal is negative or not.
+		 */
+		u64 inumber;
+		/* Floating point literal */
+		double floatn;
+	};
+	isize len; /* Size in bytes of this token */
+} LexToken;
+
+typedef HashMapStr(i8) IdentsBucket;
+
+typedef struct {
+	FILE *input_fp;
+	/* Lexing buffer. This is actually split into two buffers, providing
+	 * a double-buffering scheme */
+	u8 *buf;
+	/* Actual length of each buffer (fread may read less than LEX_BUFFER_SIZE) */
+	isize buflen, buflen2;
+
+	u8 *lbegin; /* marks the begin of the current lexeme */
+	u8 *fwd; /* this pointer is the scanner */
+   	Vec(LexToken) backlist; /* stack of backed up tokens */
+
+	int tabsize;
+	bool eof;
+	Location cur_loc;
+	Compiler *cm;
+	IdentsBucket *idents;
+} LexState;
+
+LexToken
+lex_scan(LexState *ls);
+void
+lex_backup(LexState *ls, LexToken token);
+bool
+lex_match(LexState *ls, LexToken *t, enum LexTokenId exp_tok);
+LexState *
+lex_new(Compiler *cm, FILE *input_fp, Str file_name, usize tabsize);
+void
+lex_destroy(LexState *l);
+
+#endif
--- a/compiler/libs/optparse.h
+++ b/compiler/libs/optparse.h
@ -0,0 +1,403 @@
+/* Optparse --- portable, reentrant, embeddable, getopt-like option parser
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * To get the implementation, define OPTPARSE_IMPLEMENTATION.
+ * Optionally define OPTPARSE_API to control the API's visibility
+ * and/or linkage (static, __attribute__, __declspec).
+ *
+ * The POSIX getopt() option parser has three fatal flaws. These flaws
+ * are solved by Optparse.
+ *
+ * 1) Parser state is stored entirely in global variables, some of
+ * which are static and inaccessible. This means only one thread can
+ * use getopt(). It also means it's not possible to recursively parse
+ * nested sub-arguments while in the middle of argument parsing.
+ * Optparse fixes this by storing all state on a local struct.
+ *
+ * 2) The POSIX standard provides no way to properly reset the parser.
+ * This means for portable code that getopt() is only good for one
+ * run, over one argv with one option string. It also means subcommand
+ * options cannot be processed with getopt(). Most implementations
+ * provide a method to reset the parser, but it's not portable.
+ * Optparse provides an optparse_arg() function for stepping over
+ * subcommands and continuing parsing of options with another option
+ * string. The Optparse struct itself can be passed around to
+ * subcommand handlers for additional subcommand option parsing. A
+ * full reset can be achieved by with an additional optparse_init().
+ *
+ * 3) Error messages are printed to stderr. This can be disabled with
+ * opterr, but the messages themselves are still inaccessible.
+ * Optparse solves this by writing an error message in its errmsg
+ * field. The downside to Optparse is that this error message will
+ * always be in English rather than the current locale.
+ *
+ * Optparse should be familiar with anyone accustomed to getopt(), and
+ * it could be a nearly drop-in replacement. The option string is the
+ * same and the fields have the same names as the getopt() global
+ * variables (optarg, optind, optopt).
+ *
+ * Optparse also supports GNU-style long options with optparse_long().
+ * The interface is slightly different and simpler than getopt_long().
+ *
+ * By default, argv is permuted as it is parsed, moving non-option
+ * arguments to the end. This can be disabled by setting the `permute`
+ * field to 0 after initialization.
+ */
+#ifndef OPTPARSE_H
+#define OPTPARSE_H
+
+#ifndef OPTPARSE_API
+#  define OPTPARSE_API
+#endif
+
+struct optparse {
+    char **argv;
+    int permute;
+    int optind;
+    int optopt;
+    char *optarg;
+    char errmsg[64];
+    int subopt;
+};
+
+enum optparse_argtype {
+    OPTPARSE_NONE,
+    OPTPARSE_REQUIRED,
+    OPTPARSE_OPTIONAL
+};
+
+struct optparse_long {
+    const char *longname;
+    int shortname;
+    enum optparse_argtype argtype;
+};
+
+/**
+ * Initializes the parser state.
+ */
+OPTPARSE_API
+void optparse_init(struct optparse *options, char **argv);
+
+/**
+ * Read the next option in the argv array.
+ * @param optstring a getopt()-formatted option string.
+ * @return the next option character, -1 for done, or '?' for error
+ *
+ * Just like getopt(), a character followed by no colons means no
+ * argument. One colon means the option has a required argument. Two
+ * colons means the option takes an optional argument.
+ */
+OPTPARSE_API
+int optparse(struct optparse *options, const char *optstring);
+
+/**
+ * Handles GNU-style long options in addition to getopt() options.
+ * This works a lot like GNU's getopt_long(). The last option in
+ * longopts must be all zeros, marking the end of the array. The
+ * longindex argument may be NULL.
+ */
+OPTPARSE_API
+int optparse_long(struct optparse *options,
+                  const struct optparse_long *longopts,
+                  int *longindex);
+
+/**
+ * Used for stepping over non-option arguments.
+ * @return the next non-option argument, or NULL for no more arguments
+ *
+ * Argument parsing can continue with optparse() after using this
+ * function. That would be used to parse the options for the
+ * subcommand returned by optparse_arg(). This function allows you to
+ * ignore the value of optind.
+ */
+OPTPARSE_API
+char *optparse_arg(struct optparse *options);
+
+/* Implementation */
+#ifdef OPTPARSE_IMPLEMENTATION
+
+#define OPTPARSE_MSG_INVALID "invalid option"
+#define OPTPARSE_MSG_MISSING "option requires an argument"
+#define OPTPARSE_MSG_TOOMANY "option takes no arguments"
+
+static int
+optparse_error(struct optparse *options, const char *msg, const char *data)
+{
+    unsigned p = 0;
+    const char *sep = " -- '";
+    while (*msg)
+        options->errmsg[p++] = *msg++;
+    while (*sep)
+        options->errmsg[p++] = *sep++;
+    while (p < sizeof(options->errmsg) - 2 && *data)
+        options->errmsg[p++] = *data++;
+    options->errmsg[p++] = '\'';
+    options->errmsg[p++] = '\0';
+    return '?';
+}
+
+OPTPARSE_API
+void
+optparse_init(struct optparse *options, char **argv)
+{
+    options->argv = argv;
+    options->permute = 1;
+    options->optind = argv[0] != 0;
+    options->subopt = 0;
+    options->optarg = 0;
+    options->errmsg[0] = '\0';
+}
+
+static int
+optparse_is_dashdash(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0';
+}
+
+static int
+optparse_is_shortopt(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0';
+}
+
+static int
+optparse_is_longopt(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0';
+}
+
+static void
+optparse_permute(struct optparse *options, int index)
+{
+    char *nonoption = options->argv[index];
+    int i;
+    for (i = index; i < options->optind - 1; i++)
+        options->argv[i] = options->argv[i + 1];
+    options->argv[options->optind - 1] = nonoption;
+}
+
+static int
+optparse_argtype(const char *optstring, char c)
+{
+    int count = OPTPARSE_NONE;
+    if (c == ':')
+        return -1;
+    for (; *optstring && c != *optstring; optstring++);
+    if (!*optstring)
+        return -1;
+    if (optstring[1] == ':')
+        count += optstring[2] == ':' ? 2 : 1;
+    return count;
+}
+
+OPTPARSE_API
+int
+optparse(struct optparse *options, const char *optstring)
+{
+    int type;
+    char *next;
+    char *option = options->argv[options->optind];
+    options->errmsg[0] = '\0';
+    options->optopt = 0;
+    options->optarg = 0;
+    if (option == 0) {
+        return -1;
+    } else if (optparse_is_dashdash(option)) {
+        options->optind++; /* consume "--" */
+        return -1;
+    } else if (!optparse_is_shortopt(option)) {
+        if (options->permute) {
+            int index = options->optind++;
+            int r = optparse(options, optstring);
+            optparse_permute(options, index);
+            options->optind--;
+            return r;
+        } else {
+            return -1;
+        }
+    }
+    option += options->subopt + 1;
+    options->optopt = option[0];
+    type = optparse_argtype(optstring, option[0]);
+    next = options->argv[options->optind + 1];
+    switch (type) {
+    case -1: {
+        char str[2] = {0, 0};
+        str[0] = option[0];
+        options->optind++;
+        return optparse_error(options, OPTPARSE_MSG_INVALID, str);
+    }
+    case OPTPARSE_NONE:
+        if (option[1]) {
+            options->subopt++;
+        } else {
+            options->subopt = 0;
+            options->optind++;
+        }
+        return option[0];
+    case OPTPARSE_REQUIRED:
+        options->subopt = 0;
+        options->optind++;
+        if (option[1]) {
+            options->optarg = option + 1;
+        } else if (next != 0) {
+            options->optarg = next;
+            options->optind++;
+        } else {
+            char str[2] = {0, 0};
+            str[0] = option[0];
+            options->optarg = 0;
+            return optparse_error(options, OPTPARSE_MSG_MISSING, str);
+        }
+        return option[0];
+    case OPTPARSE_OPTIONAL:
+        options->subopt = 0;
+        options->optind++;
+        if (option[1])
+            options->optarg = option + 1;
+        else
+            options->optarg = 0;
+        return option[0];
+    }
+    return 0;
+}
+
+OPTPARSE_API
+char *
+optparse_arg(struct optparse *options)
+{
+    char *option = options->argv[options->optind];
+    options->subopt = 0;
+    if (option != 0)
+        options->optind++;
+    return option;
+}
+
+static int
+optparse_longopts_end(const struct optparse_long *longopts, int i)
+{
+    return !longopts[i].longname && !longopts[i].shortname;
+}
+
+static void
+optparse_from_long(const struct optparse_long *longopts, char *optstring)
+{
+    char *p = optstring;
+    int i;
+    for (i = 0; !optparse_longopts_end(longopts, i); i++) {
+        if (longopts[i].shortname && longopts[i].shortname < 127) {
+            int a;
+            *p++ = (char)longopts[i].shortname;
+            for (a = 0; a < (int)longopts[i].argtype; a++)
+                *p++ = ':';
+        }
+    }
+    *p = '\0';
+}
+
+/* Unlike strcmp(), handles options containing "=". */
+static int
+optparse_longopts_match(const char *longname, const char *option)
+{
+    const char *a = option, *n = longname;
+    if (longname == 0)
+        return 0;
+    for (; *a && *n && *a != '='; a++, n++)
+        if (*a != *n)
+            return 0;
+    return *n == '\0' && (*a == '\0' || *a == '=');
+}
+
+/* Return the part after "=", or NULL. */
+static char *
+optparse_longopts_arg(char *option)
+{
+    for (; *option && *option != '='; option++);
+    if (*option == '=')
+        return option + 1;
+    else
+        return 0;
+}
+
+static int
+optparse_long_fallback(struct optparse *options,
+                       const struct optparse_long *longopts,
+                       int *longindex)
+{
+    int result;
+    char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */
+    optparse_from_long(longopts, optstring);
+    result = optparse(options, optstring);
+    if (longindex != 0) {
+        *longindex = -1;
+        if (result != -1) {
+            int i;
+            for (i = 0; !optparse_longopts_end(longopts, i); i++)
+                if (longopts[i].shortname == options->optopt)
+                    *longindex = i;
+        }
+    }
+    return result;
+}
+
+OPTPARSE_API
+int
+optparse_long(struct optparse *options,
+              const struct optparse_long *longopts,
+              int *longindex)
+{
+    int i;
+    char *option = options->argv[options->optind];
+    if (option == 0) {
+        return -1;
+    } else if (optparse_is_dashdash(option)) {
+        options->optind++; /* consume "--" */
+        return -1;
+    } else if (optparse_is_shortopt(option)) {
+        return optparse_long_fallback(options, longopts, longindex);
+    } else if (!optparse_is_longopt(option)) {
+        if (options->permute) {
+            int index = options->optind++;
+            int r = optparse_long(options, longopts, longindex);
+            optparse_permute(options, index);
+            options->optind--;
+            return r;
+        } else {
+            return -1;
+        }
+    }
+
+    /* Parse as long option. */
+    options->errmsg[0] = '\0';
+    options->optopt = 0;
+    options->optarg = 0;
+    option += 2; /* skip "--" */
+    options->optind++;
+    for (i = 0; !optparse_longopts_end(longopts, i); i++) {
+        const char *name = longopts[i].longname;
+        if (optparse_longopts_match(name, option)) {
+            char *arg;
+            if (longindex)
+                *longindex = i;
+            options->optopt = longopts[i].shortname;
+            arg = optparse_longopts_arg(option);
+            if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) {
+                return optparse_error(options, OPTPARSE_MSG_TOOMANY, name);
+            } if (arg != 0) {
+                options->optarg = arg;
+            } else if (longopts[i].argtype == OPTPARSE_REQUIRED) {
+                options->optarg = options->argv[options->optind];
+                if (options->optarg == 0)
+                    return optparse_error(options, OPTPARSE_MSG_MISSING, name);
+                else
+                    options->optind++;
+            }
+            return options->optopt;
+        }
+    }
+    return optparse_error(options, OPTPARSE_MSG_INVALID, option);
+}
+
+#endif /* OPTPARSE_IMPLEMENTATION */
+#endif /* OPTPARSE_H */
--- a/compiler/libs/optparse_impl.c
+++ b/compiler/libs/optparse_impl.c
@ -0,0 +1,3 @@
+/* This file holds the implementation of the optparse library functionality */
+#define OPTPARSE_IMPLEMENTATION
+#include "optparse.h"
--- a/compiler/libs/stb_ds.h
+++ b/compiler/libs/stb_ds.h
--- a/compiler/libs/stb_impl.c
+++ b/compiler/libs/stb_impl.c
@ -0,0 +1,3 @@
+/* This file holds the implementation of stb library functionality */
+#define STB_DS_IMPLEMENTATION
+#include "stb_ds.h"
--- a/compiler/location.h
+++ b/compiler/location.h
@ -0,0 +1,11 @@
+#ifndef _location_h_
+#define _location_h_
+
+#include "pre.h"
+
+typedef struct {
+	Str source;
+	i64 line, column;
+} Location;
+
+#endif
--- a/compiler/messages.c
+++ b/compiler/messages.c
@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "messages.h"
+#include "location.h"
+
+/* SGI sequence */
+#define ANSI_C(c) "\x1b["c"m"
+/* 8bit palette color */
+#define ANSI_8C(c) "\x1b[38;5;"c"m"
+#define ANSI_8CB(c) "\x1b[48;5;"c"m"
+/* True color */
+#define ANSI_RC(r,g,b) "\x1b[38;2;"r";"g";"b"m"
+#define ANSI_RCB(r,g,b) "\x1b[48;2;"r";"g";"b"m"
+#define ANSI_RESET "\x1b[0m"
+#define ANSI_BOLD "\x1b[1m"
+#define ANSI_IF(cond, seq) (cond ? (seq) : "")
+
+#define make_diag_func(name, diagtype, after) 							\
+	void name(Compiler *cm, const Location *loc, const char *s, ...) { 	\
+		va_list args;													\
+		va_start(args, s);												\
+		print_diagnostic(cm, loc, diagtype, s, args);					\
+		va_end(args);													\
+		after;															\
+	}
+
+void
+print_diagnostic(Compiler *cm, const Location *loc, DiagType dt, const char *msg, va_list args)
+{
+	static const char *ds[] = {"fatal", "error", "warning", "note"};
+	static const char *dsc[] = {
+		ANSI_C("1;90"), ANSI_C("1;31"), ANSI_C("1;35"), ANSI_C("1;34")
+	};
+
+	char fmsg[4096] = {0};
+	char dmsg[32] = {0};
+	bool color = cm != nil ? cm->opts.color : false;
+
+	if (dt == diag_error && cm->error_count < cm->opts.max_errors)
+		++cm->error_count;
+		
+	vsnprintf(fmsg, sizeof(fmsg), msg, args);
+	snprintf(dmsg, sizeof(dmsg), "%s%s:%s", ANSI_IF(color, dsc[dt]), ds[dt], ANSI_IF(color, ANSI_RESET));
+
+	if (loc != nil) {
+		fprintf(stderr, "(%s:%li:%li) %s %s\n",
+			loc->source.s, loc->line, loc->column, dmsg, fmsg);
+	} else {
+		fprintf(stderr, "%s %s\n", dmsg, fmsg);
+	}
+}
+
+make_diag_func(fatal, diag_fatal, exit(EXIT_FAILURE))
+make_diag_func(error, diag_error, )
+make_diag_func(warning, diag_warning, )
+make_diag_func(note, diag_note, )
--- a/compiler/messages.h
+++ b/compiler/messages.h
@ -0,0 +1,30 @@
+#ifndef _messages_h_
+#define _messages_h_
+
+#include <stdarg.h>
+#include "state.h"
+#include "location.h"
+
+#ifdef __GNUC__
+#	define fmtattr(archt, fmtsi, ftchk) __attribute((format(archt, fmtsi, ftchk)))
+#else
+#	define fmtattr(a, b, c)
+#endif
+
+typedef enum
+{
+	diag_fatal = 0,
+	diag_error,
+	diag_warning,
+	diag_note,
+} DiagType;
+
+void
+fatal(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+error(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+warning(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+note(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+#endif
--- a/compiler/parse.c
+++ b/compiler/parse.c
@ -0,0 +1,665 @@
+/* Recursive descent parser + Pratt parser (for expressions)
+ * TODO:
+ *  - DRY code that handle list of tokens, I have like three almost identical functions for that.
+ *  - Use an arena for the AST nodes. Nuke all of them with a single call
+ *    when we no longer need the AST.
+ */
+#include <stdlib.h>
+
+#include "ast.h"
+#include "pre.h"
+#include "parse.h"
+#include "lex.h"
+#include "state.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+#define MAX_STMTS_IN_BLOCK 2000
+#define MAX_PROC_ARG_COUNT 127
+#define EXPR_INIT_PREC 1
+
+/* Consume a token and match it */
+#define next_match(lexer, tokt) \
+	do { LexToken t = lex_scan(lexer); lex_match(lexer, &t, tokt); } while (0)
+
+/* Scans a token (mutating `t`), and if its id matches `ttype`,
+ * it executes the code block. Otherwise, the scanned token
+ * gets put back (so a next call to `lex_scan` can pick it up).
+ */
+#define matchopt(t, ttype, ps) 						\
+	if ((t = lex_scan(ps->lexer)).id != ttype) { 	\
+		lex_backup((ps)->lexer, t); 				\
+	} else
+
+#define token_is_binop(t) (t >= T_PLUS && t <= T_NOTEQUAL)
+#define token_is_atom(t) (t >= T_IDENT && t <= T_DECNUMBER)
+#define token_is_unary(t) (t == T_MINUS || t == T_LOGNOT)
+#define token_is_expr_start(t) (token_is_unary(t) || token_is_atom(t))
+#define parse_error(ctx, ...) \
+	do { error((ctx)->cm, &((ctx)->lexer->cur_loc), __VA_ARGS__); (ctx)->ok = false; } while (0)
+
+typedef Optional(AstIdentTypePair) OptAstIdentTypePair;
+typedef struct {
+	int pred;
+	bool left_assoc; /* false if right assoc... */
+} OperatorPrec;
+
+/* Operator table specifying the precedence and associativeness
+ * of each operator, used by the expression parser.
+ * The precedence goes from lower to higher.
+ */
+const OperatorPrec OperatorTable[] = {
+	[T_LOGOR] = {1, true},
+	[T_LOGAND] = {2, true},
+	[T_LESSTHAN] = {3, true},
+	[T_GREATTHAN] = {3, true},
+	[T_LOGICEQUAL] = {3, true},
+	[T_NOTEQUAL] = {3, true},
+	[T_PLUS] = {4, true},
+	[T_MINUS] = {4, true},
+	[T_STAR] = {5, true},
+	[T_BAR] = {5, true},
+};
+
+static Ast *
+expr(ParserState *ps, int minprec);
+static Ast *
+expr_comma_list(ParserState *ps);
+static Ast *
+stmt(ParserState *ps, LexToken token);
+static Ast *
+stmt_list_until(ParserState *ps, bool putback, const enum LexTokenId *end_markers, isize len);
+
+
+static Ast *
+make_tree(enum AstType type, Location loc)
+{
+	Ast *tree = calloc(1, sizeof(Ast));
+	tree->type = type;
+	tree->loc = loc;
+	return tree;
+}
+
+static Ast *
+make_binop(enum LexTokenId op, Location loc, Ast *lhs, Ast *rhs)
+{
+	Ast *tree = make_tree(AST_BINEXPR, loc);
+	tree->bin.op = Str_from_c(TokenIdStr[op]);
+	tree->bin.left = lhs;
+	tree->bin.right = rhs;
+	return tree;
+}
+
+static Ast *
+make_ident_node(Str ident, Location loc)
+{
+	Ast *tree = make_tree(AST_IDENT, loc);
+	tree->ident = ident;
+	return tree;
+}
+
+static OptAstIdentTypePair
+ident_type_pair(ParserState *ps)
+{
+	AstIdentTypePair itp = { .loc = ps->lexer->cur_loc };
+	/* ident */
+	LexToken token = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &token, T_IDENT);
+	itp.ident = token.ident;
+	/* type */
+	next_match(ps->lexer, T_COLON);
+	/* optional qualifier */
+	token = lex_scan(ps->lexer);
+	if (token.id == T_VAR) {
+		itp.kind = SymVar;
+	} else {
+		itp.kind = SymLet;
+		lex_backup(ps->lexer, token);
+	}
+	itp.dtype_loc = ps->lexer->cur_loc;
+	token = lex_scan(ps->lexer);
+	if (token.id != T_IDENT) {
+		parse_error(ps, "expected a type, got %s instead",  TokenIdStr[token.id]);
+		return None(OptAstIdentTypePair);
+	}
+	itp.dtype = token.ident;
+	return Some(OptAstIdentTypePair, itp);
+}
+
+static Vec(AstIdentTypePair)
+proc_arglist(ParserState *ps)
+{
+	Vec(AstIdentTypePair) args = nil;
+	LexToken next;
+
+	for (;;) {
+		OptAstIdentTypePair oitp = ident_type_pair(ps);
+		if (!oitp.ok)
+			return nil;
+		if (arrlen(args) + 1 > MAX_PROC_ARG_COUNT) {
+			parse_error(ps, "more than %d (implementation limit) proc arguments", MAX_PROC_ARG_COUNT);
+			return nil;
+		}
+
+		arrput(args, oitp.val);
+		next = lex_scan(ps->lexer);
+		/* do we have a comma? if not, we reached the end of the list */
+		if (next.id != T_COMMA)
+			break;
+		/* check if we have an expression next to this comma, we do this
+		 * to allow a trailling comma
+		 */
+		next = lex_scan(ps->lexer);
+		if (next.id != T_IDENT)
+			break;
+		lex_backup(ps->lexer, next);
+	}
+	trace("token in arglist out: %s\n", TokenIdStr[next.id]);
+	lex_backup(ps->lexer, next);
+
+	if (arrlen(args) == 0) {
+		arrfree(args);
+		return nil;
+	}
+	return args;
+}
+
+static Ast *
+proc_decl(ParserState *ps)
+{
+	LexToken proc_name = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &proc_name, T_IDENT);
+
+	Ast *proc = make_tree(AST_PROCDEF, ps->lexer->cur_loc);
+	proc->proc.name = proc_name.ident;
+	trace("proc name: %s\n", proc->proc.name.s);
+
+	LexToken token = lex_scan(ps->lexer);
+	if (token.id == T_STAR) {
+		proc->proc.ispublic = true;
+		token = lex_scan(ps->lexer);
+	}
+
+	lex_match(ps->lexer, &token, T_LPAREN);
+	token = lex_scan(ps->lexer);
+	if (token.id != T_RPAREN) {
+		lex_backup(ps->lexer, token);
+		proc->proc.args = proc_arglist(ps);
+		token = lex_scan(ps->lexer);
+	}
+	lex_match(ps->lexer, &token, T_RPAREN);
+
+	/* return type */
+	token = lex_scan(ps->lexer);
+	if (token.id == T_COLON) {
+		token = lex_scan(ps->lexer);
+		lex_match(ps->lexer, &token, T_IDENT);
+		proc->proc.rettype = make_ident_node(token.ident, ps->lexer->cur_loc);
+	} else {
+		lex_backup(ps->lexer, token);
+	}
+	/* body */
+	proc->proc.body = stmt_list_until(ps, false, (enum LexTokenId[]){T_END}, 1);
+	return proc;
+}
+
+static Ast *
+function_call(ParserState *ps, Str ident, bool ate_lp)
+{
+	Ast *funcc = make_tree(AST_PROCCALL, ps->lexer->cur_loc);
+	funcc->call = (AstProcCall){ .name = ident };
+
+	if (!ate_lp)
+		next_match(ps->lexer, T_LPAREN);
+
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_expr_start(next.id)) {
+		lex_backup(ps->lexer, next);
+		funcc->call.args = expr_comma_list(ps);
+	} else {
+		lex_backup(ps->lexer, next);
+	}
+	next_match(ps->lexer, T_RPAREN);
+	trace("function call to: %s\n", ident.s);
+
+	return funcc;
+}
+
+static Ast *
+variable_assign(ParserState *ps, Str ident, Location loc)
+{
+	Ast *tree = make_tree(AST_VARASSIGN, loc);
+	tree->varassgn.name = ident;
+	tree->varassgn.expr = expr(ps, EXPR_INIT_PREC);
+	return tree;
+}
+
+static Ast *
+funccall_or_assignment(ParserState *ps, Str ident)
+{
+	LexToken token;
+	matchopt(token, T_EQUAL, ps) {
+		return variable_assign(ps, ident, ps->lexer->cur_loc);
+	}
+	return function_call(ps, ident, false);
+}
+
+static Ast *
+variable_decl(ParserState *ps, enum LexTokenId decl_kind)
+{
+	static const enum SymbolKind Token2SemaVarKind[] = {
+		[T_LET] = SymLet,
+		[T_VAR] = SymVar,
+		[T_CONST] = SymConst,
+	};
+	Assert(decl_kind == T_LET || decl_kind == T_VAR || decl_kind == T_CONST);
+
+	LexToken token = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &token, T_IDENT);
+
+	Ast *decl = make_tree(AST_VARDECL, ps->lexer->cur_loc);
+	decl->var = (AstVarDecl) {
+		.name = token.ident,
+		.kind = Token2SemaVarKind[decl_kind],
+	};
+
+	/* type */
+	matchopt(token, T_COLON, ps) {
+		token = lex_scan(ps->lexer);
+		if (token.id != T_IDENT) {
+			parse_error(ps, "expected a type, got %s instead",  TokenIdStr[token.id]);
+			return nil;
+		}
+		decl->var.datatype = make_ident_node(token.ident, ps->lexer->cur_loc);
+	}
+
+	/* assignment expression */
+	matchopt(token, T_EQUAL, ps) {
+		trace("assignment of decl here\n");
+		decl->var.expr = expr(ps, EXPR_INIT_PREC);
+	}
+	trace(
+		"var decl %s %s: %s\n",
+		TokenIdStr[decl_kind],
+		decl->var.name.s,
+		decl->var.datatype != nil ? (char *)decl->var.datatype->ident.s : "(no type)"
+	);
+	/* if there's no type there must be an expr */
+	/* TODO: move to semantic analysis phase? */
+	if (decl->var.datatype == nil && decl->var.expr == nil) {
+		parse_error(
+			ps,
+			"'%s' declaration must have an assignment expression if no type is specified, "
+			"but neither a type nor expression was supplied",
+			TokenIdStr[decl_kind]
+		);
+		return nil;
+	}
+	return decl;
+}
+
+static Ast *
+return_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_RETURN, ps->lexer->cur_loc);
+
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_expr_start(next.id)) {
+		lex_backup(ps->lexer, next);
+		tree->ret = expr(ps, EXPR_INIT_PREC);
+	} else {
+		lex_backup(ps->lexer, next);
+	}
+	return tree;
+}
+
+static Ast *
+break_stmt(ParserState *ps)
+{
+	return make_tree(AST_BREAK, ps->lexer->cur_loc);
+}
+
+static Ast *
+discard_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_DISCARD, ps->lexer->cur_loc);
+	tree->discard.expr = expr(ps, EXPR_INIT_PREC);
+	return tree;
+}
+
+static Ast *
+parse_attribute(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_ATTRIBUTE, ps->lexer->cur_loc);
+	LexToken next = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &next, T_LBRACKET);
+	next = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &next, T_RBRACKET);
+	return tree;
+}
+
+/* A declaration "decorated" with an attribute */
+static Ast *
+decorated_decl(ParserState *ps)
+{
+	Ast *attr = parse_attribute(ps);
+	LexToken next = lex_scan(ps->lexer);
+	switch (next.id) {
+	case T_PROC:
+		attr->attribute.node = proc_decl(ps);
+		break;
+	case T_CONST:
+	case T_LET:
+	case T_VAR:
+		attr->attribute.node = variable_decl(ps, next.id);
+		break;
+	default:
+		parse_error(ps, "node of kind '%s' cannot be attributed", TokenIdStr[next.id]);
+		return nil;
+	}
+	return attr;
+}
+
+static Ast *
+if_stmt_expr(ParserState *ps)
+{
+	const enum LexTokenId if_block_ends[] = {T_ELSE, T_ELIF, T_END};
+	Ast *tree = make_tree(AST_IF, ps->lexer->cur_loc);
+	/* parse `if` */
+	tree->ifse.cond = expr(ps, EXPR_INIT_PREC);
+	tree->ifse.true_body = stmt_list_until(ps, true, if_block_ends, countof(if_block_ends));
+	tree->ifse.false_body = nil;
+
+	LexToken next = lex_scan(ps->lexer);
+	AstElif elif_tree;
+	/* parse `elif`s and else */
+	for (;;) {
+		switch (next.id) {
+		case T_END: /* only has true branch */
+			return tree;
+		case T_ELSE:
+			/* once we see an `else` block, we assume the end of the `if` block,
+			 * enforcing that `else` must be the last. */
+			trace("we got else\n");
+			tree->ifse.false_body = stmt_list_until(ps, true, (enum LexTokenId[]){T_ELIF, T_END}, 2);
+			next = lex_scan(ps->lexer);
+			if (next.id == T_ELIF) {
+				parse_error(ps, "'elif' branch after 'else' branch not allowed");
+				lex_backup(ps->lexer, next);
+				return nil;
+			}
+			return tree;
+		case T_ELIF:
+			trace("we got elif\n");
+			elif_tree.cond = expr(ps, EXPR_INIT_PREC);
+			elif_tree.body = stmt_list_until(ps, true, if_block_ends, countof(if_block_ends));
+			next = lex_scan(ps->lexer);
+			arrput(tree->ifse.elifs, elif_tree);
+			/* no more `elif` blocks neither an `else` block next */
+			if (next.id == T_END)
+				return tree;
+			Assert(next.id == T_ELSE || next.id == T_ELIF);
+			break;
+		default: /* shouldn't happen */
+			lex_backup(ps->lexer, next);
+			parse_error(ps, "huh?: %s", TokenIdStr[next.id]);
+			return nil;
+		}
+	}
+	return tree;
+}
+
+static Ast *
+while_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_LOOP, ps->lexer->cur_loc);
+	tree->loop.precond = expr(ps, EXPR_INIT_PREC);
+	tree->loop.body = stmt_list_until(ps, false, (enum LexTokenId[]){T_END}, 1);
+	return tree;
+}
+
+static Ast *
+atom(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_INVALID, ps->lexer->cur_loc);
+	LexToken t = lex_scan(ps->lexer);
+	LexToken next;
+
+	switch (t.id) {
+	case T_NUMBER:
+		tree->type = AST_NUMBER;
+		tree->number.n = t.inumber;
+		trace("number in atom: %lu\n", t.inumber);
+		return tree;
+	case T_STRING:
+		tree->type = AST_STRLIT;
+		tree->strlit = t.str;
+		return tree;
+	case T_IDENT:
+		next = lex_scan(ps->lexer);
+		/* It is a plain symbol or a function call? */
+		if (next.id == T_LPAREN) {
+			free(tree);
+			tree = function_call(ps, t.ident, true);
+		} else {
+			lex_backup(ps->lexer, next);
+			tree->type = AST_IDENT;
+			tree->ident = t.ident;
+		}
+		return tree;
+	default:
+		parse_error(ps, "expected a number, identifier or expression, not '%s'", TokenIdStr[t.id]);
+		free(tree);
+	}
+	return nil;
+}
+
+static Ast *
+unary(ParserState *ps)
+{
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_unary(next.id)) {
+		Ast *unt = make_tree(AST_UNARY, ps->lexer->cur_loc);
+		unt->unary.op = Str_from_c(TokenIdStr[next.id]);
+		unt->unary.atom = atom(ps);
+		return unt;
+	}
+	lex_backup(ps->lexer, next);
+	return atom(ps);
+}
+
+/* Parse a binary expression or an atom. This implements the Pratt parser algorithm.
+ * See also:
+ * 	- https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
+ * 	- https://www.oilshell.org/blog/2016/11/01.html
+ * 	XXX: Mutate to the shunting yard variation? Since it uses an explicit stack instead of the call
+ * 	stack, guard against deeply nested expressions.
+ */
+static Ast *
+expr(ParserState *ps, int minprec)
+{
+	Ast *tree = unary(ps);
+	for (;;) {
+		LexToken t = lex_scan(ps->lexer);
+		if (!token_is_binop(t.id)
+			|| t.id == T_END
+			|| OperatorTable[t.id].pred < minprec) {
+			lex_backup(ps->lexer, t);
+			break;
+		}
+		const OperatorPrec op = OperatorTable[t.id];
+		const int next_prec = op.left_assoc ? op.pred + 1 : op.pred;
+		tree = make_binop(t.id, ps->lexer->cur_loc, tree, expr(ps, next_prec));
+	}
+	return tree;
+}
+
+static Vec(Ast *)
+sep_list(ParserState *ps, Ast *(*prod_fn)(Compiler *, void *))
+{
+	(void)ps, (void)prod_fn;
+	Vec(Ast *) prod = nil;
+	return prod;
+}
+
+static Ast *
+expr_comma_list(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_EXPRS, ps->lexer->cur_loc);
+	Vec(Ast *) exprs = nil;
+	
+	LexToken next;
+	for (;;) {
+		arrput(exprs, expr(ps, EXPR_INIT_PREC));
+		next = lex_scan(ps->lexer);
+		trace("commalist tok: %s\n", TokenIdStr[next.id]);
+		/* do we have a comma? if not, we reached the end of the list */
+		if (next.id != T_COMMA)
+			break;
+		next = lex_scan(ps->lexer);
+		/* check if we have an expression next to this comma, we do this
+		 * to allow a trailling comma
+		 */
+		if (!token_is_expr_start(next.id))
+			break;
+		lex_backup(ps->lexer, next);
+	}
+	lex_backup(ps->lexer, next);
+
+	if (arrlen(exprs) == 0) {
+		free(tree);
+		arrfree(exprs);
+		return nil;
+	}
+	tree->exprs = exprs;
+	return tree;
+}
+
+static bool
+token_id_in_list(enum LexTokenId c, const enum LexTokenId *toks, isize len)
+{
+	for (isize i = 0; i < len; ++i)
+		if (c == toks[i])
+			return true;
+	return false;
+}
+
+/* Parses a statement list until the token `end_marker`. Returns `nil` if the statement list
+ * is empty. */
+static Ast *
+stmt_list_until(ParserState *ps, bool putback, const enum LexTokenId *end_markers, isize len)
+{
+	LexToken token = lex_scan(ps->lexer);
+	Vec(Ast *) stmts = nil;
+	Ast *body = make_tree(AST_STMTS, ps->lexer->cur_loc);
+
+	/* stmt* */
+	while (!token_id_in_list(token.id, end_markers, len)) {
+		trace("stmt list token: %s\n", TokenIdStr[token.id]);
+		if (arrlen(stmts) + 1 > MAX_STMTS_IN_BLOCK) {
+			parse_error(ps, "more than %d (implementation limit) statements in block", MAX_STMTS_IN_BLOCK);
+			return nil;
+		}
+	   	arrput(stmts, stmt(ps, token));
+
+		token = lex_scan(ps->lexer);
+		if (token.id == T_EOF) {
+			parse_error(ps, "unexpected EOF, expected a statement or `end`");
+			break;
+		}
+		if (token.id == T_SEMICOLON)
+			token = lex_scan(ps->lexer);
+	}
+	//lex_match(ps->lexer, &token, end_marker);
+	trace("token before end next_match: %s\n", TokenIdStr[token.id]);
+	if (putback)
+		lex_backup(ps->lexer, token);
+	/* empty list, just return nil instead of wasting space on a 0-length 
+	 * vector */
+	if (arrlen(stmts) == 0) {
+		free(body);
+		arrfree(stmts);
+		return nil;
+	}
+	body->stmts = stmts;
+	return body;
+}
+
+static Ast *
+stmt(ParserState *ps, LexToken token)
+{
+	switch (token.id) {
+	case T_IDENT:
+		return funccall_or_assignment(ps, token.ident);
+	case T_CONST:
+	case T_LET:
+	case T_VAR:
+		return variable_decl(ps, token.id);
+	case T_PROC:
+		return proc_decl(ps);
+	case T_HASH:
+		return decorated_decl(ps);
+	case T_RETURN:
+		return return_stmt(ps);
+	case T_BREAK:
+		return break_stmt(ps);
+	case T_DISCARD:
+		return discard_stmt(ps);
+	case T_IF:
+		return if_stmt_expr(ps);
+	case T_ELIF:
+		parse_error(ps, "stray 'elif'");
+		return nil;
+	case T_WHILE:
+		return while_stmt(ps);
+	case T_ELSE:
+		parse_error(ps, "'else' with no accompanying 'if'");
+		return nil;
+	case T_END:
+		parse_error(ps, "stray 'end' keyword");
+		return nil;
+	case T_EOF:
+		parse_error(ps, "unexpected EOF while parsing a statement");
+		return nil;
+	default:
+		parse_error(ps, "invalid statement '%s'", TokenIdStr[token.id]);
+		exit(1);
+	}
+	return nil;
+}
+
+/* Parse statements until EOF. */
+static Ast *
+stmt_list(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_STMTS, ps->lexer->cur_loc);
+	for (;;) {
+		const LexToken next = lex_scan(ps->lexer);
+		if (next.id == T_EOF)
+			break;
+		arrput(tree->stmts, stmt(ps, next));
+	}
+	return tree;	
+}
+
+ParserState *
+parse_new(Compiler *cm, LexState *ls)
+{
+	ParserState *ps = calloc(1, sizeof(*ps));
+	ps->cm = cm;
+	ps->lexer = ls;
+	ps->ok = true;
+	return ps;
+}
+
+void
+parse_destroy(ParserState *ps)
+{
+	free(ps);
+}
+
+Ast *
+parse(ParserState *ps)
+{
+	return stmt_list(ps);
+}
--- a/compiler/parse.h
+++ b/compiler/parse.h
@ -0,0 +1,21 @@
+#ifndef _parse_h_
+#define _parse_h_
+
+#include "ast.h"
+#include "state.h"
+#include "lex.h"
+
+typedef struct {
+	Compiler *cm;
+	LexState *lexer;
+	bool ok;
+} ParserState;
+
+ParserState *
+parse_new(Compiler *cm, LexState *ls);
+void
+parse_destroy(ParserState *ps);
+Ast *
+parse(ParserState *ps);
+
+#endif
--- a/compiler/pre.h
+++ b/compiler/pre.h
@ -0,0 +1,158 @@
+#ifndef _pre_h_
+#define _pre_h_
+/* Prelude file, containing some useful macros and types. */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+typedef int8_t i8;
+typedef uint8_t u8;
+typedef int16_t i16;
+typedef uint16_t u16;
+typedef int32_t i32;
+typedef uint32_t u32;
+typedef int64_t i64;
+typedef uint64_t u64;
+typedef float f32;
+typedef double f64;
+
+typedef uintptr_t uptr;
+typedef ptrdiff_t isize;
+typedef size_t usize;
+typedef _Bool bool;
+
+#define true ((bool)1)
+#define false ((bool)0)
+#define nil ((void *)0)
+#define U64_MAX ((u64)-1)
+
+#define Slice(T) 	\
+	struct {		\
+		T *s;		\
+		isize len;	\
+	}
+
+typedef Slice(u8) Str;
+#define Optional(T) struct {T val; bool ok;}
+#define Some(T, v) (T){v, true}
+#define None(T) (T){.ok = false}
+/* Meant for use with stb_ds */
+#define Vec(T) T *
+#define HashMap(K, V) struct { K key; V value; }
+#define HashMapStr(V) struct { char *key; V value; }
+
+/* Length of an array */
+#define countof(arr) (isize)(sizeof(arr) / sizeof(*(arr)))
+/* Length of string literal */
+#define lengthof(s) (countof(s) - 1)
+
+#define ViewMem(T, arr, lo, hi) ((Slice(T)){.s = arr+lo, .len = hi - lo})
+#define View(sl, lo, hi) ((sl).s += lo, (sl).len = hi - lo, (sl))
+#define foreach(val, arr) for(__typeof__(*(arr)) *__p = (arr), (val) = *__p; __p < (arr)+(arrlen((arr))); (val) = *(__p++))
+#define foreach_getindex(val, arr) (&(val) - (arr))
+
+/* Useful integer operations good to have. */
+#define max(x, y) ((x) > (y) ? (x) : (y))
+#define min(x, y) ((x) < (y) ? (x) : (y))
+#define clamp(x, lo, hi) max(lo, min(x, hi))
+
+#define BitPos(pos) (1 << (pos))
+
+#if defined(__GNUC__) || defined(__clang__)
+#	define debugtrap() __builtin_trap()
+#	define unreachable() __builtin_unreachable()
+#else /* not optimal... */
+#	define debugtrap() abort()
+#	define unreachable() abort()
+#endif
+
+#ifndef NDEBUG
+#	if defined (__GNUC__) || defined(__clang__)
+#		define Assert(pred) if (!(pred)) { __builtin_trap(); }
+#	else
+#		define Assert(pred) if (!(pred)) { *(volatile int *)0 = 0; }
+#	endif
+#	define trace(...) do { 								\
+		fprintf(stderr, "%s:%-5i", __FILE__, __LINE__);	\
+		fprintf(stderr, __VA_ARGS__);					\
+	} while (0)
+#else
+#	define Assert(pred)
+#	define trace(...)
+#endif
+
+/* Creates a `Str` from a string literal */
+#define Sl(s) ((Str){ (u8 *)s, (isize)lengthof(s) })
+/* Creates a `Str` from a buffer of size `len` */
+#define Sb(s, len) ((Str){ (u8 *)s, (isize)len })
+/* Creates a `Str` from a C string. */
+#define Str_from_c(s) ((Str){ (u8 *)s, (isize)(s != nil ? strlen(s) : 0) })
+#define Str_empty(s) ((s).len == 0)
+#define Str_default(s, sor) (!Str_empty(s) ? (s) : (sor))
+
+int
+vsnprintf(char *, unsigned long, const char *, va_list);
+
+/* "Converts" a `Str` into a C string. Since `Str` are meant to be
+ * null-terminated already, no conversion is made, but ensures that the
+ * null terminator is present. */
+static inline char *
+Str_to_c(Str s)
+{
+	if (s.len == 0 || s.s == nil)
+		return nil;
+	Assert(s.s[s.len - 1] == '\0');
+	return (char *)s.s;
+}
+
+/* Returns `true` if both strings are equal. */
+static inline bool
+Str_equal(Str s1, Str s2)
+{
+	/* because passing nil to mem* is UB even if size == 0... */
+	return (s1.len == s2.len) && (s1.len == 0 || memcmp(s1.s, s2.s, s1.len) == 0);
+}
+
+/* Heaps allocates a new `Str` of size `len`, with contents from `data` if it is
+ * not `nil`.*/
+static inline Str
+Str_new(const u8 *data, isize len)
+{
+	Assert(len >= 0);
+	Str s;
+	s.s = calloc(len + 1, sizeof(*s.s));
+	s.len = len;
+	if (data != nil) {
+		memcpy(s.s, data, len);
+		s.s[len + 1] = '\0'; /* ensure */
+	}
+	return s;
+}
+
+/* Returns a formatted string (heap allocated) of the exact required size. */
+static inline Str
+Strafmt(const char *fmt, ...)
+{
+	Str s = {0};
+	va_list args;
+
+	va_start(args, fmt);
+	/* Calculate buffer size required to hold the formatted string */
+	int reqs = vsnprintf(nil, 0, fmt, args);
+	va_end(args);
+	if (reqs < 0)
+		return s;
+
+	s = Str_new(nil, reqs);
+	va_start(args, fmt); /* `vsnprintf` touched the arg list, reinitialize it */
+	/* the nil terminator is guaranteed by `Str_new` */
+	vsnprintf((char *)s.s, s.len + 1, fmt, args);
+	va_end(args);
+
+	return s;
+}
+
+#endif
--- a/compiler/rutilec.c
+++ b/compiler/rutilec.c
@ -0,0 +1,200 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pre.h"
+#include "lex.h"
+#include "parse.h"
+#include "sema.h"
+#include "state.h"
+#include "codegen.h"
+#include "messages.h"
+
+#include "libs/optparse.h"
+#include "libs/stb_ds.h"
+
+#ifndef GIT_HASH
+#	define GIT_HASH "<no hash>"
+#endif
+#ifndef BUG_REPORT_URL
+#	define BUG_REPORT_URL "https://codeberg.org/tocariimaa/rutile"
+#endif
+#ifndef TARGET_EXE_EXT /* without prefix dot! */
+#	define TARGET_EXE_EXT Sl("")
+#endif
+
+static const char *HelpMessage = 													\
+	"Summary of common options:\n"													\
+	"  -c\tCompile only. Don't link, output an object file instead.\n" 				\
+	"  -d\tDefine a constant with the specified value.\n" 							\
+	"  -h\tPrint this help message.\n" 												\
+	"  -o\tSet output file name of the executable/object file.\n" 					\
+	"  -v\tPrint the version of this compiler, plus other relevant information.\n" 	\
+	"  -S\tEmit intermediate code.\n" 												\
+	"  -R\tSet the code generation mode, 'release' for an optimized build,\n" 		\
+	"  \t'debug' for a debug build.\n" 												\
+	;
+
+/* Creates the output binary file name, changing the extension to the current platform
+ * executable file extension, or it simply removes the original extension if the platform
+ * has no binary extension, (i.e UNIX-likes OSes). */
+static Str
+make_binary_filename(Compiler *cm, Str src_filename, const Str exe_ext)
+{
+	bool exe_has_ext = exe_ext.len > 0;
+	const size_t ss = src_filename.len;
+	Assert(ss != 0);
+	char *buf = malloc(ss + 2 + (exe_has_ext ? exe_ext.len : 0));
+	memcpy(buf, src_filename.s, ss);
+	buf[ss] = '\0';
+
+	char *p = buf + (ss - 1);
+	while (p != buf && *p != '.') /* Search for the first '.' backwards */
+		--p;
+	/* No extension in filename, egde case really */
+	if (p == buf) {
+		if (!exe_has_ext)
+			fatal(cm, nil, "output file name required in this case (host OS binary format lacks extension)");
+		/* append extension then */
+		p = buf + ss;
+		*p = '.';
+	}
+	if (exe_has_ext) {
+		memcpy(++p, exe_ext.s, exe_ext.len);
+		p += 3;
+	}
+	*p = '\0';
+	return Str_from_c(buf);
+}
+
+static enum CodegenBackends
+backend_from_str(Compiler *cm, Str s)
+{
+	if (s.len == 1 && (s.s[0] == 'c' || s.s[0] == 'C'))
+		return CgBackendC;
+	else if (Str_equal(s, Sl("gcc")))
+		return CgBackendLibGccJit;
+	fatal(cm, nil, "unknown backend '%s'", s.s);
+	unreachable();
+}
+
+static Str
+cli_boilerplate(char **argv, Compiler *cm)
+{
+	const struct optparse_long longopts[] = {
+		{"backend", 'b', OPTPARSE_REQUIRED},
+		{"compile-only", 'c', OPTPARSE_NONE},
+		{"define", 'd', OPTPARSE_REQUIRED},
+		{"max-errors", 'E', OPTPARSE_REQUIRED},
+		{"release", 'R', OPTPARSE_REQUIRED},
+		{"emit-ir", 'S', OPTPARSE_OPTIONAL},
+		{"exe", 'o', OPTPARSE_REQUIRED},
+		{"version", 'v', OPTPARSE_NONE},
+		{"help", 'h', OPTPARSE_NONE},
+		{0},
+	};
+
+	const char *no_fun_env = getenv("NO_COLOR");
+	cm->opts.color = isatty(STDERR_FILENO) && !(no_fun_env != nil && *no_fun_env != '\0');
+
+	struct optparse opts;
+	optparse_init(&opts, argv);
+
+	i8 opt;
+	while ((opt = optparse_long(&opts, longopts, nil)) != -1) {
+		switch (opt) {
+		case 'b':
+			cm->opts.backend = backend_from_str(cm, Str_from_c(opts.optarg));
+			break;
+		case 'c':
+			cm->opts.compile_only = true;
+			break;
+		case 'd':
+			trace("define: %s\n", opts.optarg);
+			arrput(cm->opts.defines, Str_from_c(opts.optarg));
+			break;
+		case 'h':
+			printf("Usage: %s [options...] files...\n\n%s\n", *argv, HelpMessage);
+			exit(0);
+		case 'E':
+			cm->opts.max_errors = atoi(opts.optarg); /* XXX: atoi LOL */
+			break;
+		case 'R':
+			trace("release: %s\n", opts.optarg);
+			cm->opts.release_mode = Str_from_c(opts.optarg);
+			break;
+		case 'S':
+			break;
+		case 'o':
+		   	cm->opts.exe_out = Str_from_c(opts.optarg);
+			break;
+		case 'v':
+			printf("Rutile compiler v0.0.1\n");
+			printf("git commit: %s\nReport bugs here: %s\n", GIT_HASH, BUG_REPORT_URL);
+			exit(0);
+		case '?':
+			fatal(cm, nil, "%s: %s", *argv, opts.errmsg);
+		}
+	}
+
+	const char *src_filename = optparse_arg(&opts);
+	if (src_filename == nil)
+		fatal(cm, nil, "no input files specified");
+	return Str_from_c(src_filename);
+}
+
+int
+main(int argc, char **argv)
+{
+	(void)argc;
+	Compiler cm = {
+		.opts = {
+			.backend = CgBackendC,
+			.max_errors = 20,
+		}
+	};
+
+	Str src_filename = cli_boilerplate(argv, &cm);
+	FILE *src_in = nil;
+
+	if (src_filename.s[0] == '-' && src_filename.s[1] == '\0') {
+		src_in = stdin;
+		src_filename = Sl("<stdin>");
+	} else {
+		if ((src_in = fopen((char *)src_filename.s, "rb")) == nil) {
+			fatal(&cm, nil, "can't open: %s", src_filename.s);
+		}
+	}
+
+	cm.current_filename = src_filename;
+	if (cm.opts.exe_out.len == 0)
+		cm.opts.exe_out = make_binary_filename(&cm, src_filename, TARGET_EXE_EXT);
+
+	if (Str_equal(cm.opts.exe_out, cm.current_filename)) {
+		fatal(&cm, nil, "input source file and output file are the same");
+	}
+	
+	/* Compiler pipeline */
+	LexState *ls = lex_new(&cm, src_in, src_filename, 4);
+	ParserState *ps = parse_new(&cm, ls);
+	SemaCtx *ss = sema_new(&cm);
+	Ast *program = parse(ps);
+	if (!ps->ok)
+		goto err;
+	sema(ss, program);
+	if (!ss->ok)
+		goto err;
+
+	CodegenCtx *cgctx = codegen_new(&cm, cm.opts.backend);
+	codegen(cgctx, program);
+	codegen_destroy(cgctx);
+err:
+	sema_destroy(ss);
+	parse_destroy(ps);
+	lex_destroy(ls);
+	fclose(src_in);
+
+	return 0;
+}
--- a/compiler/sema.c
+++ b/compiler/sema.c
@ -0,0 +1,980 @@
+/* Semantic analyzer and type checker */
+#include <stdlib.h>
+#include <string.h>
+
+#include "pre.h"
+#include "sema.h"
+#include "datatype.h"
+#include "location.h"
+#include "symbol.h"
+#include "ast.h"
+#include "state.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+#define sema_error(ctx, loc, ...) do { 		\
+		error((ctx)->cm, loc, __VA_ARGS__);	\
+		(ctx)->ok = false;					\
+	} while (0)
+#define sema_warning(ctx, loc, ...) warning((ctx)->cm, loc, __VA_ARGS__)
+#define sema_note(ctx, loc, ...) note((ctx)->cm, loc, __VA_ARGS__)
+#define sema_is_stmt_terminal(s) (s->type == AST_RETURN || s->type == AST_BREAK)
+#define sym_insert(syms, k, v) shput(syms, k, v)
+
+enum SemaCtxFlags /* 64 bits */
+{
+	SctxInsideProc = BitPos(0),
+	SctxInsideLoop = BitPos(1),
+	SctxInsideIf = BitPos(2),
+	SctxInTopLevel = BitPos(3),
+	SctxInExpr = BitPos(4),
+	SctxInDiscard = BitPos(5),
+	SctxInStmtBlock = BitPos(6),
+};
+
+typedef struct {
+	enum SymbolKind kind;
+ 	/* The data type associated with the symbol. */
+	DataType *dtype;
+	bool used;
+	bool procparm; /* if its a proc parameter */
+	Location loc;
+} Symbol;
+
+typedef HashMapStr(Symbol) SymbolEntry;
+
+struct Scope
+{
+	Scope *prev; /* Previous scope in the stack */
+	SymbolEntry *symbols; /* All the symbols in this scope */
+};
+
+typedef struct {
+	bool ok;
+} SemaStatus;
+
+static const Symbol InvalidSymbol = {.kind = SymInvalid};
+static const DataType *InvalidDataType = &(DataType){.kind = DtkInvalid};
+
+static DataTypeCheck
+datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2);
+static DataTypeCheck
+datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2);
+static DataType *
+sema_expr(SemaCtx *sctx, Ast *expr, Location loc);
+static void
+sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc);
+static void
+sema_node(SemaCtx *sctx, Ast *node);
+static void
+sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts);
+static DataType *
+resolve_datatype(SemaCtx *sctx, const Str ident, Location loc);
+
+static Scope *
+make_scope(Scope *prev)
+{
+	Scope *sc = malloc(sizeof(*sc));
+	sc->prev = prev;
+	sc->symbols = nil;
+	sh_new_arena(sc->symbols);
+	shdefault(sc->symbols, InvalidSymbol);
+	return sc;
+}
+
+static SemaCtx *
+make_semactx(Compiler *cm, SemaCtx *prev)
+{
+	SemaCtx *smc = calloc(1, sizeof(*smc));
+	smc->cm = cm;
+	smc->prev = prev;
+	return smc;
+}
+
+static DataType *
+make_data_type(enum DataTypeKind kind, u16 size, bool builtin, bool sign)
+{
+	DataType *dt = calloc(1, sizeof(*dt));
+	dt->kind = kind;
+	dt->size = size;
+	dt->builtin = builtin;
+	dt->sign = sign;
+	return dt;
+}
+
+static DataType *
+make_proc_type(bool builtin, DataType *rettype, Vec(DataType *) argtypes)
+{
+	DataType *pdt = calloc(1, sizeof(*pdt));
+	pdt->kind = DtkProc;
+	pdt->builtin = builtin;
+	pdt->proc.rettype = rettype;
+	pdt->proc.argtypes = argtypes;
+	return pdt;
+}
+
+static Vec(DataType *)
+make_type_list_from_idents(SemaCtx *sctx, Vec(AstIdentTypePair) idents)
+{
+	if (idents == nil)
+		return nil;
+
+	Vec(DataType *) dts = nil;
+	foreach (ident, idents)
+		arrput(dts, resolve_datatype(sctx, ident.dtype, ident.dtype_loc));
+	return dts;
+}
+
+static Vec(DataType *)
+make_proc_args(DataType *a[], isize len)
+{
+	Vec(DataType *) args = nil;
+	arrsetlen(args, len);
+	memcpy(args, a, len);
+	return args;
+}
+
+/* Pushes a new context frame. Note that this inherits the flags and scope of the
+ * previous context frame.
+ * XXX: could rather only push flags...
+ */
+static void
+push_semactx(SemaCtx **sctx) 
+{
+	SemaCtx *tmp = make_semactx((*sctx)->cm, *sctx);
+	tmp->flags = (*sctx)->flags;
+	tmp->current_scope = (*sctx)->current_scope;
+	tmp->top_scope = (*sctx)->top_scope;
+	tmp->ok = (*sctx)->ok;
+	*sctx = tmp;
+}
+
+/* Pops the current context frame. */
+static void
+pop_semactx(SemaCtx **sctx)
+{
+	SemaCtx *prev = (*sctx)->prev;
+	compiler_assert((*sctx)->cm, prev != nil);
+	prev->ok = (*sctx)->ok;
+	free(*sctx);
+	*sctx = prev;
+}
+
+static void
+enter_scope(SemaCtx *sctx) 
+{
+	sctx->current_scope = make_scope(sctx->current_scope);
+}
+
+static void
+exit_scope(SemaCtx *sctx)
+{
+	compiler_assert(sctx->cm, sctx->current_scope->prev != nil);
+	sctx->current_scope = sctx->current_scope->prev;
+}
+
+Symbol *
+sym_search_oncurrent(Scope *scope, const Str name)
+{
+	Symbol *sym = &shget(scope->symbols, name.s);
+	if (sym->kind != SymInvalid)
+		return sym;
+	return nil;
+}
+
+/* Searches for a symbol in the current and previous scopes */
+Symbol *
+sym_search(Scope *scope, const Str name)
+{
+	Scope *sp = scope;
+	Symbol *sym = nil;
+	while (sp != nil && sym == nil) {
+		sym = sym_search_oncurrent(sp, name);
+		sp = sp->prev;
+	}
+	return sym;
+}
+
+/* Scans through the current scope for any unused var-like bindings
+ * (including proc parameters)
+ */
+static void
+sema_check_unused_vars(SemaCtx *sctx)
+{
+	/* Very simple, iterate over all bindings on this scope and report any that
+	 * doesn't have the 'used' flag toggled. */
+	const SymbolEntry *syms = sctx->current_scope->symbols;
+	for (isize i = 0; i < shlen(syms); ++i) {
+		const Symbol sym = syms[i].value;
+		if (!sym.used && symbol_is_var_binding(sym.kind)) {
+			const char *bind_kind_name = !sym.procparm ? "variable" : "proc parameter"; // SymbolKindStr[sym.kind] : "proc parameter";
+			sema_warning(
+				sctx, &sym.loc,
+				"unused %s '%s'", bind_kind_name, syms[i].key
+			);
+		}
+	}
+}
+
+static void
+sema_check_dead_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	(void)sctx, (void)stmts;
+	/* those who forsake the CFG are doomed to implement it badly without even
+	 * noticing... */
+}
+
+static void
+sema_match_proc_type(SemaCtx *sctx, Symbol *fsym, Str fident)
+{
+	if (fsym->dtype->kind != DtkProc) {
+		sema_error(
+			sctx, nil,
+			"cannot call '%s' because has non-proc type '%s'",
+			fident.s, "uh"
+		);
+		return;
+	}
+}
+
+static DataType *
+sema_proccall(SemaCtx *sctx, const AstProcCall *call, Location loc)
+{
+	Symbol *fsym = sym_search(sctx->current_scope, call->name);
+	if (fsym == nil) {
+		sema_error(sctx, &loc, "call to undeclared proc '%s'", call->name.s);
+		return nil;
+	}
+
+	fsym->used = true;
+	sema_match_proc_type(sctx, fsym, call->name);
+
+	/* check call arguments */
+	const isize proc_arglen = arrlen(fsym->dtype->proc.argtypes);
+	if (call->args != nil) {
+		compiler_assert(sctx->cm, call->args->type == AST_EXPRS);
+		const isize call_arglen = arrlen(call->args->exprs);
+
+		if (call_arglen != proc_arglen) {
+			const char *at_most = call_arglen > proc_arglen ? "s at most" : "";
+			sema_error(
+				sctx, &loc,
+				"argument length mismatch: given %li arguments to '%s' but it expects %li argument%s",
+				call_arglen, call->name.s, proc_arglen, at_most
+			);
+			return nil;
+		}
+		sema_expr_list(sctx, call->args->exprs, loc); /* now sema-check the args */
+	} else if (call->args == nil && proc_arglen != 0) {
+		sema_error(sctx, &loc, "'%s' proc takes %li argument(s), but none given",
+			  call->name.s, proc_arglen);
+		return nil;
+	}
+
+	if (fsym->dtype->proc.rettype != sctx->builtintypes.void_t
+		&& (~sctx->flags & SctxInDiscard)
+		&& (~sctx->flags & SctxInExpr)) {
+		sema_error(sctx, &loc, "result of function call with non-void type ignored");
+		sema_note(sctx, &loc, "use 'discard' if this was intentional");
+		return nil;
+	}
+
+	for (isize i = 0; i < proc_arglen; ++i) {
+		;
+	}
+	return fsym->dtype;
+}
+
+/************ Semantic and type checking of expressions ************/
+/* Type checking for expressions is done inside-out */
+
+static DataType *
+sema_expr_number(SemaCtx *sctx, AstNumber *num)
+{
+#define pow2(exp) (2 << (exp - 1))
+	/* type rule axiom */
+	num->type = sym_search_oncurrent(sctx->top_scope, Sl("u64"))->dtype;
+	return num->type;
+#undef pow2
+}
+
+static DataType *
+sema_expr_strlit(SemaCtx *sctx, const Str *strlit)
+{
+	(void)sctx, (void)strlit;
+	/* type rule axiom */
+	return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
+}
+
+static Symbol *
+sema_expr_ident(SemaCtx *sctx, const Str ident)
+{
+	Symbol *ident_sym = sym_search(sctx->current_scope, ident);
+	if (ident_sym == nil) {
+		sema_error(sctx, nil, "undeclared identifier '%s'", ident.s);
+		return nil;
+	}
+	if (ident_sym->kind == SymType) {
+		sema_error(sctx, nil, "data type '%s' used as identifier in expression", ident.s);
+		return nil;
+	}
+	ident_sym->used = true;
+	return ident_sym;
+}
+
+static DataType *
+sema_expr_unary(SemaCtx *sctx, AstUnary *unary, Location loc)
+{
+	Ast *expr = unary->atom;
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	//if (expr->type == AST_STRLIT) {
+	//	sema_error(sctx, nil, "%s with a string literal makes no sense\n", TokenIdStr[unary->op]);
+	//	return;
+	//}
+
+	//if (expr->type == AST_NUMBER) {
+	//	if (unary->op == T_MINUS && !expr->number.type->sign) {
+	//	}
+	//}
+	return sema_expr(sctx, expr, loc);
+}
+
+static DataType *
+sema_binop(SemaCtx *sctx, const AstBinop *expr, Location loc)
+{
+	Symbol *opsym = sym_search_oncurrent(sctx->top_scope, expr->op);
+	if (opsym == nil) {
+		sema_error(sctx, nil, "no operator '%s'", expr->op.s);
+		return nil;
+	}
+	if (arrlen(opsym->dtype->proc.argtypes) != 2) {
+		sema_error(sctx, nil, "no binary operator for '%s'", expr->op.s);
+		return nil;
+	}
+
+	DataType *ldt = sema_expr(sctx, expr->left, loc);
+	DataType *rdt = sema_expr(sctx, expr->right, loc);
+	/* Skip typechecking if either ldt or rdt have `InvalidDataType` and propagate
+	 * it up the call stack. */
+	if (ldt == InvalidDataType || rdt == InvalidDataType)
+		return (DataType *)InvalidDataType;
+
+	DataTypeCheck tchk;
+	if (!(tchk = datatype_cmp(sctx, ldt, rdt)).ok) {
+		sema_error(sctx, &loc, "type error: %s", tchk.msg.s);
+		return nil;
+	}
+	return ldt;
+}
+
+static DataType *
+sema_expr(SemaCtx *sctx, Ast *expr, Location loc)
+{
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	push_semactx(&sctx);
+	sctx->flags |= SctxInExpr;
+
+	DataType *dt = nil;
+	switch (expr->type) {
+	case AST_BINEXPR:
+		dt = sema_binop(sctx, &expr->bin, loc);
+		break;
+	case AST_UNARY:
+		dt = sema_expr_unary(sctx, &expr->unary, loc);
+		break;
+	case AST_NUMBER:
+		dt = sema_expr_number(sctx, &expr->number);
+		break;
+	case AST_STRLIT:
+		dt = sema_expr_strlit(sctx, &expr->strlit);
+		break;
+	case AST_IDENT:
+		dt = sema_expr_ident(sctx, expr->ident)->dtype;
+		break;
+	case AST_PROCCALL:
+		dt = sema_proccall(sctx, &expr->call, expr->loc);
+		break;
+	default:
+		unreachable();
+	}
+
+	pop_semactx(&sctx);
+	return dt;
+}
+
+static void
+sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc)
+{
+	foreach (expr, exprs) {
+		sema_expr(sctx, expr, loc);
+	}
+}
+
+/************ Type checking ************/
+
+/* Structurally compare two structural data types. */
+static DataTypeCheck
+datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2)
+{
+	compiler_assert(sctx->cm, s1->kind == DtkStruct && s2->kind == DtkStruct);
+	const DataTypeCompound *s1s = &s1->compound;
+	const DataTypeCompound *s2s = &s2->compound;
+
+	if (s1s->packed != s2s->packed)
+		return (DataTypeCheck){false, Sl("")};
+	if (arrlen(s1s->fields) != arrlen(s2s->fields))
+		return (DataTypeCheck){false, Sl("")};
+	for (isize i = 0; i < arrlen(s1s->fields); ++i) {
+		DataTypeCheck tchk;
+		if (!(tchk = datatype_cmp(sctx, s1s->fields[i], s2s->fields[i])).ok)
+			return tchk;
+	}
+	return (DataTypeCheck){.ok = true};
+}
+
+static DataTypeCheck
+datatype_array_cmp(SemaCtx *sctx, DataType *a1, DataType *a2)
+{
+	DataTypeCheck tchk = {.ok = true};
+	if (a1->array.len != a2->array.len)
+		return (DataTypeCheck){false, Sl("")};
+	if (!(tchk = datatype_cmp(sctx, a1->array.base, a2->array.base)).ok)
+		return tchk;
+	return tchk;
+}
+
+static DataTypeCheck
+datatype_proc_cmp(SemaCtx *sctx, DataType *pc1, DataType *pc2)
+{
+	DataTypeCheck tchk = {.ok = true};
+
+	if (pc1->proc.public != pc2->proc.public)
+		return (DataTypeCheck){false, Sl("")};
+	if (pc1->proc.extern_lnk != pc2->proc.extern_lnk)
+		return (DataTypeCheck){false, Sl("")};
+	if (pc1->proc.c_varargs != pc2->proc.c_varargs)
+		return (DataTypeCheck){false, Sl("")};
+	if (arrlen(pc1->proc.argtypes) != arrlen(pc2->proc.argtypes))
+		return (DataTypeCheck){false, Sl("")};
+	if (!(tchk = datatype_cmp(sctx, pc1->proc.rettype, pc2->proc.rettype)).ok)
+		return tchk;
+
+	for (isize i = 0; i < arrlen(pc1->proc.argtypes); ++i) {
+		if (!(tchk = datatype_cmp(sctx, pc1->proc.argtypes[i], pc2->proc.argtypes[i])).ok)
+			return tchk;
+	}
+	return tchk;
+}
+
+static DataTypeCheck
+datatype_basic_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
+{
+	(void)sctx;
+	if (dt1->size > dt2->size) /* if it has a size equal or less than dt2 */
+		return (DataTypeCheck){false, Sl("")};
+	if (dt1->sign != dt2->sign)
+		return (DataTypeCheck){false, Strafmt("integers with different sign")};
+	return (DataTypeCheck){.ok = true};
+}
+
+/* Compares two datatype objects, returning true if they are equal. */
+static DataTypeCheck
+datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
+{
+	if (dt1 == nil || dt2 == nil)
+		return (DataTypeCheck){false, Sl("")};
+	/* TODO: return more information in case of a mismatch... */
+	if (dt1 == dt2) /* shallow */
+		return (DataTypeCheck){.ok = true};
+	if (dt1->kind != dt2->kind)
+		return (DataTypeCheck){.ok = false};
+
+	switch (dt1->kind) {
+	case DtkBasic:
+		return datatype_basic_cmp(sctx, dt1, dt2);
+	case DtkStruct:
+	case DtkUnion:
+		return datatype_struct_cmp(sctx, dt1, dt2);
+	case DtkProc:
+		return datatype_proc_cmp(sctx, dt1, dt2);
+	case DtkArray:
+		return datatype_array_cmp(sctx, dt1, dt2);
+	case DtkBool:
+	case DtkVoid:
+		return (DataTypeCheck){.ok = true};
+	}
+	return (DataTypeCheck){.ok = false};
+}
+
+static DataType *
+expr_get_datatype(SemaCtx *sctx, Ast *expr)
+{
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	switch (expr->type) {
+	case AST_BINEXPR:
+		return expr->bin.type;
+	case AST_UNARY:
+		return expr->unary.type;
+	case AST_NUMBER:
+		return expr->number.type;
+	case AST_STRLIT:
+		return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
+	/* XXX: for these two we could attach the type in the ast... */
+	case AST_IDENT:
+		return sym_search(sctx->current_scope, expr->ident)->dtype;
+	case AST_PROCCALL:
+		return sym_search(sctx->current_scope, expr->call.name)->dtype->proc.rettype;
+	default:
+		unreachable();
+	}
+	return nil;
+}
+
+/* Search for the type in the symbol table, asserting that is a data type. */
+static DataType *
+resolve_datatype(SemaCtx *sctx, const Str ident, Location loc)
+{
+	Symbol *dtsym = sym_search(sctx->current_scope, ident);
+	if (dtsym == nil) {
+		sema_error(sctx, &loc, "no such type '%s'", ident.s);
+		return (DataType *)InvalidDataType;
+	}
+	if (dtsym->kind != SymType) {
+		sema_error(sctx, &loc, "'%s' is not a type but a %s", ident.s, SymbolKindStr[dtsym->kind]);
+		return (DataType *)InvalidDataType;
+	}
+	return dtsym->dtype;
+}
+
+static void
+sema_procdef(SemaCtx *sctx, AstProc *proc, Location loc)
+{
+	Symbol *sym_prev;
+	if ((sym_prev = sym_search(sctx->current_scope, proc->name)) != nil) {
+		sema_error(
+			sctx, nil,
+			"'%s' was already declared as a %s",
+			proc->name.s, SymbolKindStr[sym_prev->kind]
+		);
+		sema_note(sctx, &sym_prev->loc, "'%s' previously declared here", proc->name.s);
+		return;
+	}
+
+	if (Str_equal(proc->name, Sl("main"))) {
+		sctx->main_defined = true;
+		if (!proc->ispublic) {
+			sema_error(sctx, &loc, "'main' has to be declared as a public proc");
+		}
+	}
+
+	const Ast *rettype_node = proc->rettype;
+	DataType *proc_rettype = nil;
+	if (rettype_node != nil) {
+		compiler_assert(sctx->cm, rettype_node->type == AST_IDENT);
+		proc_rettype = resolve_datatype(sctx, proc->rettype->ident, rettype_node->loc);
+		if (proc_rettype == InvalidDataType)
+			return;
+	} else {
+		/* return type node is nil, we infer that as a `void` type */
+		proc_rettype = sctx->builtintypes.void_t;	
+	}
+
+	Vec(DataType *) procargs = make_type_list_from_idents(sctx, proc->args);
+	DataType *procdtype = make_proc_type(false, proc_rettype, procargs);
+	procdtype->proc.public = proc->ispublic;
+	Symbol proc_sym = {
+		.kind = SymProc,
+		.dtype = procdtype,
+		.loc = loc
+	};
+
+	sym_insert(sctx->current_scope->symbols, proc->name.s, proc_sym);
+	proc->type = procdtype;
+
+	/* proc has no body at all */
+	if (proc->body == nil)
+		return;
+
+	/* analyze the body */
+	compiler_assert(sctx->cm, proc->body->type == AST_STMTS);
+	push_semactx(&sctx);
+	enter_scope(sctx);
+
+	compiler_assert(sctx->cm, arrlen(proc->args) == arrlen(procargs));
+	/* Inject proc parameters into the proc body top scope */
+	for (isize i = 0; i < arrlen(proc->args); ++i) {
+		DataType *argdtype = procargs[i];
+		enum SymbolKind argsymkind = proc->args[i].kind;
+
+		compiler_assert(sctx->cm, argdtype != nil);
+		compiler_assert(sctx->cm, argsymkind == SymLet || argsymkind == SymVar);
+
+		Symbol argsym = {
+			.kind = argsymkind,
+			.dtype = argdtype,
+			.procparm = true,
+			.loc = proc->args[i].loc
+		};
+		sym_insert(sctx->current_scope->symbols, proc->args[i].ident.s, argsym);
+	}
+	sctx->flags |= SctxInsideProc;
+	sema_stmts(sctx, proc->body->stmts);
+	sema_check_unused_vars(sctx);
+	exit_scope(sctx);
+	pop_semactx(&sctx);
+}
+
+static void
+sema_return(SemaCtx *sctx, Ast *ret_expr, Location loc)
+{
+	if (~sctx->flags & SctxInsideProc) {
+		sema_error(sctx, &loc, "'return' outside of proc");
+	}
+	if (ret_expr != nil)
+		sema_expr(sctx, ret_expr, loc);
+}
+
+static void
+sema_break(SemaCtx *sctx, Ast *unused, Location loc)
+{
+	(void)unused;
+	if (~sctx->flags & SctxInsideLoop) {
+		sema_error(sctx, &loc, "'break' used outside of a loop");
+	}
+}
+
+static void
+sema_discard(SemaCtx *sctx, Ast *expr, Location loc)
+{
+	sctx->flags |= SctxInDiscard;
+	sema_expr(sctx, expr, loc);
+	sctx->flags &= ~SctxInDiscard;
+}
+
+static void
+sema_attribute(SemaCtx *sctx, AstAttribute *attr)
+{
+	sema_node(sctx, attr->node);
+}
+
+static void
+sema_var_decl(SemaCtx *sctx, AstVarDecl *decl, Location loc)
+{
+	compiler_assert(sctx->cm, symbol_is_var_binding(decl->kind));
+
+	const Symbol *symp = sym_search(sctx->current_scope, decl->name);
+	if (symp != nil && symp->kind != decl->kind) {
+		switch (symp->kind) {
+		case SymLet:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as 'let'", decl->name.s);
+			return;
+		case SymVar:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as 'var'", decl->name.s);
+			return;
+		case SymConst:
+			sema_error(
+				sctx, &symp->loc, 
+				"declaration of '%s' shadows previously declared constant with the same name",
+				decl->name.s
+			);
+			return;
+		case SymType:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as a type", decl->name.s);
+			return;
+		default:
+			break;
+		}
+		sema_note(sctx, &symp->loc, "'%s' was declared in this line", decl->name.s);
+	}
+
+	Ast *dexpr = decl->expr;
+	if (dexpr != nil) {
+		sema_expr(sctx, dexpr, loc); /* check the assignment expression */
+	} else {
+		sema_warning(sctx, &loc, "variable is unitialized");
+	}
+
+	if (decl->datatype == nil) {
+		sema_error(sctx, nil, "we don't do type inference yet sorry");
+		return;
+	}
+
+	compiler_assert(sctx->cm, decl->datatype->type == AST_IDENT);
+ 	DataType *dtype = resolve_datatype(sctx, decl->datatype->ident, decl->datatype->loc);
+	/* Note that we ignore whether `resolve_datatype` return an invalid type,
+	 * since we still want to insert the variable into the symbol table,
+	 * otherwise we would have spurious "undeclared identifier" errors. */
+	decl->type = dtype;
+
+	Symbol sym = {
+		.kind = decl->kind,
+		.dtype = dtype,
+		.loc = loc,
+	};
+	/* Insert the variable to the symbol table */
+	sym_insert(sctx->current_scope->symbols, decl->name.s, sym);
+}
+
+static void
+sema_var_assign(SemaCtx *sctx, AstVarAssign *assign, Location loc)
+{
+	sema_expr_ident(sctx, assign->name);
+	sema_expr(sctx, assign->expr, loc);
+
+	Symbol *decl = sym_search(sctx->current_scope, assign->name);
+	if (decl == nil) {
+		sema_error(sctx, &loc, "assign to undeclared variable '%s'", assign->name.s);
+		return;
+	}
+	if (!symbol_is_var_binding(decl->kind)) {
+		sema_error(
+			sctx, &loc,
+			"assign to non-variable symbol ('%s' is a '%s')",
+			assign->name.s, SymbolKindStr[decl->kind]
+		);
+		return;
+	}
+	if (decl->kind != SymVar) {
+		sema_error(
+			sctx, &loc,
+			"assign to immutable symbol ('%s' was declared as '%s')",
+			assign->name.s, SymbolKindStr[decl->kind]
+		);
+		return;
+	}
+	//datatype_cmp(sctx, nil, decl->dtype);
+}
+
+static void
+sema_ifstmtexpr(SemaCtx *sctx, AstIf *ift, Location loc)
+{
+	sema_expr(sctx, ift->cond, loc);
+	sema_node(sctx, ift->true_body);
+	sema_node(sctx, ift->false_body);
+
+	const isize elifs_len = arrlen(ift->elifs);
+	if (elifs_len > 0) {
+		for (isize i = 0; i < elifs_len; ++i) {
+			AstElif *elif = &ift->elifs[i];
+			sema_expr(sctx, elif->cond, loc);
+			sema_node(sctx, elif->body);
+		}
+	}
+}
+
+static void
+sema_loop(SemaCtx *sctx, AstLoop *loop, Location loc)
+{
+	if (loop->precond != nil) {
+		sema_expr(sctx, loop->precond, loc);
+	}
+	if (loop->postcond != nil) {
+		sema_expr(sctx, loop->postcond, loc);
+	}
+	push_semactx(&sctx);
+	sctx->flags |= SctxInsideLoop;
+	sema_node(sctx, loop->body);
+	pop_semactx(&sctx);
+}
+
+static void
+sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	/* AST_STMTS imply the opening of a new scope */
+	const isize stmts_len = arrlen(stmts);
+	for (isize i = 0; i < stmts_len; ++i) {
+		sema_node(sctx, stmts[i]);
+		if (sema_is_stmt_terminal(stmts[i]) && i + 1 != stmts_len) {
+			sema_warning(sctx, &stmts[i + 1]->loc, "dead code after 'return'");
+		}
+	}
+}
+
+static void
+sema_stmt_block(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	enter_scope(sctx);
+	sema_stmts(sctx, stmts);
+	exit_scope(sctx);
+	/* check for unused bindings declared in this scope */
+	sema_check_unused_vars(sctx);
+}
+
+static void
+sema_node(SemaCtx *sctx, Ast *node)
+{
+	if (node == nil)
+		return;
+	switch (node->type) {
+	case AST_IF:
+		sema_ifstmtexpr(sctx, &node->ifse, node->loc);
+		break;
+	case AST_LOOP:
+		sema_loop(sctx, &node->loop, node->loc);
+		break;
+	case AST_STMTS:
+		sema_stmt_block(sctx, node->stmts);
+		break;
+	case AST_PROCDEF:
+		sema_procdef(sctx, &node->proc, node->loc);
+		break;
+	case AST_PROCCALL:
+		sema_proccall(sctx, &node->call, node->loc);
+		break;
+	case AST_VARDECL:
+		sema_var_decl(sctx, &node->var, node->loc);
+		break;
+	case AST_VARASSIGN:
+		sema_var_assign(sctx, &node->varassgn, node->loc);
+		break;
+	case AST_RETURN:
+		sema_return(sctx, node->ret, node->loc);
+		break;
+	case AST_BREAK:
+		sema_break(sctx, nil, node->loc);
+		break;
+	case AST_DISCARD:
+		sema_discard(sctx, node->discard.expr, node->loc);
+		break;
+	case AST_ATTRIBUTE:
+		sema_attribute(sctx, &node->attribute);
+		break;
+	case AST_BINEXPR:
+	case AST_UNARY:
+	case AST_NUMBER:
+	case AST_STRLIT:
+	case AST_IDENT:
+		sema_expr(sctx, node, node->loc);
+		break;
+	case AST_INVALID:
+	case AST_EXPRS:
+	case AST_PROCCALL_ARGS:
+		unreachable();
+	}
+}
+
+static void
+sema_make_builtin_types(SemaCtx *sctx)
+{
+	typedef struct {
+		const char *name;
+		Symbol sym;
+	} NameSym;
+
+	DataType *void_type = make_data_type(DtkVoid, 0, true, false);
+	DataType *str_type = make_data_type(DtkStruct, 0, false, false);
+	DataType *puts_proto = make_data_type(DtkProc, 0, false, false);
+	puts_proto->proc.rettype = void_type;
+	puts_proto->proc.argtypes = make_proc_args((DataType *[]){str_type}, 1);
+	puts_proto->proc.extern_lnk = true;
+
+	NameSym builtin_basic_types[] = {
+		{"void", {.kind = SymType, .dtype = void_type}},
+		{"u64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, false)}},
+		{"i64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, true)}},
+		{"cint", {.kind = SymType, .dtype = make_data_type(DtkBasic, sizeof(int), true, true)}},
+		{"string", {.kind = SymType, .dtype = str_type}},
+		{"bool", {.kind = SymType, .dtype = make_data_type(DtkBool, 1, true, false)}},
+	};
+	DataType *u64_dt = builtin_basic_types[1].sym.dtype;
+	DataType *bool_dt = builtin_basic_types[5].sym.dtype;
+
+	NameSym builtin_procs[] = {
+		{
+			"+",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					u64_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+		{
+			"-",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					u64_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+		{
+			"==",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					bool_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+	};
+
+	for (isize i = 0; i < countof(builtin_basic_types); ++i) {
+		const char *name = builtin_basic_types[i].name;
+		Symbol sym = builtin_basic_types[i].sym;
+		sym_insert(sctx->current_scope->symbols, name, sym);
+	}
+	for (isize i = 0; i < countof(builtin_procs); ++i) {
+		sym_insert(sctx->current_scope->symbols,
+			  builtin_procs[i].name, builtin_procs[i].sym);
+	}
+
+	sctx->builtintypes.tyu64 = builtin_basic_types[1].sym.dtype;
+	sctx->builtintypes.void_t = void_type;
+
+	Symbol puts_sym = {.kind = SymProc, .dtype = puts_proto};
+	sym_insert(sctx->current_scope->symbols, "puts", puts_sym);
+}
+
+SemaCtx *
+sema_new(Compiler *cm)
+{
+	SemaCtx *toplevel_context = make_semactx(cm, nil);
+	toplevel_context->current_scope = make_scope(nil);
+
+	sema_make_builtin_types(toplevel_context);
+	toplevel_context->top_scope = toplevel_context->current_scope;
+	toplevel_context->ok = true;
+	return toplevel_context;
+}
+
+void
+sema_destroy(SemaCtx *sctx)
+{
+	free(sctx);
+}
+
+void
+sema(SemaCtx *sctx, Ast *program)
+{
+	/* Analyze toplevel */
+	/* XXX: DRY it */
+	compiler_assert(sctx->cm, program->type == AST_STMTS);
+	for (isize i = 0; i < arrlen(program->stmts); ++i)
+		sema_node(sctx, program->stmts[i]);
+
+	if (!sctx->cm->opts.compile_only && !sctx->main_defined)
+		sema_error(sctx, nil, "missing 'main' entrypoint proc");
+
+	/* check unused local procedures */
+	const SymbolEntry *syms = sctx->current_scope->symbols;
+	for (isize i = 0; i < shlen(syms); ++i) {
+		const Symbol fsym = syms[i].value;
+		if (fsym.kind == SymProc
+			&& !fsym.dtype->builtin
+			&& !fsym.dtype->proc.public
+			&& !fsym.dtype->proc.extern_lnk
+			&& !fsym.used) {
+			sema_warning(
+				sctx, &fsym.loc,
+				"defined proc '%s' is never called in this module", syms[i].key
+			);
+		}
+	}
+}
--- a/compiler/sema.h
+++ b/compiler/sema.h
@ -0,0 +1,32 @@
+#ifndef _sema_h_
+#define _sema_h_
+
+#include "ast.h"
+#include "state.h"
+
+typedef struct Scope Scope;
+typedef struct SemaCtx SemaCtx;
+
+struct SemaCtx
+{
+	SemaCtx *prev;
+	Scope *current_scope;
+	Scope *top_scope;
+	Compiler *cm;
+	u64 flags; /* Bit field storing context flags */
+	struct {
+		DataType *tyu64;
+		DataType *void_t;
+	} builtintypes;
+	bool ok; /* did the semantic check fail */
+	bool main_defined;
+};
+
+SemaCtx *
+sema_new(Compiler *cm);
+void
+sema_destroy(SemaCtx *sctx);
+void
+sema(SemaCtx *sctx, Ast *program);
+
+#endif
--- a/compiler/state.c
+++ b/compiler/state.c
@ -0,0 +1,13 @@
+#include <stdio.h>
+#include "pre.h"
+#include "state.h"
+
+void
+compiler_assert_impl(Compiler *cm, const char *pred_s)
+{
+	(void)cm;
+	fprintf(stderr, "Bug check fail: `%s`\n", pred_s);
+	fputs("This is a compiler bug, please report! (run with -v for bug reporting instructions)\n\n", stderr);
+	fflush(stderr);
+	debugtrap();
+}
--- a/compiler/state.h
+++ b/compiler/state.h
@ -0,0 +1,30 @@
+#ifndef _state_h_
+#define _state_h_
+
+#include "pre.h"
+#include "cgBackends.h"
+
+/* Assert meant to catch compiler bugs. The difference with a normal assert is that
+ * this one stays on release builds. Better to crash than to deal with some weird bug
+ * seeping through codegen.
+ */
+#define compiler_assert(cm, pred) if (!(pred)) {compiler_assert_impl(cm, #pred);}
+
+typedef struct {
+	struct {
+		bool color; /* colored diagnostics */
+		bool compile_only;
+		Str exe_out;
+		Str release_mode;
+		Vec(Str) defines;
+		enum CodegenBackends backend;
+		isize max_errors;
+	} opts;
+	Str current_filename;
+	isize error_count;
+} Compiler;
+
+void
+compiler_assert_impl(Compiler *cm, const char *pred_s);
+
+#endif
--- a/compiler/symbol.c
+++ b/compiler/symbol.c
@ -0,0 +1,10 @@
+#include "symbol.h"
+
+const char *SymbolKindStr[] = {
+	[SymInvalid] = "",
+	[SymLet] = "let",
+	[SymVar] = "var",
+	[SymConst] = "const",
+	[SymProc] = "proc",
+	[SymType] = "type definition",
+};
--- a/compiler/symbol.h
+++ b/compiler/symbol.h
@ -0,0 +1,17 @@
+#ifndef _symbol_h_
+#define _symbol_h_
+
+#define symbol_is_var_binding(sk) ((sk) >= SymLet && (sk) <= SymConst)
+
+enum SymbolKind
+{
+	SymInvalid,
+	SymLet,
+	SymVar,
+	SymConst,
+	SymProc,
+	SymType, /* a data type that is */
+};
+extern const char *SymbolKindStr[];
+
+#endif