commit bba597f7bfa7550feafbf5f34e7a63aada24d5e7
Author: tocariimaa <tocariimaa@noreply.codeberg.org>
Date:   Sun Jan 12 18:20:42 2025 -0300

    Initial commit

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..46d2175
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,38 @@
+# This makefile should work for both GNU and BSD Make I think...
+
+SRCDIR = ./compiler
+COMPILER_SRCS != find $(SRCDIR) -type f -name '*.c'
+COMPILER_OBJS := $(COMPILER_SRCS:.c=.o)
+COMPILER_DEPS := $(COMPILER_OBJS:.o=.d)
+
+ASAN = -fsanitize=address,undefined
+CFLAGS := -Wall -Wextra -Wstrict-prototypes -Wold-style-definition -Wvla -Wwrite-strings \
+		 -Wnull-dereference -pipe -O0 -ggdb3 -std=c11 $(ASAN)
+LDFLAGS := $(ASAN)
+
+all: rutilec ast2dot
+
+rutilec: $(COMPILER_OBJS)
+	$(CC) $(LDFLAGS) $^ -o $@$(EXE)
+
+ast2dot: tools/ast2dot.c $(COMPILER_OBJS)
+	$(CC) $(CFLAGS) $(LDFLAGS) $(COMPILER_OBJS) $< -o $@$(EXE)
+
+clean:
+	rm -f $(COMPILER_OBJS) $(COMPILER_DEPS) ./rutilec$(EXE) ./ast2dot$(EXE)
+
+options:
+	@echo "Build options:"
+	@echo "CC = $(CC)"
+	@echo "CFLAGS = $(CFLAGS)"
+	@echo "LDFLAGS = $(LDFLAGS)"
+	@echo "ASan flags = $(ASAN)"
+	@echo "SRCS = $(COMPILER_SRCS)"
+	@echo "OBJS = $(COMPILER_OBJS)"
+
+.PHONY: all clean options
+
+-include $(COMPILER_DEPS)
+
+%.o: %.c Makefile
+	$(CC) $(CFLAGS) -MMD -MP -c -o $@ $<
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..604deba
--- /dev/null
+++ b/README.md
@@ -0,0 +1,39 @@
+# Rutile
+Yet another compiled programming language.
+Very unstable and in early development.
+
+```
+proc main*(): cint
+    puts("Hello, world!")
+    return 0
+end
+```
+
+## Building
+### Build time dependencies
+- C11 C compiler (tested on GCC, Clang and TCC)
+- Libc
+- BSD or GNU Make
+- POSIX shell
+- `find` command
+
+### Build time dependencies (single header libraries)
+These are contained in `compiler/libs`.
+- [stb_ds.h](https://github.com/nothings/stb)
+- [optparse](https://github.com/skeeto/optparse)
+
+### Procedure
+Note that the default `CFLAGS` and `LDFLAGS` are meant for development builds.
+```sh
+# debug build (uses default flags):
+make -j$(nproc) ptgc
+# for release:
+make -j$(nproc) CFLAGS='-O2 -march=native -DNDEBUG' LDFLAGS='' ASAN=''
+```
+
+## License
+GNU GPLv3 for the compiler and BSD 3-Clause for the standard library.
+
+## Acknowledgements
+- Christopher Wellons, for his public domain libraries.
+- Sean Barrett's `stb_ds.h`.
diff --git a/compiler/ast.h b/compiler/ast.h
new file mode 100644
index 0000000..cdf8d3a
--- /dev/null
+++ b/compiler/ast.h
@@ -0,0 +1,151 @@
+#ifndef _ast_h_
+#define _ast_h_
+
+#include "pre.h"
+#include "datatype.h"
+#include "symbol.h"
+#include "location.h"
+
+#define ast_node_is_atom(nk) \
+	(nk == AST_IDENT || nk == AST_NUMBER || nk == AST_STRLIT || nk == AST_PROCCALL)
+#define ast_node_is_unary(nk) \
+	(nk == AST_UNARY || ast_node_is_atom(nk))
+#define ast_node_is_expr(nk) \
+	(nk == AST_BINEXPR || ast_node_is_unary(nk))
+
+enum AstType
+{
+	AST_INVALID, /* For use as a placeholder until the actual type is decided */
+	AST_NUMBER, /* number */
+	AST_IDENT, /* ident */
+	AST_STRLIT, /* strlit */
+	AST_PROCDEF, /* proc */
+	AST_PROCCALL, /* call */
+	AST_PROCCALL_ARGS, /* */
+	AST_VARDECL, /* var */
+	AST_VARASSIGN, /* varassgn */
+	AST_IF, /* ifse */
+	AST_RETURN, /* ret */
+	AST_BREAK,
+	AST_LOOP, /* loop */
+	AST_STMTS, /* stmts */
+	AST_EXPRS, /* exprs */
+	AST_BINEXPR, /* bin */
+	AST_UNARY, /* unary */
+	AST_ATTRIBUTE, /* attribute */
+	AST_DISCARD,
+};
+
+typedef struct Ast Ast;
+
+typedef struct {
+	Str op;
+	Ast *left, *right;
+	DataType *type; /* filled in by sema */
+} AstBinop;
+
+typedef struct {
+	Str op;
+	Ast *atom;
+	DataType *type; /* filled in by sema */
+} AstUnary;
+
+typedef struct {
+	Str ident;
+	Str dtype;
+	/* Symbol kind for this parameter, `SymVar` would represent a mutable
+	 * parameter and `SymLet` a immutable one. */
+	enum SymbolKind kind;
+	Location loc, dtype_loc;
+} AstIdentTypePair;
+
+typedef struct {
+	Str name;
+	bool ispublic;
+	Ast *body;
+	Vec(AstIdentTypePair) args;
+	Ast *rettype;
+
+	DataType *type;
+} AstProc;
+
+typedef struct {
+	Str name;
+	Ast *args;
+} AstProcCall;
+
+typedef struct {
+	Str name;
+	/* Data type, nil if no type was explicitly stated, meaning that
+	 * type deduction must be made from the expression, also implying that
+	 * if this field is nil, `expr` MUSN'T be nil. */
+	Ast *datatype;
+	Ast *expr; /* if the declaration assigns a value */
+	enum SymbolKind kind; /* whether is a let, var or const... */
+	DataType *type; /* filled in by sema */
+} AstVarDecl;
+
+typedef struct {
+	Str name;
+	Ast *expr;
+} AstVarAssign;
+
+typedef struct {
+	u64 n;
+	DataType *type; /* filled in by the sema */
+} AstNumber;
+
+typedef struct {
+	Ast *cond;
+	Ast *body;
+} AstElif;
+
+typedef struct {
+	Ast *cond;
+	Ast *true_body;
+	Ast *false_body;
+	Vec(AstElif) elifs;
+} AstIf;
+
+/* Abstract representation of a loop, providing a pre and post condition.
+ * `while` loops are modelled as a loop with a precondition only.
+ * For infinite loops both `precond` and `postcond` are nil. */
+typedef struct {
+	Ast *precond, *postcond, *body;
+} AstLoop;
+
+typedef struct {
+	/* Attributes for now can only be identifiers */
+	Vec(Str) attrs;
+	Ast *node; /* The decorated node */
+} AstAttribute;
+
+typedef struct {
+	Ast *expr;
+} AstDiscard;
+
+struct Ast {
+	enum AstType type;
+	union {
+		AstBinop bin; /* binary expression */
+		AstUnary unary; /* unary operator */
+		AstNumber number; /* number (this is an atom) */
+		Str ident; /* identifier (this is an atom too) */
+		AstProc proc; /* procedure definition */
+		AstProcCall call; /* procedure call */
+		AstVarDecl var; /* variable declaration */
+		AstVarAssign varassgn;
+		Ast *ret; /* return statement, this points to its expression (if any) */
+		AstIf ifse; /* if statement/expression */
+		AstLoop loop;
+		Vec(Ast *) stmts;
+		Vec(Ast *) exprs;
+		Str strlit; /* String literal */
+		AstAttribute attribute;
+		AstDiscard discard;
+	};
+	Location loc; /* location in the source code of this node */
+};
+_Static_assert(sizeof(Ast) <= 512, "AST node got too bloated");
+
+#endif
diff --git a/compiler/cgBackends.h b/compiler/cgBackends.h
new file mode 100644
index 0000000..d7206af
--- /dev/null
+++ b/compiler/cgBackends.h
@@ -0,0 +1,10 @@
+#ifndef _cgbackends_
+#define _cgbackends_
+
+enum CodegenBackends
+{
+	CgBackendC,
+	CgBackendLibGccJit, /* libgccjit backend */
+};
+
+#endif
diff --git a/compiler/cgC.c b/compiler/cgC.c
new file mode 100644
index 0000000..aac892c
--- /dev/null
+++ b/compiler/cgC.c
@@ -0,0 +1,382 @@
+#include <stdio.h>
+
+#include "pre.h"
+#include "codegen.h"
+#include "cgC.h"
+#include "ast.h"
+#include "libs/stb_ds.h"
+
+#define EMIT_SEMICOLON_NL(out) fputs(";\n", out)
+#define EMIT_RB_NL(out) fputs("}\n", out)
+
+static void
+emit_expr(CodegenC *cgc, const Ast *expr);
+static void
+emit_expr_list(CodegenC *cgc, const Vec(Ast *) exprs, bool sep);
+static void
+emit_node(CodegenC *cgc, const Ast *node);
+
+static void
+indent(CodegenC *cgc)
+{
+#define INDENT(out) fputc('\t', out)
+	switch (cgc->indent) {
+	case 8: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 7: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 6: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 5: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 4: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 3: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 2: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 1: INDENT(cgc->cgctx->out); /* fallthrough */
+	case 0: break;
+	default:
+		for (isize left = 0; left < cgc->indent; ++left)
+			INDENT(cgc->cgctx->out);
+	}
+#undef INDENT
+}
+
+/* Interns a string literal into the string table, returning its ID */
+static i64
+intern_strlit(CodegenC *cgc, const Str *str)
+{
+	const i64 strno = shget(cgc->cgctx->strings, str->s);
+	if (strno != -1) /* string already exists, return its index number */
+		return strno;
+
+	shput(cgc->cgctx->strings, str->s, cgc->cgctx->strlit_no);
+	return cgc->cgctx->strlit_no++;
+}
+
+static void
+emit_comment(CodegenC *cgc, Str comment, bool nl_after)
+{
+	fprintf(cgc->cgctx->out, "/* %s */%c", comment.s, nl_after ? '\n' : '\0');
+}
+
+static void
+emit_include(CodegenC *cgc, Str path, bool local)
+{
+	fprintf(
+		cgc->cgctx->out, "#include %c%s%c\n",
+		local ? '"' : '<', path.s, local ? '"' : '>'
+	);
+}
+
+static const char *
+basic_datatype_to_c(CodegenC *cgc, const DataType *dt)
+{
+	switch (dt->kind) {
+	case DtkBasic:
+		switch (dt->size) {
+		case 0: return "void";
+		case 1: return "uint8_t";
+		case 2: return "uint16_t";
+		case 4: return "uint32_t";
+		case 8: return "uint64_t";
+		}
+		break;
+	case DtkVoid:
+		return "void";
+		break;
+	}
+	return nil;
+}
+
+static void
+emit_datatype(CodegenC *cgc, const DataType *dt)
+{
+	switch (dt->kind) {
+	case DtkBasic:
+	case DtkVoid:
+		fputs(basic_datatype_to_c(cgc, dt), cgc->cgctx->out);
+		break;
+	case DtkStruct:
+		fprintf(cgc->cgctx->out, "struct %s", dt->name.s);
+		break;
+	}
+}
+
+static void
+emit_c_attribute(CodegenC *cgc, Str attr)
+{
+	fprintf(cgc->cgctx->out, "__attribute((%s))", attr.s);
+}
+
+static void
+emit_structdecl(CodegenC *cgc, const DataType *dt)
+{
+	fputs("struct %s {\n", cgc->cgctx->out);
+	for (isize i = 0; i < arrlen(dt->compound.fields); ++i) {
+		emit_datatype(cgc, dt->compound.fields[i]);
+		EMIT_SEMICOLON_NL(cgc->cgctx->out);
+	}
+	fputc('}', cgc->cgctx->out);
+	if (dt->compound.packed)
+		emit_c_attribute(cgc, Sl("packed"));
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_vardecl(CodegenC *cgc, const AstVarDecl *decl)
+{
+	if (decl->kind == SymConst)
+		fputs("const ", cgc->cgctx->out);
+
+	emit_datatype(cgc, decl->type);
+	fprintf(cgc->cgctx->out, " %s", decl->name.s);
+	if (decl->expr != nil) {
+		fputc('=', cgc->cgctx->out);
+		emit_expr(cgc, decl->expr);
+	}
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_varassign(CodegenC *cgc, const AstVarAssign *assign)
+{
+	fprintf(cgc->cgctx->out, "%s = ", assign->name.s);
+	emit_expr(cgc, assign->expr);
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_proc(CodegenC *cgc, const AstProc *proc)
+{
+	if (!proc->ispublic)
+		fputs("static ", cgc->cgctx->out);
+
+	emit_datatype(cgc, proc->type->proc.rettype);
+	fprintf(cgc->cgctx->out, " %s(", proc->name.s);
+
+	const isize arglen = arrlen(proc->args);
+	if (arglen == 0)
+		fputs("void", cgc->cgctx->out);
+	for (isize i = 0; i < arglen; ++i) {
+		AstIdentTypePair arg = proc->args[i];
+		//emit_datatype(cgc, arg.dtype);
+		fputs("uint64_t ", cgc->cgctx->out);
+		fputs((char *)arg.ident.s, cgc->cgctx->out);
+		if (i + 1 < arglen)
+			fputc(',', cgc->cgctx->out);
+	}
+	fputs(")\n{\n", cgc->cgctx->out);
+	if (proc->body != nil)
+		emit_node(cgc, proc->body);
+	EMIT_RB_NL(cgc->cgctx->out);
+}
+
+static void
+emit_proccall(CodegenC *cgc, const AstProcCall *call)
+{
+	fprintf(cgc->cgctx->out, "%s(", call->name.s);
+	if (call->args != nil)
+		emit_expr_list(cgc, (const Vec(Ast *))call->args->stmts, true);
+	fputs(")", cgc->cgctx->out);
+}
+
+static void
+emit_if(CodegenC *cgc, const AstIf *ift)
+{
+	fputs("if (", cgc->cgctx->out);
+	emit_expr(cgc, ift->cond);
+	fputs("){\n", cgc->cgctx->out);
+	emit_node(cgc, ift->true_body);
+	fputc('}', cgc->cgctx->out);
+	if (ift->false_body != nil) {
+		fputs("else", cgc->cgctx->out);
+		fputs("{\n", cgc->cgctx->out);
+		emit_node(cgc, ift->false_body);
+		fputc('}', cgc->cgctx->out);
+	}
+	fputc('\n', cgc->cgctx->out);
+}
+
+static void
+emit_whileLoop(CodegenC *cgc, const AstLoop *whl)
+{
+	fputs("while (", cgc->cgctx->out);
+	emit_expr(cgc, whl->precond);
+	fputs("){\n", cgc->cgctx->out);
+	emit_node(cgc, whl->body);
+	fputs("}\n", cgc->cgctx->out);
+}
+
+static void
+emit_loop(CodegenC *cgc, const AstLoop *loop)
+{
+	if (loop->precond != nil)
+		emit_whileLoop(cgc, loop);
+	else if (loop->postcond != nil)
+		unreachable();
+}
+
+static void
+emit_return(CodegenC *cgc, const Ast *ret_expr)
+{
+	fputs("return ", cgc->cgctx->out);
+	emit_expr(cgc, ret_expr);
+	EMIT_SEMICOLON_NL(cgc->cgctx->out);
+}
+
+static void
+emit_break(CodegenC *cgc, const Ast *unused)
+{
+	(void)unused;
+	fputs("break;\n", cgc->cgctx->out);
+}
+
+static void
+emit_discard(CodegenC *cgc, const Ast *expr)
+{
+	emit_node(cgc, expr);
+}
+
+static void
+emit_expr_number(CodegenC *cgc, const AstNumber *num)
+{
+	fprintf(cgc->cgctx->out, "%lu", num->n);
+}
+
+static void
+emit_expr_strlit(CodegenC *cgc, const Str *strlit)
+{
+	fprintf(cgc->cgctx->out, "\"%s\"", strlit->s);
+}
+
+static void
+emit_expr_ident(CodegenC *cgc, const Str *ident)
+{
+	fputs((char *)ident->s, cgc->cgctx->out);
+}
+
+static void
+emit_expr_unary(CodegenC *cgc, const AstUnary *unary)
+{
+	emit_expr(cgc, unary->atom);
+}
+
+static void
+emit_expr_binop(CodegenC *cgc, const AstBinop *expr)
+{
+	/* guard binops with parenthesis, even if they are redundant */
+	fputc('(', cgc->cgctx->out);
+	emit_expr(cgc, expr->left);
+	fputc('+', cgc->cgctx->out);
+	emit_expr(cgc, expr->right);
+	fputc(')', cgc->cgctx->out);
+}
+
+static void
+emit_expr(CodegenC *cgc, const Ast *expr)
+{
+	if (expr == nil)
+		return;
+	switch (expr->type) {
+	case AST_BINEXPR:
+		emit_expr_binop(cgc, &expr->bin);
+		break;
+	case AST_UNARY:
+		emit_expr_unary(cgc, &expr->unary);
+		break;
+	case AST_NUMBER:
+		emit_expr_number(cgc, &expr->number);
+		break;
+	case AST_STRLIT:
+		emit_expr_strlit(cgc, &expr->strlit);
+		break;
+	case AST_IDENT:
+		emit_expr_ident(cgc, &expr->ident);
+		break;
+	case AST_PROCCALL:
+		emit_proccall(cgc, &expr->call);
+		break;
+	default:
+		unreachable();
+	}
+}
+
+static void
+emit_expr_list(CodegenC *cgc, const Vec(Ast *) exprs, bool sep)
+{
+	const isize exprs_len = arrlen(exprs);
+	for (isize i = 0; i < exprs_len; ++i) {
+		emit_expr(cgc, exprs[i]);
+		if (sep && i + 1 < exprs_len) /* no trailling separator */
+			fputc(',', cgc->cgctx->out);
+	}
+}
+
+static void
+emit_stmt_list(CodegenC *cgc, Vec(Ast *) stmts)
+{
+	for (isize i = 0; i < arrlen(stmts); ++i) {
+		emit_node(cgc, stmts[i]);
+	}
+}
+
+static void
+emit_node(CodegenC *cgc, const Ast *node)
+{
+	switch (node->type) {
+	case AST_STMTS:
+		emit_stmt_list(cgc, node->stmts);
+		break;
+	case AST_PROCDEF:
+		emit_proc(cgc, &node->proc);
+		break;
+	case AST_PROCCALL:
+		emit_proccall(cgc, &node->call);
+		EMIT_SEMICOLON_NL(cgc->cgctx->out);
+		break;
+	case AST_IF:
+		emit_if(cgc, &node->ifse);
+		break;
+	case AST_LOOP:
+		emit_loop(cgc, &node->loop);
+		break;
+	case AST_RETURN:
+		emit_return(cgc, node->ret);
+		break;
+	case AST_BREAK:
+		emit_break(cgc, nil);
+		break;
+	case AST_DISCARD:
+		emit_discard(cgc, node->discard.expr);
+		break;
+	case AST_VARDECL:
+		emit_vardecl(cgc, &node->var);
+		break;
+	case AST_VARASSIGN:
+		emit_varassign(cgc, &node->varassgn);
+		break;
+	case AST_BINEXPR:
+	case AST_UNARY:
+	case AST_NUMBER:
+	case AST_STRLIT:
+	case AST_IDENT:
+		emit_expr(cgc, node);
+		break;
+	case AST_PROCCALL_ARGS:
+	case AST_EXPRS:
+	case AST_INVALID:
+		unreachable();
+	}
+}
+
+void
+cgC(CodegenC *cgc, const Ast *program)
+{
+	cgc->cgctx->out = stdout;
+
+	char note_buf[255] = {0};
+	snprintf(note_buf, sizeof(note_buf),
+		"generated C IR from %s", cgc->cgctx->cctx->current_filename.s
+	);
+	emit_comment(cgc, Str_from_c(note_buf), true);
+
+	emit_include(cgc, Sl("stdint.h"), false);
+	fputc('\n', cgc->cgctx->out);
+	emit_node(cgc, program);
+}
diff --git a/compiler/cgC.h b/compiler/cgC.h
new file mode 100644
index 0000000..9473734
--- /dev/null
+++ b/compiler/cgC.h
@@ -0,0 +1,15 @@
+#ifndef _cgC_h_
+#define _cgC_h_
+
+#include "codegen.h"
+#include "ast.h"
+
+typedef struct {
+	CodegenCtx *cgctx;
+	int indent;
+} CodegenC;
+
+void
+cgC(CodegenC *cgc, const Ast *program);
+
+#endif
diff --git a/compiler/codegen.c b/compiler/codegen.c
new file mode 100644
index 0000000..00606e3
--- /dev/null
+++ b/compiler/codegen.c
@@ -0,0 +1,101 @@
+#define _POSIX_C_SOURCE 200809L
+#include <unistd.h>
+#include <spawn.h>
+#include <sys/wait.h>
+
+#include "codegen.h"
+#include "cgC.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+/* (Std)In --> process --> (Std)Out */
+void
+spawn_with_iofp(const char *path, char *const *argv,
+					 pid_t *pid, FILE **in, FILE **out)
+{
+	int irp[2], asmp[2];
+	posix_spawn_file_actions_t fileacts;
+	posix_spawn_file_actions_init(&fileacts);
+
+	if (in != nil) {
+		/* the "in" pipe */
+		if (pipe(irp) < 0)
+			fatal(nil, nil, "could not open pipe");
+		posix_spawn_file_actions_addclose(&fileacts, irp[1]);
+		posix_spawn_file_actions_adddup2(&fileacts, irp[0], STDIN_FILENO);
+	}
+	if (out != nil) {
+		/* the "out" pipe */
+		if (pipe(asmp) < 0)
+			fatal(nil, nil, "could not open pipe");
+		posix_spawn_file_actions_addclose(&fileacts, asmp[0]);
+		posix_spawn_file_actions_adddup2(&fileacts, asmp[1], STDOUT_FILENO);
+	}
+
+	if (posix_spawn(pid, path, &fileacts, nil, argv, nil) != 0)
+		fatal(nil, nil, "posix_spawn failed");
+
+	posix_spawn_file_actions_destroy(&fileacts);
+
+	if (in != nil) {
+		close(irp[0]);
+		if ((*in = fdopen(irp[1], "wb")) == nil)
+			fatal(nil, nil, "fdopen fail");
+	}
+	if (out != nil) {
+		close(asmp[1]);
+		if ((*out = fdopen(asmp[0], "rb")) == nil)
+			fatal(nil, nil, "fdopen fail");
+	}
+}
+
+void
+process_wait(pid_t pid)
+{
+	int pstat;
+	waitpid(pid, &pstat, 0);
+	if (!WIFEXITED(pstat))
+		fatal(nil, nil, "qbe crashed");
+	/* did not crash, read return status */
+	int exitc;
+	if ((exitc = WEXITSTATUS(pstat)) != 0)
+		fatal(nil, nil, "qbe exited with non-zero status %d", exitc);
+}
+
+
+CodegenCtx *
+codegen_new(Compiler *cm, enum CodegenBackends backend)
+{
+	CodegenCtx *ctx = calloc(1, sizeof(*ctx));
+	ctx->ext_pid = ctx->ld_pid = -1;
+	ctx->backend = backend;
+	ctx->cctx = cm;
+
+	sh_new_arena(ctx->strings);
+	shdefault(ctx->strings, -1);
+	return ctx;
+}
+
+void
+codegen_destroy(CodegenCtx *cgctx)
+{
+	if (cgctx->ext_pid != -1)
+		process_wait(cgctx->ext_pid);
+	if (cgctx->ld_pid != -1)
+		process_wait(cgctx->ld_pid);
+	shfree(cgctx->strings);	
+	free(cgctx);
+}
+
+void
+codegen(CodegenCtx *cgctx, Ast *program)
+{
+	switch (cgctx->backend) {
+	case CgBackendC:
+		cgC(&(CodegenC){.cgctx = cgctx, .indent = 2}, program);
+		break;
+	case CgBackendLibGccJit:
+		fatal(nil, nil, "libgccjit backend not implemented yet");
+		break;
+	}
+}
diff --git a/compiler/codegen.h b/compiler/codegen.h
new file mode 100644
index 0000000..f7ae184
--- /dev/null
+++ b/compiler/codegen.h
@@ -0,0 +1,37 @@
+#ifndef _codegen_h_
+#define _codegen_h_
+#include <stdio.h> /* FILE */
+#include <sys/types.h> /* for pid_t */
+
+#include "pre.h"
+#include "ast.h"
+#include "state.h"
+#include "cgBackends.h"
+
+typedef struct {
+	FILE *out; /* File where to output QBE IR */
+	FILE *asm_out;
+
+	i64 strlit_no;
+	i64 internal_label;
+	/* Hash map acting as a set, which contains all strings in a compilation
+	 * unit. Strings get interned on this hash map to remove duplicates.
+	 */
+	HashMapStr(i64) *strings;
+	pid_t ext_pid, ld_pid;
+	enum CodegenBackends backend;
+	Compiler *cctx;
+} CodegenCtx;
+
+void
+spawn_with_iofp(const char *path, char *const *argv, pid_t *pid, FILE **in, FILE **out);
+void
+process_wait(pid_t pid);
+CodegenCtx *
+codegen_new(Compiler *cm, enum CodegenBackends backend);
+void
+codegen_destroy(CodegenCtx *cgctx);
+void
+codegen(CodegenCtx *cgctx, Ast *program);
+
+#endif
diff --git a/compiler/datatype.h b/compiler/datatype.h
new file mode 100644
index 0000000..94f369d
--- /dev/null
+++ b/compiler/datatype.h
@@ -0,0 +1,53 @@
+#ifndef _datatype_h_
+#define _datatype_h_
+#include "pre.h"
+
+enum DataTypeKind
+{
+	DtkInvalid = 0,
+	DtkVoid,
+	DtkBasic,
+	DtkStruct,
+	DtkUnion,
+	DtkProc,
+	DtkArray,
+	DtkBool,
+};
+
+typedef struct DataType DataType;
+
+typedef struct {
+	bool packed;
+	Vec(DataType *) fields;
+} DataTypeCompound;
+
+struct DataType
+{
+	enum DataTypeKind kind;
+	u16 size; /* size in bytes of the data type */
+	bool builtin; /* if this type is defined in compilerland */
+	bool sign; /* if the type is numerical and has a sign or not */
+	Str name;
+
+	union {
+		DataTypeCompound compound; /* Represents either a struct or union type */
+		struct {
+			DataType *rettype;
+			Vec(DataType *) argtypes;
+			bool public;
+			bool extern_lnk; /* external linkage */
+			bool c_varargs; /* C-style varargs (for FFI) */
+		} proc;
+		struct {
+			DataType *base;
+			isize len;
+		} array;
+	};
+};
+
+typedef struct {
+	bool ok; /* whether the type checking succeeded */
+	Str msg; /* message describing the type error */
+} DataTypeCheck;
+
+#endif
diff --git a/compiler/lex.c b/compiler/lex.c
new file mode 100644
index 0000000..30fc41a
--- /dev/null
+++ b/compiler/lex.c
@@ -0,0 +1,581 @@
+#include <stdio.h> /* feof, ferror, fread, FILE, EOF */
+#include <stdlib.h> /* malloc calloc free */
+#include <string.h> /* memset */
+
+#include "lex.h"
+#include "messages.h"
+#include "pre.h"
+#include "libs/stb_ds.h"
+
+#define LEX_BUFFER_SIZE 8192
+#define LEX_HALF_BUFFER_SIZE LEX_BUFFER_SIZE / 2
+#define LEX_BUFFER_SENTINEL '\0'
+
+#define MAX_IDENT_SIZE 1024u
+#define STRING_LITERAL_BASE_SIZE 255
+#define STRING_LITERAL_MAX_SIZE 4096
+
+#define at_buffer_end(ls) (*(ls)->fwd == '\0')
+#define ascii_isident(c) (c == '_' || c == '?' || c == '!' || ascii_isalnum(c))
+#define ascii_isident_start(c) (c == '_' || ascii_isalpha(c))
+
+#define lex_error(ls, ...) do {							\
+		error((ls)->cm, &(ls)->cur_loc, __VA_ARGS__);	\
+	} while(0)
+
+#define lex_fatal(ls, ...) do {							\
+		fatal((ls)->cm, &(ls)->cur_loc, __VA_ARGS__);	\
+	} while(0)
+
+typedef Optional(u8) MaybeChr;
+
+const char *TokenIdStr[T_TOKEN_COUNT] = {
+	[T_INVALID] = "(invalid token)",
+	[T_PLUS] = "+",
+	[T_MINUS] = "-",
+	[T_STAR] = "*",
+	[T_BAR] = "/",
+	[T_EXCLAMATION] = "!",
+	[T_LPAREN] = "(",
+	[T_RPAREN] = ")",
+	[T_COMMA] = ",",
+	[T_LESSTHAN] = "<",
+	[T_GREATTHAN] = ">",
+	[T_LOGAND] = "and",
+	[T_LOGOR] = "or",
+	[T_EQUAL] = "=",
+	[T_LOGICEQUAL] = "==",
+	[T_NOTEQUAL] = "!=",
+	[T_HASH] = "#",
+	[T_COLON] = ":",
+	[T_SEMICOLON] = ";",
+	[T_LBRACKET] = "[",
+	[T_RBRACKET] = "]",
+	[T_LBRACE] = "{",
+	[T_RBRACE] = "}",
+	[T_IDENT] = "(identifier)",
+	[T_STRING] = "(string literal)",
+	[T_NUMBER] = "(number)",
+	[T_DECNUMBER] = "(decimal number)",
+	[T_CONST] = "const",
+	[T_DISCARD] = "discard",
+	[T_ELIF] = "elif",
+	[T_ELSE] = "else",
+	[T_END] = "end",
+	[T_IF] = "if",
+	[T_LET] = "let",
+	[T_PROC] = "proc",
+	[T_RETURN] = "return",
+	[T_VAR] = "var",
+	[T_WHILE] = "while",
+	[T_STRUCT] = "struct",
+	[T_USE] = "use",
+	[T_BREAK] = "break",
+	[T_NEXT] = "next",
+	[T_EOF] = "(EOF)",
+	[T_ERROR] = "(error)",
+};
+
+/* Non retarded ASCII character class comparison */
+static bool 
+ascii_isdigit(u32 c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static bool
+ascii_isalpha(u32 c)
+{
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool
+ascii_isspace(u32 c)
+{
+	return c == ' ' || (c >= '\t' && c <= '\r');
+}
+
+static bool
+ascii_isalnum(u32 c)
+{
+	return ascii_isalpha(c) || ascii_isdigit(c);
+}
+
+static void
+update_line_count(LexState *ls, u8 chr)
+{
+	switch (chr) {
+	case '\n':
+		ls->cur_loc.column = 1;
+		++ls->cur_loc.line;
+		break;
+	case '\t': /* fallthrough */
+	default:
+		++ls->cur_loc.column;
+	}
+}
+
+static u8
+peek(LexState *ls)
+{
+	return *ls->fwd;
+}
+
+static void
+backup(LexState *ls, int n)
+{
+	ls->fwd -= n;
+	if (*ls->fwd == '\n')
+		--ls->cur_loc.line;
+	/* not quite right if fwd is \n... */
+	--ls->cur_loc.column;
+}
+
+static bool
+read_buf(LexState *ls, u8 *buf, isize n, isize *ar)
+{
+	if (feof(ls->input_fp))
+		return false;
+	const isize rb = fread(buf, sizeof(*buf), n, ls->input_fp);
+	if (ferror(ls->input_fp)) {
+		fatal(ls->cm, nil, "could not read input file\n");
+	}
+   	*ar	= rb;
+	buf[rb] = LEX_BUFFER_SENTINEL;
+	return true;
+}
+
+static bool
+reload_buffers(LexState *ls)
+{
+	//if ((ls->fwd != ls->buf + ls->buflen) || (ls->fwd != ls->buf + ls->buflen2))
+	//	lex_fatal(ls, "invalid nil byte in middle of source file");
+
+	const u8 *end_of_buf1 = ls->buf + ls->buflen;
+	const u8 *end_of_buf2 = ls->buf + LEX_HALF_BUFFER_SIZE + ls->buflen2;
+
+	if (ls->fwd == end_of_buf1) { /* end of first buffer */
+		u8 *buf2 = ls->buf + LEX_HALF_BUFFER_SIZE;
+		if (!read_buf(ls, buf2, LEX_HALF_BUFFER_SIZE, &ls->buflen2))
+			return false; /* reached EOF, no more data */
+		ls->fwd = buf2;
+	} else if (ls->fwd == end_of_buf2) { /* end of second buffer */
+		u8 *buf1 = ls->buf;
+		if (!read_buf(ls, buf1, LEX_HALF_BUFFER_SIZE, &ls->buflen))
+			return false; /* reached EOF, no more data */
+		ls->fwd = buf1;
+	}
+	/* reset pointers back to the beginning of the buffer */
+	ls->lbegin = ls->fwd;
+	return true;
+}
+
+static MaybeChr
+read_chr(LexState *ls)
+{
+	u8 chr = peek(ls);
+	if (chr == LEX_BUFFER_SENTINEL) { /* maybe end of buffer */
+		if (!reload_buffers(ls))
+			return None(MaybeChr);
+	}
+	update_line_count(ls, chr);
+	return Some(MaybeChr, *ls->fwd++);
+}
+
+static MaybeChr
+skip_whitespace(LexState *ls)
+{
+	/* skip any whitespace
+	 * [ abc =    2*9  -  1 ]
+	 *        ^-fwd, lbegin
+	 * [ abc =    2*9  -  1 ]
+	 *     lbegin-^^-fwd
+	 * */
+	MaybeChr c;
+	for (;;) {
+		c = read_chr(ls);
+		if (!c.ok) {
+			return None(MaybeChr);
+		}
+		if (!ascii_isspace(c.val))
+			break;
+		++ls->lbegin;
+	}
+	return c;
+}
+
+static LexToken
+make_error(void)
+{
+	return (LexToken){ .id = T_ERROR };
+}
+
+static u8 *
+intern_identifier(LexState *ls, u8 *ident)
+{
+	IdentsBucket *entry;
+	if ((entry = shgetp_null(ls->idents, ident)) == nil) {
+		shput(ls->idents, ident, 0);
+		return (u8 *)shgets(ls->idents, ident).key;
+	}
+	return (u8 *)entry->key;
+}
+
+/* 
+ * *──┬(ident)┬──*
+ *    ╰───<───╯
+ */
+static LexToken
+identifier(LexState *ls)
+{
+	/* this gets copied to the hash table arena, no problem */
+	u8 ident_buf[MAX_IDENT_SIZE];
+	usize i = 0;
+
+	MaybeChr chr = { *ls->lbegin, true };
+	while (chr.ok && ascii_isident(chr.val)) {
+		if (i + 1 == MAX_IDENT_SIZE) {
+			lex_error(ls, "identifier is too long (max: %u)\n", MAX_IDENT_SIZE);
+			return make_error();
+		}
+		ident_buf[i++] = chr.val;
+		chr = read_chr(ls);
+	}
+	ident_buf[i] = '\0';
+	/* ate 1 extra character, give it back */
+	if (chr.ok)
+		backup(ls, 1);
+
+	return (LexToken) {
+		.id = T_IDENT,
+		.ident = {intern_identifier(ls, ident_buf), i},
+		.len = i,
+	};
+}
+
+static LexToken
+string_literal(LexState *ls)
+{
+	isize str_buf_len = STRING_LITERAL_BASE_SIZE;
+	u8 *str_buf = malloc(str_buf_len);
+	isize i = 0;
+
+	/* skip past " */
+	MaybeChr chr = read_chr(ls);
+	while (chr.val != '"') {
+		if (i + 1 == STRING_LITERAL_MAX_SIZE) {
+			lex_error(ls, "string literal length exceeds maximum of %d bytes", STRING_LITERAL_MAX_SIZE);
+			goto err;
+		}
+		if (i + 1 > str_buf_len) {
+			str_buf = realloc(str_buf, str_buf_len *= 2);
+		}
+		str_buf[i++] = chr.val;
+		chr = read_chr(ls);
+		if (!chr.ok || chr.val == '\n') {
+			lex_error(ls, "unterminated string literal");
+			goto err;
+		}
+	}
+	if (i > 0) {
+		str_buf[i] = '\0';
+	} else { /* empty literal */
+		free(str_buf); /* we wasted our time */
+		str_buf = nil;
+	}
+
+	return (LexToken) {
+		.id = T_STRING,
+		.str = {str_buf, i},
+		.len = i,
+	};
+err:
+	return make_error();
+}
+
+/*	Identifies a numeric literal that may have a prefix:
+ *
+ *	('0')─┬──────────────────────┬─*
+ *	      ├('b')╭──┬(digit)┬─────╯
+ *	      ├('o')┤  ╰───<───╯
+ *	      ╰('x')╯
+ * Indirectly based on a BSD (?) implementation.
+ */
+static LexToken
+number_literal(LexState *ls)
+{
+	LexToken t = { .id = T_NUMBER };
+	u64 number = 0;
+	u8 base = 10;
+
+	MaybeChr chr = { *ls->lbegin, true };
+
+	if (chr.val == '0') {
+		chr = read_chr(ls); /* skip 0 prefix */
+		if (!chr.ok) { /* EOF edge case */
+			return t; /* 0 */
+		}
+		switch (chr.val) {
+		case 'b':
+			base = 2;
+			break;
+		case 'o':
+			base = 8;
+			break;
+		case 'x':
+			base = 16;
+			break;
+		default:
+		if (ascii_isdigit(chr.val)) {
+				lex_error(ls, "use '0o' for an octal literal");
+				return make_error();
+			}
+			//lex_error(ls, "unknown numeric prefix '0%c'", chr.val);
+			/* start of another token */
+			return t; /* 0 */
+		}
+		chr = read_chr(ls);
+		if (!chr.ok) {
+			lex_error(ls, "expected a digit after the base prefix");
+			return make_error();
+		}
+	}
+
+	const u64 mmax = U64_MAX / base;
+	static const u8 digits[] = "0123456789abcdef";
+
+	while (chr.ok) {
+		u8 *digitp = memchr(digits, chr.val, lengthof(digits));
+		if (digitp == nil)
+			break;
+
+		u8 digit = digitp - digits;
+		if (digit >= base) {
+			lex_error(ls, "invalid literal");
+			return make_error();
+		}
+		if (number > mmax)
+			goto overflow;
+		number *= base;
+		/* overflow for adding the digit */
+		if (U64_MAX - digit < number)
+			goto overflow;
+
+		number += digit;
+		chr = read_chr(ls);
+	}
+	if (chr.ok)
+		backup(ls, 1);
+
+	t.inumber = number;
+	return t;
+overflow:
+	lex_error(ls, "integer literal is too big (2^64 max)");
+	return make_error();
+}
+
+static LexToken
+keyword(LexToken *t)
+{
+#define kwcmp(ident, kw, tid) \
+	{if (Str_equal(ident, kw)) return (LexToken){ .id = tid, .len = kw.len };}
+
+	Str ident = t->ident;
+	--ident.len;
+	switch (*ident.s++) {
+	case 'a':
+		kwcmp(ident, Sl("nd"), T_LOGAND);
+		break;
+	case 'b':
+		kwcmp(ident, Sl("reak"), T_BREAK);
+		break;
+	case 'c':
+		kwcmp(ident, Sl("onst"), T_CONST);
+		break;
+	case 'd':
+		kwcmp(ident, Sl("iscard"), T_DISCARD);
+		break;
+	case 'e':
+		kwcmp(ident, Sl("nd"), T_END);
+		kwcmp(ident, Sl("lse"), T_ELSE);
+		kwcmp(ident, Sl("lif"), T_ELIF);
+		break;
+	case 'i':
+		kwcmp(ident, Sl("f"), T_IF);
+		break;
+	case 'l':
+		kwcmp(ident, Sl("et"), T_LET);
+		break;
+	case 'n':
+		kwcmp(ident, Sl("ot"), T_LOGNOT);
+		kwcmp(ident, Sl("ext"), T_NEXT);
+		break;
+	case 'o':
+		kwcmp(ident, Sl("r"), T_LOGOR);
+		break;
+	case 'p':
+		kwcmp(ident, Sl("roc"), T_PROC);
+		break;
+	case 'r':
+		kwcmp(ident, Sl("eturn"), T_RETURN);
+		break;
+	case 's':
+		kwcmp(ident, Sl("truct"), T_STRUCT);
+		break;
+	case 'v':
+		kwcmp(ident, Sl("ar"), T_VAR);
+		break;
+	case 'w':
+		kwcmp(ident, Sl("hile"), T_WHILE);
+		break;
+	case 'u':
+		kwcmp(ident, Sl("se"), T_USE);
+		break;
+	}
+	return *t;
+#undef kwcmp
+}
+
+LexToken
+lex_scan(LexState *ls)
+{
+	if (arrlen(ls->backlist) > 0) {
+		return arrpop(ls->backlist);
+	}
+	/* lexeme start pointer */
+	ls->lbegin = ls->fwd;
+
+	LexToken token = {0};
+	MaybeChr c = skip_whitespace(ls); 
+	if (!c.ok) {
+		token.id = T_EOF;
+		ls->eof = true;
+		return token;
+	}
+	
+#define TOKEN(chr, t) case chr: token.id = t; break;
+	//trace("token now: '%c'\n", c.val);
+	//trace("lp: <%s>\n", ls->lbegin);
+	//trace("fwd: <%s>\n", ls->fwd);
+	switch (c.val) {
+	case '!':
+		if (peek(ls) == '=') {
+			token.id = T_NOTEQUAL;
+			++ls->fwd;
+		} else {
+			token.id = T_EXCLAMATION;
+		}
+		break;
+	TOKEN('+', T_PLUS)
+	TOKEN('-', T_MINUS)
+	TOKEN('*', T_STAR)
+	TOKEN('/', T_BAR)
+	TOKEN('(', T_LPAREN)
+	TOKEN(')', T_RPAREN)
+	TOKEN(',', T_COMMA)
+	TOKEN('<', T_LESSTHAN)
+	TOKEN('>', T_GREATTHAN)
+	TOKEN('#', T_HASH)
+	TOKEN(':', T_COLON)
+	TOKEN(';', T_SEMICOLON)
+	TOKEN('[', T_LBRACKET)
+	TOKEN(']', T_RBRACKET)
+	TOKEN('{', T_LBRACE)
+	TOKEN('}', T_RBRACE)
+	case '=':
+		if (peek(ls) == '=') {
+			token.id = T_LOGICEQUAL;
+			++ls->fwd;
+		} else {
+			token.id = T_EQUAL;
+		}
+		break;
+	case '"':
+		return string_literal(ls);
+	case '0' ... '9':
+		return number_literal(ls);
+	default: {
+		const u8 uc = c.val;
+		if (ascii_isident_start(uc)) {
+			LexToken ident_or_keyword = identifier(ls);
+			if (ident_or_keyword.id != T_IDENT)
+				return make_error();
+		   	return keyword(&ident_or_keyword);
+		}
+
+		if (uc > 0x7f) /* DEL, the last ASCII character */
+			lex_error(ls, "unicode tokens aren't allowed yet");
+		else
+			lex_error(ls, "unknown token '%c' (\\x%02x)", uc, uc);
+		return make_error();
+	}
+	}
+	return token;
+#undef TOKEN
+}
+
+/* Put a token into the backlist. The next call to `lex_scan` will return this
+ * token. The backlist is a stack of tokens, so technically you can have unlimited
+ * look-ahead at the cost of memory.
+ */
+void
+lex_backup(LexState *ls, LexToken token)
+{
+	arrput(ls->backlist, token);
+	i64 col = ls->cur_loc.column - token.len;
+	if (col < 1) {
+		if (ls->cur_loc.line > 1)
+			--ls->cur_loc.line;
+	} else {
+		ls->cur_loc.column = col;
+	}
+}
+
+/* Checks if `t` token type is equal to `exp_tok`. This does not eat any token. */
+bool
+lex_match(LexState *ls, LexToken *token, enum LexTokenId exp_tok)
+{
+	if (token->id != exp_tok) {
+		lex_error(ls, "expected '%s' but got '%s' instead\n",
+				TokenIdStr[exp_tok], TokenIdStr[token->id]);
+		return false;
+	}
+	return true;
+}
+
+LexState *
+lex_new(Compiler *cm, FILE *input_fp, Str file_name, usize tabsize)
+{
+	LexState *ls = calloc(1, sizeof(*ls));
+	ls->buf = calloc(LEX_BUFFER_SIZE + 1, sizeof(*ls->buf));
+	ls->lbegin = ls->fwd = ls->buf;
+	ls->tabsize = tabsize;
+	ls->input_fp = input_fp;
+	ls->cur_loc.line = 1;
+	ls->cur_loc.source = file_name;
+	ls->cm = cm;
+	/* We use a hash table with string keys as a set containing all identifiers 
+	 * in a compilation unit, to avoid dupplicate allocations.
+	 */
+	sh_new_arena(ls->idents);
+	/* We provide our own buffering scheme */
+	setbuf(input_fp, nil);
+	/* Initial fill of first buffer.
+	 * Any file error gets caught in the function, only thing that can happen
+	 * here is that the file is actually empty, so instant EOF.
+	 */
+	read_buf(ls, ls->buf, LEX_HALF_BUFFER_SIZE, &ls->buflen);
+	return ls;
+}
+
+/* Destroys a lexing context and frees its allocated memory.
+ * Note that this will also deallocate the identifier arena.
+ */
+void
+lex_destroy(LexState *ls)
+{
+	shfree(ls->idents);
+	arrfree(ls->backlist);
+	free(ls->buf);
+	free(ls);
+}
diff --git a/compiler/lex.h b/compiler/lex.h
new file mode 100644
index 0000000..6d46a5f
--- /dev/null
+++ b/compiler/lex.h
@@ -0,0 +1,93 @@
+#ifndef _lex_h_
+#define _lex_h_
+#include <stdio.h>
+
+#include "pre.h"
+#include "location.h"
+#include "state.h"
+#include "libs/stb_ds.h"
+
+enum LexTokenId {
+	T_INVALID = 0,
+	/* Unary and binary operators */
+	T_PLUS, T_MINUS, T_STAR, T_BAR,
+	T_LESSTHAN, T_GREATTHAN, T_LOGNOT, T_LOGAND, T_LOGOR, T_LOGICEQUAL, T_NOTEQUAL,
+	T_HASH,
+	/* Others */
+	T_EQUAL, T_EXCLAMATION, T_LPAREN, T_RPAREN, T_COMMA,
+	T_COLON, T_SEMICOLON, T_LBRACKET, T_RBRACKET, T_LBRACE, T_RBRACE,
+	/* Atoms */
+	T_IDENT, T_STRING, T_NUMBER, T_DECNUMBER,
+	/* Keywords */
+	T_CONST,
+	T_ELSE,
+	T_END,
+	T_ELIF,
+	T_IF,
+	T_LET,
+	T_PROC,
+	T_RETURN,
+	T_VAR,
+	T_DISCARD,
+	T_WHILE,
+	T_STRUCT,
+	T_USE,
+	T_BREAK,
+	T_NEXT,
+	/* Control */
+	T_EOF, T_ERROR,
+	T_TOKEN_COUNT, /* does not represent an actual token */
+};
+
+/* Table mapping a `LexTokenId` to a string name of the token */
+extern const char *TokenIdStr[];
+
+typedef struct {
+	enum LexTokenId id;
+	union {
+		Str ident, str, keyword;
+		/* XXX: Defer number parsing until it is actually needed?
+		 * So we can move number parsing out of the lexer. */
+		/* Integer literal, it's the parser problem to tell
+		 * whether the literal is negative or not.
+		 */
+		u64 inumber;
+		/* Floating point literal */
+		double floatn;
+	};
+	isize len; /* Size in bytes of this token */
+} LexToken;
+
+typedef HashMapStr(i8) IdentsBucket;
+
+typedef struct {
+	FILE *input_fp;
+	/* Lexing buffer. This is actually split into two buffers, providing
+	 * a double-buffering scheme */
+	u8 *buf;
+	/* Actual length of each buffer (fread may read less than LEX_BUFFER_SIZE) */
+	isize buflen, buflen2;
+
+	u8 *lbegin; /* marks the begin of the current lexeme */
+	u8 *fwd; /* this pointer is the scanner */
+   	Vec(LexToken) backlist; /* stack of backed up tokens */
+
+	int tabsize;
+	bool eof;
+	Location cur_loc;
+	Compiler *cm;
+	IdentsBucket *idents;
+} LexState;
+
+LexToken
+lex_scan(LexState *ls);
+void
+lex_backup(LexState *ls, LexToken token);
+bool
+lex_match(LexState *ls, LexToken *t, enum LexTokenId exp_tok);
+LexState *
+lex_new(Compiler *cm, FILE *input_fp, Str file_name, usize tabsize);
+void
+lex_destroy(LexState *l);
+
+#endif
diff --git a/compiler/libs/optparse.h b/compiler/libs/optparse.h
new file mode 100644
index 0000000..9b7c7f8
--- /dev/null
+++ b/compiler/libs/optparse.h
@@ -0,0 +1,403 @@
+/* Optparse --- portable, reentrant, embeddable, getopt-like option parser
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * To get the implementation, define OPTPARSE_IMPLEMENTATION.
+ * Optionally define OPTPARSE_API to control the API's visibility
+ * and/or linkage (static, __attribute__, __declspec).
+ *
+ * The POSIX getopt() option parser has three fatal flaws. These flaws
+ * are solved by Optparse.
+ *
+ * 1) Parser state is stored entirely in global variables, some of
+ * which are static and inaccessible. This means only one thread can
+ * use getopt(). It also means it's not possible to recursively parse
+ * nested sub-arguments while in the middle of argument parsing.
+ * Optparse fixes this by storing all state on a local struct.
+ *
+ * 2) The POSIX standard provides no way to properly reset the parser.
+ * This means for portable code that getopt() is only good for one
+ * run, over one argv with one option string. It also means subcommand
+ * options cannot be processed with getopt(). Most implementations
+ * provide a method to reset the parser, but it's not portable.
+ * Optparse provides an optparse_arg() function for stepping over
+ * subcommands and continuing parsing of options with another option
+ * string. The Optparse struct itself can be passed around to
+ * subcommand handlers for additional subcommand option parsing. A
+ * full reset can be achieved by with an additional optparse_init().
+ *
+ * 3) Error messages are printed to stderr. This can be disabled with
+ * opterr, but the messages themselves are still inaccessible.
+ * Optparse solves this by writing an error message in its errmsg
+ * field. The downside to Optparse is that this error message will
+ * always be in English rather than the current locale.
+ *
+ * Optparse should be familiar with anyone accustomed to getopt(), and
+ * it could be a nearly drop-in replacement. The option string is the
+ * same and the fields have the same names as the getopt() global
+ * variables (optarg, optind, optopt).
+ *
+ * Optparse also supports GNU-style long options with optparse_long().
+ * The interface is slightly different and simpler than getopt_long().
+ *
+ * By default, argv is permuted as it is parsed, moving non-option
+ * arguments to the end. This can be disabled by setting the `permute`
+ * field to 0 after initialization.
+ */
+#ifndef OPTPARSE_H
+#define OPTPARSE_H
+
+#ifndef OPTPARSE_API
+#  define OPTPARSE_API
+#endif
+
+struct optparse {
+    char **argv;
+    int permute;
+    int optind;
+    int optopt;
+    char *optarg;
+    char errmsg[64];
+    int subopt;
+};
+
+enum optparse_argtype {
+    OPTPARSE_NONE,
+    OPTPARSE_REQUIRED,
+    OPTPARSE_OPTIONAL
+};
+
+struct optparse_long {
+    const char *longname;
+    int shortname;
+    enum optparse_argtype argtype;
+};
+
+/**
+ * Initializes the parser state.
+ */
+OPTPARSE_API
+void optparse_init(struct optparse *options, char **argv);
+
+/**
+ * Read the next option in the argv array.
+ * @param optstring a getopt()-formatted option string.
+ * @return the next option character, -1 for done, or '?' for error
+ *
+ * Just like getopt(), a character followed by no colons means no
+ * argument. One colon means the option has a required argument. Two
+ * colons means the option takes an optional argument.
+ */
+OPTPARSE_API
+int optparse(struct optparse *options, const char *optstring);
+
+/**
+ * Handles GNU-style long options in addition to getopt() options.
+ * This works a lot like GNU's getopt_long(). The last option in
+ * longopts must be all zeros, marking the end of the array. The
+ * longindex argument may be NULL.
+ */
+OPTPARSE_API
+int optparse_long(struct optparse *options,
+                  const struct optparse_long *longopts,
+                  int *longindex);
+
+/**
+ * Used for stepping over non-option arguments.
+ * @return the next non-option argument, or NULL for no more arguments
+ *
+ * Argument parsing can continue with optparse() after using this
+ * function. That would be used to parse the options for the
+ * subcommand returned by optparse_arg(). This function allows you to
+ * ignore the value of optind.
+ */
+OPTPARSE_API
+char *optparse_arg(struct optparse *options);
+
+/* Implementation */
+#ifdef OPTPARSE_IMPLEMENTATION
+
+#define OPTPARSE_MSG_INVALID "invalid option"
+#define OPTPARSE_MSG_MISSING "option requires an argument"
+#define OPTPARSE_MSG_TOOMANY "option takes no arguments"
+
+static int
+optparse_error(struct optparse *options, const char *msg, const char *data)
+{
+    unsigned p = 0;
+    const char *sep = " -- '";
+    while (*msg)
+        options->errmsg[p++] = *msg++;
+    while (*sep)
+        options->errmsg[p++] = *sep++;
+    while (p < sizeof(options->errmsg) - 2 && *data)
+        options->errmsg[p++] = *data++;
+    options->errmsg[p++] = '\'';
+    options->errmsg[p++] = '\0';
+    return '?';
+}
+
+OPTPARSE_API
+void
+optparse_init(struct optparse *options, char **argv)
+{
+    options->argv = argv;
+    options->permute = 1;
+    options->optind = argv[0] != 0;
+    options->subopt = 0;
+    options->optarg = 0;
+    options->errmsg[0] = '\0';
+}
+
+static int
+optparse_is_dashdash(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0';
+}
+
+static int
+optparse_is_shortopt(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0';
+}
+
+static int
+optparse_is_longopt(const char *arg)
+{
+    return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0';
+}
+
+static void
+optparse_permute(struct optparse *options, int index)
+{
+    char *nonoption = options->argv[index];
+    int i;
+    for (i = index; i < options->optind - 1; i++)
+        options->argv[i] = options->argv[i + 1];
+    options->argv[options->optind - 1] = nonoption;
+}
+
+static int
+optparse_argtype(const char *optstring, char c)
+{
+    int count = OPTPARSE_NONE;
+    if (c == ':')
+        return -1;
+    for (; *optstring && c != *optstring; optstring++);
+    if (!*optstring)
+        return -1;
+    if (optstring[1] == ':')
+        count += optstring[2] == ':' ? 2 : 1;
+    return count;
+}
+
+OPTPARSE_API
+int
+optparse(struct optparse *options, const char *optstring)
+{
+    int type;
+    char *next;
+    char *option = options->argv[options->optind];
+    options->errmsg[0] = '\0';
+    options->optopt = 0;
+    options->optarg = 0;
+    if (option == 0) {
+        return -1;
+    } else if (optparse_is_dashdash(option)) {
+        options->optind++; /* consume "--" */
+        return -1;
+    } else if (!optparse_is_shortopt(option)) {
+        if (options->permute) {
+            int index = options->optind++;
+            int r = optparse(options, optstring);
+            optparse_permute(options, index);
+            options->optind--;
+            return r;
+        } else {
+            return -1;
+        }
+    }
+    option += options->subopt + 1;
+    options->optopt = option[0];
+    type = optparse_argtype(optstring, option[0]);
+    next = options->argv[options->optind + 1];
+    switch (type) {
+    case -1: {
+        char str[2] = {0, 0};
+        str[0] = option[0];
+        options->optind++;
+        return optparse_error(options, OPTPARSE_MSG_INVALID, str);
+    }
+    case OPTPARSE_NONE:
+        if (option[1]) {
+            options->subopt++;
+        } else {
+            options->subopt = 0;
+            options->optind++;
+        }
+        return option[0];
+    case OPTPARSE_REQUIRED:
+        options->subopt = 0;
+        options->optind++;
+        if (option[1]) {
+            options->optarg = option + 1;
+        } else if (next != 0) {
+            options->optarg = next;
+            options->optind++;
+        } else {
+            char str[2] = {0, 0};
+            str[0] = option[0];
+            options->optarg = 0;
+            return optparse_error(options, OPTPARSE_MSG_MISSING, str);
+        }
+        return option[0];
+    case OPTPARSE_OPTIONAL:
+        options->subopt = 0;
+        options->optind++;
+        if (option[1])
+            options->optarg = option + 1;
+        else
+            options->optarg = 0;
+        return option[0];
+    }
+    return 0;
+}
+
+OPTPARSE_API
+char *
+optparse_arg(struct optparse *options)
+{
+    char *option = options->argv[options->optind];
+    options->subopt = 0;
+    if (option != 0)
+        options->optind++;
+    return option;
+}
+
+static int
+optparse_longopts_end(const struct optparse_long *longopts, int i)
+{
+    return !longopts[i].longname && !longopts[i].shortname;
+}
+
+static void
+optparse_from_long(const struct optparse_long *longopts, char *optstring)
+{
+    char *p = optstring;
+    int i;
+    for (i = 0; !optparse_longopts_end(longopts, i); i++) {
+        if (longopts[i].shortname && longopts[i].shortname < 127) {
+            int a;
+            *p++ = (char)longopts[i].shortname;
+            for (a = 0; a < (int)longopts[i].argtype; a++)
+                *p++ = ':';
+        }
+    }
+    *p = '\0';
+}
+
+/* Unlike strcmp(), handles options containing "=". */
+static int
+optparse_longopts_match(const char *longname, const char *option)
+{
+    const char *a = option, *n = longname;
+    if (longname == 0)
+        return 0;
+    for (; *a && *n && *a != '='; a++, n++)
+        if (*a != *n)
+            return 0;
+    return *n == '\0' && (*a == '\0' || *a == '=');
+}
+
+/* Return the part after "=", or NULL. */
+static char *
+optparse_longopts_arg(char *option)
+{
+    for (; *option && *option != '='; option++);
+    if (*option == '=')
+        return option + 1;
+    else
+        return 0;
+}
+
+static int
+optparse_long_fallback(struct optparse *options,
+                       const struct optparse_long *longopts,
+                       int *longindex)
+{
+    int result;
+    char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */
+    optparse_from_long(longopts, optstring);
+    result = optparse(options, optstring);
+    if (longindex != 0) {
+        *longindex = -1;
+        if (result != -1) {
+            int i;
+            for (i = 0; !optparse_longopts_end(longopts, i); i++)
+                if (longopts[i].shortname == options->optopt)
+                    *longindex = i;
+        }
+    }
+    return result;
+}
+
+OPTPARSE_API
+int
+optparse_long(struct optparse *options,
+              const struct optparse_long *longopts,
+              int *longindex)
+{
+    int i;
+    char *option = options->argv[options->optind];
+    if (option == 0) {
+        return -1;
+    } else if (optparse_is_dashdash(option)) {
+        options->optind++; /* consume "--" */
+        return -1;
+    } else if (optparse_is_shortopt(option)) {
+        return optparse_long_fallback(options, longopts, longindex);
+    } else if (!optparse_is_longopt(option)) {
+        if (options->permute) {
+            int index = options->optind++;
+            int r = optparse_long(options, longopts, longindex);
+            optparse_permute(options, index);
+            options->optind--;
+            return r;
+        } else {
+            return -1;
+        }
+    }
+
+    /* Parse as long option. */
+    options->errmsg[0] = '\0';
+    options->optopt = 0;
+    options->optarg = 0;
+    option += 2; /* skip "--" */
+    options->optind++;
+    for (i = 0; !optparse_longopts_end(longopts, i); i++) {
+        const char *name = longopts[i].longname;
+        if (optparse_longopts_match(name, option)) {
+            char *arg;
+            if (longindex)
+                *longindex = i;
+            options->optopt = longopts[i].shortname;
+            arg = optparse_longopts_arg(option);
+            if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) {
+                return optparse_error(options, OPTPARSE_MSG_TOOMANY, name);
+            } if (arg != 0) {
+                options->optarg = arg;
+            } else if (longopts[i].argtype == OPTPARSE_REQUIRED) {
+                options->optarg = options->argv[options->optind];
+                if (options->optarg == 0)
+                    return optparse_error(options, OPTPARSE_MSG_MISSING, name);
+                else
+                    options->optind++;
+            }
+            return options->optopt;
+        }
+    }
+    return optparse_error(options, OPTPARSE_MSG_INVALID, option);
+}
+
+#endif /* OPTPARSE_IMPLEMENTATION */
+#endif /* OPTPARSE_H */
diff --git a/compiler/libs/optparse_impl.c b/compiler/libs/optparse_impl.c
new file mode 100644
index 0000000..e41bf8b
--- /dev/null
+++ b/compiler/libs/optparse_impl.c
@@ -0,0 +1,3 @@
+/* This file holds the implementation of the optparse library functionality */
+#define OPTPARSE_IMPLEMENTATION
+#include "optparse.h"
diff --git a/compiler/libs/stb_ds.h b/compiler/libs/stb_ds.h
new file mode 100644
index 0000000..e84c82d
--- /dev/null
+++ b/compiler/libs/stb_ds.h
@@ -0,0 +1,1895 @@
+/* stb_ds.h - v0.67 - public domain data structures - Sean Barrett 2019
+
+   This is a single-header-file library that provides easy-to-use
+   dynamic arrays and hash tables for C (also works in C++).
+
+   For a gentle introduction:
+      http://nothings.org/stb_ds
+
+   To use this library, do this in *one* C or C++ file:
+      #define STB_DS_IMPLEMENTATION
+      #include "stb_ds.h"
+
+TABLE OF CONTENTS
+
+  Table of Contents
+  Compile-time options
+  License
+  Documentation
+  Notes
+  Notes - Dynamic arrays
+  Notes - Hash maps
+  Credits
+
+COMPILE-TIME OPTIONS
+
+  #define STBDS_NO_SHORT_NAMES
+
+     This flag needs to be set globally.
+
+     By default stb_ds exposes shorter function names that are not qualified
+     with the "stbds_" prefix. If these names conflict with the names in your
+     code, define this flag.
+
+  #define STBDS_SIPHASH_2_4
+
+     This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION.
+
+     By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for
+     4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force
+     stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes
+     hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on
+     64-byte keys, and 10% slower on 256-byte keys on my test computer.
+
+  #define STBDS_REALLOC(context,ptr,size) better_realloc
+  #define STBDS_FREE(context,ptr)         better_free
+
+     These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION.
+
+     By default stb_ds uses stdlib realloc() and free() for memory management. You can
+     substitute your own functions instead by defining these symbols. You must either
+     define both, or neither. Note that at the moment, 'context' will always be NULL.
+     @TODO add an array/hash initialization function that takes a memory context pointer.
+
+  #define STBDS_UNIT_TESTS
+
+     Defines a function stbds_unit_tests() that checks the functioning of the data structures.
+
+  Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x'
+     (or equivalentally '-std=c++11') when using anonymous structures as seen on the web
+     page or in STBDS_UNIT_TESTS.
+
+LICENSE
+
+  Placed in the public domain and also MIT licensed.
+  See end of file for detailed license information.
+
+DOCUMENTATION
+
+  Dynamic Arrays
+
+    Non-function interface:
+
+      Declare an empty dynamic array of type T
+        T* foo = NULL;
+
+      Access the i'th item of a dynamic array 'foo' of type T, T* foo:
+        foo[i]
+
+    Functions (actually macros)
+
+      arrfree:
+        void arrfree(T*);
+          Frees the array.
+
+      arrlen:
+        ptrdiff_t arrlen(T*);
+          Returns the number of elements in the array.
+
+      arrlenu:
+        size_t arrlenu(T*);
+          Returns the number of elements in the array as an unsigned type.
+
+      arrpop:
+        T arrpop(T* a)
+          Removes the final element of the array and returns it.
+
+      arrput:
+        T arrput(T* a, T b);
+          Appends the item b to the end of array a. Returns b.
+
+      arrins:
+        T arrins(T* a, int p, T b);
+          Inserts the item b into the middle of array a, into a[p],
+          moving the rest of the array over. Returns b.
+
+      arrinsn:
+        void arrinsn(T* a, int p, int n);
+          Inserts n uninitialized items into array a starting at a[p],
+          moving the rest of the array over.
+
+      arraddnptr:
+        T* arraddnptr(T* a, int n)
+          Appends n uninitialized items onto array at the end.
+          Returns a pointer to the first uninitialized item added.
+
+      arraddnindex:
+        size_t arraddnindex(T* a, int n)
+          Appends n uninitialized items onto array at the end.
+          Returns the index of the first uninitialized item added.
+
+      arrdel:
+        void arrdel(T* a, int p);
+          Deletes the element at a[p], moving the rest of the array over.
+
+      arrdeln:
+        void arrdeln(T* a, int p, int n);
+          Deletes n elements starting at a[p], moving the rest of the array over.
+
+      arrdelswap:
+        void arrdelswap(T* a, int p);
+          Deletes the element at a[p], replacing it with the element from
+          the end of the array. O(1) performance.
+
+      arrsetlen:
+        void arrsetlen(T* a, int n);
+          Changes the length of the array to n. Allocates uninitialized
+          slots at the end if necessary.
+
+      arrsetcap:
+        size_t arrsetcap(T* a, int n);
+          Sets the length of allocated storage to at least n. It will not
+          change the length of the array.
+
+      arrcap:
+        size_t arrcap(T* a);
+          Returns the number of total elements the array can contain without
+          needing to be reallocated.
+
+  Hash maps & String hash maps
+
+    Given T is a structure type: struct { TK key; TV value; }. Note that some
+    functions do not require TV value and can have other fields. For string
+    hash maps, TK must be 'char *'.
+
+    Special interface:
+
+      stbds_rand_seed:
+        void stbds_rand_seed(size_t seed);
+          For security against adversarially chosen data, you should seed the
+          library with a strong random number. Or at least seed it with time().
+
+      stbds_hash_string:
+        size_t stbds_hash_string(char *str, size_t seed);
+          Returns a hash value for a string.
+
+      stbds_hash_bytes:
+        size_t stbds_hash_bytes(void *p, size_t len, size_t seed);
+          These functions hash an arbitrary number of bytes. The function
+          uses a custom hash for 4- and 8-byte data, and a weakened version
+          of SipHash for everything else. On 64-bit platforms you can get
+          specification-compliant SipHash-2-4 on all data by defining
+          STBDS_SIPHASH_2_4, at a significant cost in speed.
+
+    Non-function interface:
+
+      Declare an empty hash map of type T
+        T* foo = NULL;
+
+      Access the i'th entry in a hash table T* foo:
+        foo[i]
+
+    Function interface (actually macros):
+
+      hmfree
+      shfree
+        void hmfree(T*);
+        void shfree(T*);
+          Frees the hashmap and sets the pointer to NULL.
+
+      hmlen
+      shlen
+        ptrdiff_t hmlen(T*)
+        ptrdiff_t shlen(T*)
+          Returns the number of elements in the hashmap.
+
+      hmlenu
+      shlenu
+        size_t hmlenu(T*)
+        size_t shlenu(T*)
+          Returns the number of elements in the hashmap.
+
+      hmgeti
+      shgeti
+      hmgeti_ts
+        ptrdiff_t hmgeti(T*, TK key)
+        ptrdiff_t shgeti(T*, char* key)
+        ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar)
+          Returns the index in the hashmap which has the key 'key', or -1
+          if the key is not present.
+
+      hmget
+      hmget_ts
+      shget
+        TV hmget(T*, TK key)
+        TV shget(T*, char* key)
+        TV hmget_ts(T*, TK key, ptrdiff_t tempvar)
+          Returns the value corresponding to 'key' in the hashmap.
+          The structure must have a 'value' field
+
+      hmgets
+      shgets
+        T hmgets(T*, TK key)
+        T shgets(T*, char* key)
+          Returns the structure corresponding to 'key' in the hashmap.
+
+      hmgetp
+      shgetp
+      hmgetp_ts
+      hmgetp_null
+      shgetp_null
+        T* hmgetp(T*, TK key)
+        T* shgetp(T*, char* key)
+        T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar)
+        T* hmgetp_null(T*, TK key)
+        T* shgetp_null(T*, char *key)
+          Returns a pointer to the structure corresponding to 'key' in
+          the hashmap. Functions ending in "_null" return NULL if the key
+          is not present in the hashmap; the others return a pointer to a
+          structure holding the default value (but not the searched-for key).
+
+      hmdefault
+      shdefault
+        TV hmdefault(T*, TV value)
+        TV shdefault(T*, TV value)
+          Sets the default value for the hashmap, the value which will be
+          returned by hmget/shget if the key is not present.
+
+      hmdefaults
+      shdefaults
+        TV hmdefaults(T*, T item)
+        TV shdefaults(T*, T item)
+          Sets the default struct for the hashmap, the contents which will be
+          returned by hmgets/shgets if the key is not present.
+
+      hmput
+      shput
+        TV hmput(T*, TK key, TV value)
+        TV shput(T*, char* key, TV value)
+          Inserts a <key,value> pair into the hashmap. If the key is already
+          present in the hashmap, updates its value.
+
+      hmputs
+      shputs
+        T hmputs(T*, T item)
+        T shputs(T*, T item)
+          Inserts a struct with T.key into the hashmap. If the struct is already
+          present in the hashmap, updates it.
+
+      hmdel
+      shdel
+        int hmdel(T*, TK key)
+        int shdel(T*, char* key)
+          If 'key' is in the hashmap, deletes its entry and returns 1.
+          Otherwise returns 0.
+
+    Function interface (actually macros) for strings only:
+
+      sh_new_strdup
+        void sh_new_strdup(T*);
+          Overwrites the existing pointer with a newly allocated
+          string hashmap which will automatically allocate and free
+          each string key using realloc/free
+
+      sh_new_arena
+        void sh_new_arena(T*);
+          Overwrites the existing pointer with a newly allocated
+          string hashmap which will automatically allocate each string
+          key to a string arena. Every string key ever used by this
+          hash table remains in the arena until the arena is freed.
+          Additionally, any key which is deleted and reinserted will
+          be allocated multiple times in the string arena.
+
+NOTES
+
+  * These data structures are realloc'd when they grow, and the macro
+    "functions" write to the provided pointer. This means: (a) the pointer
+    must be an lvalue, and (b) the pointer to the data structure is not
+    stable, and you must maintain it the same as you would a realloc'd
+    pointer. For example, if you pass a pointer to a dynamic array to a
+    function which updates it, the function must return back the new
+    pointer to the caller. This is the price of trying to do this in C.
+
+  * The following are the only functions that are thread-safe on a single data
+    structure, i.e. can be run in multiple threads simultaneously on the same
+    data structure
+        hmlen        shlen
+        hmlenu       shlenu
+        hmget_ts     shget_ts
+        hmgeti_ts    shgeti_ts
+        hmgets_ts    shgets_ts
+
+  * You iterate over the contents of a dynamic array and a hashmap in exactly
+    the same way, using arrlen/hmlen/shlen:
+
+      for (i=0; i < arrlen(foo); ++i)
+         ... foo[i] ...
+
+  * All operations except arrins/arrdel are O(1) amortized, but individual
+    operations can be slow, so these data structures may not be suitable
+    for real time use. Dynamic arrays double in capacity as needed, so
+    elements are copied an average of once. Hash tables double/halve
+    their size as needed, with appropriate hysteresis to maintain O(1)
+    performance.
+
+NOTES - DYNAMIC ARRAY
+
+  * If you know how long a dynamic array is going to be in advance, you can avoid
+    extra memory allocations by using arrsetlen to allocate it to that length in
+    advance and use foo[n] while filling it out, or arrsetcap to allocate the memory
+    for that length and use arrput/arrpush as normal.
+
+  * Unlike some other versions of the dynamic array, this version should
+    be safe to use with strict-aliasing optimizations.
+
+NOTES - HASH MAP
+
+  * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel
+    and variants, the key must be an lvalue (so the macro can take the address of it).
+    Extensions are used that eliminate this requirement if you're using C99 and later
+    in GCC or clang, or if you're using C++ in GCC. But note that this can make your
+    code less portable.
+
+  * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.
+
+  * The iteration order of your data in the hashmap is determined solely by the
+    order of insertions and deletions. In particular, if you never delete, new
+    keys are always added at the end of the array. This will be consistent
+    across all platforms and versions of the library. However, you should not
+    attempt to serialize the internal hash table, as the hash is not consistent
+    between different platforms, and may change with future versions of the library.
+
+  * Use sh_new_arena() for string hashmaps that you never delete from. Initialize
+    with NULL if you're managing the memory for your strings, or your strings are
+    never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup().
+    @TODO: make an arena variant that garbage collects the strings with a trivial
+    copy collector into a new arena whenever the table shrinks / rebuilds. Since
+    current arena recommendation is to only use arena if it never deletes, then
+    this can just replace current arena implementation.
+
+  * If adversarial input is a serious concern and you're on a 64-bit platform,
+    enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass
+    a strong random number to stbds_rand_seed.
+
+  * The default value for the hash table is stored in foo[-1], so if you
+    use code like 'hmget(T,k)->value = 5' you can accidentally overwrite
+    the value stored by hmdefault if 'k' is not present.
+
+CREDITS
+
+  Sean Barrett -- library, idea for dynamic array API/implementation
+  Per Vognsen  -- idea for hash table API/implementation
+  Rafael Sachetto -- arrpop()
+  github:HeroicKatora -- arraddn() reworking
+
+  Bugfixes:
+    Andy Durdin
+    Shane Liesegang
+    Vinh Truong
+    Andreas Molzer
+    github:hashitaku
+    github:srdjanstipic
+    Macoy Madson
+    Andreas Vennstrom
+    Tobias Mansfield-Williams
+*/
+
+#ifdef STBDS_UNIT_TESTS
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#ifndef INCLUDE_STB_DS_H
+#define INCLUDE_STB_DS_H
+
+#include <stddef.h>
+#include <string.h>
+
+#ifndef STBDS_NO_SHORT_NAMES
+#define arrlen      stbds_arrlen
+#define arrlenu     stbds_arrlenu
+#define arrput      stbds_arrput
+#define arrpush     stbds_arrput
+#define arrpop      stbds_arrpop
+#define arrfree     stbds_arrfree
+#define arraddn     stbds_arraddn // deprecated, use one of the following instead:
+#define arraddnptr  stbds_arraddnptr
+#define arraddnindex stbds_arraddnindex
+#define arrsetlen   stbds_arrsetlen
+#define arrlast     stbds_arrlast
+#define arrins      stbds_arrins
+#define arrinsn     stbds_arrinsn
+#define arrdel      stbds_arrdel
+#define arrdeln     stbds_arrdeln
+#define arrdelswap  stbds_arrdelswap
+#define arrcap      stbds_arrcap
+#define arrsetcap   stbds_arrsetcap
+
+#define hmput       stbds_hmput
+#define hmputs      stbds_hmputs
+#define hmget       stbds_hmget
+#define hmget_ts    stbds_hmget_ts
+#define hmgets      stbds_hmgets
+#define hmgetp      stbds_hmgetp
+#define hmgetp_ts   stbds_hmgetp_ts
+#define hmgetp_null stbds_hmgetp_null
+#define hmgeti      stbds_hmgeti
+#define hmgeti_ts   stbds_hmgeti_ts
+#define hmdel       stbds_hmdel
+#define hmlen       stbds_hmlen
+#define hmlenu      stbds_hmlenu
+#define hmfree      stbds_hmfree
+#define hmdefault   stbds_hmdefault
+#define hmdefaults  stbds_hmdefaults
+
+#define shput       stbds_shput
+#define shputi      stbds_shputi
+#define shputs      stbds_shputs
+#define shget       stbds_shget
+#define shgeti      stbds_shgeti
+#define shgets      stbds_shgets
+#define shgetp      stbds_shgetp
+#define shgetp_null stbds_shgetp_null
+#define shdel       stbds_shdel
+#define shlen       stbds_shlen
+#define shlenu      stbds_shlenu
+#define shfree      stbds_shfree
+#define shdefault   stbds_shdefault
+#define shdefaults  stbds_shdefaults
+#define sh_new_arena  stbds_sh_new_arena
+#define sh_new_strdup stbds_sh_new_strdup
+
+#define stralloc    stbds_stralloc
+#define strreset    stbds_strreset
+#endif
+
+#if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE)
+#error "You must define both STBDS_REALLOC and STBDS_FREE, or neither."
+#endif
+#if !defined(STBDS_REALLOC) && !defined(STBDS_FREE)
+#include <stdlib.h>
+#define STBDS_REALLOC(c,p,s) realloc(p,s)
+#define STBDS_FREE(c,p)      free(p)
+#endif
+
+#ifdef _MSC_VER
+#define STBDS_NOTUSED(v)  (void)(v)
+#else
+#define STBDS_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// for security against attackers, seed the library with a random number, at least time() but stronger is better
+extern void stbds_rand_seed(size_t seed);
+
+// these are the hash functions used internally if you want to test them or use them for other purposes
+extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed);
+extern size_t stbds_hash_string(char *str, size_t seed);
+
+// this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'.
+typedef struct stbds_string_arena stbds_string_arena;
+extern char * stbds_stralloc(stbds_string_arena *a, char *str);
+extern void   stbds_strreset(stbds_string_arena *a);
+
+// have to #define STBDS_UNIT_TESTS to call this
+extern void stbds_unit_tests(void);
+
+///////////////
+//
+// Everything below here is implementation details
+//
+
+extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap);
+extern void   stbds_arrfreef(void *a);
+extern void   stbds_hmfree_func(void *p, size_t elemsize);
+extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);
+extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode);
+extern void * stbds_hmput_default(void *a, size_t elemsize);
+extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);
+extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode);
+extern void * stbds_shmode_func(size_t elemsize, int mode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define STBDS_HAS_TYPEOF
+#ifdef __cplusplus
+//#define STBDS_HAS_LITERAL_ARRAY  // this is currently broken for clang
+#endif
+#endif
+
+#if !defined(__cplusplus)
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define STBDS_HAS_LITERAL_ARRAY
+#endif
+#endif
+
+// this macro takes the address of the argument, but on gcc/clang can accept rvalues
+#if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF)
+  #if __clang__
+  #define STBDS_ADDRESSOF(typevar, value)     ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value
+  #else
+  #define STBDS_ADDRESSOF(typevar, value)     ((typeof(typevar)[1]){value}) // literal array decays to pointer to value
+  #endif
+#else
+#define STBDS_ADDRESSOF(typevar, value)     &(value)
+#endif
+
+#define STBDS_OFFSETOF(var,field)           ((char *) &(var)->field - (char *) (var))
+
+#define stbds_header(t)  ((stbds_array_header *) (t) - 1)
+#define stbds_temp(t)    stbds_header(t)->temp
+#define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table)
+
+#define stbds_arrsetcap(a,n)   (stbds_arrgrow(a,0,n))
+#define stbds_arrsetlen(a,n)   ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0)
+#define stbds_arrcap(a)        ((a) ? stbds_header(a)->capacity : 0)
+#define stbds_arrlen(a)        ((a) ? (ptrdiff_t) stbds_header(a)->length : 0)
+#define stbds_arrlenu(a)       ((a) ?             stbds_header(a)->length : 0)
+#define stbds_arrput(a,v)      (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v))
+#define stbds_arrpush          stbds_arrput  // synonym
+#define stbds_arrpop(a)        (stbds_header(a)->length--, (a)[stbds_header(a)->length])
+#define stbds_arraddn(a,n)     ((void)(stbds_arraddnindex(a, n)))    // deprecated, use one of the following instead:
+#define stbds_arraddnptr(a,n)  (stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)]) : (a))
+#define stbds_arraddnindex(a,n)(stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), stbds_header(a)->length-(n)) : stbds_arrlen(a))
+#define stbds_arraddnoff       stbds_arraddnindex
+#define stbds_arrlast(a)       ((a)[stbds_header(a)->length-1])
+#define stbds_arrfree(a)       ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL)
+#define stbds_arrdel(a,i)      stbds_arrdeln(a,i,1)
+#define stbds_arrdeln(a,i,n)   (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n))
+#define stbds_arrdelswap(a,i)  ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1)
+#define stbds_arrinsn(a,i,n)   (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i))))
+#define stbds_arrins(a,i,v)    (stbds_arrinsn((a),(i),1), (a)[i]=(v))
+
+#define stbds_arrmaybegrow(a,n)  ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \
+                                  ? (stbds_arrgrow(a,n,0),0) : 0)
+
+#define stbds_arrgrow(a,b,c)   ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c)))
+
+#define stbds_hmput(t, k, v) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0),   \
+     (t)[stbds_temp((t)-1)].key = (k),    \
+     (t)[stbds_temp((t)-1)].value = (v))
+
+#define stbds_hmputs(t, s) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \
+     (t)[stbds_temp((t)-1)] = (s))
+
+#define stbds_hmgeti(t,k) \
+    ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \
+      stbds_temp((t)-1))
+
+#define stbds_hmgeti_ts(t,k,temp) \
+    ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \
+      (temp))
+
+#define stbds_hmgetp(t, k) \
+    ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)])
+
+#define stbds_hmgetp_ts(t, k, temp) \
+    ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp])
+
+#define stbds_hmdel(t,k) \
+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0)
+
+#define stbds_hmdefault(t, v) \
+    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v))
+
+#define stbds_hmdefaults(t, s) \
+    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s))
+
+#define stbds_hmfree(p)        \
+    ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL)
+
+#define stbds_hmgets(t, k)    (*stbds_hmgetp(t,k))
+#define stbds_hmget(t, k)     (stbds_hmgetp(t,k)->value)
+#define stbds_hmget_ts(t, k, temp)  (stbds_hmgetp_ts(t,k,temp)->value)
+#define stbds_hmlen(t)        ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0)
+#define stbds_hmlenu(t)       ((t) ?             stbds_header((t)-1)->length-1 : 0)
+#define stbds_hmgetp_null(t,k)  (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)])
+
+#define stbds_shput(t, k, v) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \
+     (t)[stbds_temp((t)-1)].value = (v))
+
+#define stbds_shputi(t, k, v) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \
+     (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1))
+
+#define stbds_shputs(t, s) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \
+     (t)[stbds_temp((t)-1)] = (s), \
+     (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally
+
+#define stbds_pshput(t, p) \
+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \
+     (t)[stbds_temp((t)-1)] = (p))
+
+#define stbds_shgeti(t,k) \
+     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \
+      stbds_temp((t)-1))
+
+#define stbds_pshgeti(t,k) \
+     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \
+      stbds_temp((t)-1))
+
+#define stbds_shgetp(t, k) \
+    ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)])
+
+#define stbds_pshget(t, k) \
+    ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)])
+
+#define stbds_shdel(t,k) \
+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0)
+#define stbds_pshdel(t,k) \
+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0)
+
+#define stbds_sh_new_arena(t)  \
+    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA))
+#define stbds_sh_new_strdup(t) \
+    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP))
+
+#define stbds_shdefault(t, v)  stbds_hmdefault(t,v)
+#define stbds_shdefaults(t, s) stbds_hmdefaults(t,s)
+
+#define stbds_shfree       stbds_hmfree
+#define stbds_shlenu       stbds_hmlenu
+
+#define stbds_shgets(t, k) (*stbds_shgetp(t,k))
+#define stbds_shget(t, k)  (stbds_shgetp(t,k)->value)
+#define stbds_shgetp_null(t,k)  (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)])
+#define stbds_shlen        stbds_hmlen
+
+typedef struct
+{
+  size_t      length;
+  size_t      capacity;
+  void      * hash_table;
+  ptrdiff_t   temp;
+} stbds_array_header;
+
+typedef struct stbds_string_block
+{
+  struct stbds_string_block *next;
+  char storage[8];
+} stbds_string_block;
+
+struct stbds_string_arena
+{
+  stbds_string_block *storage;
+  size_t remaining;
+  unsigned char block;
+  unsigned char mode;  // this isn't used by the string arena itself
+};
+
+#define STBDS_HM_BINARY         0
+#define STBDS_HM_STRING         1
+
+enum
+{
+   STBDS_SH_NONE,
+   STBDS_SH_DEFAULT,
+   STBDS_SH_STRDUP,
+   STBDS_SH_ARENA
+};
+
+#ifdef __cplusplus
+// in C we use implicit assignment from these void*-returning functions to T*.
+// in C++ these templates make the same code work
+template<class T> static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) {
+  return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap);
+}
+template<class T> static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {
+  return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode);
+}
+template<class T> static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) {
+  return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode);
+}
+template<class T> static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) {
+  return (T*)stbds_hmput_default((void *)a, elemsize);
+}
+template<class T> static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {
+  return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode);
+}
+template<class T> static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){
+  return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode);
+}
+template<class T> static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) {
+  return (T*)stbds_shmode_func(elemsize, mode);
+}
+#else
+#define stbds_arrgrowf_wrapper            stbds_arrgrowf
+#define stbds_hmget_key_wrapper           stbds_hmget_key
+#define stbds_hmget_key_ts_wrapper        stbds_hmget_key_ts
+#define stbds_hmput_default_wrapper       stbds_hmput_default
+#define stbds_hmput_key_wrapper           stbds_hmput_key
+#define stbds_hmdel_key_wrapper           stbds_hmdel_key
+#define stbds_shmode_func_wrapper(t,e,m)  stbds_shmode_func(e,m)
+#endif
+
+#endif // INCLUDE_STB_DS_H
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//   IMPLEMENTATION
+//
+
+#ifdef STB_DS_IMPLEMENTATION
+#include <assert.h>
+#include <string.h>
+
+#ifndef STBDS_ASSERT
+#define STBDS_ASSERT_WAS_UNDEFINED
+#define STBDS_ASSERT(x)   ((void) 0)
+#endif
+
+#ifdef STBDS_STATISTICS
+#define STBDS_STATS(x)   x
+size_t stbds_array_grow;
+size_t stbds_hash_grow;
+size_t stbds_hash_shrink;
+size_t stbds_hash_rebuild;
+size_t stbds_hash_probes;
+size_t stbds_hash_alloc;
+size_t stbds_rehash_probes;
+size_t stbds_rehash_items;
+#else
+#define STBDS_STATS(x)
+#endif
+
+//
+// stbds_arr implementation
+//
+
+//int *prev_allocs[65536];
+//int num_prev;
+
+void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)
+{
+  stbds_array_header temp={0}; // force debugging
+  void *b;
+  size_t min_len = stbds_arrlen(a) + addlen;
+  (void) sizeof(temp);
+
+  // compute the minimum capacity needed
+  if (min_len > min_cap)
+    min_cap = min_len;
+
+  if (min_cap <= stbds_arrcap(a))
+    return a;
+
+  // increase needed capacity to guarantee O(1) amortized
+  if (min_cap < 2 * stbds_arrcap(a))
+    min_cap = 2 * stbds_arrcap(a);
+  else if (min_cap < 4)
+    min_cap = 4;
+
+  //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1);
+  //if (num_prev == 2201)
+  //  num_prev = num_prev;
+  b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header));
+  //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b;
+  b = (char *) b + sizeof(stbds_array_header);
+  if (a == NULL) {
+    stbds_header(b)->length = 0;
+    stbds_header(b)->hash_table = 0;
+    stbds_header(b)->temp = 0;
+  } else {
+    STBDS_STATS(++stbds_array_grow);
+  }
+  stbds_header(b)->capacity = min_cap;
+
+  return b;
+}
+
+void stbds_arrfreef(void *a)
+{
+  STBDS_FREE(NULL, stbds_header(a));
+}
+
+//
+// stbds_hm hash table implementation
+//
+
+#ifdef STBDS_INTERNAL_SMALL_BUCKET
+#define STBDS_BUCKET_LENGTH      4
+#else
+#define STBDS_BUCKET_LENGTH      8
+#endif
+
+#define STBDS_BUCKET_SHIFT      (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)
+#define STBDS_BUCKET_MASK       (STBDS_BUCKET_LENGTH-1)
+#define STBDS_CACHE_LINE_SIZE   64
+
+#define STBDS_ALIGN_FWD(n,a)   (((n) + (a) - 1) & ~((a)-1))
+
+typedef struct
+{
+   size_t    hash [STBDS_BUCKET_LENGTH];
+   ptrdiff_t index[STBDS_BUCKET_LENGTH];
+} stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line
+
+typedef struct
+{
+  char * temp_key; // this MUST be the first field of the hash table
+  size_t slot_count;
+  size_t used_count;
+  size_t used_count_threshold;
+  size_t used_count_shrink_threshold;
+  size_t tombstone_count;
+  size_t tombstone_count_threshold;
+  size_t seed;
+  size_t slot_count_log2;
+  stbds_string_arena string;
+  stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct
+} stbds_hash_index;
+
+#define STBDS_INDEX_EMPTY    -1
+#define STBDS_INDEX_DELETED  -2
+#define STBDS_INDEX_IN_USE(x)  ((x) >= 0)
+
+#define STBDS_HASH_EMPTY      0
+#define STBDS_HASH_DELETED    1
+
+static size_t stbds_hash_seed=0x31415926;
+
+void stbds_rand_seed(size_t seed)
+{
+  stbds_hash_seed = seed;
+}
+
+#define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo)                                          \
+  temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */   \
+  var = v64_hi, var <<= 16, var <<= 16,                                    /* discard if 32-bit */   \
+  var ^= temp ^ v32
+
+#define STBDS_SIZE_T_BITS           ((sizeof (size_t)) * 8)
+
+static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)
+{
+  size_t pos;
+  STBDS_NOTUSED(slot_log2);
+  pos = hash & (slot_count-1);
+  #ifdef STBDS_INTERNAL_BUCKET_START
+  pos &= ~STBDS_BUCKET_MASK;
+  #endif
+  return pos;
+}
+
+static size_t stbds_log2(size_t slot_count)
+{
+  size_t n=0;
+  while (slot_count > 1) {
+    slot_count >>= 1;
+    ++n;
+  }
+  return n;
+}
+
+static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot)
+{
+  stbds_hash_index *t;
+  t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1);
+  t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE);
+  t->slot_count = slot_count;
+  t->slot_count_log2 = stbds_log2(slot_count);
+  t->tombstone_count = 0;
+  t->used_count = 0;
+
+  #if 0 // A1
+  t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow
+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild
+  t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink
+  #elif 1 // A2
+  //t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow
+  //t->tombstone_count_threshold   = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild
+  //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink
+
+  // compute without overflowing
+  t->used_count_threshold        = slot_count - (slot_count>>2);
+  t->tombstone_count_threshold   = (slot_count>>3) + (slot_count>>4);
+  t->used_count_shrink_threshold = slot_count >> 2;
+
+  #elif 0 // B1
+  t->used_count_threshold        = slot_count*13/16; // if 13/16th of table is occupied, grow
+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild
+  t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink
+  #else // C1
+  t->used_count_threshold        = slot_count*14/16; // if 14/16th of table is occupied, grow
+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild
+  t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink
+  #endif
+  // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2
+    // Note that the larger tables have high variance as they were run fewer times
+  //     A1            A2          B1           C1
+  //    0.10ms :     0.10ms :     0.10ms :     0.11ms :      2,000 inserts creating 2K table
+  //    0.96ms :     0.95ms :     0.97ms :     1.04ms :     20,000 inserts creating 20K table
+  //   14.48ms :    14.46ms :    10.63ms :    11.00ms :    200,000 inserts creating 200K table
+  //  195.74ms :   196.35ms :   203.69ms :   214.92ms :  2,000,000 inserts creating 2M table
+  // 2193.88ms :  2209.22ms :  2285.54ms :  2437.17ms : 20,000,000 inserts creating 20M table
+  //   65.27ms :    53.77ms :    65.33ms :    65.47ms : 500,000 inserts & deletes in 2K table
+  //   72.78ms :    62.45ms :    71.95ms :    72.85ms : 500,000 inserts & deletes in 20K table
+  //   89.47ms :    77.72ms :    96.49ms :    96.75ms : 500,000 inserts & deletes in 200K table
+  //   97.58ms :    98.14ms :    97.18ms :    97.53ms : 500,000 inserts & deletes in 2M table
+  //  118.61ms :   119.62ms :   120.16ms :   118.86ms : 500,000 inserts & deletes in 20M table
+  //  192.11ms :   194.39ms :   196.38ms :   195.73ms : 500,000 inserts & deletes in 200M table
+
+  if (slot_count <= STBDS_BUCKET_LENGTH)
+    t->used_count_shrink_threshold = 0;
+  // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes
+  STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count);
+  STBDS_STATS(++stbds_hash_alloc);
+  if (ot) {
+    t->string = ot->string;
+    // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing
+    t->seed = ot->seed;
+  } else {
+    size_t a,b,temp;
+    memset(&t->string, 0, sizeof(t->string));
+    t->seed = stbds_hash_seed;
+    // LCG
+    // in 32-bit, a =          2147001325   b =  715136305
+    // in 64-bit, a = 2862933555777941757   b = 3037000493
+    stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd);
+    stbds_load_32_or_64(b,temp,  715136305,          0, 0xb504f32d);
+    stbds_hash_seed = stbds_hash_seed  * a + b;
+  }
+
+  {
+    size_t i,j;
+    for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) {
+      stbds_hash_bucket *b = &t->storage[i];
+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)
+        b->hash[j] = STBDS_HASH_EMPTY;
+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)
+        b->index[j] = STBDS_INDEX_EMPTY;
+    }
+  }
+
+  // copy out the old data, if any
+  if (ot) {
+    size_t i,j;
+    t->used_count = ot->used_count;
+    for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) {
+      stbds_hash_bucket *ob = &ot->storage[i];
+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j) {
+        if (STBDS_INDEX_IN_USE(ob->index[j])) {
+          size_t hash = ob->hash[j];
+          size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2);
+          size_t step = STBDS_BUCKET_LENGTH;
+          STBDS_STATS(++stbds_rehash_items);
+          for (;;) {
+            size_t limit,z;
+            stbds_hash_bucket *bucket;
+            bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];
+            STBDS_STATS(++stbds_rehash_probes);
+
+            for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) {
+              if (bucket->hash[z] == 0) {
+                bucket->hash[z] = hash;
+                bucket->index[z] = ob->index[j];
+                goto done;
+              }
+            }
+
+            limit = pos & STBDS_BUCKET_MASK;
+            for (z = 0; z < limit; ++z) {
+              if (bucket->hash[z] == 0) {
+                bucket->hash[z] = hash;
+                bucket->index[z] = ob->index[j];
+                goto done;
+              }
+            }
+
+            pos += step;                  // quadratic probing
+            step += STBDS_BUCKET_LENGTH;
+            pos &= (t->slot_count-1);
+          }
+        }
+       done:
+        ;
+      }
+    }
+  }
+
+  return t;
+}
+
+#define STBDS_ROTATE_LEFT(val, n)   (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n))))
+#define STBDS_ROTATE_RIGHT(val, n)  (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n))))
+
+size_t stbds_hash_string(char *str, size_t seed)
+{
+  size_t hash = seed;
+  while (*str)
+     hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++;
+
+  // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits
+  hash ^= seed;
+  hash = (~hash) + (hash << 18);
+  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31);
+  hash = hash * 21;
+  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11);
+  hash += (hash << 6);
+  hash ^= STBDS_ROTATE_RIGHT(hash,22);
+  return hash+seed;
+}
+
+#ifdef STBDS_SIPHASH_2_4
+#define STBDS_SIPHASH_C_ROUNDS 2
+#define STBDS_SIPHASH_D_ROUNDS 4
+typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1];
+#endif
+
+#ifndef STBDS_SIPHASH_C_ROUNDS
+#define STBDS_SIPHASH_C_ROUNDS 1
+#endif
+#ifndef STBDS_SIPHASH_D_ROUNDS
+#define STBDS_SIPHASH_D_ROUNDS 1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()==
+#endif
+
+static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)
+{
+  unsigned char *d = (unsigned char *) p;
+  size_t i,j;
+  size_t v0,v1,v2,v3, data;
+
+  // hash that works on 32- or 64-bit registers without knowing which we have
+  // (computes different results on 32-bit and 64-bit platform)
+  // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit
+  v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^  seed;
+  v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed;
+  v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^  seed;
+  v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed;
+
+  #ifdef STBDS_TEST_SIPHASH_2_4
+  // hardcoded with key material in the siphash test vectors
+  v0 ^= 0x0706050403020100ull ^  seed;
+  v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;
+  v2 ^= 0x0706050403020100ull ^  seed;
+  v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;
+  #endif
+
+  #define STBDS_SIPROUND() \
+    do {                   \
+      v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13);  v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \
+      v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16);  v3 ^= v2;                                                 \
+      v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17);  v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \
+      v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21);  v3 ^= v0;                                                 \
+    } while (0)
+
+  for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) {
+    data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
+    data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4
+
+    v3 ^= data;
+    for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)
+      STBDS_SIPROUND();
+    v0 ^= data;
+  }
+  data = len << (STBDS_SIZE_T_BITS-8);
+  switch (len - i) {
+    case 7: data |= ((size_t) d[6] << 24) << 24; // fall through
+    case 6: data |= ((size_t) d[5] << 20) << 20; // fall through
+    case 5: data |= ((size_t) d[4] << 16) << 16; // fall through
+    case 4: data |= (d[3] << 24); // fall through
+    case 3: data |= (d[2] << 16); // fall through
+    case 2: data |= (d[1] << 8); // fall through
+    case 1: data |= d[0]; // fall through
+    case 0: break;
+  }
+  v3 ^= data;
+  for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)
+    STBDS_SIPROUND();
+  v0 ^= data;
+  v2 ^= 0xff;
+  for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j)
+    STBDS_SIPROUND();
+
+#ifdef STBDS_SIPHASH_2_4
+  return v0^v1^v2^v3;
+#else
+  return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply
+#endif
+}
+
+size_t stbds_hash_bytes(void *p, size_t len, size_t seed)
+{
+#ifdef STBDS_SIPHASH_2_4
+  return stbds_siphash_bytes(p,len,seed);
+#else
+  unsigned char *d = (unsigned char *) p;
+
+  if (len == 4) {
+    unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
+    #if 0
+    // HASH32-A  Bob Jenkin's hash function w/o large constants
+    hash ^= seed;
+    hash -= (hash<<6);
+    hash ^= (hash>>17);
+    hash -= (hash<<9);
+    hash ^= seed;
+    hash ^= (hash<<4);
+    hash -= (hash<<3);
+    hash ^= (hash<<10);
+    hash ^= (hash>>15);
+    #elif 1
+    // HASH32-BB  Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.
+    // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm
+    // not really sure what's going on.
+    hash ^= seed;
+    hash = (hash ^ 61) ^ (hash >> 16);
+    hash = hash + (hash << 3);
+    hash = hash ^ (hash >> 4);
+    hash = hash * 0x27d4eb2d;
+    hash ^= seed;
+    hash = hash ^ (hash >> 15);
+    #else  // HASH32-C   -  Murmur3
+    hash ^= seed;
+    hash *= 0xcc9e2d51;
+    hash = (hash << 17) | (hash >> 15);
+    hash *= 0x1b873593;
+    hash ^= seed;
+    hash = (hash << 19) | (hash >> 13);
+    hash = hash*5 + 0xe6546b64;
+    hash ^= hash >> 16;
+    hash *= 0x85ebca6b;
+    hash ^= seed;
+    hash ^= hash >> 13;
+    hash *= 0xc2b2ae35;
+    hash ^= hash >> 16;
+    #endif
+    // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2
+    // Note that the larger tables have high variance as they were run fewer times
+    //  HASH32-A   //  HASH32-BB  //  HASH32-C
+    //    0.10ms   //    0.10ms   //    0.10ms :      2,000 inserts creating 2K table
+    //    0.96ms   //    0.95ms   //    0.99ms :     20,000 inserts creating 20K table
+    //   14.69ms   //   14.43ms   //   14.97ms :    200,000 inserts creating 200K table
+    //  199.99ms   //  195.36ms   //  202.05ms :  2,000,000 inserts creating 2M table
+    // 2234.84ms   // 2187.74ms   // 2240.38ms : 20,000,000 inserts creating 20M table
+    //   55.68ms   //   53.72ms   //   57.31ms : 500,000 inserts & deletes in 2K table
+    //   63.43ms   //   61.99ms   //   65.73ms : 500,000 inserts & deletes in 20K table
+    //   80.04ms   //   77.96ms   //   81.83ms : 500,000 inserts & deletes in 200K table
+    //  100.42ms   //   97.40ms   //  102.39ms : 500,000 inserts & deletes in 2M table
+    //  119.71ms   //  120.59ms   //  121.63ms : 500,000 inserts & deletes in 20M table
+    //  185.28ms   //  195.15ms   //  187.74ms : 500,000 inserts & deletes in 200M table
+    //   15.58ms   //   14.79ms   //   15.52ms : 200,000 inserts creating 200K table with varying key spacing
+
+    return (((size_t) hash << 16 << 16) | hash) ^ seed;
+  } else if (len == 8 && sizeof(size_t) == 8) {
+    size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);
+    hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4
+    hash ^= seed;
+    hash = (~hash) + (hash << 21);
+    hash ^= STBDS_ROTATE_RIGHT(hash,24);
+    hash *= 265;
+    hash ^= STBDS_ROTATE_RIGHT(hash,14);
+    hash ^= seed;
+    hash *= 21;
+    hash ^= STBDS_ROTATE_RIGHT(hash,28);
+    hash += (hash << 31);
+    hash = (~hash) + (hash << 18);
+    return hash;
+  } else {
+    return stbds_siphash_bytes(p,len,seed);
+  }
+#endif
+}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+
+static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i)
+{
+  if (mode >= STBDS_HM_STRING)
+    return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset));
+  else
+    return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize);
+}
+
+#define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize))
+#define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize))
+
+#define stbds_hash_table(a)  ((stbds_hash_index *) stbds_header(a)->hash_table)
+
+void stbds_hmfree_func(void *a, size_t elemsize)
+{
+  if (a == NULL) return;
+  if (stbds_hash_table(a) != NULL) {
+    if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) {
+      size_t i;
+      // skip 0th element, which is default
+      for (i=1; i < stbds_header(a)->length; ++i)
+        STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i));
+    }
+    stbds_strreset(&stbds_hash_table(a)->string);
+  }
+  STBDS_FREE(NULL, stbds_header(a)->hash_table);
+  STBDS_FREE(NULL, stbds_header(a));
+}
+
+static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)
+{
+  void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);
+  stbds_hash_index *table = stbds_hash_table(raw_a);
+  size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);
+  size_t step = STBDS_BUCKET_LENGTH;
+  size_t limit,i;
+  size_t pos;
+  stbds_hash_bucket *bucket;
+
+  if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots
+
+  pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);
+
+  for (;;) {
+    STBDS_STATS(++stbds_hash_probes);
+    bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];
+
+    // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache
+    for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {
+      if (bucket->hash[i] == hash) {
+        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {
+          return (pos & ~STBDS_BUCKET_MASK)+i;
+        }
+      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {
+        return -1;
+      }
+    }
+
+    // search from beginning of bucket to pos
+    limit = pos & STBDS_BUCKET_MASK;
+    for (i = 0; i < limit; ++i) {
+      if (bucket->hash[i] == hash) {
+        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {
+          return (pos & ~STBDS_BUCKET_MASK)+i;
+        }
+      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {
+        return -1;
+      }
+    }
+
+    // quadratic probing
+    pos += step;
+    step += STBDS_BUCKET_LENGTH;
+    pos &= (table->slot_count-1);
+  }
+  /* NOTREACHED */
+}
+
+void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode)
+{
+  size_t keyoffset = 0;
+  if (a == NULL) {
+    // make it non-empty so we can return a temp
+    a = stbds_arrgrowf(0, elemsize, 0, 1);
+    stbds_header(a)->length += 1;
+    memset(a, 0, elemsize);
+    *temp = STBDS_INDEX_EMPTY;
+    // adjust a to point after the default element
+    return STBDS_ARR_TO_HASH(a,elemsize);
+  } else {
+    stbds_hash_index *table;
+    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);
+    // adjust a to point to the default element
+    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;
+    if (table == 0) {
+      *temp = -1;
+    } else {
+      ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);
+      if (slot < 0) {
+        *temp = STBDS_INDEX_EMPTY;
+      } else {
+        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];
+        *temp = b->index[slot & STBDS_BUCKET_MASK];
+      }
+    }
+    return a;
+  }
+}
+
+void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)
+{
+  ptrdiff_t temp;
+  void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode);
+  stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp;
+  return p;
+}
+
+void * stbds_hmput_default(void *a, size_t elemsize)
+{
+  // three cases:
+  //   a is NULL <- allocate
+  //   a has a hash table but no entries, because of shmode <- grow
+  //   a has entries <- do nothing
+  if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) {
+    a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1);
+    stbds_header(a)->length += 1;
+    memset(a, 0, elemsize);
+    a=STBDS_ARR_TO_HASH(a,elemsize);
+  }
+  return a;
+}
+
+static char *stbds_strdup(char *str);
+
+void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)
+{
+  size_t keyoffset=0;
+  void *raw_a;
+  stbds_hash_index *table;
+
+  if (a == NULL) {
+    a = stbds_arrgrowf(0, elemsize, 0, 1);
+    memset(a, 0, elemsize);
+    stbds_header(a)->length += 1;
+    // adjust a to point AFTER the default element
+    a = STBDS_ARR_TO_HASH(a,elemsize);
+  }
+
+  // adjust a to point to the default element
+  raw_a = a;
+  a = STBDS_HASH_TO_ARR(a,elemsize);
+
+  table = (stbds_hash_index *) stbds_header(a)->hash_table;
+
+  if (table == NULL || table->used_count >= table->used_count_threshold) {
+    stbds_hash_index *nt;
+    size_t slot_count;
+
+    slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2;
+    nt = stbds_make_hash_index(slot_count, table);
+    if (table)
+      STBDS_FREE(NULL, table);
+    else
+      nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0;
+    stbds_header(a)->hash_table = table = nt;
+    STBDS_STATS(++stbds_hash_grow);
+  }
+
+  // we iterate hash table explicitly because we want to track if we saw a tombstone
+  {
+    size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);
+    size_t step = STBDS_BUCKET_LENGTH;
+    size_t pos;
+    ptrdiff_t tombstone = -1;
+    stbds_hash_bucket *bucket;
+
+    // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly
+    if (hash < 2) hash += 2;
+
+    pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);
+
+    for (;;) {
+      size_t limit, i;
+      STBDS_STATS(++stbds_hash_probes);
+      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];
+
+      // start searching from pos to end of bucket
+      for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {
+        if (bucket->hash[i] == hash) {
+          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {
+            stbds_temp(a) = bucket->index[i];
+            if (mode >= STBDS_HM_STRING)
+              stbds_temp_key(a) = * (char **) ((char *) raw_a + elemsize*bucket->index[i] + keyoffset);
+            return STBDS_ARR_TO_HASH(a,elemsize);
+          }
+        } else if (bucket->hash[i] == 0) {
+          pos = (pos & ~STBDS_BUCKET_MASK) + i;
+          goto found_empty_slot;
+        } else if (tombstone < 0) {
+          if (bucket->index[i] == STBDS_INDEX_DELETED)
+            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);
+        }
+      }
+
+      // search from beginning of bucket to pos
+      limit = pos & STBDS_BUCKET_MASK;
+      for (i = 0; i < limit; ++i) {
+        if (bucket->hash[i] == hash) {
+          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {
+            stbds_temp(a) = bucket->index[i];
+            return STBDS_ARR_TO_HASH(a,elemsize);
+          }
+        } else if (bucket->hash[i] == 0) {
+          pos = (pos & ~STBDS_BUCKET_MASK) + i;
+          goto found_empty_slot;
+        } else if (tombstone < 0) {
+          if (bucket->index[i] == STBDS_INDEX_DELETED)
+            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);
+        }
+      }
+
+      // quadratic probing
+      pos += step;
+      step += STBDS_BUCKET_LENGTH;
+      pos &= (table->slot_count-1);
+    }
+   found_empty_slot:
+    if (tombstone >= 0) {
+      pos = tombstone;
+      --table->tombstone_count;
+    }
+    ++table->used_count;
+
+    {
+      ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a);
+      // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type
+      if ((size_t) i+1 > stbds_arrcap(a))
+        *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0);
+      raw_a = STBDS_ARR_TO_HASH(a,elemsize);
+
+      STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a));
+      stbds_header(a)->length = i+1;
+      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];
+      bucket->hash[pos & STBDS_BUCKET_MASK] = hash;
+      bucket->index[pos & STBDS_BUCKET_MASK] = i-1;
+      stbds_temp(a) = i-1;
+
+      switch (table->string.mode) {
+         case STBDS_SH_STRDUP:  stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break;
+         case STBDS_SH_ARENA:   stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break;
+         case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break;
+         default:                memcpy((char *) a + elemsize*i, key, keysize); break;
+      }
+    }
+    return STBDS_ARR_TO_HASH(a,elemsize);
+  }
+}
+
+void * stbds_shmode_func(size_t elemsize, int mode)
+{
+  void *a = stbds_arrgrowf(0, elemsize, 0, 1);
+  stbds_hash_index *h;
+  memset(a, 0, elemsize);
+  stbds_header(a)->length = 1;
+  stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);
+  h->string.mode = (unsigned char) mode;
+  return STBDS_ARR_TO_HASH(a,elemsize);
+}
+
+void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)
+{
+  if (a == NULL) {
+    return 0;
+  } else {
+    stbds_hash_index *table;
+    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);
+    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;
+    stbds_temp(raw_a) = 0;
+    if (table == 0) {
+      return a;
+    } else {
+      ptrdiff_t slot;
+      slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);
+      if (slot < 0)
+        return a;
+      else {
+        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];
+        int i = slot & STBDS_BUCKET_MASK;
+        ptrdiff_t old_index = b->index[i];
+        ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last'
+        STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count);
+        --table->used_count;
+        ++table->tombstone_count;
+        stbds_temp(raw_a) = 1;
+        STBDS_ASSERT(table->used_count >= 0);
+        //STBDS_ASSERT(table->tombstone_count < table->slot_count/4);
+        b->hash[i] = STBDS_HASH_DELETED;
+        b->index[i] = STBDS_INDEX_DELETED;
+
+        if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP)
+          STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index));
+
+        // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip
+        if (old_index != final_index) {
+          // swap delete
+          memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize);
+
+          // now find the slot for the last element
+          if (mode == STBDS_HM_STRING)
+            slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode);
+          else
+            slot = stbds_hm_find_slot(a, elemsize,  (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode);
+          STBDS_ASSERT(slot >= 0);
+          b = &table->storage[slot >> STBDS_BUCKET_SHIFT];
+          i = slot & STBDS_BUCKET_MASK;
+          STBDS_ASSERT(b->index[i] == final_index);
+          b->index[i] = old_index;
+        }
+        stbds_header(raw_a)->length -= 1;
+
+        if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) {
+          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table);
+          STBDS_FREE(NULL, table);
+          STBDS_STATS(++stbds_hash_shrink);
+        } else if (table->tombstone_count > table->tombstone_count_threshold) {
+          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count   , table);
+          STBDS_FREE(NULL, table);
+          STBDS_STATS(++stbds_hash_rebuild);
+        }
+
+        return a;
+      }
+    }
+  }
+  /* NOTREACHED */
+}
+
+static char *stbds_strdup(char *str)
+{
+  // to keep replaceable allocator simple, we don't want to use strdup.
+  // rolling our own also avoids problem of strdup vs _strdup
+  size_t len = strlen(str)+1;
+  char *p = (char*) STBDS_REALLOC(NULL, 0, len);
+  memmove(p, str, len);
+  return p;
+}
+
+#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN
+#define STBDS_STRING_ARENA_BLOCKSIZE_MIN  512u
+#endif
+#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX
+#define STBDS_STRING_ARENA_BLOCKSIZE_MAX  (1u<<20)
+#endif
+
+char *stbds_stralloc(stbds_string_arena *a, char *str)
+{
+  char *p;
+  size_t len = strlen(str)+1;
+  if (len > a->remaining) {
+    // compute the next blocksize
+    size_t blocksize = a->block;
+
+    // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that
+    // there are log(SIZE) allocations to free when we destroy the table
+    blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1);
+
+    // if size is under 1M, advance to next blocktype
+    if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX))
+      ++a->block;
+
+    if (len > blocksize) {
+      // if string is larger than blocksize, then just allocate the full size.
+      // note that we still advance string_block so block size will continue
+      // increasing, so e.g. if somebody only calls this with 1000-long strings,
+      // eventually the arena will start doubling and handling those as well
+      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len);
+      memmove(sb->storage, str, len);
+      if (a->storage) {
+        // insert it after the first element, so that we don't waste the space there
+        sb->next = a->storage->next;
+        a->storage->next = sb;
+      } else {
+        sb->next = 0;
+        a->storage = sb;
+        a->remaining = 0; // this is redundant, but good for clarity
+      }
+      return sb->storage;
+    } else {
+      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize);
+      sb->next = a->storage;
+      a->storage = sb;
+      a->remaining = blocksize;
+    }
+  }
+
+  STBDS_ASSERT(len <= a->remaining);
+  p = a->storage->storage + a->remaining - len;
+  a->remaining -= len;
+  memmove(p, str, len);
+  return p;
+}
+
+void stbds_strreset(stbds_string_arena *a)
+{
+  stbds_string_block *x,*y;
+  x = a->storage;
+  while (x) {
+    y = x->next;
+    STBDS_FREE(NULL, x);
+    x = y;
+  }
+  memset(a, 0, sizeof(*a));
+}
+
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//   UNIT TESTS
+//
+
+#ifdef STBDS_UNIT_TESTS
+#include <stdio.h>
+#ifdef STBDS_ASSERT_WAS_UNDEFINED
+#undef STBDS_ASSERT
+#endif
+#ifndef STBDS_ASSERT
+#define STBDS_ASSERT assert
+#include <assert.h>
+#endif
+
+typedef struct { int key,b,c,d; } stbds_struct;
+typedef struct { int key[2],b,c,d; } stbds_struct2;
+
+static char buffer[256];
+char *strkey(int n)
+{
+#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
+   sprintf_s(buffer, sizeof(buffer), "test_%d", n);
+#else
+   sprintf(buffer, "test_%d", n);
+#endif
+   return buffer;
+}
+
+void stbds_unit_tests(void)
+{
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus)
+  // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing!
+  STBDS_ASSERT(0);
+#else
+  const int testsize = 100000;
+  const int testsize2 = testsize/20;
+  int *arr=NULL;
+  struct { int   key;        int value; }  *intmap  = NULL;
+  struct { char *key;        int value; }  *strmap  = NULL, s;
+  struct { stbds_struct key; int value; }  *map     = NULL;
+  stbds_struct                             *map2    = NULL;
+  stbds_struct2                            *map3    = NULL;
+  stbds_string_arena                        sa      = { 0 };
+  int key3[2] = { 1,2 };
+  ptrdiff_t temp;
+
+  int i,j;
+
+  STBDS_ASSERT(arrlen(arr)==0);
+  for (i=0; i < 20000; i += 50) {
+    for (j=0; j < i; ++j)
+      arrpush(arr,j);
+    arrfree(arr);
+  }
+
+  for (i=0; i < 4; ++i) {
+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);
+    arrdel(arr,i);
+    arrfree(arr);
+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);
+    arrdelswap(arr,i);
+    arrfree(arr);
+  }
+
+  for (i=0; i < 5; ++i) {
+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);
+    stbds_arrins(arr,i,5);
+    STBDS_ASSERT(arr[i] == 5);
+    if (i < 4)
+      STBDS_ASSERT(arr[4] == 4);
+    arrfree(arr);
+  }
+
+  i = 1;
+  STBDS_ASSERT(hmgeti(intmap,i) == -1);
+  hmdefault(intmap, -2);
+  STBDS_ASSERT(hmgeti(intmap, i) == -1);
+  STBDS_ASSERT(hmget (intmap, i) == -2);
+  for (i=0; i < testsize; i+=2)
+    hmput(intmap, i, i*5);
+  for (i=0; i < testsize; i+=1) {
+    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );
+    else       STBDS_ASSERT(hmget(intmap, i) == i*5);
+    if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 );
+    else       STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5);
+  }
+  for (i=0; i < testsize; i+=2)
+    hmput(intmap, i, i*3);
+  for (i=0; i < testsize; i+=1)
+    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );
+    else       STBDS_ASSERT(hmget(intmap, i) == i*3);
+  for (i=2; i < testsize; i+=4)
+    hmdel(intmap, i); // delete half the entries
+  for (i=0; i < testsize; i+=1)
+    if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 );
+    else       STBDS_ASSERT(hmget(intmap, i) == i*3);
+  for (i=0; i < testsize; i+=1)
+    hmdel(intmap, i); // delete the rest of the entries
+  for (i=0; i < testsize; i+=1)
+    STBDS_ASSERT(hmget(intmap, i) == -2 );
+  hmfree(intmap);
+  for (i=0; i < testsize; i+=2)
+    hmput(intmap, i, i*3);
+  hmfree(intmap);
+
+  #if defined(__clang__) || defined(__GNUC__)
+  #ifndef __cplusplus
+  intmap = NULL;
+  hmput(intmap, 15, 7);
+  hmput(intmap, 11, 3);
+  hmput(intmap,  9, 5);
+  STBDS_ASSERT(hmget(intmap, 9) == 5);
+  STBDS_ASSERT(hmget(intmap, 11) == 3);
+  STBDS_ASSERT(hmget(intmap, 15) == 7);
+  #endif
+  #endif
+
+  for (i=0; i < testsize; ++i)
+    stralloc(&sa, strkey(i));
+  strreset(&sa);
+
+  {
+    s.key = "a", s.value = 1;
+    shputs(strmap, s);
+    STBDS_ASSERT(*strmap[0].key == 'a');
+    STBDS_ASSERT(strmap[0].key == s.key);
+    STBDS_ASSERT(strmap[0].value == s.value);
+    shfree(strmap);
+  }
+
+  {
+    s.key = "a", s.value = 1;
+    sh_new_strdup(strmap);
+    shputs(strmap, s);
+    STBDS_ASSERT(*strmap[0].key == 'a');
+    STBDS_ASSERT(strmap[0].key != s.key);
+    STBDS_ASSERT(strmap[0].value == s.value);
+    shfree(strmap);
+  }
+
+  {
+    s.key = "a", s.value = 1;
+    sh_new_arena(strmap);
+    shputs(strmap, s);
+    STBDS_ASSERT(*strmap[0].key == 'a');
+    STBDS_ASSERT(strmap[0].key != s.key);
+    STBDS_ASSERT(strmap[0].value == s.value);
+    shfree(strmap);
+  }
+
+  for (j=0; j < 2; ++j) {
+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);
+    if (j == 0)
+      sh_new_strdup(strmap);
+    else
+      sh_new_arena(strmap);
+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);
+    shdefault(strmap, -2);
+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);
+    for (i=0; i < testsize; i+=2)
+      shput(strmap, strkey(i), i*3);
+    for (i=0; i < testsize; i+=1)
+      if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );
+      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);
+    for (i=2; i < testsize; i+=4)
+      shdel(strmap, strkey(i)); // delete half the entries
+    for (i=0; i < testsize; i+=1)
+      if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );
+      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);
+    for (i=0; i < testsize; i+=1)
+      shdel(strmap, strkey(i)); // delete the rest of the entries
+    for (i=0; i < testsize; i+=1)
+      STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );
+    shfree(strmap);
+  }
+
+  {
+    struct { char *key; char value; } *hash = NULL;
+    char name[4] = "jen";
+    shput(hash, "bob"   , 'h');
+    shput(hash, "sally" , 'e');
+    shput(hash, "fred"  , 'l');
+    shput(hash, "jen"   , 'x');
+    shput(hash, "doug"  , 'o');
+
+    shput(hash, name    , 'l');
+    shfree(hash);
+  }
+
+  for (i=0; i < testsize; i += 2) {
+    stbds_struct s = { i,i*2,i*3,i*4 };
+    hmput(map, s, i*5);
+  }
+
+  for (i=0; i < testsize; i += 1) {
+    stbds_struct s = { i,i*2,i*3  ,i*4 };
+    stbds_struct t = { i,i*2,i*3+1,i*4 };
+    if (i & 1) STBDS_ASSERT(hmget(map, s) == 0);
+    else       STBDS_ASSERT(hmget(map, s) == i*5);
+    if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0);
+    else       STBDS_ASSERT(hmget_ts(map, s, temp) == i*5);
+    //STBDS_ASSERT(hmget(map, t.key) == 0);
+  }
+
+  for (i=0; i < testsize; i += 2) {
+    stbds_struct s = { i,i*2,i*3,i*4 };
+    hmputs(map2, s);
+  }
+  hmfree(map);
+
+  for (i=0; i < testsize; i += 1) {
+    stbds_struct s = { i,i*2,i*3,i*4 };
+    stbds_struct t = { i,i*2,i*3+1,i*4 };
+    if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0);
+    else       STBDS_ASSERT(hmgets(map2, s.key).d == i*4);
+    //STBDS_ASSERT(hmgetp(map2, t.key) == 0);
+  }
+  hmfree(map2);
+
+  for (i=0; i < testsize; i += 2) {
+    stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 };
+    hmputs(map3, s);
+  }
+  for (i=0; i < testsize; i += 1) {
+    stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 };
+    stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 };
+    if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0);
+    else       STBDS_ASSERT(hmgets(map3, s.key).d == i*5);
+    //STBDS_ASSERT(hmgetp(map3, t.key) == 0);
+  }
+#endif
+}
+#endif
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2019 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/compiler/libs/stb_impl.c b/compiler/libs/stb_impl.c
new file mode 100644
index 0000000..a1f5188
--- /dev/null
+++ b/compiler/libs/stb_impl.c
@@ -0,0 +1,3 @@
+/* This file holds the implementation of stb library functionality */
+#define STB_DS_IMPLEMENTATION
+#include "stb_ds.h"
diff --git a/compiler/location.h b/compiler/location.h
new file mode 100644
index 0000000..16fe5cd
--- /dev/null
+++ b/compiler/location.h
@@ -0,0 +1,11 @@
+#ifndef _location_h_
+#define _location_h_
+
+#include "pre.h"
+
+typedef struct {
+	Str source;
+	i64 line, column;
+} Location;
+
+#endif
diff --git a/compiler/messages.c b/compiler/messages.c
new file mode 100644
index 0000000..351af74
--- /dev/null
+++ b/compiler/messages.c
@@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include "messages.h"
+#include "location.h"
+
+/* SGI sequence */
+#define ANSI_C(c) "\x1b["c"m"
+/* 8bit palette color */
+#define ANSI_8C(c) "\x1b[38;5;"c"m"
+#define ANSI_8CB(c) "\x1b[48;5;"c"m"
+/* True color */
+#define ANSI_RC(r,g,b) "\x1b[38;2;"r";"g";"b"m"
+#define ANSI_RCB(r,g,b) "\x1b[48;2;"r";"g";"b"m"
+#define ANSI_RESET "\x1b[0m"
+#define ANSI_BOLD "\x1b[1m"
+#define ANSI_IF(cond, seq) (cond ? (seq) : "")
+
+#define make_diag_func(name, diagtype, after) 							\
+	void name(Compiler *cm, const Location *loc, const char *s, ...) { 	\
+		va_list args;													\
+		va_start(args, s);												\
+		print_diagnostic(cm, loc, diagtype, s, args);					\
+		va_end(args);													\
+		after;															\
+	}
+
+void
+print_diagnostic(Compiler *cm, const Location *loc, DiagType dt, const char *msg, va_list args)
+{
+	static const char *ds[] = {"fatal", "error", "warning", "note"};
+	static const char *dsc[] = {
+		ANSI_C("1;90"), ANSI_C("1;31"), ANSI_C("1;35"), ANSI_C("1;34")
+	};
+
+	char fmsg[4096] = {0};
+	char dmsg[32] = {0};
+	bool color = cm != nil ? cm->opts.color : false;
+
+	if (dt == diag_error && cm->error_count < cm->opts.max_errors)
+		++cm->error_count;
+		
+	vsnprintf(fmsg, sizeof(fmsg), msg, args);
+	snprintf(dmsg, sizeof(dmsg), "%s%s:%s", ANSI_IF(color, dsc[dt]), ds[dt], ANSI_IF(color, ANSI_RESET));
+
+	if (loc != nil) {
+		fprintf(stderr, "(%s:%li:%li) %s %s\n",
+			loc->source.s, loc->line, loc->column, dmsg, fmsg);
+	} else {
+		fprintf(stderr, "%s %s\n", dmsg, fmsg);
+	}
+}
+
+make_diag_func(fatal, diag_fatal, exit(EXIT_FAILURE))
+make_diag_func(error, diag_error, )
+make_diag_func(warning, diag_warning, )
+make_diag_func(note, diag_note, )
diff --git a/compiler/messages.h b/compiler/messages.h
new file mode 100644
index 0000000..4f0160b
--- /dev/null
+++ b/compiler/messages.h
@@ -0,0 +1,30 @@
+#ifndef _messages_h_
+#define _messages_h_
+
+#include <stdarg.h>
+#include "state.h"
+#include "location.h"
+
+#ifdef __GNUC__
+#	define fmtattr(archt, fmtsi, ftchk) __attribute((format(archt, fmtsi, ftchk)))
+#else
+#	define fmtattr(a, b, c)
+#endif
+
+typedef enum
+{
+	diag_fatal = 0,
+	diag_error,
+	diag_warning,
+	diag_note,
+} DiagType;
+
+void
+fatal(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+error(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+warning(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+void
+note(Compiler *cm, const Location *loc, const char *s, ...) fmtattr(printf, 3, 4);
+#endif
diff --git a/compiler/parse.c b/compiler/parse.c
new file mode 100644
index 0000000..523513a
--- /dev/null
+++ b/compiler/parse.c
@@ -0,0 +1,665 @@
+/* Recursive descent parser + Pratt parser (for expressions)
+ * TODO:
+ *  - DRY code that handle list of tokens, I have like three almost identical functions for that.
+ *  - Use an arena for the AST nodes. Nuke all of them with a single call
+ *    when we no longer need the AST.
+ */
+#include <stdlib.h>
+
+#include "ast.h"
+#include "pre.h"
+#include "parse.h"
+#include "lex.h"
+#include "state.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+#define MAX_STMTS_IN_BLOCK 2000
+#define MAX_PROC_ARG_COUNT 127
+#define EXPR_INIT_PREC 1
+
+/* Consume a token and match it */
+#define next_match(lexer, tokt) \
+	do { LexToken t = lex_scan(lexer); lex_match(lexer, &t, tokt); } while (0)
+
+/* Scans a token (mutating `t`), and if its id matches `ttype`,
+ * it executes the code block. Otherwise, the scanned token
+ * gets put back (so a next call to `lex_scan` can pick it up).
+ */
+#define matchopt(t, ttype, ps) 						\
+	if ((t = lex_scan(ps->lexer)).id != ttype) { 	\
+		lex_backup((ps)->lexer, t); 				\
+	} else
+
+#define token_is_binop(t) (t >= T_PLUS && t <= T_NOTEQUAL)
+#define token_is_atom(t) (t >= T_IDENT && t <= T_DECNUMBER)
+#define token_is_unary(t) (t == T_MINUS || t == T_LOGNOT)
+#define token_is_expr_start(t) (token_is_unary(t) || token_is_atom(t))
+#define parse_error(ctx, ...) \
+	do { error((ctx)->cm, &((ctx)->lexer->cur_loc), __VA_ARGS__); (ctx)->ok = false; } while (0)
+
+typedef Optional(AstIdentTypePair) OptAstIdentTypePair;
+typedef struct {
+	int pred;
+	bool left_assoc; /* false if right assoc... */
+} OperatorPrec;
+
+/* Operator table specifying the precedence and associativeness
+ * of each operator, used by the expression parser.
+ * The precedence goes from lower to higher.
+ */
+const OperatorPrec OperatorTable[] = {
+	[T_LOGOR] = {1, true},
+	[T_LOGAND] = {2, true},
+	[T_LESSTHAN] = {3, true},
+	[T_GREATTHAN] = {3, true},
+	[T_LOGICEQUAL] = {3, true},
+	[T_NOTEQUAL] = {3, true},
+	[T_PLUS] = {4, true},
+	[T_MINUS] = {4, true},
+	[T_STAR] = {5, true},
+	[T_BAR] = {5, true},
+};
+
+static Ast *
+expr(ParserState *ps, int minprec);
+static Ast *
+expr_comma_list(ParserState *ps);
+static Ast *
+stmt(ParserState *ps, LexToken token);
+static Ast *
+stmt_list_until(ParserState *ps, bool putback, const enum LexTokenId *end_markers, isize len);
+
+
+static Ast *
+make_tree(enum AstType type, Location loc)
+{
+	Ast *tree = calloc(1, sizeof(Ast));
+	tree->type = type;
+	tree->loc = loc;
+	return tree;
+}
+
+static Ast *
+make_binop(enum LexTokenId op, Location loc, Ast *lhs, Ast *rhs)
+{
+	Ast *tree = make_tree(AST_BINEXPR, loc);
+	tree->bin.op = Str_from_c(TokenIdStr[op]);
+	tree->bin.left = lhs;
+	tree->bin.right = rhs;
+	return tree;
+}
+
+static Ast *
+make_ident_node(Str ident, Location loc)
+{
+	Ast *tree = make_tree(AST_IDENT, loc);
+	tree->ident = ident;
+	return tree;
+}
+
+static OptAstIdentTypePair
+ident_type_pair(ParserState *ps)
+{
+	AstIdentTypePair itp = { .loc = ps->lexer->cur_loc };
+	/* ident */
+	LexToken token = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &token, T_IDENT);
+	itp.ident = token.ident;
+	/* type */
+	next_match(ps->lexer, T_COLON);
+	/* optional qualifier */
+	token = lex_scan(ps->lexer);
+	if (token.id == T_VAR) {
+		itp.kind = SymVar;
+	} else {
+		itp.kind = SymLet;
+		lex_backup(ps->lexer, token);
+	}
+	itp.dtype_loc = ps->lexer->cur_loc;
+	token = lex_scan(ps->lexer);
+	if (token.id != T_IDENT) {
+		parse_error(ps, "expected a type, got %s instead",  TokenIdStr[token.id]);
+		return None(OptAstIdentTypePair);
+	}
+	itp.dtype = token.ident;
+	return Some(OptAstIdentTypePair, itp);
+}
+
+static Vec(AstIdentTypePair)
+proc_arglist(ParserState *ps)
+{
+	Vec(AstIdentTypePair) args = nil;
+	LexToken next;
+
+	for (;;) {
+		OptAstIdentTypePair oitp = ident_type_pair(ps);
+		if (!oitp.ok)
+			return nil;
+		if (arrlen(args) + 1 > MAX_PROC_ARG_COUNT) {
+			parse_error(ps, "more than %d (implementation limit) proc arguments", MAX_PROC_ARG_COUNT);
+			return nil;
+		}
+
+		arrput(args, oitp.val);
+		next = lex_scan(ps->lexer);
+		/* do we have a comma? if not, we reached the end of the list */
+		if (next.id != T_COMMA)
+			break;
+		/* check if we have an expression next to this comma, we do this
+		 * to allow a trailling comma
+		 */
+		next = lex_scan(ps->lexer);
+		if (next.id != T_IDENT)
+			break;
+		lex_backup(ps->lexer, next);
+	}
+	trace("token in arglist out: %s\n", TokenIdStr[next.id]);
+	lex_backup(ps->lexer, next);
+
+	if (arrlen(args) == 0) {
+		arrfree(args);
+		return nil;
+	}
+	return args;
+}
+
+static Ast *
+proc_decl(ParserState *ps)
+{
+	LexToken proc_name = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &proc_name, T_IDENT);
+
+	Ast *proc = make_tree(AST_PROCDEF, ps->lexer->cur_loc);
+	proc->proc.name = proc_name.ident;
+	trace("proc name: %s\n", proc->proc.name.s);
+
+	LexToken token = lex_scan(ps->lexer);
+	if (token.id == T_STAR) {
+		proc->proc.ispublic = true;
+		token = lex_scan(ps->lexer);
+	}
+
+	lex_match(ps->lexer, &token, T_LPAREN);
+	token = lex_scan(ps->lexer);
+	if (token.id != T_RPAREN) {
+		lex_backup(ps->lexer, token);
+		proc->proc.args = proc_arglist(ps);
+		token = lex_scan(ps->lexer);
+	}
+	lex_match(ps->lexer, &token, T_RPAREN);
+
+	/* return type */
+	token = lex_scan(ps->lexer);
+	if (token.id == T_COLON) {
+		token = lex_scan(ps->lexer);
+		lex_match(ps->lexer, &token, T_IDENT);
+		proc->proc.rettype = make_ident_node(token.ident, ps->lexer->cur_loc);
+	} else {
+		lex_backup(ps->lexer, token);
+	}
+	/* body */
+	proc->proc.body = stmt_list_until(ps, false, (enum LexTokenId[]){T_END}, 1);
+	return proc;
+}
+
+static Ast *
+function_call(ParserState *ps, Str ident, bool ate_lp)
+{
+	Ast *funcc = make_tree(AST_PROCCALL, ps->lexer->cur_loc);
+	funcc->call = (AstProcCall){ .name = ident };
+
+	if (!ate_lp)
+		next_match(ps->lexer, T_LPAREN);
+
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_expr_start(next.id)) {
+		lex_backup(ps->lexer, next);
+		funcc->call.args = expr_comma_list(ps);
+	} else {
+		lex_backup(ps->lexer, next);
+	}
+	next_match(ps->lexer, T_RPAREN);
+	trace("function call to: %s\n", ident.s);
+
+	return funcc;
+}
+
+static Ast *
+variable_assign(ParserState *ps, Str ident, Location loc)
+{
+	Ast *tree = make_tree(AST_VARASSIGN, loc);
+	tree->varassgn.name = ident;
+	tree->varassgn.expr = expr(ps, EXPR_INIT_PREC);
+	return tree;
+}
+
+static Ast *
+funccall_or_assignment(ParserState *ps, Str ident)
+{
+	LexToken token;
+	matchopt(token, T_EQUAL, ps) {
+		return variable_assign(ps, ident, ps->lexer->cur_loc);
+	}
+	return function_call(ps, ident, false);
+}
+
+static Ast *
+variable_decl(ParserState *ps, enum LexTokenId decl_kind)
+{
+	static const enum SymbolKind Token2SemaVarKind[] = {
+		[T_LET] = SymLet,
+		[T_VAR] = SymVar,
+		[T_CONST] = SymConst,
+	};
+	Assert(decl_kind == T_LET || decl_kind == T_VAR || decl_kind == T_CONST);
+
+	LexToken token = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &token, T_IDENT);
+
+	Ast *decl = make_tree(AST_VARDECL, ps->lexer->cur_loc);
+	decl->var = (AstVarDecl) {
+		.name = token.ident,
+		.kind = Token2SemaVarKind[decl_kind],
+	};
+
+	/* type */
+	matchopt(token, T_COLON, ps) {
+		token = lex_scan(ps->lexer);
+		if (token.id != T_IDENT) {
+			parse_error(ps, "expected a type, got %s instead",  TokenIdStr[token.id]);
+			return nil;
+		}
+		decl->var.datatype = make_ident_node(token.ident, ps->lexer->cur_loc);
+	}
+
+	/* assignment expression */
+	matchopt(token, T_EQUAL, ps) {
+		trace("assignment of decl here\n");
+		decl->var.expr = expr(ps, EXPR_INIT_PREC);
+	}
+	trace(
+		"var decl %s %s: %s\n",
+		TokenIdStr[decl_kind],
+		decl->var.name.s,
+		decl->var.datatype != nil ? (char *)decl->var.datatype->ident.s : "(no type)"
+	);
+	/* if there's no type there must be an expr */
+	/* TODO: move to semantic analysis phase? */
+	if (decl->var.datatype == nil && decl->var.expr == nil) {
+		parse_error(
+			ps,
+			"'%s' declaration must have an assignment expression if no type is specified, "
+			"but neither a type nor expression was supplied",
+			TokenIdStr[decl_kind]
+		);
+		return nil;
+	}
+	return decl;
+}
+
+static Ast *
+return_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_RETURN, ps->lexer->cur_loc);
+
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_expr_start(next.id)) {
+		lex_backup(ps->lexer, next);
+		tree->ret = expr(ps, EXPR_INIT_PREC);
+	} else {
+		lex_backup(ps->lexer, next);
+	}
+	return tree;
+}
+
+static Ast *
+break_stmt(ParserState *ps)
+{
+	return make_tree(AST_BREAK, ps->lexer->cur_loc);
+}
+
+static Ast *
+discard_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_DISCARD, ps->lexer->cur_loc);
+	tree->discard.expr = expr(ps, EXPR_INIT_PREC);
+	return tree;
+}
+
+static Ast *
+parse_attribute(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_ATTRIBUTE, ps->lexer->cur_loc);
+	LexToken next = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &next, T_LBRACKET);
+	next = lex_scan(ps->lexer);
+	lex_match(ps->lexer, &next, T_RBRACKET);
+	return tree;
+}
+
+/* A declaration "decorated" with an attribute */
+static Ast *
+decorated_decl(ParserState *ps)
+{
+	Ast *attr = parse_attribute(ps);
+	LexToken next = lex_scan(ps->lexer);
+	switch (next.id) {
+	case T_PROC:
+		attr->attribute.node = proc_decl(ps);
+		break;
+	case T_CONST:
+	case T_LET:
+	case T_VAR:
+		attr->attribute.node = variable_decl(ps, next.id);
+		break;
+	default:
+		parse_error(ps, "node of kind '%s' cannot be attributed", TokenIdStr[next.id]);
+		return nil;
+	}
+	return attr;
+}
+
+static Ast *
+if_stmt_expr(ParserState *ps)
+{
+	const enum LexTokenId if_block_ends[] = {T_ELSE, T_ELIF, T_END};
+	Ast *tree = make_tree(AST_IF, ps->lexer->cur_loc);
+	/* parse `if` */
+	tree->ifse.cond = expr(ps, EXPR_INIT_PREC);
+	tree->ifse.true_body = stmt_list_until(ps, true, if_block_ends, countof(if_block_ends));
+	tree->ifse.false_body = nil;
+
+	LexToken next = lex_scan(ps->lexer);
+	AstElif elif_tree;
+	/* parse `elif`s and else */
+	for (;;) {
+		switch (next.id) {
+		case T_END: /* only has true branch */
+			return tree;
+		case T_ELSE:
+			/* once we see an `else` block, we assume the end of the `if` block,
+			 * enforcing that `else` must be the last. */
+			trace("we got else\n");
+			tree->ifse.false_body = stmt_list_until(ps, true, (enum LexTokenId[]){T_ELIF, T_END}, 2);
+			next = lex_scan(ps->lexer);
+			if (next.id == T_ELIF) {
+				parse_error(ps, "'elif' branch after 'else' branch not allowed");
+				lex_backup(ps->lexer, next);
+				return nil;
+			}
+			return tree;
+		case T_ELIF:
+			trace("we got elif\n");
+			elif_tree.cond = expr(ps, EXPR_INIT_PREC);
+			elif_tree.body = stmt_list_until(ps, true, if_block_ends, countof(if_block_ends));
+			next = lex_scan(ps->lexer);
+			arrput(tree->ifse.elifs, elif_tree);
+			/* no more `elif` blocks neither an `else` block next */
+			if (next.id == T_END)
+				return tree;
+			Assert(next.id == T_ELSE || next.id == T_ELIF);
+			break;
+		default: /* shouldn't happen */
+			lex_backup(ps->lexer, next);
+			parse_error(ps, "huh?: %s", TokenIdStr[next.id]);
+			return nil;
+		}
+	}
+	return tree;
+}
+
+static Ast *
+while_stmt(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_LOOP, ps->lexer->cur_loc);
+	tree->loop.precond = expr(ps, EXPR_INIT_PREC);
+	tree->loop.body = stmt_list_until(ps, false, (enum LexTokenId[]){T_END}, 1);
+	return tree;
+}
+
+static Ast *
+atom(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_INVALID, ps->lexer->cur_loc);
+	LexToken t = lex_scan(ps->lexer);
+	LexToken next;
+
+	switch (t.id) {
+	case T_NUMBER:
+		tree->type = AST_NUMBER;
+		tree->number.n = t.inumber;
+		trace("number in atom: %lu\n", t.inumber);
+		return tree;
+	case T_STRING:
+		tree->type = AST_STRLIT;
+		tree->strlit = t.str;
+		return tree;
+	case T_IDENT:
+		next = lex_scan(ps->lexer);
+		/* It is a plain symbol or a function call? */
+		if (next.id == T_LPAREN) {
+			free(tree);
+			tree = function_call(ps, t.ident, true);
+		} else {
+			lex_backup(ps->lexer, next);
+			tree->type = AST_IDENT;
+			tree->ident = t.ident;
+		}
+		return tree;
+	default:
+		parse_error(ps, "expected a number, identifier or expression, not '%s'", TokenIdStr[t.id]);
+		free(tree);
+	}
+	return nil;
+}
+
+static Ast *
+unary(ParserState *ps)
+{
+	LexToken next = lex_scan(ps->lexer);
+	if (token_is_unary(next.id)) {
+		Ast *unt = make_tree(AST_UNARY, ps->lexer->cur_loc);
+		unt->unary.op = Str_from_c(TokenIdStr[next.id]);
+		unt->unary.atom = atom(ps);
+		return unt;
+	}
+	lex_backup(ps->lexer, next);
+	return atom(ps);
+}
+
+/* Parse a binary expression or an atom. This implements the Pratt parser algorithm.
+ * See also:
+ * 	- https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
+ * 	- https://www.oilshell.org/blog/2016/11/01.html
+ * 	XXX: Mutate to the shunting yard variation? Since it uses an explicit stack instead of the call
+ * 	stack, guard against deeply nested expressions.
+ */
+static Ast *
+expr(ParserState *ps, int minprec)
+{
+	Ast *tree = unary(ps);
+	for (;;) {
+		LexToken t = lex_scan(ps->lexer);
+		if (!token_is_binop(t.id)
+			|| t.id == T_END
+			|| OperatorTable[t.id].pred < minprec) {
+			lex_backup(ps->lexer, t);
+			break;
+		}
+		const OperatorPrec op = OperatorTable[t.id];
+		const int next_prec = op.left_assoc ? op.pred + 1 : op.pred;
+		tree = make_binop(t.id, ps->lexer->cur_loc, tree, expr(ps, next_prec));
+	}
+	return tree;
+}
+
+static Vec(Ast *)
+sep_list(ParserState *ps, Ast *(*prod_fn)(Compiler *, void *))
+{
+	(void)ps, (void)prod_fn;
+	Vec(Ast *) prod = nil;
+	return prod;
+}
+
+static Ast *
+expr_comma_list(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_EXPRS, ps->lexer->cur_loc);
+	Vec(Ast *) exprs = nil;
+	
+	LexToken next;
+	for (;;) {
+		arrput(exprs, expr(ps, EXPR_INIT_PREC));
+		next = lex_scan(ps->lexer);
+		trace("commalist tok: %s\n", TokenIdStr[next.id]);
+		/* do we have a comma? if not, we reached the end of the list */
+		if (next.id != T_COMMA)
+			break;
+		next = lex_scan(ps->lexer);
+		/* check if we have an expression next to this comma, we do this
+		 * to allow a trailling comma
+		 */
+		if (!token_is_expr_start(next.id))
+			break;
+		lex_backup(ps->lexer, next);
+	}
+	lex_backup(ps->lexer, next);
+
+	if (arrlen(exprs) == 0) {
+		free(tree);
+		arrfree(exprs);
+		return nil;
+	}
+	tree->exprs = exprs;
+	return tree;
+}
+
+static bool
+token_id_in_list(enum LexTokenId c, const enum LexTokenId *toks, isize len)
+{
+	for (isize i = 0; i < len; ++i)
+		if (c == toks[i])
+			return true;
+	return false;
+}
+
+/* Parses a statement list until the token `end_marker`. Returns `nil` if the statement list
+ * is empty. */
+static Ast *
+stmt_list_until(ParserState *ps, bool putback, const enum LexTokenId *end_markers, isize len)
+{
+	LexToken token = lex_scan(ps->lexer);
+	Vec(Ast *) stmts = nil;
+	Ast *body = make_tree(AST_STMTS, ps->lexer->cur_loc);
+
+	/* stmt* */
+	while (!token_id_in_list(token.id, end_markers, len)) {
+		trace("stmt list token: %s\n", TokenIdStr[token.id]);
+		if (arrlen(stmts) + 1 > MAX_STMTS_IN_BLOCK) {
+			parse_error(ps, "more than %d (implementation limit) statements in block", MAX_STMTS_IN_BLOCK);
+			return nil;
+		}
+	   	arrput(stmts, stmt(ps, token));
+
+		token = lex_scan(ps->lexer);
+		if (token.id == T_EOF) {
+			parse_error(ps, "unexpected EOF, expected a statement or `end`");
+			break;
+		}
+		if (token.id == T_SEMICOLON)
+			token = lex_scan(ps->lexer);
+	}
+	//lex_match(ps->lexer, &token, end_marker);
+	trace("token before end next_match: %s\n", TokenIdStr[token.id]);
+	if (putback)
+		lex_backup(ps->lexer, token);
+	/* empty list, just return nil instead of wasting space on a 0-length 
+	 * vector */
+	if (arrlen(stmts) == 0) {
+		free(body);
+		arrfree(stmts);
+		return nil;
+	}
+	body->stmts = stmts;
+	return body;
+}
+
+static Ast *
+stmt(ParserState *ps, LexToken token)
+{
+	switch (token.id) {
+	case T_IDENT:
+		return funccall_or_assignment(ps, token.ident);
+	case T_CONST:
+	case T_LET:
+	case T_VAR:
+		return variable_decl(ps, token.id);
+	case T_PROC:
+		return proc_decl(ps);
+	case T_HASH:
+		return decorated_decl(ps);
+	case T_RETURN:
+		return return_stmt(ps);
+	case T_BREAK:
+		return break_stmt(ps);
+	case T_DISCARD:
+		return discard_stmt(ps);
+	case T_IF:
+		return if_stmt_expr(ps);
+	case T_ELIF:
+		parse_error(ps, "stray 'elif'");
+		return nil;
+	case T_WHILE:
+		return while_stmt(ps);
+	case T_ELSE:
+		parse_error(ps, "'else' with no accompanying 'if'");
+		return nil;
+	case T_END:
+		parse_error(ps, "stray 'end' keyword");
+		return nil;
+	case T_EOF:
+		parse_error(ps, "unexpected EOF while parsing a statement");
+		return nil;
+	default:
+		parse_error(ps, "invalid statement '%s'", TokenIdStr[token.id]);
+		exit(1);
+	}
+	return nil;
+}
+
+/* Parse statements until EOF. */
+static Ast *
+stmt_list(ParserState *ps)
+{
+	Ast *tree = make_tree(AST_STMTS, ps->lexer->cur_loc);
+	for (;;) {
+		const LexToken next = lex_scan(ps->lexer);
+		if (next.id == T_EOF)
+			break;
+		arrput(tree->stmts, stmt(ps, next));
+	}
+	return tree;	
+}
+
+ParserState *
+parse_new(Compiler *cm, LexState *ls)
+{
+	ParserState *ps = calloc(1, sizeof(*ps));
+	ps->cm = cm;
+	ps->lexer = ls;
+	ps->ok = true;
+	return ps;
+}
+
+void
+parse_destroy(ParserState *ps)
+{
+	free(ps);
+}
+
+Ast *
+parse(ParserState *ps)
+{
+	return stmt_list(ps);
+}
diff --git a/compiler/parse.h b/compiler/parse.h
new file mode 100644
index 0000000..5343dbc
--- /dev/null
+++ b/compiler/parse.h
@@ -0,0 +1,21 @@
+#ifndef _parse_h_
+#define _parse_h_
+
+#include "ast.h"
+#include "state.h"
+#include "lex.h"
+
+typedef struct {
+	Compiler *cm;
+	LexState *lexer;
+	bool ok;
+} ParserState;
+
+ParserState *
+parse_new(Compiler *cm, LexState *ls);
+void
+parse_destroy(ParserState *ps);
+Ast *
+parse(ParserState *ps);
+
+#endif
diff --git a/compiler/pre.h b/compiler/pre.h
new file mode 100644
index 0000000..743f2ca
--- /dev/null
+++ b/compiler/pre.h
@@ -0,0 +1,158 @@
+#ifndef _pre_h_
+#define _pre_h_
+/* Prelude file, containing some useful macros and types. */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+typedef int8_t i8;
+typedef uint8_t u8;
+typedef int16_t i16;
+typedef uint16_t u16;
+typedef int32_t i32;
+typedef uint32_t u32;
+typedef int64_t i64;
+typedef uint64_t u64;
+typedef float f32;
+typedef double f64;
+
+typedef uintptr_t uptr;
+typedef ptrdiff_t isize;
+typedef size_t usize;
+typedef _Bool bool;
+
+#define true ((bool)1)
+#define false ((bool)0)
+#define nil ((void *)0)
+#define U64_MAX ((u64)-1)
+
+#define Slice(T) 	\
+	struct {		\
+		T *s;		\
+		isize len;	\
+	}
+
+typedef Slice(u8) Str;
+#define Optional(T) struct {T val; bool ok;}
+#define Some(T, v) (T){v, true}
+#define None(T) (T){.ok = false}
+/* Meant for use with stb_ds */
+#define Vec(T) T *
+#define HashMap(K, V) struct { K key; V value; }
+#define HashMapStr(V) struct { char *key; V value; }
+
+/* Length of an array */
+#define countof(arr) (isize)(sizeof(arr) / sizeof(*(arr)))
+/* Length of string literal */
+#define lengthof(s) (countof(s) - 1)
+
+#define ViewMem(T, arr, lo, hi) ((Slice(T)){.s = arr+lo, .len = hi - lo})
+#define View(sl, lo, hi) ((sl).s += lo, (sl).len = hi - lo, (sl))
+#define foreach(val, arr) for(__typeof__(*(arr)) *__p = (arr), (val) = *__p; __p < (arr)+(arrlen((arr))); (val) = *(__p++))
+#define foreach_getindex(val, arr) (&(val) - (arr))
+
+/* Useful integer operations good to have. */
+#define max(x, y) ((x) > (y) ? (x) : (y))
+#define min(x, y) ((x) < (y) ? (x) : (y))
+#define clamp(x, lo, hi) max(lo, min(x, hi))
+
+#define BitPos(pos) (1 << (pos))
+
+#if defined(__GNUC__) || defined(__clang__)
+#	define debugtrap() __builtin_trap()
+#	define unreachable() __builtin_unreachable()
+#else /* not optimal... */
+#	define debugtrap() abort()
+#	define unreachable() abort()
+#endif
+
+#ifndef NDEBUG
+#	if defined (__GNUC__) || defined(__clang__)
+#		define Assert(pred) if (!(pred)) { __builtin_trap(); }
+#	else
+#		define Assert(pred) if (!(pred)) { *(volatile int *)0 = 0; }
+#	endif
+#	define trace(...) do { 								\
+		fprintf(stderr, "%s:%-5i", __FILE__, __LINE__);	\
+		fprintf(stderr, __VA_ARGS__);					\
+	} while (0)
+#else
+#	define Assert(pred)
+#	define trace(...)
+#endif
+
+/* Creates a `Str` from a string literal */
+#define Sl(s) ((Str){ (u8 *)s, (isize)lengthof(s) })
+/* Creates a `Str` from a buffer of size `len` */
+#define Sb(s, len) ((Str){ (u8 *)s, (isize)len })
+/* Creates a `Str` from a C string. */
+#define Str_from_c(s) ((Str){ (u8 *)s, (isize)(s != nil ? strlen(s) : 0) })
+#define Str_empty(s) ((s).len == 0)
+#define Str_default(s, sor) (!Str_empty(s) ? (s) : (sor))
+
+int
+vsnprintf(char *, unsigned long, const char *, va_list);
+
+/* "Converts" a `Str` into a C string. Since `Str` are meant to be
+ * null-terminated already, no conversion is made, but ensures that the
+ * null terminator is present. */
+static inline char *
+Str_to_c(Str s)
+{
+	if (s.len == 0 || s.s == nil)
+		return nil;
+	Assert(s.s[s.len - 1] == '\0');
+	return (char *)s.s;
+}
+
+/* Returns `true` if both strings are equal. */
+static inline bool
+Str_equal(Str s1, Str s2)
+{
+	/* because passing nil to mem* is UB even if size == 0... */
+	return (s1.len == s2.len) && (s1.len == 0 || memcmp(s1.s, s2.s, s1.len) == 0);
+}
+
+/* Heaps allocates a new `Str` of size `len`, with contents from `data` if it is
+ * not `nil`.*/
+static inline Str
+Str_new(const u8 *data, isize len)
+{
+	Assert(len >= 0);
+	Str s;
+	s.s = calloc(len + 1, sizeof(*s.s));
+	s.len = len;
+	if (data != nil) {
+		memcpy(s.s, data, len);
+		s.s[len + 1] = '\0'; /* ensure */
+	}
+	return s;
+}
+
+/* Returns a formatted string (heap allocated) of the exact required size. */
+static inline Str
+Strafmt(const char *fmt, ...)
+{
+	Str s = {0};
+	va_list args;
+
+	va_start(args, fmt);
+	/* Calculate buffer size required to hold the formatted string */
+	int reqs = vsnprintf(nil, 0, fmt, args);
+	va_end(args);
+	if (reqs < 0)
+		return s;
+
+	s = Str_new(nil, reqs);
+	va_start(args, fmt); /* `vsnprintf` touched the arg list, reinitialize it */
+	/* the nil terminator is guaranteed by `Str_new` */
+	vsnprintf((char *)s.s, s.len + 1, fmt, args);
+	va_end(args);
+
+	return s;
+}
+
+#endif
diff --git a/compiler/rutilec.c b/compiler/rutilec.c
new file mode 100644
index 0000000..fb6eb8d
--- /dev/null
+++ b/compiler/rutilec.c
@@ -0,0 +1,200 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "pre.h"
+#include "lex.h"
+#include "parse.h"
+#include "sema.h"
+#include "state.h"
+#include "codegen.h"
+#include "messages.h"
+
+#include "libs/optparse.h"
+#include "libs/stb_ds.h"
+
+#ifndef GIT_HASH
+#	define GIT_HASH "<no hash>"
+#endif
+#ifndef BUG_REPORT_URL
+#	define BUG_REPORT_URL "https://codeberg.org/tocariimaa/rutile"
+#endif
+#ifndef TARGET_EXE_EXT /* without prefix dot! */
+#	define TARGET_EXE_EXT Sl("")
+#endif
+
+static const char *HelpMessage = 													\
+	"Summary of common options:\n"													\
+	"  -c\tCompile only. Don't link, output an object file instead.\n" 				\
+	"  -d\tDefine a constant with the specified value.\n" 							\
+	"  -h\tPrint this help message.\n" 												\
+	"  -o\tSet output file name of the executable/object file.\n" 					\
+	"  -v\tPrint the version of this compiler, plus other relevant information.\n" 	\
+	"  -S\tEmit intermediate code.\n" 												\
+	"  -R\tSet the code generation mode, 'release' for an optimized build,\n" 		\
+	"  \t'debug' for a debug build.\n" 												\
+	;
+
+/* Creates the output binary file name, changing the extension to the current platform
+ * executable file extension, or it simply removes the original extension if the platform
+ * has no binary extension, (i.e UNIX-likes OSes). */
+static Str
+make_binary_filename(Compiler *cm, Str src_filename, const Str exe_ext)
+{
+	bool exe_has_ext = exe_ext.len > 0;
+	const size_t ss = src_filename.len;
+	Assert(ss != 0);
+	char *buf = malloc(ss + 2 + (exe_has_ext ? exe_ext.len : 0));
+	memcpy(buf, src_filename.s, ss);
+	buf[ss] = '\0';
+
+	char *p = buf + (ss - 1);
+	while (p != buf && *p != '.') /* Search for the first '.' backwards */
+		--p;
+	/* No extension in filename, egde case really */
+	if (p == buf) {
+		if (!exe_has_ext)
+			fatal(cm, nil, "output file name required in this case (host OS binary format lacks extension)");
+		/* append extension then */
+		p = buf + ss;
+		*p = '.';
+	}
+	if (exe_has_ext) {
+		memcpy(++p, exe_ext.s, exe_ext.len);
+		p += 3;
+	}
+	*p = '\0';
+	return Str_from_c(buf);
+}
+
+static enum CodegenBackends
+backend_from_str(Compiler *cm, Str s)
+{
+	if (s.len == 1 && (s.s[0] == 'c' || s.s[0] == 'C'))
+		return CgBackendC;
+	else if (Str_equal(s, Sl("gcc")))
+		return CgBackendLibGccJit;
+	fatal(cm, nil, "unknown backend '%s'", s.s);
+	unreachable();
+}
+
+static Str
+cli_boilerplate(char **argv, Compiler *cm)
+{
+	const struct optparse_long longopts[] = {
+		{"backend", 'b', OPTPARSE_REQUIRED},
+		{"compile-only", 'c', OPTPARSE_NONE},
+		{"define", 'd', OPTPARSE_REQUIRED},
+		{"max-errors", 'E', OPTPARSE_REQUIRED},
+		{"release", 'R', OPTPARSE_REQUIRED},
+		{"emit-ir", 'S', OPTPARSE_OPTIONAL},
+		{"exe", 'o', OPTPARSE_REQUIRED},
+		{"version", 'v', OPTPARSE_NONE},
+		{"help", 'h', OPTPARSE_NONE},
+		{0},
+	};
+
+	const char *no_fun_env = getenv("NO_COLOR");
+	cm->opts.color = isatty(STDERR_FILENO) && !(no_fun_env != nil && *no_fun_env != '\0');
+
+	struct optparse opts;
+	optparse_init(&opts, argv);
+
+	i8 opt;
+	while ((opt = optparse_long(&opts, longopts, nil)) != -1) {
+		switch (opt) {
+		case 'b':
+			cm->opts.backend = backend_from_str(cm, Str_from_c(opts.optarg));
+			break;
+		case 'c':
+			cm->opts.compile_only = true;
+			break;
+		case 'd':
+			trace("define: %s\n", opts.optarg);
+			arrput(cm->opts.defines, Str_from_c(opts.optarg));
+			break;
+		case 'h':
+			printf("Usage: %s [options...] files...\n\n%s\n", *argv, HelpMessage);
+			exit(0);
+		case 'E':
+			cm->opts.max_errors = atoi(opts.optarg); /* XXX: atoi LOL */
+			break;
+		case 'R':
+			trace("release: %s\n", opts.optarg);
+			cm->opts.release_mode = Str_from_c(opts.optarg);
+			break;
+		case 'S':
+			break;
+		case 'o':
+		   	cm->opts.exe_out = Str_from_c(opts.optarg);
+			break;
+		case 'v':
+			printf("Rutile compiler v0.0.1\n");
+			printf("git commit: %s\nReport bugs here: %s\n", GIT_HASH, BUG_REPORT_URL);
+			exit(0);
+		case '?':
+			fatal(cm, nil, "%s: %s", *argv, opts.errmsg);
+		}
+	}
+
+	const char *src_filename = optparse_arg(&opts);
+	if (src_filename == nil)
+		fatal(cm, nil, "no input files specified");
+	return Str_from_c(src_filename);
+}
+
+int
+main(int argc, char **argv)
+{
+	(void)argc;
+	Compiler cm = {
+		.opts = {
+			.backend = CgBackendC,
+			.max_errors = 20,
+		}
+	};
+
+	Str src_filename = cli_boilerplate(argv, &cm);
+	FILE *src_in = nil;
+
+	if (src_filename.s[0] == '-' && src_filename.s[1] == '\0') {
+		src_in = stdin;
+		src_filename = Sl("<stdin>");
+	} else {
+		if ((src_in = fopen((char *)src_filename.s, "rb")) == nil) {
+			fatal(&cm, nil, "can't open: %s", src_filename.s);
+		}
+	}
+
+	cm.current_filename = src_filename;
+	if (cm.opts.exe_out.len == 0)
+		cm.opts.exe_out = make_binary_filename(&cm, src_filename, TARGET_EXE_EXT);
+
+	if (Str_equal(cm.opts.exe_out, cm.current_filename)) {
+		fatal(&cm, nil, "input source file and output file are the same");
+	}
+	
+	/* Compiler pipeline */
+	LexState *ls = lex_new(&cm, src_in, src_filename, 4);
+	ParserState *ps = parse_new(&cm, ls);
+	SemaCtx *ss = sema_new(&cm);
+	Ast *program = parse(ps);
+	if (!ps->ok)
+		goto err;
+	sema(ss, program);
+	if (!ss->ok)
+		goto err;
+
+	CodegenCtx *cgctx = codegen_new(&cm, cm.opts.backend);
+	codegen(cgctx, program);
+	codegen_destroy(cgctx);
+err:
+	sema_destroy(ss);
+	parse_destroy(ps);
+	lex_destroy(ls);
+	fclose(src_in);
+
+	return 0;
+}
diff --git a/compiler/sema.c b/compiler/sema.c
new file mode 100644
index 0000000..913a845
--- /dev/null
+++ b/compiler/sema.c
@@ -0,0 +1,980 @@
+/* Semantic analyzer and type checker */
+#include <stdlib.h>
+#include <string.h>
+
+#include "pre.h"
+#include "sema.h"
+#include "datatype.h"
+#include "location.h"
+#include "symbol.h"
+#include "ast.h"
+#include "state.h"
+#include "messages.h"
+#include "libs/stb_ds.h"
+
+#define sema_error(ctx, loc, ...) do { 		\
+		error((ctx)->cm, loc, __VA_ARGS__);	\
+		(ctx)->ok = false;					\
+	} while (0)
+#define sema_warning(ctx, loc, ...) warning((ctx)->cm, loc, __VA_ARGS__)
+#define sema_note(ctx, loc, ...) note((ctx)->cm, loc, __VA_ARGS__)
+#define sema_is_stmt_terminal(s) (s->type == AST_RETURN || s->type == AST_BREAK)
+#define sym_insert(syms, k, v) shput(syms, k, v)
+
+enum SemaCtxFlags /* 64 bits */
+{
+	SctxInsideProc = BitPos(0),
+	SctxInsideLoop = BitPos(1),
+	SctxInsideIf = BitPos(2),
+	SctxInTopLevel = BitPos(3),
+	SctxInExpr = BitPos(4),
+	SctxInDiscard = BitPos(5),
+	SctxInStmtBlock = BitPos(6),
+};
+
+typedef struct {
+	enum SymbolKind kind;
+ 	/* The data type associated with the symbol. */
+	DataType *dtype;
+	bool used;
+	bool procparm; /* if its a proc parameter */
+	Location loc;
+} Symbol;
+
+typedef HashMapStr(Symbol) SymbolEntry;
+
+struct Scope
+{
+	Scope *prev; /* Previous scope in the stack */
+	SymbolEntry *symbols; /* All the symbols in this scope */
+};
+
+typedef struct {
+	bool ok;
+} SemaStatus;
+
+static const Symbol InvalidSymbol = {.kind = SymInvalid};
+static const DataType *InvalidDataType = &(DataType){.kind = DtkInvalid};
+
+static DataTypeCheck
+datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2);
+static DataTypeCheck
+datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2);
+static DataType *
+sema_expr(SemaCtx *sctx, Ast *expr, Location loc);
+static void
+sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc);
+static void
+sema_node(SemaCtx *sctx, Ast *node);
+static void
+sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts);
+static DataType *
+resolve_datatype(SemaCtx *sctx, const Str ident, Location loc);
+
+static Scope *
+make_scope(Scope *prev)
+{
+	Scope *sc = malloc(sizeof(*sc));
+	sc->prev = prev;
+	sc->symbols = nil;
+	sh_new_arena(sc->symbols);
+	shdefault(sc->symbols, InvalidSymbol);
+	return sc;
+}
+
+static SemaCtx *
+make_semactx(Compiler *cm, SemaCtx *prev)
+{
+	SemaCtx *smc = calloc(1, sizeof(*smc));
+	smc->cm = cm;
+	smc->prev = prev;
+	return smc;
+}
+
+static DataType *
+make_data_type(enum DataTypeKind kind, u16 size, bool builtin, bool sign)
+{
+	DataType *dt = calloc(1, sizeof(*dt));
+	dt->kind = kind;
+	dt->size = size;
+	dt->builtin = builtin;
+	dt->sign = sign;
+	return dt;
+}
+
+static DataType *
+make_proc_type(bool builtin, DataType *rettype, Vec(DataType *) argtypes)
+{
+	DataType *pdt = calloc(1, sizeof(*pdt));
+	pdt->kind = DtkProc;
+	pdt->builtin = builtin;
+	pdt->proc.rettype = rettype;
+	pdt->proc.argtypes = argtypes;
+	return pdt;
+}
+
+static Vec(DataType *)
+make_type_list_from_idents(SemaCtx *sctx, Vec(AstIdentTypePair) idents)
+{
+	if (idents == nil)
+		return nil;
+
+	Vec(DataType *) dts = nil;
+	foreach (ident, idents)
+		arrput(dts, resolve_datatype(sctx, ident.dtype, ident.dtype_loc));
+	return dts;
+}
+
+static Vec(DataType *)
+make_proc_args(DataType *a[], isize len)
+{
+	Vec(DataType *) args = nil;
+	arrsetlen(args, len);
+	memcpy(args, a, len);
+	return args;
+}
+
+/* Pushes a new context frame. Note that this inherits the flags and scope of the
+ * previous context frame.
+ * XXX: could rather only push flags...
+ */
+static void
+push_semactx(SemaCtx **sctx) 
+{
+	SemaCtx *tmp = make_semactx((*sctx)->cm, *sctx);
+	tmp->flags = (*sctx)->flags;
+	tmp->current_scope = (*sctx)->current_scope;
+	tmp->top_scope = (*sctx)->top_scope;
+	tmp->ok = (*sctx)->ok;
+	*sctx = tmp;
+}
+
+/* Pops the current context frame. */
+static void
+pop_semactx(SemaCtx **sctx)
+{
+	SemaCtx *prev = (*sctx)->prev;
+	compiler_assert((*sctx)->cm, prev != nil);
+	prev->ok = (*sctx)->ok;
+	free(*sctx);
+	*sctx = prev;
+}
+
+static void
+enter_scope(SemaCtx *sctx) 
+{
+	sctx->current_scope = make_scope(sctx->current_scope);
+}
+
+static void
+exit_scope(SemaCtx *sctx)
+{
+	compiler_assert(sctx->cm, sctx->current_scope->prev != nil);
+	sctx->current_scope = sctx->current_scope->prev;
+}
+
+Symbol *
+sym_search_oncurrent(Scope *scope, const Str name)
+{
+	Symbol *sym = &shget(scope->symbols, name.s);
+	if (sym->kind != SymInvalid)
+		return sym;
+	return nil;
+}
+
+/* Searches for a symbol in the current and previous scopes */
+Symbol *
+sym_search(Scope *scope, const Str name)
+{
+	Scope *sp = scope;
+	Symbol *sym = nil;
+	while (sp != nil && sym == nil) {
+		sym = sym_search_oncurrent(sp, name);
+		sp = sp->prev;
+	}
+	return sym;
+}
+
+/* Scans through the current scope for any unused var-like bindings
+ * (including proc parameters)
+ */
+static void
+sema_check_unused_vars(SemaCtx *sctx)
+{
+	/* Very simple, iterate over all bindings on this scope and report any that
+	 * doesn't have the 'used' flag toggled. */
+	const SymbolEntry *syms = sctx->current_scope->symbols;
+	for (isize i = 0; i < shlen(syms); ++i) {
+		const Symbol sym = syms[i].value;
+		if (!sym.used && symbol_is_var_binding(sym.kind)) {
+			const char *bind_kind_name = !sym.procparm ? "variable" : "proc parameter"; // SymbolKindStr[sym.kind] : "proc parameter";
+			sema_warning(
+				sctx, &sym.loc,
+				"unused %s '%s'", bind_kind_name, syms[i].key
+			);
+		}
+	}
+}
+
+static void
+sema_check_dead_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	(void)sctx, (void)stmts;
+	/* those who forsake the CFG are doomed to implement it badly without even
+	 * noticing... */
+}
+
+static void
+sema_match_proc_type(SemaCtx *sctx, Symbol *fsym, Str fident)
+{
+	if (fsym->dtype->kind != DtkProc) {
+		sema_error(
+			sctx, nil,
+			"cannot call '%s' because has non-proc type '%s'",
+			fident.s, "uh"
+		);
+		return;
+	}
+}
+
+static DataType *
+sema_proccall(SemaCtx *sctx, const AstProcCall *call, Location loc)
+{
+	Symbol *fsym = sym_search(sctx->current_scope, call->name);
+	if (fsym == nil) {
+		sema_error(sctx, &loc, "call to undeclared proc '%s'", call->name.s);
+		return nil;
+	}
+
+	fsym->used = true;
+	sema_match_proc_type(sctx, fsym, call->name);
+
+	/* check call arguments */
+	const isize proc_arglen = arrlen(fsym->dtype->proc.argtypes);
+	if (call->args != nil) {
+		compiler_assert(sctx->cm, call->args->type == AST_EXPRS);
+		const isize call_arglen = arrlen(call->args->exprs);
+
+		if (call_arglen != proc_arglen) {
+			const char *at_most = call_arglen > proc_arglen ? "s at most" : "";
+			sema_error(
+				sctx, &loc,
+				"argument length mismatch: given %li arguments to '%s' but it expects %li argument%s",
+				call_arglen, call->name.s, proc_arglen, at_most
+			);
+			return nil;
+		}
+		sema_expr_list(sctx, call->args->exprs, loc); /* now sema-check the args */
+	} else if (call->args == nil && proc_arglen != 0) {
+		sema_error(sctx, &loc, "'%s' proc takes %li argument(s), but none given",
+			  call->name.s, proc_arglen);
+		return nil;
+	}
+
+	if (fsym->dtype->proc.rettype != sctx->builtintypes.void_t
+		&& (~sctx->flags & SctxInDiscard)
+		&& (~sctx->flags & SctxInExpr)) {
+		sema_error(sctx, &loc, "result of function call with non-void type ignored");
+		sema_note(sctx, &loc, "use 'discard' if this was intentional");
+		return nil;
+	}
+
+	for (isize i = 0; i < proc_arglen; ++i) {
+		;
+	}
+	return fsym->dtype;
+}
+
+/************ Semantic and type checking of expressions ************/
+/* Type checking for expressions is done inside-out */
+
+static DataType *
+sema_expr_number(SemaCtx *sctx, AstNumber *num)
+{
+#define pow2(exp) (2 << (exp - 1))
+	/* type rule axiom */
+	num->type = sym_search_oncurrent(sctx->top_scope, Sl("u64"))->dtype;
+	return num->type;
+#undef pow2
+}
+
+static DataType *
+sema_expr_strlit(SemaCtx *sctx, const Str *strlit)
+{
+	(void)sctx, (void)strlit;
+	/* type rule axiom */
+	return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
+}
+
+static Symbol *
+sema_expr_ident(SemaCtx *sctx, const Str ident)
+{
+	Symbol *ident_sym = sym_search(sctx->current_scope, ident);
+	if (ident_sym == nil) {
+		sema_error(sctx, nil, "undeclared identifier '%s'", ident.s);
+		return nil;
+	}
+	if (ident_sym->kind == SymType) {
+		sema_error(sctx, nil, "data type '%s' used as identifier in expression", ident.s);
+		return nil;
+	}
+	ident_sym->used = true;
+	return ident_sym;
+}
+
+static DataType *
+sema_expr_unary(SemaCtx *sctx, AstUnary *unary, Location loc)
+{
+	Ast *expr = unary->atom;
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	//if (expr->type == AST_STRLIT) {
+	//	sema_error(sctx, nil, "%s with a string literal makes no sense\n", TokenIdStr[unary->op]);
+	//	return;
+	//}
+
+	//if (expr->type == AST_NUMBER) {
+	//	if (unary->op == T_MINUS && !expr->number.type->sign) {
+	//	}
+	//}
+	return sema_expr(sctx, expr, loc);
+}
+
+static DataType *
+sema_binop(SemaCtx *sctx, const AstBinop *expr, Location loc)
+{
+	Symbol *opsym = sym_search_oncurrent(sctx->top_scope, expr->op);
+	if (opsym == nil) {
+		sema_error(sctx, nil, "no operator '%s'", expr->op.s);
+		return nil;
+	}
+	if (arrlen(opsym->dtype->proc.argtypes) != 2) {
+		sema_error(sctx, nil, "no binary operator for '%s'", expr->op.s);
+		return nil;
+	}
+
+	DataType *ldt = sema_expr(sctx, expr->left, loc);
+	DataType *rdt = sema_expr(sctx, expr->right, loc);
+	/* Skip typechecking if either ldt or rdt have `InvalidDataType` and propagate
+	 * it up the call stack. */
+	if (ldt == InvalidDataType || rdt == InvalidDataType)
+		return (DataType *)InvalidDataType;
+
+	DataTypeCheck tchk;
+	if (!(tchk = datatype_cmp(sctx, ldt, rdt)).ok) {
+		sema_error(sctx, &loc, "type error: %s", tchk.msg.s);
+		return nil;
+	}
+	return ldt;
+}
+
+static DataType *
+sema_expr(SemaCtx *sctx, Ast *expr, Location loc)
+{
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	push_semactx(&sctx);
+	sctx->flags |= SctxInExpr;
+
+	DataType *dt = nil;
+	switch (expr->type) {
+	case AST_BINEXPR:
+		dt = sema_binop(sctx, &expr->bin, loc);
+		break;
+	case AST_UNARY:
+		dt = sema_expr_unary(sctx, &expr->unary, loc);
+		break;
+	case AST_NUMBER:
+		dt = sema_expr_number(sctx, &expr->number);
+		break;
+	case AST_STRLIT:
+		dt = sema_expr_strlit(sctx, &expr->strlit);
+		break;
+	case AST_IDENT:
+		dt = sema_expr_ident(sctx, expr->ident)->dtype;
+		break;
+	case AST_PROCCALL:
+		dt = sema_proccall(sctx, &expr->call, expr->loc);
+		break;
+	default:
+		unreachable();
+	}
+
+	pop_semactx(&sctx);
+	return dt;
+}
+
+static void
+sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc)
+{
+	foreach (expr, exprs) {
+		sema_expr(sctx, expr, loc);
+	}
+}
+
+/************ Type checking ************/
+
+/* Structurally compare two structural data types. */
+static DataTypeCheck
+datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2)
+{
+	compiler_assert(sctx->cm, s1->kind == DtkStruct && s2->kind == DtkStruct);
+	const DataTypeCompound *s1s = &s1->compound;
+	const DataTypeCompound *s2s = &s2->compound;
+
+	if (s1s->packed != s2s->packed)
+		return (DataTypeCheck){false, Sl("")};
+	if (arrlen(s1s->fields) != arrlen(s2s->fields))
+		return (DataTypeCheck){false, Sl("")};
+	for (isize i = 0; i < arrlen(s1s->fields); ++i) {
+		DataTypeCheck tchk;
+		if (!(tchk = datatype_cmp(sctx, s1s->fields[i], s2s->fields[i])).ok)
+			return tchk;
+	}
+	return (DataTypeCheck){.ok = true};
+}
+
+static DataTypeCheck
+datatype_array_cmp(SemaCtx *sctx, DataType *a1, DataType *a2)
+{
+	DataTypeCheck tchk = {.ok = true};
+	if (a1->array.len != a2->array.len)
+		return (DataTypeCheck){false, Sl("")};
+	if (!(tchk = datatype_cmp(sctx, a1->array.base, a2->array.base)).ok)
+		return tchk;
+	return tchk;
+}
+
+static DataTypeCheck
+datatype_proc_cmp(SemaCtx *sctx, DataType *pc1, DataType *pc2)
+{
+	DataTypeCheck tchk = {.ok = true};
+
+	if (pc1->proc.public != pc2->proc.public)
+		return (DataTypeCheck){false, Sl("")};
+	if (pc1->proc.extern_lnk != pc2->proc.extern_lnk)
+		return (DataTypeCheck){false, Sl("")};
+	if (pc1->proc.c_varargs != pc2->proc.c_varargs)
+		return (DataTypeCheck){false, Sl("")};
+	if (arrlen(pc1->proc.argtypes) != arrlen(pc2->proc.argtypes))
+		return (DataTypeCheck){false, Sl("")};
+	if (!(tchk = datatype_cmp(sctx, pc1->proc.rettype, pc2->proc.rettype)).ok)
+		return tchk;
+
+	for (isize i = 0; i < arrlen(pc1->proc.argtypes); ++i) {
+		if (!(tchk = datatype_cmp(sctx, pc1->proc.argtypes[i], pc2->proc.argtypes[i])).ok)
+			return tchk;
+	}
+	return tchk;
+}
+
+static DataTypeCheck
+datatype_basic_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
+{
+	(void)sctx;
+	if (dt1->size > dt2->size) /* if it has a size equal or less than dt2 */
+		return (DataTypeCheck){false, Sl("")};
+	if (dt1->sign != dt2->sign)
+		return (DataTypeCheck){false, Strafmt("integers with different sign")};
+	return (DataTypeCheck){.ok = true};
+}
+
+/* Compares two datatype objects, returning true if they are equal. */
+static DataTypeCheck
+datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
+{
+	if (dt1 == nil || dt2 == nil)
+		return (DataTypeCheck){false, Sl("")};
+	/* TODO: return more information in case of a mismatch... */
+	if (dt1 == dt2) /* shallow */
+		return (DataTypeCheck){.ok = true};
+	if (dt1->kind != dt2->kind)
+		return (DataTypeCheck){.ok = false};
+
+	switch (dt1->kind) {
+	case DtkBasic:
+		return datatype_basic_cmp(sctx, dt1, dt2);
+	case DtkStruct:
+	case DtkUnion:
+		return datatype_struct_cmp(sctx, dt1, dt2);
+	case DtkProc:
+		return datatype_proc_cmp(sctx, dt1, dt2);
+	case DtkArray:
+		return datatype_array_cmp(sctx, dt1, dt2);
+	case DtkBool:
+	case DtkVoid:
+		return (DataTypeCheck){.ok = true};
+	}
+	return (DataTypeCheck){.ok = false};
+}
+
+static DataType *
+expr_get_datatype(SemaCtx *sctx, Ast *expr)
+{
+	compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
+	switch (expr->type) {
+	case AST_BINEXPR:
+		return expr->bin.type;
+	case AST_UNARY:
+		return expr->unary.type;
+	case AST_NUMBER:
+		return expr->number.type;
+	case AST_STRLIT:
+		return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
+	/* XXX: for these two we could attach the type in the ast... */
+	case AST_IDENT:
+		return sym_search(sctx->current_scope, expr->ident)->dtype;
+	case AST_PROCCALL:
+		return sym_search(sctx->current_scope, expr->call.name)->dtype->proc.rettype;
+	default:
+		unreachable();
+	}
+	return nil;
+}
+
+/* Search for the type in the symbol table, asserting that is a data type. */
+static DataType *
+resolve_datatype(SemaCtx *sctx, const Str ident, Location loc)
+{
+	Symbol *dtsym = sym_search(sctx->current_scope, ident);
+	if (dtsym == nil) {
+		sema_error(sctx, &loc, "no such type '%s'", ident.s);
+		return (DataType *)InvalidDataType;
+	}
+	if (dtsym->kind != SymType) {
+		sema_error(sctx, &loc, "'%s' is not a type but a %s", ident.s, SymbolKindStr[dtsym->kind]);
+		return (DataType *)InvalidDataType;
+	}
+	return dtsym->dtype;
+}
+
+static void
+sema_procdef(SemaCtx *sctx, AstProc *proc, Location loc)
+{
+	Symbol *sym_prev;
+	if ((sym_prev = sym_search(sctx->current_scope, proc->name)) != nil) {
+		sema_error(
+			sctx, nil,
+			"'%s' was already declared as a %s",
+			proc->name.s, SymbolKindStr[sym_prev->kind]
+		);
+		sema_note(sctx, &sym_prev->loc, "'%s' previously declared here", proc->name.s);
+		return;
+	}
+
+	if (Str_equal(proc->name, Sl("main"))) {
+		sctx->main_defined = true;
+		if (!proc->ispublic) {
+			sema_error(sctx, &loc, "'main' has to be declared as a public proc");
+		}
+	}
+
+	const Ast *rettype_node = proc->rettype;
+	DataType *proc_rettype = nil;
+	if (rettype_node != nil) {
+		compiler_assert(sctx->cm, rettype_node->type == AST_IDENT);
+		proc_rettype = resolve_datatype(sctx, proc->rettype->ident, rettype_node->loc);
+		if (proc_rettype == InvalidDataType)
+			return;
+	} else {
+		/* return type node is nil, we infer that as a `void` type */
+		proc_rettype = sctx->builtintypes.void_t;	
+	}
+
+	Vec(DataType *) procargs = make_type_list_from_idents(sctx, proc->args);
+	DataType *procdtype = make_proc_type(false, proc_rettype, procargs);
+	procdtype->proc.public = proc->ispublic;
+	Symbol proc_sym = {
+		.kind = SymProc,
+		.dtype = procdtype,
+		.loc = loc
+	};
+
+	sym_insert(sctx->current_scope->symbols, proc->name.s, proc_sym);
+	proc->type = procdtype;
+
+	/* proc has no body at all */
+	if (proc->body == nil)
+		return;
+
+	/* analyze the body */
+	compiler_assert(sctx->cm, proc->body->type == AST_STMTS);
+	push_semactx(&sctx);
+	enter_scope(sctx);
+
+	compiler_assert(sctx->cm, arrlen(proc->args) == arrlen(procargs));
+	/* Inject proc parameters into the proc body top scope */
+	for (isize i = 0; i < arrlen(proc->args); ++i) {
+		DataType *argdtype = procargs[i];
+		enum SymbolKind argsymkind = proc->args[i].kind;
+
+		compiler_assert(sctx->cm, argdtype != nil);
+		compiler_assert(sctx->cm, argsymkind == SymLet || argsymkind == SymVar);
+
+		Symbol argsym = {
+			.kind = argsymkind,
+			.dtype = argdtype,
+			.procparm = true,
+			.loc = proc->args[i].loc
+		};
+		sym_insert(sctx->current_scope->symbols, proc->args[i].ident.s, argsym);
+	}
+	sctx->flags |= SctxInsideProc;
+	sema_stmts(sctx, proc->body->stmts);
+	sema_check_unused_vars(sctx);
+	exit_scope(sctx);
+	pop_semactx(&sctx);
+}
+
+static void
+sema_return(SemaCtx *sctx, Ast *ret_expr, Location loc)
+{
+	if (~sctx->flags & SctxInsideProc) {
+		sema_error(sctx, &loc, "'return' outside of proc");
+	}
+	if (ret_expr != nil)
+		sema_expr(sctx, ret_expr, loc);
+}
+
+static void
+sema_break(SemaCtx *sctx, Ast *unused, Location loc)
+{
+	(void)unused;
+	if (~sctx->flags & SctxInsideLoop) {
+		sema_error(sctx, &loc, "'break' used outside of a loop");
+	}
+}
+
+static void
+sema_discard(SemaCtx *sctx, Ast *expr, Location loc)
+{
+	sctx->flags |= SctxInDiscard;
+	sema_expr(sctx, expr, loc);
+	sctx->flags &= ~SctxInDiscard;
+}
+
+static void
+sema_attribute(SemaCtx *sctx, AstAttribute *attr)
+{
+	sema_node(sctx, attr->node);
+}
+
+static void
+sema_var_decl(SemaCtx *sctx, AstVarDecl *decl, Location loc)
+{
+	compiler_assert(sctx->cm, symbol_is_var_binding(decl->kind));
+
+	const Symbol *symp = sym_search(sctx->current_scope, decl->name);
+	if (symp != nil && symp->kind != decl->kind) {
+		switch (symp->kind) {
+		case SymLet:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as 'let'", decl->name.s);
+			return;
+		case SymVar:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as 'var'", decl->name.s);
+			return;
+		case SymConst:
+			sema_error(
+				sctx, &symp->loc, 
+				"declaration of '%s' shadows previously declared constant with the same name",
+				decl->name.s
+			);
+			return;
+		case SymType:
+			sema_error(sctx, &symp->loc, "'%s' was already declared as a type", decl->name.s);
+			return;
+		default:
+			break;
+		}
+		sema_note(sctx, &symp->loc, "'%s' was declared in this line", decl->name.s);
+	}
+
+	Ast *dexpr = decl->expr;
+	if (dexpr != nil) {
+		sema_expr(sctx, dexpr, loc); /* check the assignment expression */
+	} else {
+		sema_warning(sctx, &loc, "variable is unitialized");
+	}
+
+	if (decl->datatype == nil) {
+		sema_error(sctx, nil, "we don't do type inference yet sorry");
+		return;
+	}
+
+	compiler_assert(sctx->cm, decl->datatype->type == AST_IDENT);
+ 	DataType *dtype = resolve_datatype(sctx, decl->datatype->ident, decl->datatype->loc);
+	/* Note that we ignore whether `resolve_datatype` return an invalid type,
+	 * since we still want to insert the variable into the symbol table,
+	 * otherwise we would have spurious "undeclared identifier" errors. */
+	decl->type = dtype;
+
+	Symbol sym = {
+		.kind = decl->kind,
+		.dtype = dtype,
+		.loc = loc,
+	};
+	/* Insert the variable to the symbol table */
+	sym_insert(sctx->current_scope->symbols, decl->name.s, sym);
+}
+
+static void
+sema_var_assign(SemaCtx *sctx, AstVarAssign *assign, Location loc)
+{
+	sema_expr_ident(sctx, assign->name);
+	sema_expr(sctx, assign->expr, loc);
+
+	Symbol *decl = sym_search(sctx->current_scope, assign->name);
+	if (decl == nil) {
+		sema_error(sctx, &loc, "assign to undeclared variable '%s'", assign->name.s);
+		return;
+	}
+	if (!symbol_is_var_binding(decl->kind)) {
+		sema_error(
+			sctx, &loc,
+			"assign to non-variable symbol ('%s' is a '%s')",
+			assign->name.s, SymbolKindStr[decl->kind]
+		);
+		return;
+	}
+	if (decl->kind != SymVar) {
+		sema_error(
+			sctx, &loc,
+			"assign to immutable symbol ('%s' was declared as '%s')",
+			assign->name.s, SymbolKindStr[decl->kind]
+		);
+		return;
+	}
+	//datatype_cmp(sctx, nil, decl->dtype);
+}
+
+static void
+sema_ifstmtexpr(SemaCtx *sctx, AstIf *ift, Location loc)
+{
+	sema_expr(sctx, ift->cond, loc);
+	sema_node(sctx, ift->true_body);
+	sema_node(sctx, ift->false_body);
+
+	const isize elifs_len = arrlen(ift->elifs);
+	if (elifs_len > 0) {
+		for (isize i = 0; i < elifs_len; ++i) {
+			AstElif *elif = &ift->elifs[i];
+			sema_expr(sctx, elif->cond, loc);
+			sema_node(sctx, elif->body);
+		}
+	}
+}
+
+static void
+sema_loop(SemaCtx *sctx, AstLoop *loop, Location loc)
+{
+	if (loop->precond != nil) {
+		sema_expr(sctx, loop->precond, loc);
+	}
+	if (loop->postcond != nil) {
+		sema_expr(sctx, loop->postcond, loc);
+	}
+	push_semactx(&sctx);
+	sctx->flags |= SctxInsideLoop;
+	sema_node(sctx, loop->body);
+	pop_semactx(&sctx);
+}
+
+static void
+sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	/* AST_STMTS imply the opening of a new scope */
+	const isize stmts_len = arrlen(stmts);
+	for (isize i = 0; i < stmts_len; ++i) {
+		sema_node(sctx, stmts[i]);
+		if (sema_is_stmt_terminal(stmts[i]) && i + 1 != stmts_len) {
+			sema_warning(sctx, &stmts[i + 1]->loc, "dead code after 'return'");
+		}
+	}
+}
+
+static void
+sema_stmt_block(SemaCtx *sctx, Vec(Ast *) stmts)
+{
+	enter_scope(sctx);
+	sema_stmts(sctx, stmts);
+	exit_scope(sctx);
+	/* check for unused bindings declared in this scope */
+	sema_check_unused_vars(sctx);
+}
+
+static void
+sema_node(SemaCtx *sctx, Ast *node)
+{
+	if (node == nil)
+		return;
+	switch (node->type) {
+	case AST_IF:
+		sema_ifstmtexpr(sctx, &node->ifse, node->loc);
+		break;
+	case AST_LOOP:
+		sema_loop(sctx, &node->loop, node->loc);
+		break;
+	case AST_STMTS:
+		sema_stmt_block(sctx, node->stmts);
+		break;
+	case AST_PROCDEF:
+		sema_procdef(sctx, &node->proc, node->loc);
+		break;
+	case AST_PROCCALL:
+		sema_proccall(sctx, &node->call, node->loc);
+		break;
+	case AST_VARDECL:
+		sema_var_decl(sctx, &node->var, node->loc);
+		break;
+	case AST_VARASSIGN:
+		sema_var_assign(sctx, &node->varassgn, node->loc);
+		break;
+	case AST_RETURN:
+		sema_return(sctx, node->ret, node->loc);
+		break;
+	case AST_BREAK:
+		sema_break(sctx, nil, node->loc);
+		break;
+	case AST_DISCARD:
+		sema_discard(sctx, node->discard.expr, node->loc);
+		break;
+	case AST_ATTRIBUTE:
+		sema_attribute(sctx, &node->attribute);
+		break;
+	case AST_BINEXPR:
+	case AST_UNARY:
+	case AST_NUMBER:
+	case AST_STRLIT:
+	case AST_IDENT:
+		sema_expr(sctx, node, node->loc);
+		break;
+	case AST_INVALID:
+	case AST_EXPRS:
+	case AST_PROCCALL_ARGS:
+		unreachable();
+	}
+}
+
+static void
+sema_make_builtin_types(SemaCtx *sctx)
+{
+	typedef struct {
+		const char *name;
+		Symbol sym;
+	} NameSym;
+
+	DataType *void_type = make_data_type(DtkVoid, 0, true, false);
+	DataType *str_type = make_data_type(DtkStruct, 0, false, false);
+	DataType *puts_proto = make_data_type(DtkProc, 0, false, false);
+	puts_proto->proc.rettype = void_type;
+	puts_proto->proc.argtypes = make_proc_args((DataType *[]){str_type}, 1);
+	puts_proto->proc.extern_lnk = true;
+
+	NameSym builtin_basic_types[] = {
+		{"void", {.kind = SymType, .dtype = void_type}},
+		{"u64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, false)}},
+		{"i64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, true)}},
+		{"cint", {.kind = SymType, .dtype = make_data_type(DtkBasic, sizeof(int), true, true)}},
+		{"string", {.kind = SymType, .dtype = str_type}},
+		{"bool", {.kind = SymType, .dtype = make_data_type(DtkBool, 1, true, false)}},
+	};
+	DataType *u64_dt = builtin_basic_types[1].sym.dtype;
+	DataType *bool_dt = builtin_basic_types[5].sym.dtype;
+
+	NameSym builtin_procs[] = {
+		{
+			"+",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					u64_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+		{
+			"-",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					u64_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+		{
+			"==",
+			{
+				.kind = SymProc,
+				.dtype = make_proc_type(
+					true,
+					bool_dt,
+					make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
+				)
+			}
+		},
+	};
+
+	for (isize i = 0; i < countof(builtin_basic_types); ++i) {
+		const char *name = builtin_basic_types[i].name;
+		Symbol sym = builtin_basic_types[i].sym;
+		sym_insert(sctx->current_scope->symbols, name, sym);
+	}
+	for (isize i = 0; i < countof(builtin_procs); ++i) {
+		sym_insert(sctx->current_scope->symbols,
+			  builtin_procs[i].name, builtin_procs[i].sym);
+	}
+
+	sctx->builtintypes.tyu64 = builtin_basic_types[1].sym.dtype;
+	sctx->builtintypes.void_t = void_type;
+
+	Symbol puts_sym = {.kind = SymProc, .dtype = puts_proto};
+	sym_insert(sctx->current_scope->symbols, "puts", puts_sym);
+}
+
+SemaCtx *
+sema_new(Compiler *cm)
+{
+	SemaCtx *toplevel_context = make_semactx(cm, nil);
+	toplevel_context->current_scope = make_scope(nil);
+
+	sema_make_builtin_types(toplevel_context);
+	toplevel_context->top_scope = toplevel_context->current_scope;
+	toplevel_context->ok = true;
+	return toplevel_context;
+}
+
+void
+sema_destroy(SemaCtx *sctx)
+{
+	free(sctx);
+}
+
+void
+sema(SemaCtx *sctx, Ast *program)
+{
+	/* Analyze toplevel */
+	/* XXX: DRY it */
+	compiler_assert(sctx->cm, program->type == AST_STMTS);
+	for (isize i = 0; i < arrlen(program->stmts); ++i)
+		sema_node(sctx, program->stmts[i]);
+
+	if (!sctx->cm->opts.compile_only && !sctx->main_defined)
+		sema_error(sctx, nil, "missing 'main' entrypoint proc");
+
+	/* check unused local procedures */
+	const SymbolEntry *syms = sctx->current_scope->symbols;
+	for (isize i = 0; i < shlen(syms); ++i) {
+		const Symbol fsym = syms[i].value;
+		if (fsym.kind == SymProc
+			&& !fsym.dtype->builtin
+			&& !fsym.dtype->proc.public
+			&& !fsym.dtype->proc.extern_lnk
+			&& !fsym.used) {
+			sema_warning(
+				sctx, &fsym.loc,
+				"defined proc '%s' is never called in this module", syms[i].key
+			);
+		}
+	}
+}
diff --git a/compiler/sema.h b/compiler/sema.h
new file mode 100644
index 0000000..a3e95b9
--- /dev/null
+++ b/compiler/sema.h
@@ -0,0 +1,32 @@
+#ifndef _sema_h_
+#define _sema_h_
+
+#include "ast.h"
+#include "state.h"
+
+typedef struct Scope Scope;
+typedef struct SemaCtx SemaCtx;
+
+struct SemaCtx
+{
+	SemaCtx *prev;
+	Scope *current_scope;
+	Scope *top_scope;
+	Compiler *cm;
+	u64 flags; /* Bit field storing context flags */
+	struct {
+		DataType *tyu64;
+		DataType *void_t;
+	} builtintypes;
+	bool ok; /* did the semantic check fail */
+	bool main_defined;
+};
+
+SemaCtx *
+sema_new(Compiler *cm);
+void
+sema_destroy(SemaCtx *sctx);
+void
+sema(SemaCtx *sctx, Ast *program);
+
+#endif
diff --git a/compiler/state.c b/compiler/state.c
new file mode 100644
index 0000000..301579a
--- /dev/null
+++ b/compiler/state.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include "pre.h"
+#include "state.h"
+
+void
+compiler_assert_impl(Compiler *cm, const char *pred_s)
+{
+	(void)cm;
+	fprintf(stderr, "Bug check fail: `%s`\n", pred_s);
+	fputs("This is a compiler bug, please report! (run with -v for bug reporting instructions)\n\n", stderr);
+	fflush(stderr);
+	debugtrap();
+}
diff --git a/compiler/state.h b/compiler/state.h
new file mode 100644
index 0000000..477389e
--- /dev/null
+++ b/compiler/state.h
@@ -0,0 +1,30 @@
+#ifndef _state_h_
+#define _state_h_
+
+#include "pre.h"
+#include "cgBackends.h"
+
+/* Assert meant to catch compiler bugs. The difference with a normal assert is that
+ * this one stays on release builds. Better to crash than to deal with some weird bug
+ * seeping through codegen.
+ */
+#define compiler_assert(cm, pred) if (!(pred)) {compiler_assert_impl(cm, #pred);}
+
+typedef struct {
+	struct {
+		bool color; /* colored diagnostics */
+		bool compile_only;
+		Str exe_out;
+		Str release_mode;
+		Vec(Str) defines;
+		enum CodegenBackends backend;
+		isize max_errors;
+	} opts;
+	Str current_filename;
+	isize error_count;
+} Compiler;
+
+void
+compiler_assert_impl(Compiler *cm, const char *pred_s);
+
+#endif
diff --git a/compiler/symbol.c b/compiler/symbol.c
new file mode 100644
index 0000000..033011c
--- /dev/null
+++ b/compiler/symbol.c
@@ -0,0 +1,10 @@
+#include "symbol.h"
+
+const char *SymbolKindStr[] = {
+	[SymInvalid] = "",
+	[SymLet] = "let",
+	[SymVar] = "var",
+	[SymConst] = "const",
+	[SymProc] = "proc",
+	[SymType] = "type definition",
+};
diff --git a/compiler/symbol.h b/compiler/symbol.h
new file mode 100644
index 0000000..7baf061
--- /dev/null
+++ b/compiler/symbol.h
@@ -0,0 +1,17 @@
+#ifndef _symbol_h_
+#define _symbol_h_
+
+#define symbol_is_var_binding(sk) ((sk) >= SymLet && (sk) <= SymConst)
+
+enum SymbolKind
+{
+	SymInvalid,
+	SymLet,
+	SymVar,
+	SymConst,
+	SymProc,
+	SymType, /* a data type that is */
+};
+extern const char *SymbolKindStr[];
+
+#endif