rutile/compiler/sema.c
2025-01-21 14:36:18 -03:00

1007 lines
26 KiB
C

/* Semantic analyzer and type checker */
#include <stdlib.h>
#include <string.h>
#include "pre.h"
#include "sema.h"
#include "datatype.h"
#include "location.h"
#include "symbol.h"
#include "ast.h"
#include "state.h"
#include "messages.h"
#include "libs/stb_ds.h"
#define sema_error(ctx, loc, ...) do { \
error((ctx)->cm, loc, __VA_ARGS__); \
(ctx)->ok = false; \
} while (0)
#define sema_warning(ctx, loc, ...) warning((ctx)->cm, loc, __VA_ARGS__)
#define sema_note(ctx, loc, ...) note((ctx)->cm, loc, __VA_ARGS__)
#define sema_is_stmt_terminal(s) (s->type == AST_RETURN || s->type == AST_BREAK)
#define sym_insert(syms, k, v) shput(syms, k, v)
enum SemaCtxFlags /* 64 bits */
{
SctxInsideProc = BitPos(0),
SctxInsideLoop = BitPos(1),
SctxInsideIf = BitPos(2),
SctxInTopLevel = BitPos(3),
SctxInExpr = BitPos(4),
SctxInDiscard = BitPos(5),
SctxInStmtBlock = BitPos(6),
};
typedef struct {
enum SymbolKind kind;
/* The data type associated with the symbol. */
DataType *dtype;
bool used;
bool procparm; /* if its a proc parameter */
Location loc;
} Symbol;
typedef HashMapStr(Symbol) SymbolEntry;
struct Scope
{
Scope *prev; /* Previous scope in the stack */
SymbolEntry *symbols; /* All the symbols in this scope */
};
typedef struct {
bool ok;
} SemaStatus;
static const Symbol InvalidSymbol = {.kind = SymInvalid};
static const DataType *InvalidDataType = &(DataType){.kind = DtkInvalid};
static DataTypeCheck
datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2);
static DataType *
sema_expr(SemaCtx *sctx, Ast *expr, Location loc);
static void
sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc);
static void
sema_node(SemaCtx *sctx, Ast *node);
static void
sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts);
static DataType *
resolve_datatype(SemaCtx *sctx, const Str ident, Location loc);
static Scope *
make_scope(Scope *prev)
{
Scope *sc = malloc(sizeof(*sc));
sc->prev = prev;
sc->symbols = nil;
sh_new_arena(sc->symbols);
shdefault(sc->symbols, InvalidSymbol);
return sc;
}
static SemaCtx *
make_semactx(Compiler *cm, SemaCtx *prev)
{
SemaCtx *smc = calloc(1, sizeof(*smc));
smc->cm = cm;
smc->prev = prev;
return smc;
}
static DataType *
make_data_type(enum DataTypeKind kind, u16 size, bool builtin, bool sign)
{
DataType *dt = calloc(1, sizeof(*dt));
dt->kind = kind;
dt->size = size;
dt->builtin = builtin;
dt->sign = sign;
return dt;
}
static DataType *
make_proc_type(bool builtin, DataType *rettype, Vec(DataType *) argtypes)
{
DataType *pdt = calloc(1, sizeof(*pdt));
pdt->kind = DtkProc;
pdt->builtin = builtin;
pdt->proc.rettype = rettype;
pdt->proc.argtypes = argtypes;
return pdt;
}
static Vec(DataType *)
make_type_list_from_idents(SemaCtx *sctx, Vec(AstIdentTypePair) idents)
{
if (idents == nil)
return nil;
Vec(DataType *) dts = nil;
foreach (ident, idents)
arrput(dts, resolve_datatype(sctx, ident.dtype, ident.dtype_loc));
return dts;
}
static Vec(DataType *)
make_proc_args(DataType *a[], isize len)
{
Vec(DataType *) args = nil;
arrsetlen(args, len);
memcpy(args, a, len);
return args;
}
/* Pushes a new context frame. Note that this inherits the flags and scope of the
* previous context frame.
* XXX: could rather only push flags...
*/
static void
push_semactx(SemaCtx **sctx)
{
SemaCtx *tmp = make_semactx((*sctx)->cm, *sctx);
tmp->flags = (*sctx)->flags;
tmp->current_scope = (*sctx)->current_scope;
tmp->top_scope = (*sctx)->top_scope;
tmp->ok = (*sctx)->ok;
*sctx = tmp;
}
/* Pops the current context frame. */
static void
pop_semactx(SemaCtx **sctx)
{
SemaCtx *prev = (*sctx)->prev;
compiler_assert((*sctx)->cm, prev != nil);
prev->ok = (*sctx)->ok;
free(*sctx);
*sctx = prev;
}
static void
enter_scope(SemaCtx *sctx)
{
sctx->current_scope = make_scope(sctx->current_scope);
}
static void
exit_scope(SemaCtx *sctx)
{
compiler_assert(sctx->cm, sctx->current_scope->prev != nil);
sctx->current_scope = sctx->current_scope->prev;
}
Symbol *
sym_search_oncurrent(Scope *scope, const Str name)
{
Symbol *sym = &shget(scope->symbols, name.s);
if (sym->kind != SymInvalid)
return sym;
return nil;
}
/* Searches for a symbol in the current and previous scopes */
Symbol *
sym_search(Scope *scope, const Str name)
{
Scope *sp = scope;
Symbol *sym = nil;
while (sp != nil && sym == nil) {
sym = sym_search_oncurrent(sp, name);
sp = sp->prev;
}
return sym;
}
/* Scans through the current scope for any unused var-like bindings
* (including proc parameters)
*/
static void
sema_check_unused_vars(SemaCtx *sctx)
{
/* Very simple, iterate over all bindings on this scope and report any that
* doesn't have the 'used' flag toggled. */
const SymbolEntry *syms = sctx->current_scope->symbols;
for (isize i = 0; i < shlen(syms); ++i) {
const Symbol sym = syms[i].value;
if (!sym.used && symbol_is_var_binding(sym.kind)) {
const char *bind_kind_name = !sym.procparm ? "variable" : "proc parameter"; // SymbolKindStr[sym.kind] : "proc parameter";
sema_warning(
sctx, &sym.loc,
"unused %s '%s'", bind_kind_name, syms[i].key
);
}
}
}
static void
sema_check_dead_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
{
(void)sctx, (void)stmts;
/* those who forsake the CFG are doomed to implement it badly without even
* noticing... */
}
static void
sema_match_proc_type(SemaCtx *sctx, Symbol *fsym, Str fident)
{
if (fsym->dtype->kind != DtkProc) {
sema_error(
sctx, nil,
"cannot call '%s' because has non-proc type '%s'",
fident.s, "uh"
);
return;
}
}
static DataType *
sema_proccall(SemaCtx *sctx, const AstProcCall *call, Location loc)
{
Symbol *fsym = sym_search(sctx->current_scope, call->name);
if (fsym == nil) {
sema_error(sctx, &loc, "call to undeclared proc '%s'", call->name.s);
return nil;
}
fsym->used = true;
sema_match_proc_type(sctx, fsym, call->name);
/* check call arguments */
const isize proc_arglen = arrlen(fsym->dtype->proc.argtypes);
if (call->args != nil) {
compiler_assert(sctx->cm, call->args->type == AST_EXPRS);
const isize call_arglen = arrlen(call->args->exprs);
if (call_arglen != proc_arglen) {
const char *at_most = call_arglen > proc_arglen ? "s at most" : "";
sema_error(
sctx, &loc,
"argument length mismatch: given %li arguments to '%s' but it expects %li argument%s",
call_arglen, call->name.s, proc_arglen, at_most
);
return nil;
}
sema_expr_list(sctx, call->args->exprs, loc); /* now sema-check the args */
} else if (call->args == nil && proc_arglen != 0) {
sema_error(sctx, &loc, "'%s' proc takes %li argument(s), but none given",
call->name.s, proc_arglen);
return nil;
}
if (fsym->dtype->proc.rettype != sctx->builtintypes.void_t
&& (~sctx->flags & SctxInDiscard)
&& (~sctx->flags & SctxInExpr)) {
sema_error(sctx, &loc, "result of function call with non-void type ignored");
sema_note(sctx, &loc, "use 'discard' if this was intentional");
return nil;
}
for (isize i = 0; i < proc_arglen; ++i) {
;
}
return fsym->dtype->proc.rettype;
}
/************ Semantic and type checking of expressions ************/
/* Type checking for expressions is done inside-out */
static DataType *
sema_expr_number(SemaCtx *sctx, AstNumber *num)
{
#define pow2(exp) (2 << (exp - 1))
/* type rule axiom */
num->type = sym_search_oncurrent(sctx->top_scope, Sl("u64"))->dtype;
return num->type;
#undef pow2
}
static DataType *
sema_expr_strlit(SemaCtx *sctx, const Str *strlit)
{
(void)sctx, (void)strlit;
/* type rule axiom */
return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
}
static Symbol *
sema_expr_ident(SemaCtx *sctx, const Str ident)
{
Symbol *ident_sym = sym_search(sctx->current_scope, ident);
if (ident_sym == nil) {
sema_error(sctx, nil, "undeclared identifier '%s'", ident.s);
return nil;
}
if (ident_sym->kind == SymType) {
sema_error(sctx, nil, "data type '%s' used as identifier in expression", ident.s);
return nil;
}
ident_sym->used = true;
return ident_sym;
}
static DataType *
sema_expr_unary(SemaCtx *sctx, AstUnary *unary, Location loc)
{
Ast *expr = unary->atom;
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
//if (expr->type == AST_STRLIT) {
// sema_error(sctx, nil, "%s with a string literal makes no sense\n", TokenIdStr[unary->op]);
// return;
//}
//if (expr->type == AST_NUMBER) {
// if (unary->op == T_MINUS && !expr->number.type->sign) {
// }
//}
return sema_expr(sctx, expr, loc);
}
static DataType *
sema_binop(SemaCtx *sctx, const AstBinop *expr, Location loc)
{
Symbol *opsym = sym_search_oncurrent(sctx->top_scope, expr->op);
if (opsym == nil) {
sema_error(sctx, nil, "no operator '%s'", expr->op.s);
return nil;
}
if (arrlen(opsym->dtype->proc.argtypes) != 2) {
sema_error(sctx, nil, "no binary operator for '%s'", expr->op.s);
return nil;
}
DataType *ldt = sema_expr(sctx, expr->left, loc);
DataType *rdt = sema_expr(sctx, expr->right, loc);
/* Skip typechecking if either ldt or rdt have `InvalidDataType` and propagate
* it up the call stack. */
if (ldt == InvalidDataType || rdt == InvalidDataType)
return (DataType *)InvalidDataType;
DataTypeCheck tchk;
if (!(tchk = datatype_cmp(sctx, ldt, rdt)).ok) {
sema_error(sctx, &loc, "type error: %s", tchk.msg.s);
return nil;
}
return ldt;
}
static DataType *
sema_expr(SemaCtx *sctx, Ast *expr, Location loc)
{
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
push_semactx(&sctx);
sctx->flags |= SctxInExpr;
DataType *dt = nil;
switch (expr->type) {
case AST_BINEXPR:
dt = sema_binop(sctx, &expr->bin, loc);
break;
case AST_UNARY:
dt = sema_expr_unary(sctx, &expr->unary, loc);
break;
case AST_NUMBER:
dt = sema_expr_number(sctx, &expr->number);
break;
case AST_STRLIT:
dt = sema_expr_strlit(sctx, &expr->strlit);
break;
case AST_IDENT:
dt = sema_expr_ident(sctx, expr->ident)->dtype;
break;
case AST_PROCCALL:
dt = sema_proccall(sctx, &expr->call, expr->loc);
break;
default:
unreachable();
}
pop_semactx(&sctx);
return dt;
}
static void
sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc)
{
foreach (expr, exprs) {
sema_expr(sctx, expr, loc);
}
}
/************ Type checking ************/
/* Structurally compare two structural data types. */
static DataTypeCheck
datatype_struct_can_cast(SemaCtx *sctx, DataType *s1, DataType *s2)
{
compiler_assert(sctx->cm, s1->kind == DtkStruct && s2->kind == DtkStruct);
const DataTypeCompound *s1s = &s1->compound;
const DataTypeCompound *s2s = &s2->compound;
if (s1s->packed != s2s->packed)
return (DataTypeCheck){false, Sl("")};
if (arrlen(s1s->fields) != arrlen(s2s->fields))
return (DataTypeCheck){false, Sl("")};
for (isize i = 0; i < arrlen(s1s->fields); ++i) {
DataTypeCheck tchk;
if (!(tchk = datatype_cmp(sctx, s1s->fields[i], s2s->fields[i])).ok)
return tchk;
}
return (DataTypeCheck){.ok = true};
}
static DataTypeCheck
datatype_array_cmp(SemaCtx *sctx, DataType *a1, DataType *a2)
{
DataTypeCheck tchk = {.ok = true};
if (a1->array.len != a2->array.len)
return (DataTypeCheck){false, Sl("")};
if (!(tchk = datatype_cmp(sctx, a1->array.base, a2->array.base)).ok)
return tchk;
return tchk;
}
static DataTypeCheck
datatype_proc_can_cast(SemaCtx *sctx, DataType *pc1, DataType *pc2)
{
DataTypeCheck tchk = {.ok = true};
if (pc1->proc.public != pc2->proc.public)
return (DataTypeCheck){false, Sl("")};
if (pc1->proc.extern_lnk != pc2->proc.extern_lnk)
return (DataTypeCheck){false, Sl("")};
if (pc1->proc.c_varargs != pc2->proc.c_varargs)
return (DataTypeCheck){false, Sl("")};
if (arrlen(pc1->proc.argtypes) != arrlen(pc2->proc.argtypes))
return (DataTypeCheck){false, Sl("")};
if (!(tchk = datatype_cmp(sctx, pc1->proc.rettype, pc2->proc.rettype)).ok)
return tchk;
for (isize i = 0; i < arrlen(pc1->proc.argtypes); ++i) {
if (!(tchk = datatype_cmp(sctx, pc1->proc.argtypes[i], pc2->proc.argtypes[i])).ok)
return tchk;
}
return tchk;
}
static DataTypeCheck
datatype_basic_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
{
(void)sctx;
if (dt1->size > dt2->size) /* if it has a size equal or less than dt2 */
return (DataTypeCheck){false, Sl("")};
if (dt1->sign != dt2->sign)
return (DataTypeCheck){false, Strafmt("integers with different sign")};
return (DataTypeCheck){.ok = true};
}
/* Compares two datatype objects, returning true if they are equal. */
static DataTypeCheck
datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
{
if (dt1 == nil || dt2 == nil)
return (DataTypeCheck){false, Sl("")};
/* TODO: return more information in case of a mismatch... */
if (dt1 == dt2) /* shallow comparison */
return (DataTypeCheck){.ok = true};
if (dt1->kind != dt2->kind)
return (DataTypeCheck){.ok = false};
switch (dt1->kind) {
case DtkBasic:
return datatype_basic_cmp(sctx, dt1, dt2);
case DtkArray:
return datatype_array_cmp(sctx, dt1, dt2);
/* Nominally typed, should have been catched by the shallow comparison above */
case DtkBool:
case DtkVoid:
case DtkStruct:
case DtkUnion:
case DtkProc:
break;
case DtkInvalid:
unreachable();
}
return (DataTypeCheck){.ok = false};
}
static DataType *
expr_get_datatype(SemaCtx *sctx, Ast *expr)
{
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
switch (expr->type) {
case AST_BINEXPR:
return expr->bin.type;
case AST_UNARY:
return expr->unary.type;
case AST_NUMBER:
return expr->number.type;
case AST_STRLIT:
return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
/* XXX: for these two we could attach the type in the ast... */
case AST_IDENT:
return sym_search(sctx->current_scope, expr->ident)->dtype;
case AST_PROCCALL:
return sym_search(sctx->current_scope, expr->call.name)->dtype->proc.rettype;
default:
unreachable();
}
return nil;
}
/* Search for the type in the symbol table, asserting that is a data type. */
static DataType *
resolve_datatype(SemaCtx *sctx, const Str ident, Location loc)
{
Symbol *dtsym = sym_search(sctx->current_scope, ident);
if (dtsym == nil) {
sema_error(sctx, &loc, "no such type '%s'", ident.s);
return (DataType *)InvalidDataType;
}
if (dtsym->kind != SymType) {
sema_error(sctx, &loc, "'%s' is not a type but a %s", ident.s, SymbolKindStr[dtsym->kind]);
return (DataType *)InvalidDataType;
}
return dtsym->dtype;
}
static void
sema_procdef(SemaCtx *sctx, AstProc *proc, Location loc)
{
Symbol *sym_prev;
if ((sym_prev = sym_search(sctx->current_scope, proc->name)) != nil) {
sema_error(
sctx, nil,
"'%s' was already declared as a %s",
proc->name.s, SymbolKindStr[sym_prev->kind]
);
sema_note(sctx, &sym_prev->loc, "'%s' previously declared here", proc->name.s);
return;
}
if (Str_equal(proc->name, Sl("main"))) {
sctx->main_defined = true;
if (!proc->ispublic) {
sema_error(sctx, &loc, "'main' has to be declared as a public proc");
}
}
const Ast *rettype_node = proc->rettype;
DataType *proc_rettype = nil;
if (rettype_node != nil) {
compiler_assert(sctx->cm, rettype_node->type == AST_IDENT);
proc_rettype = resolve_datatype(sctx, proc->rettype->ident, rettype_node->loc);
if (proc_rettype == InvalidDataType)
return;
} else {
/* return type node is nil, we infer that as a `void` type */
proc_rettype = sctx->builtintypes.void_t;
}
Vec(DataType *) procargs = make_type_list_from_idents(sctx, proc->args);
DataType *procdtype = make_proc_type(false, proc_rettype, procargs);
procdtype->proc.public = proc->ispublic;
Symbol proc_sym = {
.kind = SymProc,
.dtype = procdtype,
.loc = loc
};
sym_insert(sctx->current_scope->symbols, proc->name.s, proc_sym);
proc->type = procdtype;
/* proc has no body at all */
if (proc->body == nil)
return;
/* analyze the body */
compiler_assert(sctx->cm, proc->body->type == AST_STMTS);
push_semactx(&sctx);
enter_scope(sctx);
compiler_assert(sctx->cm, arrlen(proc->args) == arrlen(procargs));
/* Inject proc parameters into the proc body top scope */
for (isize i = 0; i < arrlen(proc->args); ++i) {
DataType *argdtype = procargs[i];
enum SymbolKind argsymkind = proc->args[i].kind;
compiler_assert(sctx->cm, argdtype != nil);
compiler_assert(sctx->cm, argsymkind == SymLet || argsymkind == SymVar);
Symbol argsym = {
.kind = argsymkind,
.dtype = argdtype,
.procparm = true,
.loc = proc->args[i].ident_loc
};
sym_insert(sctx->current_scope->symbols, proc->args[i].ident.s, argsym);
}
sctx->flags |= SctxInsideProc;
sema_stmts(sctx, proc->body->stmts);
sema_check_unused_vars(sctx);
exit_scope(sctx);
pop_semactx(&sctx);
}
static void
sema_return(SemaCtx *sctx, Ast *ret_expr, Location loc)
{
if (~sctx->flags & SctxInsideProc) {
sema_error(sctx, &loc, "'return' outside of proc");
}
if (ret_expr != nil)
sema_expr(sctx, ret_expr, ret_expr->loc);
}
static void
sema_break(SemaCtx *sctx, Ast *unused, Location loc)
{
(void)unused;
if (~sctx->flags & SctxInsideLoop) {
sema_error(sctx, &loc, "'break' used outside of a loop");
}
}
static void
sema_discard(SemaCtx *sctx, Ast *expr, Location loc)
{
sctx->flags |= SctxInDiscard;
sema_expr(sctx, expr, loc);
sctx->flags &= ~SctxInDiscard;
}
static void
sema_pragma(SemaCtx *sctx, AstPragma *attr)
{
sema_node(sctx, attr->node);
}
static void
sema_var_decl(SemaCtx *sctx, AstVarDecl *decl, Location loc)
{
compiler_assert(sctx->cm, symbol_is_var_binding(decl->kind));
const Symbol *symp = sym_search(sctx->current_scope, decl->name);
if (symp != nil && symp->kind != decl->kind) {
switch (symp->kind) {
case SymLet:
sema_error(sctx, &symp->loc, "'%s' was already declared as 'let'", decl->name.s);
return;
case SymVar:
sema_error(sctx, &symp->loc, "'%s' was already declared as 'var'", decl->name.s);
return;
case SymConst:
sema_error(
sctx, &symp->loc,
"declaration of '%s' shadows previously declared constant with the same name",
decl->name.s
);
return;
case SymType:
sema_error(sctx, &symp->loc, "'%s' was already declared as a type", decl->name.s);
return;
default:
break;
}
sema_note(sctx, &symp->loc, "'%s' was declared in this line", decl->name.s);
}
Ast *dexpr = decl->expr;
DataType *dexpr_dt = nil;
if (dexpr != nil) {
/* check the assignment expression */
dexpr_dt = sema_expr(sctx, dexpr, loc);
} else if (dexpr == nil && decl->datatype != nil) {
sema_warning(sctx, &loc, "variable is unitialized");
}
DataType *dtype = nil;
if (decl->datatype != nil) { /* explicit data type specified */
compiler_assert(sctx->cm, decl->datatype->type == AST_IDENT);
dtype = resolve_datatype(sctx, decl->datatype->ident, decl->datatype->loc);
/* Note that we ignore whether `resolve_datatype` return an invalid type,
* since we still want to insert the variable into the symbol table,
* otherwise we would have spurious "undeclared identifier" errors. */
decl->type = dtype;
} else {
/* the parser should catch this (the grammar requires it) */
compiler_assert(sctx->cm, dexpr != nil);
compiler_assert(sctx->cm, dexpr_dt != nil);
decl->type = dexpr_dt;
}
compiler_assert(sctx->cm, decl->type != nil);
Symbol sym = {
.kind = decl->kind,
.dtype = decl->type,
.loc = loc,
};
/* Insert the variable to the symbol table */
sym_insert(sctx->current_scope->symbols, decl->name.s, sym);
}
static void
sema_var_assign(SemaCtx *sctx, AstVarAssign *assign, Location loc)
{
sema_expr_ident(sctx, assign->name);
sema_expr(sctx, assign->expr, loc);
Symbol *decl = sym_search(sctx->current_scope, assign->name);
if (decl == nil) {
sema_error(sctx, &loc, "assign to undeclared variable '%s'", assign->name.s);
return;
}
if (!symbol_is_var_binding(decl->kind)) {
sema_error(
sctx, &loc,
"assign to non-variable symbol ('%s' is a '%s')",
assign->name.s, SymbolKindStr[decl->kind]
);
return;
}
if (decl->kind != SymVar) {
sema_error(
sctx, &loc,
"assign to immutable symbol ('%s' was declared as '%s')",
assign->name.s, SymbolKindStr[decl->kind]
);
return;
}
//datatype_cmp(sctx, nil, decl->dtype);
}
static void
sema_ifstmtexpr(SemaCtx *sctx, AstIf *ift, Location loc)
{
sema_expr(sctx, ift->cond, loc);
sema_node(sctx, ift->true_body);
sema_node(sctx, ift->false_body);
const isize elifs_len = arrlen(ift->elifs);
if (elifs_len > 0) {
for (isize i = 0; i < elifs_len; ++i) {
AstElif *elif = &ift->elifs[i];
sema_expr(sctx, elif->cond, loc);
sema_node(sctx, elif->body);
}
}
}
static void
sema_loop(SemaCtx *sctx, AstLoop *loop, Location loc)
{
if (loop->precond != nil) {
sema_expr(sctx, loop->precond, loc);
}
if (loop->postcond != nil) {
sema_expr(sctx, loop->postcond, loc);
}
push_semactx(&sctx);
sctx->flags |= SctxInsideLoop;
sema_node(sctx, loop->body);
pop_semactx(&sctx);
}
static void
sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
{
/* AST_STMTS imply the opening of a new scope */
const isize stmts_len = arrlen(stmts);
for (isize i = 0; i < stmts_len; ++i) {
sema_node(sctx, stmts[i]);
if (sema_is_stmt_terminal(stmts[i]) && i + 1 != stmts_len) {
sema_warning(sctx, &stmts[i + 1]->loc, "dead code after 'return'");
}
}
}
static void
sema_stmt_block(SemaCtx *sctx, Vec(Ast *) stmts)
{
enter_scope(sctx);
sema_stmts(sctx, stmts);
exit_scope(sctx);
/* check for unused bindings declared in this scope */
sema_check_unused_vars(sctx);
}
static void
sema_node(SemaCtx *sctx, Ast *node)
{
if (node == nil)
return;
switch (node->type) {
case AST_IF:
sema_ifstmtexpr(sctx, &node->ifse, node->loc);
break;
case AST_LOOP:
sema_loop(sctx, &node->loop, node->loc);
break;
case AST_STMTS:
sema_stmt_block(sctx, node->stmts);
break;
case AST_PROCDEF:
sema_procdef(sctx, &node->proc, node->loc);
break;
case AST_PROCCALL:
sema_proccall(sctx, &node->call, node->loc);
break;
case AST_VARDECL:
sema_var_decl(sctx, &node->var, node->loc);
break;
case AST_VARASSIGN:
sema_var_assign(sctx, &node->varassgn, node->loc);
break;
case AST_RETURN:
sema_return(sctx, node->ret, node->loc);
break;
case AST_BREAK:
sema_break(sctx, nil, node->loc);
break;
case AST_DISCARD:
sema_discard(sctx, node->discard.expr, node->loc);
break;
case AST_PRAGMA:
sema_pragma(sctx, &node->pragma);
break;
case AST_BINEXPR:
case AST_UNARY:
case AST_NUMBER:
case AST_STRLIT:
case AST_IDENT:
sema_expr(sctx, node, node->loc);
break;
case AST_INVALID:
case AST_EXPRS:
case AST_PROCCALL_ARGS:
unreachable();
}
}
static void
sema_make_builtin_types(SemaCtx *sctx)
{
typedef struct {
const char *name;
Symbol sym;
} NameSym;
DataType *void_type = make_data_type(DtkVoid, 0, true, false);
DataType *str_type = make_data_type(DtkStruct, 0, false, false);
DataType *puts_proto = make_data_type(DtkProc, 0, false, false);
puts_proto->proc.rettype = void_type;
puts_proto->proc.argtypes = make_proc_args((DataType *[]){str_type}, 1);
puts_proto->proc.extern_lnk = true;
NameSym builtin_basic_types[] = {
{"void", {.kind = SymType, .dtype = void_type}},
{"u64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, false)}},
{"i64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, true)}},
{"cint", {.kind = SymType, .dtype = make_data_type(DtkBasic, sizeof(int), true, true)}},
{"string", {.kind = SymType, .dtype = str_type}},
{"bool", {.kind = SymType, .dtype = make_data_type(DtkBool, 1, true, false)}},
};
DataType *u64_dt = builtin_basic_types[1].sym.dtype;
DataType *bool_dt = builtin_basic_types[5].sym.dtype;
NameSym builtin_procs[] = {
{
"+",
{
.kind = SymProc,
.dtype = make_proc_type(
true,
u64_dt,
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
)
}
},
{
"-",
{
.kind = SymProc,
.dtype = make_proc_type(
true,
u64_dt,
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
)
}
},
{
"==",
{
.kind = SymProc,
.dtype = make_proc_type(
true,
bool_dt,
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
)
}
},
{
"<",
{
.kind = SymProc,
.dtype = make_proc_type(
true,
bool_dt,
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
)
}
},
{
">",
{
.kind = SymProc,
.dtype = make_proc_type(
true,
bool_dt,
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
)
}
},
};
for (isize i = 0; i < countof(builtin_basic_types); ++i) {
const char *name = builtin_basic_types[i].name;
Symbol sym = builtin_basic_types[i].sym;
sym_insert(sctx->current_scope->symbols, name, sym);
}
for (isize i = 0; i < countof(builtin_procs); ++i) {
sym_insert(sctx->current_scope->symbols,
builtin_procs[i].name, builtin_procs[i].sym);
}
sctx->builtintypes.tyu64 = builtin_basic_types[1].sym.dtype;
sctx->builtintypes.void_t = void_type;
Symbol puts_sym = {.kind = SymProc, .dtype = puts_proto};
sym_insert(sctx->current_scope->symbols, "puts", puts_sym);
}
SemaCtx *
sema_new(Compiler *cm)
{
SemaCtx *toplevel_context = make_semactx(cm, nil);
toplevel_context->current_scope = make_scope(nil);
sema_make_builtin_types(toplevel_context);
toplevel_context->top_scope = toplevel_context->current_scope;
toplevel_context->ok = true;
return toplevel_context;
}
void
sema_destroy(SemaCtx *sctx)
{
free(sctx);
}
void
sema(SemaCtx *sctx, Ast *program)
{
/* Analyze toplevel */
/* XXX: DRY it */
compiler_assert(sctx->cm, program->type == AST_STMTS);
for (isize i = 0; i < arrlen(program->stmts); ++i)
sema_node(sctx, program->stmts[i]);
if (!sctx->cm->opts.compile_only && !sctx->main_defined)
sema_error(sctx, nil, "missing 'main' entrypoint proc");
/* check unused local procedures */
const SymbolEntry *syms = sctx->current_scope->symbols;
for (isize i = 0; i < shlen(syms); ++i) {
const Symbol fsym = syms[i].value;
if (fsym.kind == SymProc
&& !fsym.dtype->builtin
&& !fsym.dtype->proc.public
&& !fsym.dtype->proc.extern_lnk
&& !fsym.used) {
sema_warning(
sctx, &fsym.loc,
"defined proc '%s' is never called in this module", syms[i].key
);
}
}
}