984 lines
25 KiB
C
984 lines
25 KiB
C
/* Semantic analyzer and type checker */
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "pre.h"
|
|
#include "sema.h"
|
|
#include "datatype.h"
|
|
#include "location.h"
|
|
#include "symbol.h"
|
|
#include "ast.h"
|
|
#include "state.h"
|
|
#include "messages.h"
|
|
#include "libs/stb_ds.h"
|
|
|
|
#define sema_error(ctx, loc, ...) do { \
|
|
error((ctx)->cm, loc, __VA_ARGS__); \
|
|
(ctx)->ok = false; \
|
|
} while (0)
|
|
#define sema_warning(ctx, loc, ...) warning((ctx)->cm, loc, __VA_ARGS__)
|
|
#define sema_note(ctx, loc, ...) note((ctx)->cm, loc, __VA_ARGS__)
|
|
#define sema_is_stmt_terminal(s) (s->type == AST_RETURN || s->type == AST_BREAK)
|
|
#define sym_insert(syms, k, v) shput(syms, k, v)
|
|
|
|
enum SemaCtxFlags /* 64 bits */
|
|
{
|
|
SctxInsideProc = BitPos(0),
|
|
SctxInsideLoop = BitPos(1),
|
|
SctxInsideIf = BitPos(2),
|
|
SctxInTopLevel = BitPos(3),
|
|
SctxInExpr = BitPos(4),
|
|
SctxInDiscard = BitPos(5),
|
|
SctxInStmtBlock = BitPos(6),
|
|
};
|
|
|
|
typedef struct {
|
|
enum SymbolKind kind;
|
|
/* The data type associated with the symbol. */
|
|
DataType *dtype;
|
|
bool used;
|
|
bool procparm; /* if its a proc parameter */
|
|
Location loc;
|
|
} Symbol;
|
|
|
|
typedef HashMapStr(Symbol) SymbolEntry;
|
|
|
|
struct Scope
|
|
{
|
|
Scope *prev; /* Previous scope in the stack */
|
|
SymbolEntry *symbols; /* All the symbols in this scope */
|
|
};
|
|
|
|
typedef struct {
|
|
bool ok;
|
|
} SemaStatus;
|
|
|
|
static const Symbol InvalidSymbol = {.kind = SymInvalid};
|
|
static const DataType *InvalidDataType = &(DataType){.kind = DtkInvalid};
|
|
|
|
static DataTypeCheck
|
|
datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2);
|
|
static DataType *
|
|
sema_expr(SemaCtx *sctx, Ast *expr, Location loc);
|
|
static void
|
|
sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc);
|
|
static void
|
|
sema_node(SemaCtx *sctx, Ast *node);
|
|
static void
|
|
sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts);
|
|
static DataType *
|
|
resolve_datatype(SemaCtx *sctx, const Str ident, Location loc);
|
|
|
|
static Scope *
|
|
make_scope(Scope *prev)
|
|
{
|
|
Scope *sc = malloc(sizeof(*sc));
|
|
sc->prev = prev;
|
|
sc->symbols = nil;
|
|
sh_new_arena(sc->symbols);
|
|
shdefault(sc->symbols, InvalidSymbol);
|
|
return sc;
|
|
}
|
|
|
|
static SemaCtx *
|
|
make_semactx(Compiler *cm, SemaCtx *prev)
|
|
{
|
|
SemaCtx *smc = calloc(1, sizeof(*smc));
|
|
smc->cm = cm;
|
|
smc->prev = prev;
|
|
return smc;
|
|
}
|
|
|
|
static DataType *
|
|
make_data_type(enum DataTypeKind kind, u16 size, bool builtin, bool sign)
|
|
{
|
|
DataType *dt = calloc(1, sizeof(*dt));
|
|
dt->kind = kind;
|
|
dt->size = size;
|
|
dt->builtin = builtin;
|
|
dt->sign = sign;
|
|
return dt;
|
|
}
|
|
|
|
static DataType *
|
|
make_proc_type(bool builtin, DataType *rettype, Vec(DataType *) argtypes)
|
|
{
|
|
DataType *pdt = calloc(1, sizeof(*pdt));
|
|
pdt->kind = DtkProc;
|
|
pdt->builtin = builtin;
|
|
pdt->proc.rettype = rettype;
|
|
pdt->proc.argtypes = argtypes;
|
|
return pdt;
|
|
}
|
|
|
|
static Vec(DataType *)
|
|
make_type_list_from_idents(SemaCtx *sctx, Vec(AstIdentTypePair) idents)
|
|
{
|
|
if (idents == nil)
|
|
return nil;
|
|
|
|
Vec(DataType *) dts = nil;
|
|
foreach (ident, idents)
|
|
arrput(dts, resolve_datatype(sctx, ident.dtype, ident.dtype_loc));
|
|
return dts;
|
|
}
|
|
|
|
static Vec(DataType *)
|
|
make_proc_args(DataType *a[], isize len)
|
|
{
|
|
Vec(DataType *) args = nil;
|
|
arrsetlen(args, len);
|
|
memcpy(args, a, len);
|
|
return args;
|
|
}
|
|
|
|
/* Pushes a new context frame. Note that this inherits the flags and scope of the
|
|
* previous context frame.
|
|
* XXX: could rather only push flags...
|
|
*/
|
|
static void
|
|
push_semactx(SemaCtx **sctx)
|
|
{
|
|
SemaCtx *tmp = make_semactx((*sctx)->cm, *sctx);
|
|
tmp->flags = (*sctx)->flags;
|
|
tmp->current_scope = (*sctx)->current_scope;
|
|
tmp->top_scope = (*sctx)->top_scope;
|
|
tmp->ok = (*sctx)->ok;
|
|
*sctx = tmp;
|
|
}
|
|
|
|
/* Pops the current context frame. */
|
|
static void
|
|
pop_semactx(SemaCtx **sctx)
|
|
{
|
|
SemaCtx *prev = (*sctx)->prev;
|
|
compiler_assert((*sctx)->cm, prev != nil);
|
|
prev->ok = (*sctx)->ok;
|
|
free(*sctx);
|
|
*sctx = prev;
|
|
}
|
|
|
|
static void
|
|
enter_scope(SemaCtx *sctx)
|
|
{
|
|
sctx->current_scope = make_scope(sctx->current_scope);
|
|
}
|
|
|
|
static void
|
|
exit_scope(SemaCtx *sctx)
|
|
{
|
|
compiler_assert(sctx->cm, sctx->current_scope->prev != nil);
|
|
sctx->current_scope = sctx->current_scope->prev;
|
|
}
|
|
|
|
Symbol *
|
|
sym_search_oncurrent(Scope *scope, const Str name)
|
|
{
|
|
Symbol *sym = &shget(scope->symbols, name.s);
|
|
if (sym->kind != SymInvalid)
|
|
return sym;
|
|
return nil;
|
|
}
|
|
|
|
/* Searches for a symbol in the current and previous scopes */
|
|
Symbol *
|
|
sym_search(Scope *scope, const Str name)
|
|
{
|
|
Scope *sp = scope;
|
|
Symbol *sym = nil;
|
|
while (sp != nil && sym == nil) {
|
|
sym = sym_search_oncurrent(sp, name);
|
|
sp = sp->prev;
|
|
}
|
|
return sym;
|
|
}
|
|
|
|
/* Scans through the current scope for any unused var-like bindings
|
|
* (including proc parameters)
|
|
*/
|
|
static void
|
|
sema_check_unused_vars(SemaCtx *sctx)
|
|
{
|
|
/* Very simple, iterate over all bindings on this scope and report any that
|
|
* doesn't have the 'used' flag toggled. */
|
|
const SymbolEntry *syms = sctx->current_scope->symbols;
|
|
for (isize i = 0; i < shlen(syms); ++i) {
|
|
const Symbol sym = syms[i].value;
|
|
if (!sym.used && symbol_is_var_binding(sym.kind)) {
|
|
const char *bind_kind_name = !sym.procparm ? "variable" : "proc parameter"; // SymbolKindStr[sym.kind] : "proc parameter";
|
|
sema_warning(
|
|
sctx, &sym.loc,
|
|
"unused %s '%s'", bind_kind_name, syms[i].key
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
sema_check_dead_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
|
|
{
|
|
(void)sctx, (void)stmts;
|
|
/* those who forsake the CFG are doomed to implement it badly without even
|
|
* noticing... */
|
|
}
|
|
|
|
static void
|
|
sema_match_proc_type(SemaCtx *sctx, Symbol *fsym, Str fident)
|
|
{
|
|
if (fsym->dtype->kind != DtkProc) {
|
|
sema_error(
|
|
sctx, nil,
|
|
"cannot call '%s' because has non-proc type '%s'",
|
|
fident.s, "uh"
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
|
|
static DataType *
|
|
sema_proccall(SemaCtx *sctx, const AstProcCall *call, Location loc)
|
|
{
|
|
Symbol *fsym = sym_search(sctx->current_scope, call->name);
|
|
if (fsym == nil) {
|
|
sema_error(sctx, &loc, "call to undeclared proc '%s'", call->name.s);
|
|
return nil;
|
|
}
|
|
|
|
fsym->used = true;
|
|
sema_match_proc_type(sctx, fsym, call->name);
|
|
|
|
/* check call arguments */
|
|
const isize proc_arglen = arrlen(fsym->dtype->proc.argtypes);
|
|
if (call->args != nil) {
|
|
compiler_assert(sctx->cm, call->args->type == AST_EXPRS);
|
|
const isize call_arglen = arrlen(call->args->exprs);
|
|
|
|
if (call_arglen != proc_arglen) {
|
|
const char *at_most = call_arglen > proc_arglen ? "s at most" : "";
|
|
sema_error(
|
|
sctx, &loc,
|
|
"argument length mismatch: given %li arguments to '%s' but it expects %li argument%s",
|
|
call_arglen, call->name.s, proc_arglen, at_most
|
|
);
|
|
return nil;
|
|
}
|
|
sema_expr_list(sctx, call->args->exprs, loc); /* now sema-check the args */
|
|
} else if (call->args == nil && proc_arglen != 0) {
|
|
sema_error(sctx, &loc, "'%s' proc takes %li argument(s), but none given",
|
|
call->name.s, proc_arglen);
|
|
return nil;
|
|
}
|
|
|
|
if (fsym->dtype->proc.rettype != sctx->builtintypes.void_t
|
|
&& (~sctx->flags & SctxInDiscard)
|
|
&& (~sctx->flags & SctxInExpr)) {
|
|
sema_error(sctx, &loc, "result of function call with non-void type ignored");
|
|
sema_note(sctx, &loc, "use 'discard' if this was intentional");
|
|
return nil;
|
|
}
|
|
|
|
for (isize i = 0; i < proc_arglen; ++i) {
|
|
;
|
|
}
|
|
return fsym->dtype;
|
|
}
|
|
|
|
/************ Semantic and type checking of expressions ************/
|
|
/* Type checking for expressions is done inside-out */
|
|
|
|
static DataType *
|
|
sema_expr_number(SemaCtx *sctx, AstNumber *num)
|
|
{
|
|
#define pow2(exp) (2 << (exp - 1))
|
|
/* type rule axiom */
|
|
num->type = sym_search_oncurrent(sctx->top_scope, Sl("u64"))->dtype;
|
|
return num->type;
|
|
#undef pow2
|
|
}
|
|
|
|
static DataType *
|
|
sema_expr_strlit(SemaCtx *sctx, const Str *strlit)
|
|
{
|
|
(void)sctx, (void)strlit;
|
|
/* type rule axiom */
|
|
return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
|
|
}
|
|
|
|
static Symbol *
|
|
sema_expr_ident(SemaCtx *sctx, const Str ident)
|
|
{
|
|
Symbol *ident_sym = sym_search(sctx->current_scope, ident);
|
|
if (ident_sym == nil) {
|
|
sema_error(sctx, nil, "undeclared identifier '%s'", ident.s);
|
|
return nil;
|
|
}
|
|
if (ident_sym->kind == SymType) {
|
|
sema_error(sctx, nil, "data type '%s' used as identifier in expression", ident.s);
|
|
return nil;
|
|
}
|
|
ident_sym->used = true;
|
|
return ident_sym;
|
|
}
|
|
|
|
static DataType *
|
|
sema_expr_unary(SemaCtx *sctx, AstUnary *unary, Location loc)
|
|
{
|
|
Ast *expr = unary->atom;
|
|
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
|
|
//if (expr->type == AST_STRLIT) {
|
|
// sema_error(sctx, nil, "%s with a string literal makes no sense\n", TokenIdStr[unary->op]);
|
|
// return;
|
|
//}
|
|
|
|
//if (expr->type == AST_NUMBER) {
|
|
// if (unary->op == T_MINUS && !expr->number.type->sign) {
|
|
// }
|
|
//}
|
|
return sema_expr(sctx, expr, loc);
|
|
}
|
|
|
|
static DataType *
|
|
sema_binop(SemaCtx *sctx, const AstBinop *expr, Location loc)
|
|
{
|
|
Symbol *opsym = sym_search_oncurrent(sctx->top_scope, expr->op);
|
|
if (opsym == nil) {
|
|
sema_error(sctx, nil, "no operator '%s'", expr->op.s);
|
|
return nil;
|
|
}
|
|
if (arrlen(opsym->dtype->proc.argtypes) != 2) {
|
|
sema_error(sctx, nil, "no binary operator for '%s'", expr->op.s);
|
|
return nil;
|
|
}
|
|
|
|
DataType *ldt = sema_expr(sctx, expr->left, loc);
|
|
DataType *rdt = sema_expr(sctx, expr->right, loc);
|
|
/* Skip typechecking if either ldt or rdt have `InvalidDataType` and propagate
|
|
* it up the call stack. */
|
|
if (ldt == InvalidDataType || rdt == InvalidDataType)
|
|
return (DataType *)InvalidDataType;
|
|
|
|
DataTypeCheck tchk;
|
|
if (!(tchk = datatype_cmp(sctx, ldt, rdt)).ok) {
|
|
sema_error(sctx, &loc, "type error: %s", tchk.msg.s);
|
|
return nil;
|
|
}
|
|
return ldt;
|
|
}
|
|
|
|
static DataType *
|
|
sema_expr(SemaCtx *sctx, Ast *expr, Location loc)
|
|
{
|
|
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
|
|
push_semactx(&sctx);
|
|
sctx->flags |= SctxInExpr;
|
|
|
|
DataType *dt = nil;
|
|
switch (expr->type) {
|
|
case AST_BINEXPR:
|
|
dt = sema_binop(sctx, &expr->bin, loc);
|
|
break;
|
|
case AST_UNARY:
|
|
dt = sema_expr_unary(sctx, &expr->unary, loc);
|
|
break;
|
|
case AST_NUMBER:
|
|
dt = sema_expr_number(sctx, &expr->number);
|
|
break;
|
|
case AST_STRLIT:
|
|
dt = sema_expr_strlit(sctx, &expr->strlit);
|
|
break;
|
|
case AST_IDENT:
|
|
dt = sema_expr_ident(sctx, expr->ident)->dtype;
|
|
break;
|
|
case AST_PROCCALL:
|
|
dt = sema_proccall(sctx, &expr->call, expr->loc);
|
|
break;
|
|
default:
|
|
unreachable();
|
|
}
|
|
|
|
pop_semactx(&sctx);
|
|
return dt;
|
|
}
|
|
|
|
static void
|
|
sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc)
|
|
{
|
|
foreach (expr, exprs) {
|
|
sema_expr(sctx, expr, loc);
|
|
}
|
|
}
|
|
|
|
/************ Type checking ************/
|
|
|
|
/* Structurally compare two structural data types. */
|
|
static DataTypeCheck
|
|
datatype_struct_can_cast(SemaCtx *sctx, DataType *s1, DataType *s2)
|
|
{
|
|
compiler_assert(sctx->cm, s1->kind == DtkStruct && s2->kind == DtkStruct);
|
|
const DataTypeCompound *s1s = &s1->compound;
|
|
const DataTypeCompound *s2s = &s2->compound;
|
|
|
|
if (s1s->packed != s2s->packed)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (arrlen(s1s->fields) != arrlen(s2s->fields))
|
|
return (DataTypeCheck){false, Sl("")};
|
|
for (isize i = 0; i < arrlen(s1s->fields); ++i) {
|
|
DataTypeCheck tchk;
|
|
if (!(tchk = datatype_cmp(sctx, s1s->fields[i], s2s->fields[i])).ok)
|
|
return tchk;
|
|
}
|
|
return (DataTypeCheck){.ok = true};
|
|
}
|
|
|
|
static DataTypeCheck
|
|
datatype_array_cmp(SemaCtx *sctx, DataType *a1, DataType *a2)
|
|
{
|
|
DataTypeCheck tchk = {.ok = true};
|
|
if (a1->array.len != a2->array.len)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (!(tchk = datatype_cmp(sctx, a1->array.base, a2->array.base)).ok)
|
|
return tchk;
|
|
return tchk;
|
|
}
|
|
|
|
static DataTypeCheck
|
|
datatype_proc_can_cast(SemaCtx *sctx, DataType *pc1, DataType *pc2)
|
|
{
|
|
DataTypeCheck tchk = {.ok = true};
|
|
|
|
if (pc1->proc.public != pc2->proc.public)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (pc1->proc.extern_lnk != pc2->proc.extern_lnk)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (pc1->proc.c_varargs != pc2->proc.c_varargs)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (arrlen(pc1->proc.argtypes) != arrlen(pc2->proc.argtypes))
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (!(tchk = datatype_cmp(sctx, pc1->proc.rettype, pc2->proc.rettype)).ok)
|
|
return tchk;
|
|
|
|
for (isize i = 0; i < arrlen(pc1->proc.argtypes); ++i) {
|
|
if (!(tchk = datatype_cmp(sctx, pc1->proc.argtypes[i], pc2->proc.argtypes[i])).ok)
|
|
return tchk;
|
|
}
|
|
return tchk;
|
|
}
|
|
|
|
static DataTypeCheck
|
|
datatype_basic_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
|
|
{
|
|
(void)sctx;
|
|
if (dt1->size > dt2->size) /* if it has a size equal or less than dt2 */
|
|
return (DataTypeCheck){false, Sl("")};
|
|
if (dt1->sign != dt2->sign)
|
|
return (DataTypeCheck){false, Strafmt("integers with different sign")};
|
|
return (DataTypeCheck){.ok = true};
|
|
}
|
|
|
|
/* Compares two datatype objects, returning true if they are equal. */
|
|
static DataTypeCheck
|
|
datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2)
|
|
{
|
|
if (dt1 == nil || dt2 == nil)
|
|
return (DataTypeCheck){false, Sl("")};
|
|
/* TODO: return more information in case of a mismatch... */
|
|
if (dt1 == dt2) /* shallow comparison */
|
|
return (DataTypeCheck){.ok = true};
|
|
if (dt1->kind != dt2->kind)
|
|
return (DataTypeCheck){.ok = false};
|
|
|
|
switch (dt1->kind) {
|
|
case DtkBasic:
|
|
return datatype_basic_cmp(sctx, dt1, dt2);
|
|
case DtkArray:
|
|
return datatype_array_cmp(sctx, dt1, dt2);
|
|
/* Nominally typed, should have been catched by the shallow comparison above */
|
|
case DtkBool:
|
|
case DtkVoid:
|
|
case DtkStruct:
|
|
case DtkUnion:
|
|
case DtkProc:
|
|
break;
|
|
case DtkInvalid:
|
|
unreachable();
|
|
}
|
|
return (DataTypeCheck){.ok = false};
|
|
}
|
|
|
|
static DataType *
|
|
expr_get_datatype(SemaCtx *sctx, Ast *expr)
|
|
{
|
|
compiler_assert(sctx->cm, ast_node_is_expr(expr->type));
|
|
switch (expr->type) {
|
|
case AST_BINEXPR:
|
|
return expr->bin.type;
|
|
case AST_UNARY:
|
|
return expr->unary.type;
|
|
case AST_NUMBER:
|
|
return expr->number.type;
|
|
case AST_STRLIT:
|
|
return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype;
|
|
/* XXX: for these two we could attach the type in the ast... */
|
|
case AST_IDENT:
|
|
return sym_search(sctx->current_scope, expr->ident)->dtype;
|
|
case AST_PROCCALL:
|
|
return sym_search(sctx->current_scope, expr->call.name)->dtype->proc.rettype;
|
|
default:
|
|
unreachable();
|
|
}
|
|
return nil;
|
|
}
|
|
|
|
/* Search for the type in the symbol table, asserting that is a data type. */
|
|
static DataType *
|
|
resolve_datatype(SemaCtx *sctx, const Str ident, Location loc)
|
|
{
|
|
Symbol *dtsym = sym_search(sctx->current_scope, ident);
|
|
if (dtsym == nil) {
|
|
sema_error(sctx, &loc, "no such type '%s'", ident.s);
|
|
return (DataType *)InvalidDataType;
|
|
}
|
|
if (dtsym->kind != SymType) {
|
|
sema_error(sctx, &loc, "'%s' is not a type but a %s", ident.s, SymbolKindStr[dtsym->kind]);
|
|
return (DataType *)InvalidDataType;
|
|
}
|
|
return dtsym->dtype;
|
|
}
|
|
|
|
static void
|
|
sema_procdef(SemaCtx *sctx, AstProc *proc, Location loc)
|
|
{
|
|
Symbol *sym_prev;
|
|
if ((sym_prev = sym_search(sctx->current_scope, proc->name)) != nil) {
|
|
sema_error(
|
|
sctx, nil,
|
|
"'%s' was already declared as a %s",
|
|
proc->name.s, SymbolKindStr[sym_prev->kind]
|
|
);
|
|
sema_note(sctx, &sym_prev->loc, "'%s' previously declared here", proc->name.s);
|
|
return;
|
|
}
|
|
|
|
if (Str_equal(proc->name, Sl("main"))) {
|
|
sctx->main_defined = true;
|
|
if (!proc->ispublic) {
|
|
sema_error(sctx, &loc, "'main' has to be declared as a public proc");
|
|
}
|
|
}
|
|
|
|
const Ast *rettype_node = proc->rettype;
|
|
DataType *proc_rettype = nil;
|
|
if (rettype_node != nil) {
|
|
compiler_assert(sctx->cm, rettype_node->type == AST_IDENT);
|
|
proc_rettype = resolve_datatype(sctx, proc->rettype->ident, rettype_node->loc);
|
|
if (proc_rettype == InvalidDataType)
|
|
return;
|
|
} else {
|
|
/* return type node is nil, we infer that as a `void` type */
|
|
proc_rettype = sctx->builtintypes.void_t;
|
|
}
|
|
|
|
Vec(DataType *) procargs = make_type_list_from_idents(sctx, proc->args);
|
|
DataType *procdtype = make_proc_type(false, proc_rettype, procargs);
|
|
procdtype->proc.public = proc->ispublic;
|
|
Symbol proc_sym = {
|
|
.kind = SymProc,
|
|
.dtype = procdtype,
|
|
.loc = loc
|
|
};
|
|
|
|
sym_insert(sctx->current_scope->symbols, proc->name.s, proc_sym);
|
|
proc->type = procdtype;
|
|
|
|
/* proc has no body at all */
|
|
if (proc->body == nil)
|
|
return;
|
|
|
|
/* analyze the body */
|
|
compiler_assert(sctx->cm, proc->body->type == AST_STMTS);
|
|
push_semactx(&sctx);
|
|
enter_scope(sctx);
|
|
|
|
compiler_assert(sctx->cm, arrlen(proc->args) == arrlen(procargs));
|
|
/* Inject proc parameters into the proc body top scope */
|
|
for (isize i = 0; i < arrlen(proc->args); ++i) {
|
|
DataType *argdtype = procargs[i];
|
|
enum SymbolKind argsymkind = proc->args[i].kind;
|
|
|
|
compiler_assert(sctx->cm, argdtype != nil);
|
|
compiler_assert(sctx->cm, argsymkind == SymLet || argsymkind == SymVar);
|
|
|
|
Symbol argsym = {
|
|
.kind = argsymkind,
|
|
.dtype = argdtype,
|
|
.procparm = true,
|
|
.loc = proc->args[i].ident_loc
|
|
};
|
|
sym_insert(sctx->current_scope->symbols, proc->args[i].ident.s, argsym);
|
|
}
|
|
sctx->flags |= SctxInsideProc;
|
|
sema_stmts(sctx, proc->body->stmts);
|
|
sema_check_unused_vars(sctx);
|
|
exit_scope(sctx);
|
|
pop_semactx(&sctx);
|
|
}
|
|
|
|
static void
|
|
sema_return(SemaCtx *sctx, Ast *ret_expr, Location loc)
|
|
{
|
|
if (~sctx->flags & SctxInsideProc) {
|
|
sema_error(sctx, &loc, "'return' outside of proc");
|
|
}
|
|
if (ret_expr != nil)
|
|
sema_expr(sctx, ret_expr, ret_expr->loc);
|
|
}
|
|
|
|
static void
|
|
sema_break(SemaCtx *sctx, Ast *unused, Location loc)
|
|
{
|
|
(void)unused;
|
|
if (~sctx->flags & SctxInsideLoop) {
|
|
sema_error(sctx, &loc, "'break' used outside of a loop");
|
|
}
|
|
}
|
|
|
|
static void
|
|
sema_discard(SemaCtx *sctx, Ast *expr, Location loc)
|
|
{
|
|
sctx->flags |= SctxInDiscard;
|
|
sema_expr(sctx, expr, loc);
|
|
sctx->flags &= ~SctxInDiscard;
|
|
}
|
|
|
|
static void
|
|
sema_pragma(SemaCtx *sctx, AstPragma *attr)
|
|
{
|
|
sema_node(sctx, attr->node);
|
|
}
|
|
|
|
static void
|
|
sema_var_decl(SemaCtx *sctx, AstVarDecl *decl, Location loc)
|
|
{
|
|
compiler_assert(sctx->cm, symbol_is_var_binding(decl->kind));
|
|
|
|
const Symbol *symp = sym_search(sctx->current_scope, decl->name);
|
|
if (symp != nil && symp->kind != decl->kind) {
|
|
switch (symp->kind) {
|
|
case SymLet:
|
|
sema_error(sctx, &symp->loc, "'%s' was already declared as 'let'", decl->name.s);
|
|
return;
|
|
case SymVar:
|
|
sema_error(sctx, &symp->loc, "'%s' was already declared as 'var'", decl->name.s);
|
|
return;
|
|
case SymConst:
|
|
sema_error(
|
|
sctx, &symp->loc,
|
|
"declaration of '%s' shadows previously declared constant with the same name",
|
|
decl->name.s
|
|
);
|
|
return;
|
|
case SymType:
|
|
sema_error(sctx, &symp->loc, "'%s' was already declared as a type", decl->name.s);
|
|
return;
|
|
default:
|
|
break;
|
|
}
|
|
sema_note(sctx, &symp->loc, "'%s' was declared in this line", decl->name.s);
|
|
}
|
|
|
|
Ast *dexpr = decl->expr;
|
|
DataType *dexpr_dt = nil;
|
|
if (dexpr != nil) {
|
|
/* check the assignment expression */
|
|
dexpr_dt = sema_expr(sctx, dexpr, loc);
|
|
} else if (dexpr == nil && decl->datatype != nil) {
|
|
sema_warning(sctx, &loc, "variable is unitialized");
|
|
}
|
|
|
|
DataType *dtype = nil;
|
|
if (decl->datatype != nil) { /* explicit data type specified */
|
|
compiler_assert(sctx->cm, decl->datatype->type == AST_IDENT);
|
|
dtype = resolve_datatype(sctx, decl->datatype->ident, decl->datatype->loc);
|
|
/* Note that we ignore whether `resolve_datatype` return an invalid type,
|
|
* since we still want to insert the variable into the symbol table,
|
|
* otherwise we would have spurious "undeclared identifier" errors. */
|
|
decl->type = dtype;
|
|
} else {
|
|
/* the parser should catch this (the grammar requires it) */
|
|
compiler_assert(sctx->cm, dexpr != nil);
|
|
compiler_assert(sctx->cm, dexpr_dt != nil);
|
|
decl->type = dexpr_dt;
|
|
}
|
|
|
|
Symbol sym = {
|
|
.kind = decl->kind,
|
|
.dtype = dtype,
|
|
.loc = loc,
|
|
};
|
|
/* Insert the variable to the symbol table */
|
|
sym_insert(sctx->current_scope->symbols, decl->name.s, sym);
|
|
}
|
|
|
|
static void
|
|
sema_var_assign(SemaCtx *sctx, AstVarAssign *assign, Location loc)
|
|
{
|
|
sema_expr_ident(sctx, assign->name);
|
|
sema_expr(sctx, assign->expr, loc);
|
|
|
|
Symbol *decl = sym_search(sctx->current_scope, assign->name);
|
|
if (decl == nil) {
|
|
sema_error(sctx, &loc, "assign to undeclared variable '%s'", assign->name.s);
|
|
return;
|
|
}
|
|
if (!symbol_is_var_binding(decl->kind)) {
|
|
sema_error(
|
|
sctx, &loc,
|
|
"assign to non-variable symbol ('%s' is a '%s')",
|
|
assign->name.s, SymbolKindStr[decl->kind]
|
|
);
|
|
return;
|
|
}
|
|
if (decl->kind != SymVar) {
|
|
sema_error(
|
|
sctx, &loc,
|
|
"assign to immutable symbol ('%s' was declared as '%s')",
|
|
assign->name.s, SymbolKindStr[decl->kind]
|
|
);
|
|
return;
|
|
}
|
|
//datatype_cmp(sctx, nil, decl->dtype);
|
|
}
|
|
|
|
static void
|
|
sema_ifstmtexpr(SemaCtx *sctx, AstIf *ift, Location loc)
|
|
{
|
|
sema_expr(sctx, ift->cond, loc);
|
|
sema_node(sctx, ift->true_body);
|
|
sema_node(sctx, ift->false_body);
|
|
|
|
const isize elifs_len = arrlen(ift->elifs);
|
|
if (elifs_len > 0) {
|
|
for (isize i = 0; i < elifs_len; ++i) {
|
|
AstElif *elif = &ift->elifs[i];
|
|
sema_expr(sctx, elif->cond, loc);
|
|
sema_node(sctx, elif->body);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
sema_loop(SemaCtx *sctx, AstLoop *loop, Location loc)
|
|
{
|
|
if (loop->precond != nil) {
|
|
sema_expr(sctx, loop->precond, loc);
|
|
}
|
|
if (loop->postcond != nil) {
|
|
sema_expr(sctx, loop->postcond, loc);
|
|
}
|
|
push_semactx(&sctx);
|
|
sctx->flags |= SctxInsideLoop;
|
|
sema_node(sctx, loop->body);
|
|
pop_semactx(&sctx);
|
|
}
|
|
|
|
static void
|
|
sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts)
|
|
{
|
|
/* AST_STMTS imply the opening of a new scope */
|
|
const isize stmts_len = arrlen(stmts);
|
|
for (isize i = 0; i < stmts_len; ++i) {
|
|
sema_node(sctx, stmts[i]);
|
|
if (sema_is_stmt_terminal(stmts[i]) && i + 1 != stmts_len) {
|
|
sema_warning(sctx, &stmts[i + 1]->loc, "dead code after 'return'");
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
sema_stmt_block(SemaCtx *sctx, Vec(Ast *) stmts)
|
|
{
|
|
enter_scope(sctx);
|
|
sema_stmts(sctx, stmts);
|
|
exit_scope(sctx);
|
|
/* check for unused bindings declared in this scope */
|
|
sema_check_unused_vars(sctx);
|
|
}
|
|
|
|
static void
|
|
sema_node(SemaCtx *sctx, Ast *node)
|
|
{
|
|
if (node == nil)
|
|
return;
|
|
switch (node->type) {
|
|
case AST_IF:
|
|
sema_ifstmtexpr(sctx, &node->ifse, node->loc);
|
|
break;
|
|
case AST_LOOP:
|
|
sema_loop(sctx, &node->loop, node->loc);
|
|
break;
|
|
case AST_STMTS:
|
|
sema_stmt_block(sctx, node->stmts);
|
|
break;
|
|
case AST_PROCDEF:
|
|
sema_procdef(sctx, &node->proc, node->loc);
|
|
break;
|
|
case AST_PROCCALL:
|
|
sema_proccall(sctx, &node->call, node->loc);
|
|
break;
|
|
case AST_VARDECL:
|
|
sema_var_decl(sctx, &node->var, node->loc);
|
|
break;
|
|
case AST_VARASSIGN:
|
|
sema_var_assign(sctx, &node->varassgn, node->loc);
|
|
break;
|
|
case AST_RETURN:
|
|
sema_return(sctx, node->ret, node->loc);
|
|
break;
|
|
case AST_BREAK:
|
|
sema_break(sctx, nil, node->loc);
|
|
break;
|
|
case AST_DISCARD:
|
|
sema_discard(sctx, node->discard.expr, node->loc);
|
|
break;
|
|
case AST_PRAGMA:
|
|
sema_pragma(sctx, &node->pragma);
|
|
break;
|
|
case AST_BINEXPR:
|
|
case AST_UNARY:
|
|
case AST_NUMBER:
|
|
case AST_STRLIT:
|
|
case AST_IDENT:
|
|
sema_expr(sctx, node, node->loc);
|
|
break;
|
|
case AST_INVALID:
|
|
case AST_EXPRS:
|
|
case AST_PROCCALL_ARGS:
|
|
unreachable();
|
|
}
|
|
}
|
|
|
|
static void
|
|
sema_make_builtin_types(SemaCtx *sctx)
|
|
{
|
|
typedef struct {
|
|
const char *name;
|
|
Symbol sym;
|
|
} NameSym;
|
|
|
|
DataType *void_type = make_data_type(DtkVoid, 0, true, false);
|
|
DataType *str_type = make_data_type(DtkStruct, 0, false, false);
|
|
DataType *puts_proto = make_data_type(DtkProc, 0, false, false);
|
|
puts_proto->proc.rettype = void_type;
|
|
puts_proto->proc.argtypes = make_proc_args((DataType *[]){str_type}, 1);
|
|
puts_proto->proc.extern_lnk = true;
|
|
|
|
NameSym builtin_basic_types[] = {
|
|
{"void", {.kind = SymType, .dtype = void_type}},
|
|
{"u64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, false)}},
|
|
{"i64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, true)}},
|
|
{"cint", {.kind = SymType, .dtype = make_data_type(DtkBasic, sizeof(int), true, true)}},
|
|
{"string", {.kind = SymType, .dtype = str_type}},
|
|
{"bool", {.kind = SymType, .dtype = make_data_type(DtkBool, 1, true, false)}},
|
|
};
|
|
DataType *u64_dt = builtin_basic_types[1].sym.dtype;
|
|
DataType *bool_dt = builtin_basic_types[5].sym.dtype;
|
|
|
|
NameSym builtin_procs[] = {
|
|
{
|
|
"+",
|
|
{
|
|
.kind = SymProc,
|
|
.dtype = make_proc_type(
|
|
true,
|
|
u64_dt,
|
|
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
|
|
)
|
|
}
|
|
},
|
|
{
|
|
"-",
|
|
{
|
|
.kind = SymProc,
|
|
.dtype = make_proc_type(
|
|
true,
|
|
u64_dt,
|
|
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
|
|
)
|
|
}
|
|
},
|
|
{
|
|
"==",
|
|
{
|
|
.kind = SymProc,
|
|
.dtype = make_proc_type(
|
|
true,
|
|
bool_dt,
|
|
make_proc_args((DataType *[]){u64_dt, u64_dt}, 2)
|
|
)
|
|
}
|
|
},
|
|
};
|
|
|
|
for (isize i = 0; i < countof(builtin_basic_types); ++i) {
|
|
const char *name = builtin_basic_types[i].name;
|
|
Symbol sym = builtin_basic_types[i].sym;
|
|
sym_insert(sctx->current_scope->symbols, name, sym);
|
|
}
|
|
for (isize i = 0; i < countof(builtin_procs); ++i) {
|
|
sym_insert(sctx->current_scope->symbols,
|
|
builtin_procs[i].name, builtin_procs[i].sym);
|
|
}
|
|
|
|
sctx->builtintypes.tyu64 = builtin_basic_types[1].sym.dtype;
|
|
sctx->builtintypes.void_t = void_type;
|
|
|
|
Symbol puts_sym = {.kind = SymProc, .dtype = puts_proto};
|
|
sym_insert(sctx->current_scope->symbols, "puts", puts_sym);
|
|
}
|
|
|
|
SemaCtx *
|
|
sema_new(Compiler *cm)
|
|
{
|
|
SemaCtx *toplevel_context = make_semactx(cm, nil);
|
|
toplevel_context->current_scope = make_scope(nil);
|
|
|
|
sema_make_builtin_types(toplevel_context);
|
|
toplevel_context->top_scope = toplevel_context->current_scope;
|
|
toplevel_context->ok = true;
|
|
return toplevel_context;
|
|
}
|
|
|
|
void
|
|
sema_destroy(SemaCtx *sctx)
|
|
{
|
|
free(sctx);
|
|
}
|
|
|
|
void
|
|
sema(SemaCtx *sctx, Ast *program)
|
|
{
|
|
/* Analyze toplevel */
|
|
/* XXX: DRY it */
|
|
compiler_assert(sctx->cm, program->type == AST_STMTS);
|
|
for (isize i = 0; i < arrlen(program->stmts); ++i)
|
|
sema_node(sctx, program->stmts[i]);
|
|
|
|
if (!sctx->cm->opts.compile_only && !sctx->main_defined)
|
|
sema_error(sctx, nil, "missing 'main' entrypoint proc");
|
|
|
|
/* check unused local procedures */
|
|
const SymbolEntry *syms = sctx->current_scope->symbols;
|
|
for (isize i = 0; i < shlen(syms); ++i) {
|
|
const Symbol fsym = syms[i].value;
|
|
if (fsym.kind == SymProc
|
|
&& !fsym.dtype->builtin
|
|
&& !fsym.dtype->proc.public
|
|
&& !fsym.dtype->proc.extern_lnk
|
|
&& !fsym.used) {
|
|
sema_warning(
|
|
sctx, &fsym.loc,
|
|
"defined proc '%s' is never called in this module", syms[i].key
|
|
);
|
|
}
|
|
}
|
|
}
|