/* Semantic analyzer and type checker */ #include #include #include "pre.h" #include "sema.h" #include "datatype.h" #include "location.h" #include "symbol.h" #include "ast.h" #include "state.h" #include "messages.h" #include "libs/stb_ds.h" #define sema_error(ctx, loc, ...) do { \ error((ctx)->cm, loc, __VA_ARGS__); \ (ctx)->ok = false; \ } while (0) #define sema_warning(ctx, loc, ...) warning((ctx)->cm, loc, __VA_ARGS__) #define sema_note(ctx, loc, ...) note((ctx)->cm, loc, __VA_ARGS__) #define sema_is_stmt_terminal(s) (s->type == AST_RETURN || s->type == AST_BREAK) #define sym_insert(syms, k, v) shput(syms, k, v) enum SemaCtxFlags /* 64 bits */ { SctxInsideProc = BitPos(0), SctxInsideLoop = BitPos(1), SctxInsideIf = BitPos(2), SctxInTopLevel = BitPos(3), SctxInExpr = BitPos(4), SctxInDiscard = BitPos(5), SctxInStmtBlock = BitPos(6), }; typedef struct { enum SymbolKind kind; /* The data type associated with the symbol. */ DataType *dtype; bool used; bool procparm; /* if its a proc parameter */ Location loc; } Symbol; typedef HashMapStr(Symbol) SymbolEntry; struct Scope { Scope *prev; /* Previous scope in the stack */ SymbolEntry *symbols; /* All the symbols in this scope */ }; typedef struct { bool ok; } SemaStatus; static const Symbol InvalidSymbol = {.kind = SymInvalid}; static const DataType *InvalidDataType = &(DataType){.kind = DtkInvalid}; static DataTypeCheck datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2); static DataTypeCheck datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2); static DataType * sema_expr(SemaCtx *sctx, Ast *expr, Location loc); static void sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc); static void sema_node(SemaCtx *sctx, Ast *node); static void sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts); static DataType * resolve_datatype(SemaCtx *sctx, const Str ident, Location loc); static Scope * make_scope(Scope *prev) { Scope *sc = malloc(sizeof(*sc)); sc->prev = prev; sc->symbols = nil; sh_new_arena(sc->symbols); shdefault(sc->symbols, InvalidSymbol); return sc; } static SemaCtx * make_semactx(Compiler *cm, SemaCtx *prev) { SemaCtx *smc = calloc(1, sizeof(*smc)); smc->cm = cm; smc->prev = prev; return smc; } static DataType * make_data_type(enum DataTypeKind kind, u16 size, bool builtin, bool sign) { DataType *dt = calloc(1, sizeof(*dt)); dt->kind = kind; dt->size = size; dt->builtin = builtin; dt->sign = sign; return dt; } static DataType * make_proc_type(bool builtin, DataType *rettype, Vec(DataType *) argtypes) { DataType *pdt = calloc(1, sizeof(*pdt)); pdt->kind = DtkProc; pdt->builtin = builtin; pdt->proc.rettype = rettype; pdt->proc.argtypes = argtypes; return pdt; } static Vec(DataType *) make_type_list_from_idents(SemaCtx *sctx, Vec(AstIdentTypePair) idents) { if (idents == nil) return nil; Vec(DataType *) dts = nil; foreach (ident, idents) arrput(dts, resolve_datatype(sctx, ident.dtype, ident.dtype_loc)); return dts; } static Vec(DataType *) make_proc_args(DataType *a[], isize len) { Vec(DataType *) args = nil; arrsetlen(args, len); memcpy(args, a, len); return args; } /* Pushes a new context frame. Note that this inherits the flags and scope of the * previous context frame. * XXX: could rather only push flags... */ static void push_semactx(SemaCtx **sctx) { SemaCtx *tmp = make_semactx((*sctx)->cm, *sctx); tmp->flags = (*sctx)->flags; tmp->current_scope = (*sctx)->current_scope; tmp->top_scope = (*sctx)->top_scope; tmp->ok = (*sctx)->ok; *sctx = tmp; } /* Pops the current context frame. */ static void pop_semactx(SemaCtx **sctx) { SemaCtx *prev = (*sctx)->prev; compiler_assert((*sctx)->cm, prev != nil); prev->ok = (*sctx)->ok; free(*sctx); *sctx = prev; } static void enter_scope(SemaCtx *sctx) { sctx->current_scope = make_scope(sctx->current_scope); } static void exit_scope(SemaCtx *sctx) { compiler_assert(sctx->cm, sctx->current_scope->prev != nil); sctx->current_scope = sctx->current_scope->prev; } Symbol * sym_search_oncurrent(Scope *scope, const Str name) { Symbol *sym = &shget(scope->symbols, name.s); if (sym->kind != SymInvalid) return sym; return nil; } /* Searches for a symbol in the current and previous scopes */ Symbol * sym_search(Scope *scope, const Str name) { Scope *sp = scope; Symbol *sym = nil; while (sp != nil && sym == nil) { sym = sym_search_oncurrent(sp, name); sp = sp->prev; } return sym; } /* Scans through the current scope for any unused var-like bindings * (including proc parameters) */ static void sema_check_unused_vars(SemaCtx *sctx) { /* Very simple, iterate over all bindings on this scope and report any that * doesn't have the 'used' flag toggled. */ const SymbolEntry *syms = sctx->current_scope->symbols; for (isize i = 0; i < shlen(syms); ++i) { const Symbol sym = syms[i].value; if (!sym.used && symbol_is_var_binding(sym.kind)) { const char *bind_kind_name = !sym.procparm ? "variable" : "proc parameter"; // SymbolKindStr[sym.kind] : "proc parameter"; sema_warning( sctx, &sym.loc, "unused %s '%s'", bind_kind_name, syms[i].key ); } } } static void sema_check_dead_stmts(SemaCtx *sctx, Vec(Ast *) stmts) { (void)sctx, (void)stmts; /* those who forsake the CFG are doomed to implement it badly without even * noticing... */ } static void sema_match_proc_type(SemaCtx *sctx, Symbol *fsym, Str fident) { if (fsym->dtype->kind != DtkProc) { sema_error( sctx, nil, "cannot call '%s' because has non-proc type '%s'", fident.s, "uh" ); return; } } static DataType * sema_proccall(SemaCtx *sctx, const AstProcCall *call, Location loc) { Symbol *fsym = sym_search(sctx->current_scope, call->name); if (fsym == nil) { sema_error(sctx, &loc, "call to undeclared proc '%s'", call->name.s); return nil; } fsym->used = true; sema_match_proc_type(sctx, fsym, call->name); /* check call arguments */ const isize proc_arglen = arrlen(fsym->dtype->proc.argtypes); if (call->args != nil) { compiler_assert(sctx->cm, call->args->type == AST_EXPRS); const isize call_arglen = arrlen(call->args->exprs); if (call_arglen != proc_arglen) { const char *at_most = call_arglen > proc_arglen ? "s at most" : ""; sema_error( sctx, &loc, "argument length mismatch: given %li arguments to '%s' but it expects %li argument%s", call_arglen, call->name.s, proc_arglen, at_most ); return nil; } sema_expr_list(sctx, call->args->exprs, loc); /* now sema-check the args */ } else if (call->args == nil && proc_arglen != 0) { sema_error(sctx, &loc, "'%s' proc takes %li argument(s), but none given", call->name.s, proc_arglen); return nil; } if (fsym->dtype->proc.rettype != sctx->builtintypes.void_t && (~sctx->flags & SctxInDiscard) && (~sctx->flags & SctxInExpr)) { sema_error(sctx, &loc, "result of function call with non-void type ignored"); sema_note(sctx, &loc, "use 'discard' if this was intentional"); return nil; } for (isize i = 0; i < proc_arglen; ++i) { ; } return fsym->dtype; } /************ Semantic and type checking of expressions ************/ /* Type checking for expressions is done inside-out */ static DataType * sema_expr_number(SemaCtx *sctx, AstNumber *num) { #define pow2(exp) (2 << (exp - 1)) /* type rule axiom */ num->type = sym_search_oncurrent(sctx->top_scope, Sl("u64"))->dtype; return num->type; #undef pow2 } static DataType * sema_expr_strlit(SemaCtx *sctx, const Str *strlit) { (void)sctx, (void)strlit; /* type rule axiom */ return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype; } static Symbol * sema_expr_ident(SemaCtx *sctx, const Str ident) { Symbol *ident_sym = sym_search(sctx->current_scope, ident); if (ident_sym == nil) { sema_error(sctx, nil, "undeclared identifier '%s'", ident.s); return nil; } if (ident_sym->kind == SymType) { sema_error(sctx, nil, "data type '%s' used as identifier in expression", ident.s); return nil; } ident_sym->used = true; return ident_sym; } static DataType * sema_expr_unary(SemaCtx *sctx, AstUnary *unary, Location loc) { Ast *expr = unary->atom; compiler_assert(sctx->cm, ast_node_is_expr(expr->type)); //if (expr->type == AST_STRLIT) { // sema_error(sctx, nil, "%s with a string literal makes no sense\n", TokenIdStr[unary->op]); // return; //} //if (expr->type == AST_NUMBER) { // if (unary->op == T_MINUS && !expr->number.type->sign) { // } //} return sema_expr(sctx, expr, loc); } static DataType * sema_binop(SemaCtx *sctx, const AstBinop *expr, Location loc) { Symbol *opsym = sym_search_oncurrent(sctx->top_scope, expr->op); if (opsym == nil) { sema_error(sctx, nil, "no operator '%s'", expr->op.s); return nil; } if (arrlen(opsym->dtype->proc.argtypes) != 2) { sema_error(sctx, nil, "no binary operator for '%s'", expr->op.s); return nil; } DataType *ldt = sema_expr(sctx, expr->left, loc); DataType *rdt = sema_expr(sctx, expr->right, loc); /* Skip typechecking if either ldt or rdt have `InvalidDataType` and propagate * it up the call stack. */ if (ldt == InvalidDataType || rdt == InvalidDataType) return (DataType *)InvalidDataType; DataTypeCheck tchk; if (!(tchk = datatype_cmp(sctx, ldt, rdt)).ok) { sema_error(sctx, &loc, "type error: %s", tchk.msg.s); return nil; } return ldt; } static DataType * sema_expr(SemaCtx *sctx, Ast *expr, Location loc) { compiler_assert(sctx->cm, ast_node_is_expr(expr->type)); push_semactx(&sctx); sctx->flags |= SctxInExpr; DataType *dt = nil; switch (expr->type) { case AST_BINEXPR: dt = sema_binop(sctx, &expr->bin, loc); break; case AST_UNARY: dt = sema_expr_unary(sctx, &expr->unary, loc); break; case AST_NUMBER: dt = sema_expr_number(sctx, &expr->number); break; case AST_STRLIT: dt = sema_expr_strlit(sctx, &expr->strlit); break; case AST_IDENT: dt = sema_expr_ident(sctx, expr->ident)->dtype; break; case AST_PROCCALL: dt = sema_proccall(sctx, &expr->call, expr->loc); break; default: unreachable(); } pop_semactx(&sctx); return dt; } static void sema_expr_list(SemaCtx *sctx, Vec(Ast *) exprs, Location loc) { foreach (expr, exprs) { sema_expr(sctx, expr, loc); } } /************ Type checking ************/ /* Structurally compare two structural data types. */ static DataTypeCheck datatype_struct_cmp(SemaCtx *sctx, DataType *s1, DataType *s2) { compiler_assert(sctx->cm, s1->kind == DtkStruct && s2->kind == DtkStruct); const DataTypeCompound *s1s = &s1->compound; const DataTypeCompound *s2s = &s2->compound; if (s1s->packed != s2s->packed) return (DataTypeCheck){false, Sl("")}; if (arrlen(s1s->fields) != arrlen(s2s->fields)) return (DataTypeCheck){false, Sl("")}; for (isize i = 0; i < arrlen(s1s->fields); ++i) { DataTypeCheck tchk; if (!(tchk = datatype_cmp(sctx, s1s->fields[i], s2s->fields[i])).ok) return tchk; } return (DataTypeCheck){.ok = true}; } static DataTypeCheck datatype_array_cmp(SemaCtx *sctx, DataType *a1, DataType *a2) { DataTypeCheck tchk = {.ok = true}; if (a1->array.len != a2->array.len) return (DataTypeCheck){false, Sl("")}; if (!(tchk = datatype_cmp(sctx, a1->array.base, a2->array.base)).ok) return tchk; return tchk; } static DataTypeCheck datatype_proc_cmp(SemaCtx *sctx, DataType *pc1, DataType *pc2) { DataTypeCheck tchk = {.ok = true}; if (pc1->proc.public != pc2->proc.public) return (DataTypeCheck){false, Sl("")}; if (pc1->proc.extern_lnk != pc2->proc.extern_lnk) return (DataTypeCheck){false, Sl("")}; if (pc1->proc.c_varargs != pc2->proc.c_varargs) return (DataTypeCheck){false, Sl("")}; if (arrlen(pc1->proc.argtypes) != arrlen(pc2->proc.argtypes)) return (DataTypeCheck){false, Sl("")}; if (!(tchk = datatype_cmp(sctx, pc1->proc.rettype, pc2->proc.rettype)).ok) return tchk; for (isize i = 0; i < arrlen(pc1->proc.argtypes); ++i) { if (!(tchk = datatype_cmp(sctx, pc1->proc.argtypes[i], pc2->proc.argtypes[i])).ok) return tchk; } return tchk; } static DataTypeCheck datatype_basic_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2) { (void)sctx; if (dt1->size > dt2->size) /* if it has a size equal or less than dt2 */ return (DataTypeCheck){false, Sl("")}; if (dt1->sign != dt2->sign) return (DataTypeCheck){false, Strafmt("integers with different sign")}; return (DataTypeCheck){.ok = true}; } /* Compares two datatype objects, returning true if they are equal. */ static DataTypeCheck datatype_cmp(SemaCtx *sctx, DataType *dt1, DataType *dt2) { if (dt1 == nil || dt2 == nil) return (DataTypeCheck){false, Sl("")}; /* TODO: return more information in case of a mismatch... */ if (dt1 == dt2) /* shallow */ return (DataTypeCheck){.ok = true}; if (dt1->kind != dt2->kind) return (DataTypeCheck){.ok = false}; switch (dt1->kind) { case DtkBasic: return datatype_basic_cmp(sctx, dt1, dt2); case DtkStruct: case DtkUnion: return datatype_struct_cmp(sctx, dt1, dt2); case DtkProc: return datatype_proc_cmp(sctx, dt1, dt2); case DtkArray: return datatype_array_cmp(sctx, dt1, dt2); case DtkBool: case DtkVoid: return (DataTypeCheck){.ok = true}; } return (DataTypeCheck){.ok = false}; } static DataType * expr_get_datatype(SemaCtx *sctx, Ast *expr) { compiler_assert(sctx->cm, ast_node_is_expr(expr->type)); switch (expr->type) { case AST_BINEXPR: return expr->bin.type; case AST_UNARY: return expr->unary.type; case AST_NUMBER: return expr->number.type; case AST_STRLIT: return sym_search_oncurrent(sctx->top_scope, Sl("string"))->dtype; /* XXX: for these two we could attach the type in the ast... */ case AST_IDENT: return sym_search(sctx->current_scope, expr->ident)->dtype; case AST_PROCCALL: return sym_search(sctx->current_scope, expr->call.name)->dtype->proc.rettype; default: unreachable(); } return nil; } /* Search for the type in the symbol table, asserting that is a data type. */ static DataType * resolve_datatype(SemaCtx *sctx, const Str ident, Location loc) { Symbol *dtsym = sym_search(sctx->current_scope, ident); if (dtsym == nil) { sema_error(sctx, &loc, "no such type '%s'", ident.s); return (DataType *)InvalidDataType; } if (dtsym->kind != SymType) { sema_error(sctx, &loc, "'%s' is not a type but a %s", ident.s, SymbolKindStr[dtsym->kind]); return (DataType *)InvalidDataType; } return dtsym->dtype; } static void sema_procdef(SemaCtx *sctx, AstProc *proc, Location loc) { Symbol *sym_prev; if ((sym_prev = sym_search(sctx->current_scope, proc->name)) != nil) { sema_error( sctx, nil, "'%s' was already declared as a %s", proc->name.s, SymbolKindStr[sym_prev->kind] ); sema_note(sctx, &sym_prev->loc, "'%s' previously declared here", proc->name.s); return; } if (Str_equal(proc->name, Sl("main"))) { sctx->main_defined = true; if (!proc->ispublic) { sema_error(sctx, &loc, "'main' has to be declared as a public proc"); } } const Ast *rettype_node = proc->rettype; DataType *proc_rettype = nil; if (rettype_node != nil) { compiler_assert(sctx->cm, rettype_node->type == AST_IDENT); proc_rettype = resolve_datatype(sctx, proc->rettype->ident, rettype_node->loc); if (proc_rettype == InvalidDataType) return; } else { /* return type node is nil, we infer that as a `void` type */ proc_rettype = sctx->builtintypes.void_t; } Vec(DataType *) procargs = make_type_list_from_idents(sctx, proc->args); DataType *procdtype = make_proc_type(false, proc_rettype, procargs); procdtype->proc.public = proc->ispublic; Symbol proc_sym = { .kind = SymProc, .dtype = procdtype, .loc = loc }; sym_insert(sctx->current_scope->symbols, proc->name.s, proc_sym); proc->type = procdtype; /* proc has no body at all */ if (proc->body == nil) return; /* analyze the body */ compiler_assert(sctx->cm, proc->body->type == AST_STMTS); push_semactx(&sctx); enter_scope(sctx); compiler_assert(sctx->cm, arrlen(proc->args) == arrlen(procargs)); /* Inject proc parameters into the proc body top scope */ for (isize i = 0; i < arrlen(proc->args); ++i) { DataType *argdtype = procargs[i]; enum SymbolKind argsymkind = proc->args[i].kind; compiler_assert(sctx->cm, argdtype != nil); compiler_assert(sctx->cm, argsymkind == SymLet || argsymkind == SymVar); Symbol argsym = { .kind = argsymkind, .dtype = argdtype, .procparm = true, .loc = proc->args[i].ident_loc }; sym_insert(sctx->current_scope->symbols, proc->args[i].ident.s, argsym); } sctx->flags |= SctxInsideProc; sema_stmts(sctx, proc->body->stmts); sema_check_unused_vars(sctx); exit_scope(sctx); pop_semactx(&sctx); } static void sema_return(SemaCtx *sctx, Ast *ret_expr, Location loc) { if (~sctx->flags & SctxInsideProc) { sema_error(sctx, &loc, "'return' outside of proc"); } if (ret_expr != nil) sema_expr(sctx, ret_expr, ret_expr->loc); } static void sema_break(SemaCtx *sctx, Ast *unused, Location loc) { (void)unused; if (~sctx->flags & SctxInsideLoop) { sema_error(sctx, &loc, "'break' used outside of a loop"); } } static void sema_discard(SemaCtx *sctx, Ast *expr, Location loc) { sctx->flags |= SctxInDiscard; sema_expr(sctx, expr, loc); sctx->flags &= ~SctxInDiscard; } static void sema_attribute(SemaCtx *sctx, AstAttribute *attr) { sema_node(sctx, attr->node); } static void sema_var_decl(SemaCtx *sctx, AstVarDecl *decl, Location loc) { compiler_assert(sctx->cm, symbol_is_var_binding(decl->kind)); const Symbol *symp = sym_search(sctx->current_scope, decl->name); if (symp != nil && symp->kind != decl->kind) { switch (symp->kind) { case SymLet: sema_error(sctx, &symp->loc, "'%s' was already declared as 'let'", decl->name.s); return; case SymVar: sema_error(sctx, &symp->loc, "'%s' was already declared as 'var'", decl->name.s); return; case SymConst: sema_error( sctx, &symp->loc, "declaration of '%s' shadows previously declared constant with the same name", decl->name.s ); return; case SymType: sema_error(sctx, &symp->loc, "'%s' was already declared as a type", decl->name.s); return; default: break; } sema_note(sctx, &symp->loc, "'%s' was declared in this line", decl->name.s); } Ast *dexpr = decl->expr; if (dexpr != nil) { sema_expr(sctx, dexpr, loc); /* check the assignment expression */ } else { sema_warning(sctx, &loc, "variable is unitialized"); } if (decl->datatype == nil) { sema_error(sctx, nil, "we don't do type inference yet sorry"); return; } compiler_assert(sctx->cm, decl->datatype->type == AST_IDENT); DataType *dtype = resolve_datatype(sctx, decl->datatype->ident, decl->datatype->loc); /* Note that we ignore whether `resolve_datatype` return an invalid type, * since we still want to insert the variable into the symbol table, * otherwise we would have spurious "undeclared identifier" errors. */ decl->type = dtype; Symbol sym = { .kind = decl->kind, .dtype = dtype, .loc = loc, }; /* Insert the variable to the symbol table */ sym_insert(sctx->current_scope->symbols, decl->name.s, sym); } static void sema_var_assign(SemaCtx *sctx, AstVarAssign *assign, Location loc) { sema_expr_ident(sctx, assign->name); sema_expr(sctx, assign->expr, loc); Symbol *decl = sym_search(sctx->current_scope, assign->name); if (decl == nil) { sema_error(sctx, &loc, "assign to undeclared variable '%s'", assign->name.s); return; } if (!symbol_is_var_binding(decl->kind)) { sema_error( sctx, &loc, "assign to non-variable symbol ('%s' is a '%s')", assign->name.s, SymbolKindStr[decl->kind] ); return; } if (decl->kind != SymVar) { sema_error( sctx, &loc, "assign to immutable symbol ('%s' was declared as '%s')", assign->name.s, SymbolKindStr[decl->kind] ); return; } //datatype_cmp(sctx, nil, decl->dtype); } static void sema_ifstmtexpr(SemaCtx *sctx, AstIf *ift, Location loc) { sema_expr(sctx, ift->cond, loc); sema_node(sctx, ift->true_body); sema_node(sctx, ift->false_body); const isize elifs_len = arrlen(ift->elifs); if (elifs_len > 0) { for (isize i = 0; i < elifs_len; ++i) { AstElif *elif = &ift->elifs[i]; sema_expr(sctx, elif->cond, loc); sema_node(sctx, elif->body); } } } static void sema_loop(SemaCtx *sctx, AstLoop *loop, Location loc) { if (loop->precond != nil) { sema_expr(sctx, loop->precond, loc); } if (loop->postcond != nil) { sema_expr(sctx, loop->postcond, loc); } push_semactx(&sctx); sctx->flags |= SctxInsideLoop; sema_node(sctx, loop->body); pop_semactx(&sctx); } static void sema_stmts(SemaCtx *sctx, Vec(Ast *) stmts) { /* AST_STMTS imply the opening of a new scope */ const isize stmts_len = arrlen(stmts); for (isize i = 0; i < stmts_len; ++i) { sema_node(sctx, stmts[i]); if (sema_is_stmt_terminal(stmts[i]) && i + 1 != stmts_len) { sema_warning(sctx, &stmts[i + 1]->loc, "dead code after 'return'"); } } } static void sema_stmt_block(SemaCtx *sctx, Vec(Ast *) stmts) { enter_scope(sctx); sema_stmts(sctx, stmts); exit_scope(sctx); /* check for unused bindings declared in this scope */ sema_check_unused_vars(sctx); } static void sema_node(SemaCtx *sctx, Ast *node) { if (node == nil) return; switch (node->type) { case AST_IF: sema_ifstmtexpr(sctx, &node->ifse, node->loc); break; case AST_LOOP: sema_loop(sctx, &node->loop, node->loc); break; case AST_STMTS: sema_stmt_block(sctx, node->stmts); break; case AST_PROCDEF: sema_procdef(sctx, &node->proc, node->loc); break; case AST_PROCCALL: sema_proccall(sctx, &node->call, node->loc); break; case AST_VARDECL: sema_var_decl(sctx, &node->var, node->loc); break; case AST_VARASSIGN: sema_var_assign(sctx, &node->varassgn, node->loc); break; case AST_RETURN: sema_return(sctx, node->ret, node->loc); break; case AST_BREAK: sema_break(sctx, nil, node->loc); break; case AST_DISCARD: sema_discard(sctx, node->discard.expr, node->loc); break; case AST_ATTRIBUTE: sema_attribute(sctx, &node->attribute); break; case AST_BINEXPR: case AST_UNARY: case AST_NUMBER: case AST_STRLIT: case AST_IDENT: sema_expr(sctx, node, node->loc); break; case AST_INVALID: case AST_EXPRS: case AST_PROCCALL_ARGS: unreachable(); } } static void sema_make_builtin_types(SemaCtx *sctx) { typedef struct { const char *name; Symbol sym; } NameSym; DataType *void_type = make_data_type(DtkVoid, 0, true, false); DataType *str_type = make_data_type(DtkStruct, 0, false, false); DataType *puts_proto = make_data_type(DtkProc, 0, false, false); puts_proto->proc.rettype = void_type; puts_proto->proc.argtypes = make_proc_args((DataType *[]){str_type}, 1); puts_proto->proc.extern_lnk = true; NameSym builtin_basic_types[] = { {"void", {.kind = SymType, .dtype = void_type}}, {"u64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, false)}}, {"i64", {.kind = SymType, .dtype = make_data_type(DtkBasic, 8, true, true)}}, {"cint", {.kind = SymType, .dtype = make_data_type(DtkBasic, sizeof(int), true, true)}}, {"string", {.kind = SymType, .dtype = str_type}}, {"bool", {.kind = SymType, .dtype = make_data_type(DtkBool, 1, true, false)}}, }; DataType *u64_dt = builtin_basic_types[1].sym.dtype; DataType *bool_dt = builtin_basic_types[5].sym.dtype; NameSym builtin_procs[] = { { "+", { .kind = SymProc, .dtype = make_proc_type( true, u64_dt, make_proc_args((DataType *[]){u64_dt, u64_dt}, 2) ) } }, { "-", { .kind = SymProc, .dtype = make_proc_type( true, u64_dt, make_proc_args((DataType *[]){u64_dt, u64_dt}, 2) ) } }, { "==", { .kind = SymProc, .dtype = make_proc_type( true, bool_dt, make_proc_args((DataType *[]){u64_dt, u64_dt}, 2) ) } }, }; for (isize i = 0; i < countof(builtin_basic_types); ++i) { const char *name = builtin_basic_types[i].name; Symbol sym = builtin_basic_types[i].sym; sym_insert(sctx->current_scope->symbols, name, sym); } for (isize i = 0; i < countof(builtin_procs); ++i) { sym_insert(sctx->current_scope->symbols, builtin_procs[i].name, builtin_procs[i].sym); } sctx->builtintypes.tyu64 = builtin_basic_types[1].sym.dtype; sctx->builtintypes.void_t = void_type; Symbol puts_sym = {.kind = SymProc, .dtype = puts_proto}; sym_insert(sctx->current_scope->symbols, "puts", puts_sym); } SemaCtx * sema_new(Compiler *cm) { SemaCtx *toplevel_context = make_semactx(cm, nil); toplevel_context->current_scope = make_scope(nil); sema_make_builtin_types(toplevel_context); toplevel_context->top_scope = toplevel_context->current_scope; toplevel_context->ok = true; return toplevel_context; } void sema_destroy(SemaCtx *sctx) { free(sctx); } void sema(SemaCtx *sctx, Ast *program) { /* Analyze toplevel */ /* XXX: DRY it */ compiler_assert(sctx->cm, program->type == AST_STMTS); for (isize i = 0; i < arrlen(program->stmts); ++i) sema_node(sctx, program->stmts[i]); if (!sctx->cm->opts.compile_only && !sctx->main_defined) sema_error(sctx, nil, "missing 'main' entrypoint proc"); /* check unused local procedures */ const SymbolEntry *syms = sctx->current_scope->symbols; for (isize i = 0; i < shlen(syms); ++i) { const Symbol fsym = syms[i].value; if (fsym.kind == SymProc && !fsym.dtype->builtin && !fsym.dtype->proc.public && !fsym.dtype->proc.extern_lnk && !fsym.used) { sema_warning( sctx, &fsym.loc, "defined proc '%s' is never called in this module", syms[i].key ); } } }