#include #include #include #include #include #include #include #include #include "lex.h" #include "arena.h" #include "dynarr.h" #include "strio.h" #include "ir.h" #include "peephole.h" #include "proc.h" int no_opt = 0; /* bit set */ typedef struct { u64 cap; u64 *data; } BitSet; void bitset_fit(BitSet *s, u32 sz, Arena *a) { if (s->cap >= sz) return; u32 c = stdc_bit_ceil(sz); s->data = resize(a, s->data, s->cap, c); for (u32 i = s->cap; i < c; i++) s->data[i] = 0; s->cap = c; } static inline int bitset_has(BitSet *s, u32 n) { if (n / 64 >= s->cap) return 0; return !!((1L << (n & 63)) & s->data[n / 64]); } static inline void bitset_add(BitSet *s, u32 n, Arena *a) { bitset_fit(s, (n / 64) + 1, a); s->data[n / 64] |= (1L << (n & 63)); } static inline void bitset_del(BitSet *s, u32 n) { if (bitset_has(s, n)) { s->data[n / 64] &= ~(1L << (n & 63)); } } /* units */ typedef struct { Arena *arena; XAR(Proc) procs; } Unit; Proc *unit_new_proc(Unit *u) { return XAR_PUT(&u->procs, u->procs.n++, u->arena); } /* parsing */ Node *parse_expr(Lexer *l, Proc *p, Type *twant); /* TODO: eliminate unused if-else statements at the end of compilation * they don't get pruned out by peephole optimizations if there are phi * nodes that are connected to keepalive (due to still being in scope). * so probably this will have to be done in a separate step after the * graph has been generated. */ Node *ctrl(Graph *p, Node *n) { if (!n) { p->ctrl = node_new_lit(p, (Value) { .type = { .lvl = T_XCTRL, .t = T_NONE } }); return NULL; } p->ctrl = n; return n; } void parse_return(Lexer *l, Proc *p) { lex_next(l); Node *n; Graph *g = &p->graph; if (p->ret_type.t == T_NONE) { n = node_new(g, N_RETURN, g->ctrl); } else { Node *e = parse_expr(l, p, &p->ret_type); n = node_new(g, N_RETURN, g->ctrl, e); } n = node_peephole(n, g, l); if (n->type.lvl != T_XCTRL) { node_add(g, n, g->stop); } ctrl(g, n); ctrl(g, NULL); } Type parse_type(Lexer *l, Proc *proc) { (void)proc; Type t = { .lvl = T_BOT }; if (l->tok == TOK_DEREF) { lex_next(l); t.t = T_PTR; t.next = new(&proc->perm, Type); *t.next = parse_type(l, proc); return t; } lex_expected(l, TM_IDENT); if (l->tok == TOK_IDENT) { if (str_eql(l->ident, S("i64"))) { t.t = T_INT; } else if (str_eql(l->ident, S("bool"))) { t.t = T_BOOL; } else { lex_error(l, LE_ERROR, S("unknown type")); } } lex_next(l); return t; } void parse_let(Lexer *l, Proc *p) { Graph *g = &p->graph; recurse: lex_expect(l, TM_IDENT); Str name = l->ident; LexSpan pos = l->pos; lex_next(l); Node *rhs = NULL; Type t = { .t = T_NONE }; if (l->tok != TOK_EQL) { t = parse_type(l, p); if (l->tok != TOK_EQL) { rhs = node_new(g, N_UNINIT, g->start); rhs->type = t; } } if (l->tok == TOK_EQL) { lex_next(l); rhs = parse_expr(l, p, t.t == T_NONE ? NULL : &t); } NameBinding *b = scope_bind(&p->scope, name, rhs, pos, g); if (b) { lex_error_at(l, pos, LE_WARN, S("shadowing previous declaration")); lex_error_at(l, b->src_pos, LE_WARN, S("declared here")); } /* TODO: isn't this just a do while loop */ if (l->tok == TOK_COMMA) goto recurse; } void parse_stmt(Lexer *l, Proc *p); /* TODO: return node from this! */ void parse_block(Lexer *l, Proc *p) { Graph *g = &p->graph; lex_next(l); scope_push(&p->scope); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, p); } scope_pop(&p->scope, g); lex_expected(l, TM_RBRACE); lex_next(l); } void parse_assign(Lexer *l, Proc *p) { Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_ASSIGN); lex_next(l); NameBinding *b = scope_find(&p->scope, name); if (!b) { lex_error_at(l, pos, LE_ERROR, S("undeclared identifier")); } Node *e = parse_expr(l, p, &b->node->type); if (node_uninit(e)) { lex_error_at(l, e->src_pos, LE_ERROR, str_fmt(&p->scratch, "uninitialized %S", type_desc(&e->type, &p->scratch))); } else if (node_maybe_uninit(e)) { lex_error_at(l, e->src_pos, LE_WARN, str_fmt(&p->scratch, "possibly uninitialized %S", type_desc(&e->type, &p->scratch))); } scope_update(&p->scope, b, e, &p->graph); } /* TODO: find out a way to encode known relations between nodes, based on the * conditional, within the body of an if statement --- e.g., for a statement * * if a < 5 { * if a < 10 { * foo() * } * } else { * if a > 3 { * bar() * } * } * * we should be able to infer that the second condition is always true, because * it's only reachable if a < 5, and 5 < 10. conversely, in the else branch, * we know that a >= 5, and can use that to make assumptions on comparisons * made there too! * */ void parse_if(Lexer *l, Proc *p) { Graph *g = &p->graph; lex_next(l); Node *cond = parse_expr(l, p, &(Type) { .t = T_BOOL }); Node *ctrl_if = NULL, *ctrl_else = NULL; Node *if_node = node_new(g, N_IF_ELSE, g->ctrl, cond); if_node->val = (Value) { .type = { .lvl = T_TOP, .t = T_TUPLE }, .tuple = { .len = 2, .cap = 2, .data = new_arr(g->pool->arena, Value, 2) } }; if_node->val.tuple.data[0] = (Value) { .type = { .lvl = T_CTRL, .t = T_NONE } }; if_node->val.tuple.data[1] = (Value) { .type = { .lvl = T_CTRL, .t = T_NONE } }; if_node = node_peephole(if_node, g, l); node_add(g, if_node, g->keepalive); Node *if_true = node_new(g, N_PROJ, if_node); Node *if_false = node_new(g, N_PROJ, if_node); if_true->val.i = 0; if_false->val.i = 1; if_true = node_peephole(if_true, g, l); if_false = node_peephole(if_false, g, l); node_remove(g, if_node, g->keepalive); assert(if_true->in.len > 0); assert(if_false->in.len > 0); node_add(g, if_true, g->keepalive); node_add(g, if_false, g->keepalive); ScopeChangeList chg_if = { 0 }; ScopeChangeList chg_else = { 0 }; if (cond->type.lvl == T_CONST) { if (cond->val.i) if_false->type.lvl = T_XCTRL; else if_true->type.lvl = T_XCTRL; } ctrl(g, if_true); lex_expected(l, TM_LBRACE); int pos = l->pos.ofs; scope_changelist_push(&p->scope, &chg_if, &p->scratch); parse_block(l, p); scope_changelist_pop(&p->scope, g); if_true->src_pos = (LexSpan) { .ofs = pos, .n = l->pos.ofs - pos }; ctrl_if = g->ctrl; node_add(g, ctrl_if, g->keepalive); if (l->tok == TOK_ELSE) { ctrl(g, if_false); lex_expect(l, TM_LBRACE); pos = l->pos.ofs; scope_changelist_push(&p->scope, &chg_else, &p->scratch); parse_block(l, p); scope_changelist_pop(&p->scope, g); if_false->src_pos = (LexSpan) { .ofs = pos, .n = l->pos.ofs - pos }; ctrl_else = g->ctrl; node_add(g, ctrl_else, g->keepalive); } Node *region; if (ctrl_else) { assert(ctrl_if); assert(ctrl_else); region = node_new(g, N_REGION, ctrl_if, ctrl_else); node_add_out(g, region, g->keepalive); node_remove(g, ctrl_if, g->keepalive); node_remove(g, ctrl_else, g->keepalive); } else { assert(ctrl_if); assert(if_false); region = node_new(g, N_REGION, ctrl_if, if_false); node_add_out(g, region, g->keepalive); node_remove(g, ctrl_if, g->keepalive); } ctrl(g, region); node_remove(g, if_true, g->keepalive); node_remove(g, if_false, g->keepalive); assert(g->ctrl->in.len > 0); assert(IN(region, 0)); assert(IN(region, 1)); scope_changelist_merge(&p->scope, l, &chg_if, &chg_else, region, g, &p->scratch); scope_changelist_discard(&chg_if, g); scope_changelist_discard(&chg_else, g); node_del_out(region, g->keepalive); /* make sure we're not orphaning any phi nodes*/ if (g->ctrl->out.len < 1) { ctrl(g, node_peephole(g->ctrl, g, l)); } } void parse_stmt(Lexer *l, Proc *p) { ArenaMark m; arena_save(&p->scratch, &m); switch (l->tok) { case TOK_RETURN: parse_return(l, p); break; case TOK_LET: parse_let(l, p); break; case TOK_LBRACE: parse_block(l, p); break; case TOK_IDENT: parse_assign(l, p); break; case TOK_IF: parse_if(l, p); break; default: lex_expected(l, TM_RBRACE); break; } arena_load(&p->scratch, &m); } void parse_args_list(Lexer *l, Proc *proc) { Node *start = proc->graph.start; Graph *g = &proc->graph; int i = 0; struct { Str name; LexSpan pos; } idbuf[32]; int id = 0; while (l->tok != TOK_RPAREN && l->tok != TOK_EOF) { lex_expect(l, TM_IDENT); if (id == sizeof idbuf / sizeof *idbuf) { lex_error(l, LE_ERROR, S("too many arguments without specifying a type")); return; } idbuf[id].name = l->ident; idbuf[id].pos = l->pos; id++; lex_next(l); if (l->tok == TOK_COMMA) continue; Value v = (Value) { .type = parse_type(l, proc) }; lex_expected(l, TM_RPAREN | TM_COMMA); for (int j = 0; j < id; j++) { Node *proj = node_new(g, N_PROJ, start); proj->type = v.type; proj->val.i = i++; ZDA_PUSH(&proc->perm, &start->val.tuple, v); scope_bind(&proc->scope, idbuf[j].name, proj, idbuf[j].pos, g); } id = 0; } lex_expected(l, TM_RPAREN); lex_next(l); } Node *find_return(Node *n) { if (n->op == N_RETURN) return n; for (u32 i = 0; i < n->out.len; i++) { Node *r = find_return(OUT(n, i)); if (r) return r; } return NULL; } void proc_opt_fwd(Proc *p, Lexer *l, Node *n, BitSet *walked) { Graph *g = &p->graph; if (bitset_has(walked, n->id)) return; bitset_add(walked, n->id, &p->scratch); switch (n->op) { case N_START: for (u32 i = 0; i < n->out.len; i++) { proc_opt_fwd(p, l, OUT(n, i), walked); } break; case N_IF_ELSE: if (n->out.len < 2) { //lex_error_at(l, n->src_pos, LE_ERROR, S("not all codepaths return")); } for (u32 i = 0; i < n->out.len; i++) { Node *r = find_return(OUT(n, i)); if (!r) { lex_error_at(l, OUT(n, i)->src_pos, LE_ERROR, S("not all codepaths return")); } proc_opt_fwd(p, l, OUT(n, i), walked); } break; case N_PROJ: proc_opt_fwd(p, l, OUT(n, 0), walked); break; case N_REGION: /* cull empty if else */ if (n->out.len == 1 && n->in.len == 2 && IN(n,0)->op == N_PROJ && IN(n,1)->op == N_PROJ && CTRL(IN(n,0)) == CTRL(IN(n,1)) && CTRL(IN(n,0))->op == N_IF_ELSE) { assert(OUT(n, 0)->op != N_PHI); assert(IN(OUT(n, 0), 0) == n); Node *new_ctrl = CTRL(CTRL(CTRL(n))); Node *out = OUT(n, 0); node_set_in(g, out, 0, new_ctrl); proc_opt_fwd(p, l, new_ctrl, walked); return; } for (u32 i = 0; i < n->out.len; i++) { if (OUT(n, i)->op != N_PHI) { proc_opt_fwd(p, l, OUT(n, i), walked); } } break; default: break; } } void proc_opt(Proc *p, Lexer *l) { Graph *g = &p->graph; if (g->stop->in.len == 0) { if (p->ret_type.t != T_NONE) { lex_error(l, LE_ERROR, str_fmt(&p->scratch, "no return statement in function expecting %S", type_desc(&p->ret_type, &p->scratch))); } } for (u32 i = 0; i < g->start->out.len; i++) { Node *n = OUT(g->start, i); if (n->op == N_LIT && n->out.len < 1) { node_kill(n, g); i--; } } BitSet walked = { 0 }; proc_opt_fwd(p, l, g->start, &walked); } Proc *parse_proc(Lexer *l, Unit *u) { int has_ret = l->tok == TOK_FUNC; Proc *proc = unit_new_proc(u); lex_expect(l, TM_IDENT); proc_init(proc, l->ident); scope_push(&proc->scope); lex_next(l); if (l->tok == TOK_LPAREN) { parse_args_list(l, proc); } if (has_ret) { proc->ret_type = parse_type(l, proc); } else { proc->ret_type = (Type) { .lvl = T_BOT, .t = T_NONE }; } lex_expected(l, TM_LBRACE); lex_next(l); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, proc); } scope_pop(&proc->scope, &proc->graph); lex_expected(l, TM_RBRACE); lex_next(l); proc_opt(proc, l); return proc; } void uninit_check(Lexer *l, Node *n, LexSpan pos) { if (node_uninit(n)) { lex_error_at(l, pos, LE_ERROR, str_fmt(&l->arena, "uninitialized %S", type_desc(&n->type, &l->arena))); } else if (node_maybe_uninit(n)) { lex_error_at(l, pos, LE_WARN, str_fmt(&l->arena, "possibly uninitialized %S", type_desc(&n->type, &l->arena))); } } Node *parse_term(Lexer *l, Proc *p, Type *twant) { (void)twant; /* to be used for .ENUM_TYPE and stuff */ Node *node = NULL; NodeType op_after = N_START; Graph *g = &p->graph; if (TMASK(l->tok) & (TM_MINUS | TM_PLUS | TM_NOT)) { Token t = l->tok; lex_next(l); node = parse_term(l, p, twant); NodeType post_op = N_START; switch (t) { case TOK_MINUS: post_op = N_OP_NEG; break; case TOK_NOT: post_op = N_OP_NOT; break; default: return node; } if (post_op == N_START) return node; return node_peephole(node_new(g, post_op, NULL, node), g, l); } if (l->tok == TOK_LPAREN) { lex_next(l); node = parse_expr(l, p, NULL); lex_expected(l, TM_RPAREN); lex_next(l); node->src_pos.ofs--; node->src_pos.n += 2; } else if (l->tok == TOK_IDENT) { NameBinding *b = scope_find(&p->scope, l->ident); if (b) { node = b->node; } else { lex_error(l, LE_ERROR, S("undeclared identifier")); } uninit_check(l, node, l->pos); lex_next(l); } else if (TMASK(l->tok) & (TM_TRUE | TM_FALSE)) { node = node_new_lit_bool(g, l->tok == TOK_TRUE); lex_next(l); } else { lex_expected(l, TM_LIT_NUM); int64_t val = 0; for (int i = 0; i < l->ident.n; i++) { if (!(l->ident.s[i] >= '0' && l->ident.s[i] <= '9')) { lex_error(l, LE_ERROR, S("not a digit")); break; } val = (val * 10) + (l->ident.s[i] - '0'); } node = node_new_lit_i64(g, val); node->src_pos = l->pos; lex_next(l); } if (op_after != N_START) { node = node_new(g, op_after, NULL, node_peephole(node, g, l)); } return node_peephole(node, g, l); } NodeType tok_to_bin_op(Token t) { switch (t) { case TOK_PLUS: return N_OP_ADD; break; case TOK_MINUS: return N_OP_SUB; break; case TOK_ASTERISK: return N_OP_MUL; break; case TOK_SLASH: return N_OP_DIV; break; case TOK_NOT: return N_OP_NOT; break; case TOK_AND: return N_OP_AND; break; case TOK_OR: return N_OP_OR; break; case TOK_XOR: return N_OP_XOR; break; case TOK_SHL: return N_OP_SHL; break; case TOK_SHR: return N_OP_SHR; break; case TOK_EQL: return N_CMP_EQL; break; case TOK_NEQ: return N_CMP_NEQ; break; case TOK_LES: return N_CMP_LES; break; case TOK_GTR: return N_CMP_GTR; break; case TOK_LTE: return N_CMP_LTE; break; case TOK_GTE: return N_CMP_GTE; break; default: return N_START; break; } } /* TODO: operator precedence would be kinda nice actually, sad to say */ Node *parse_expr(Lexer *l, Proc *p, Type *twant) { LexSpan pos = l->pos; Graph *g = &p->graph; Node *lhs = parse_term(l, p, twant); NodeType nt = tok_to_bin_op(l->tok);; if (lhs->op == N_DEAD) lex_error(l, LE_ERROR, S("dead lhs")); if (nt != N_START) { lex_next(l); /* necessary because if lhs is a deduplicated literal, it may be an input to rhs * and therefore culled by peephole optimizations */ Node *rhs; NODE_KEEP(g, lhs, { rhs = parse_expr(l, p, &lhs->type); }); lhs = node_peephole(node_new(g, nt, NULL, lhs, rhs), g, l); } lhs->src_pos = (LexSpan) { pos.ofs, l->pos.ofs - pos.ofs }; if (twant) type_expected(twant, lhs, l); return lhs; } void parse_toplevel(Lexer *l, Unit *u) { switch (l->tok) { case TOK_PROC: case TOK_FUNC: parse_proc(l, u); break; default: lex_expected(l, TM_PROC | TM_FUNC); break; } } void unit_print(Unit *u); void parse_unit(Lexer *l, Unit *u) { while (l->tok != TOK_EOF) { parse_toplevel(l, u); } } /* graph output */ /* TODO: Print at every stage of graph compilation and generate a frame for * each, so problems can be debugged visually as they occur. */ void node_print(Node *n, Proc *p, BitSet *walked) { if (bitset_has(walked, n->id)) return; bitset_add(walked, n->id, &p->scratch); if (n->op == N_START) { Str s = S(""); int c = n->val.tuple.len; Value *v = n->val.tuple.data; for (int i = 0; i < c; i++) { if (i > 0) str_cat(&s, S(", "), &p->scratch); str_cat(&s, type_desc(&v[i].type, &p->scratch), &p->scratch); } printf("\t%d [label=\"start(%.*s)\"]", n->id, (int)s.n, s.s); } else if (n->op == N_LIT) { switch (n->type.t) { case T_INT: printf("\t%d [label=\"%ld\"]", n->id, n->val.i); break; case T_BOOL: printf("\t%d [label=\"%s\"]", n->id, n->val.i ? "true" : "false"); break; case T_NONE: if (n->type.lvl == T_XCTRL) { printf("\t%d [label=\"~ctrl\"]", n->id); break; } /* fallthrough */ default: printf("\t%d [label=\"literal %d\"]", n->id, n->id); break; } } else if (n->op == N_PROJ) { Str d = type_desc(&IN(n, 0)->val.tuple.data[n->val.i].type, &p->scratch); printf("\t%d [label=\"%.*s(%ld)\", shape=record]", n->id, (int)d.n, d.s, n->val.i); } else if (n->op == N_UNINIT) { Str s = type_desc(&n->type, &p->scratch); printf("\t%d [label=\"uninitialized %.*s\", shape=record]", n->id, (int)s.n, s.s); } else { printf("\t%d [label=\"%s\", shape=record]", n->id, node_type_name(n->op)); } printf("\n"); for (u32 i = 0; i < n->out.len; i++) { Node *o = OUT(n, i); if (o->op == N_LIT) { printf("\t%d -> %d [style=dashed]\n", n->id, o->id); } else { u32 j; for (j = 0; j < o->in.len && IN(o, j) != n; j++); if (j == 0) { printf("\t%d -> %d [color=red,headlabel=%d]\n", n->id, o->id, j); } else { printf("\t%d -> %d [headlabel=%d]\n", n->id, o->id, j); } } } for (u32 i = 0; i < n->out.len; i++) { node_print(OUT(n, i), p, walked); } } void proc_print(Proc *p) { BitSet walked = { 0 }; Graph *g = &p->graph; if (g->start) { Str d = type_desc(&p->ret_type, &p->scratch); printf("\t\"%.*s %.*s\" -> %d\n", (int)p->name.n, p->name.s, (int)d.n, d.s, g->start->id); node_print(g->start, p, &walked); if (no_opt) { for (NameBinding *b = p->scope.free_bind; b; b = b->prev) { uint64_t id = (uintptr_t)b->node; printf("\t\t%lu [label=\"%.*s\",shape=none,fontcolor=blue]\n", id, (int)b->name.n, b->name.s); printf("\t\t%lu -> %d [arrowhead=none,style=dotted,color=blue]\n", id, b->node->id); } } } } void unit_print(Unit *u) { puts("digraph {"); for (unsigned i = 0; i < u->procs.n; i++) { proc_print(XAR_GET(&u->procs, i)); } puts("}"); } /* main */ int main(int argc, const char **argv) { Arena perm = { 0 }; if (argc != 2) { fprintf(stderr, "Usage: %s FILE\n", argv[0]); return 1; } Lexer l = { 0 }; Unit u = { .arena = &perm }; lex_start(&l, argv[1]); parse_unit(&l, &u); unit_print(&u); lex_free(&l); fprintf(stderr, "node size = %zu\n", sizeof(Node)); return 0; }