#include #include #include #include #include #include #include #include "lex.h" #include "arena.h" #include "dynarr.h" #include "strio.h" #include "ir.h" int no_opt = 0; typedef struct { DYNARR(Proc) procs; } Unit; void unit_init(Unit *u) { (void)u; } void unit_free(Unit *u) { for (int i = 0; i < u->procs.len; i++) { proc_free(&u->procs.data[i]); } free(u->procs.data); } /* parsing */ Node *parse_expr(Lexer *l, Proc *p); void parse_return(Lexer *l, Proc *p) { lex_next(l); Node *n = node_new(p, N_RETURN, p->ctrl, parse_expr(l, p)); node_add(p, n, p->stop); p->ctrl = n; } void parse_let(Lexer *l, Proc *p) { recurse: lex_expect(l, TM_IDENT); Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_EQL); lex_next(l); Node *rhs = parse_expr(l, p); NameBinding *b = scope_bind(&p->scope, name, rhs, pos, p); if (b) { lex_error_at(l, pos, LE_WARN, S("shadowing previous declaration")); lex_error_at(l, b->src_pos, LE_WARN, S("declared here")); } if (l->tok == TOK_COMMA) goto recurse; } void parse_stmt(Lexer *l, Proc *p); /* TODO: return node from this! */ void parse_block(Lexer *l, Proc *p, ScopeNameList *nl) { lex_next(l); scope_push(&p->scope, p); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, p); } if (nl) { scope_collect(&p->scope, p, nl, &p->arena); } scope_pop(&p->scope, p); lex_expected(l, TM_RBRACE); lex_next(l); } void parse_assign(Lexer *l, Proc *p) { Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_ASSIGN); lex_next(l); Node *e = parse_expr(l, p); if (!scope_update(&p->scope, name, e, p)) { lex_error_at(l, pos, LE_ERROR, S("undeclared identifier")); } } void merge_scope(Lexer *l, Proc *p, ScopeNameList *na, ScopeNameList *nb) { for (ScopeFrame *f = p->scope.tail; f; f = f->prev) { for (NameBinding *b = f->latest; b; b = b->prev) { int i, j; for (i = 0; i < na->len && !str_eql(na->data[i].name, b->name); i++); for (j = 0; j < nb->len && !str_eql(nb->data[j].name, b->name); j++); if (i >= na->len && j >= nb->len) continue; /* no change */ Node *phi; if (i >= na->len) { if (nb->data[j].node == b->node) continue; phi = node_new(p, N_PHI, p->ctrl, b->node, nb->data[j].node); } else if (j >= na->len) { if (na->data[i].node == b->node) continue; phi = node_new(p, N_PHI, p->ctrl, b->node, na->data[i].node); } else { if (na->data[i].node == b->node && nb->data[j].node == b->node) continue; phi = node_new(p, N_PHI, p->ctrl, na->data[i].node, nb->data[j].node); } node_remove(p, b->node, p->keepalive); phi = node_peephole(phi, p, l); node_add(p, phi, p->keepalive); b->node = phi; } } } void parse_if(Lexer *l, Proc *p) { lex_next(l); Node *cond = parse_expr(l, p); Node *ctrl_if = NULL, *ctrl_else = NULL; Node *if_node = node_new(p, N_IF_ELSE, p->ctrl, cond); if_node->val = (Value) { .type = { T_TOP, T_TUPLE, NULL }, .tuple = { 0 } }; ZDA_PUSH(&p->arena, &if_node->val.tuple, (Value) { .type = { T_CTRL, T_NONE, NULL } }); ZDA_PUSH(&p->arena, &if_node->val.tuple, (Value) { .type = { T_CTRL, T_NONE, NULL } }); if_node = node_peephole(if_node, p, l); Node *if_true = node_peephole(node_new(p, N_PROJ, if_node), p, l); Node *if_false = node_peephole(node_new(p, N_PROJ, if_node), p, l); ScopeNameList scope_before = { 0 }, scope_true = { 0 }, scope_false = { 0 }; if_true->val.i = 0; if_false->val.i = 1; scope_collect(&p->scope, p, &scope_before, &p->arena); if (cond->val.type.lvl == T_CONST) { if (cond->val.i) if_false->val.type.lvl = T_XCTRL; else if_true->val.type.lvl = T_XCTRL; } NODE_KEEP(p, cond, { p->ctrl = if_true; lex_expected(l, TM_LBRACE); parse_block(l, p, &scope_true); ctrl_if = p->ctrl; if (l->tok == TOK_ELSE) { NODE_KEEP(p, ctrl_if, { p->ctrl = if_false; lex_expect(l, TM_LBRACE); parse_block(l, p, &scope_false); ctrl_else = p->ctrl; }); } }); if (ctrl_else) { p->ctrl = node_peephole(node_new(p, N_REGION, ctrl_if, ctrl_else), p, l); node_add(p, p->ctrl, p->keepalive); merge_scope(l, p, &scope_true, &scope_false); node_remove(p, p->ctrl, p->keepalive); } else { p->ctrl = node_peephole(node_new(p, N_REGION, ctrl_if, if_false), p, l); node_add(p, p->ctrl, p->keepalive); merge_scope(l, p, &scope_true, &scope_before); node_remove(p, p->ctrl, p->keepalive); } scope_uncollect(&p->scope, p, &scope_true); scope_uncollect(&p->scope, p, &scope_false); scope_uncollect(&p->scope, p, &scope_before); } void parse_stmt(Lexer *l, Proc *p) { /* TODO */ (void)l; switch (l->tok) { case TOK_RETURN: parse_return(l, p); break; case TOK_LET: parse_let(l, p); break; case TOK_LBRACE: parse_block(l, p, NULL); break; case TOK_IDENT: parse_assign(l, p); break; case TOK_IF: parse_if(l, p); break; default: lex_expected(l, TM_RBRACE); break; } } Type parse_type(Lexer *l, Proc *proc) { (void)proc; Type t = { .lvl = T_BOT }; if (str_eql(l->ident, S("i64"))) { t.t = T_INT; } else if (str_eql(l->ident, S("bool"))) { t.t = T_BOOL; } else { lex_error(l, LE_ERROR, S("unknown type")); } lex_next(l); return t; } void parse_args_list(Lexer *l, Proc *proc) { Node *start = proc->start; int i = 0; struct { Str name; LexSpan pos; } idbuf[32]; int id = 0; while (l->tok != TOK_RPAREN && l->tok != TOK_EOF) { lex_expect(l, TM_IDENT); if (id == sizeof idbuf / sizeof *idbuf) { lex_error(l, LE_ERROR, S("too many arguments without specifying a type")); return; } idbuf[id].name = l->ident; idbuf[id].pos = l->pos; id++; lex_expect(l, TM_IDENT | TM_COMMA); if (l->tok == TOK_COMMA) continue; Value v = (Value) { .type = parse_type(l, proc) }; lex_expected(l, TM_RPAREN | TM_COMMA); for (int j = 0; j < id; j++) { Node *proj = node_new(proc, N_PROJ, proc->start); proj->val.type = v.type; proj->val.i = i++; ZDA_PUSH(&proc->arena, &start->val.tuple, v); scope_bind(&proc->scope, idbuf[j].name, proj, idbuf[j].pos, proc); } id = 0; } lex_expected(l, TM_RPAREN); lex_next(l); } Proc *parse_proc(Lexer *l, Unit *u) { DA_FIT(&u->procs, u->procs.len + 1); Proc *proc = &u->procs.data[u->procs.len++]; lex_expect(l, TM_IDENT); proc_init(proc, l->ident); scope_push(&proc->scope, proc); lex_expect(l, TM_LPAREN | TM_LBRACE); if (l->tok == TOK_LPAREN) { parse_args_list(l, proc); } lex_expected(l, TM_LBRACE); lex_next(l); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, proc); } scope_pop(&proc->scope, proc); lex_expected(l, TM_RBRACE); lex_next(l); return proc; } Node *parse_term(Lexer *l, Proc *p) { Node *node = NULL; NodeType op_after = N_START; if (TMASK(l->tok) & (TM_MINUS | TM_PLUS | TM_NOT)) { Token t = l->tok; lex_next(l); node = parse_term(l, p); NodeType post_op = N_START; switch (t) { case TOK_MINUS: post_op = N_OP_NEG; break; case TOK_NOT: post_op = N_OP_NOT; break; default: return node; } if (post_op == N_START) return node; return node_peephole(node_new(p, post_op, NULL, node), p, l); } if (l->tok == TOK_LPAREN) { lex_next(l); node = parse_expr(l, p); lex_expected(l, TM_RPAREN); lex_next(l); node->src_pos.ofs--; node->src_pos.n += 2; } else if (l->tok == TOK_IDENT) { NameBinding *b = scope_find(&p->scope, l->ident); if (b) { node = b->node; } else { lex_error(l, LE_ERROR, S("undeclared identifier")); } lex_next(l); } else if (TMASK(l->tok) & (TM_TRUE | TM_FALSE)) { node = node_new_lit_bool(p, l->tok == TOK_TRUE); lex_next(l); } else { lex_expected(l, TM_LIT_NUM); int64_t val = 0; for (int i = 0; i < l->ident.n; i++) { if (!(l->ident.s[i] >= '0' && l->ident.s[i] <= '9')) { lex_error(l, LE_ERROR, S("not a digit")); break; } val = (val * 10) + (l->ident.s[i] - '0'); } node = node_new_lit_i64(p, val); node->src_pos = l->pos; lex_next(l); } if (op_after != N_START) { node = node_new(p, op_after, NULL, node_peephole(node, p, l)); } return node_peephole(node, p, l); } NodeType tok_to_bin_op(Token t) { switch (t) { case TOK_PLUS: return N_OP_ADD; break; case TOK_MINUS: return N_OP_SUB; break; case TOK_ASTERISK: return N_OP_MUL; break; case TOK_SLASH: return N_OP_DIV; break; case TOK_NOT: return N_OP_NOT; break; case TOK_AND: return N_OP_AND; break; case TOK_OR: return N_OP_OR; break; case TOK_XOR: return N_OP_XOR; break; case TOK_SHL: return N_OP_SHL; break; case TOK_SHR: return N_OP_SHR; break; case TOK_EQL: return N_CMP_EQL; break; case TOK_NEQ: return N_CMP_NEQ; break; case TOK_LES: return N_CMP_LES; break; case TOK_GTR: return N_CMP_GTR; break; case TOK_LTE: return N_CMP_LTE; break; case TOK_GTE: return N_CMP_GTE; break; default: return N_START; break; } } /* TODO: operator precedence would be kinda nice actually, sad to say */ Node *parse_expr(Lexer *l, Proc *p) { LexSpan pos = l->pos; Node *lhs = parse_term(l, p); NodeType nt = tok_to_bin_op(l->tok);; assert(lhs->refs > 0); if (nt != N_START) { lex_next(l); /* necessary because if lhs is a deduplicated literal, it may be an input to rhs * and therefore culled by peephole optimizations */ Node *rhs; NODE_KEEP(p, lhs, { rhs = parse_expr(l, p); }); lhs = node_peephole(node_new(p, nt, NULL, lhs, rhs), p, l); } lhs->src_pos = (LexSpan) { pos.ofs, l->pos.ofs - pos.ofs }; return lhs; } void parse_toplevel(Lexer *l, Unit *u) { switch (l->tok) { case TOK_PROC: parse_proc(l, u); break; default: lex_expected(l, TM_PROC); break; } } void unit_print(Unit *u); void parse_unit(Lexer *l) { Unit u = { 0 }; unit_init(&u); while (l->tok != TOK_EOF) { parse_toplevel(l, &u); } unit_print(&u); unit_free(&u); } /* graph output */ void node_print(Node *n, Proc *p) { if (n->walked) return; if (n->type == N_START) { Str s = S(""); int c = n->val.tuple.len; Value *v = n->val.tuple.data; for (int i = 0; i < c; i++) { if (i > 0) str_cat(&s, S(", "), &p->arena); str_cat(&s, type_desc(&v[i].type, &p->arena), &p->arena); } printf("\t%d [label=\"start(%.*s)\"]", n->id, (int)s.n, s.s); } else if (n->type == N_LIT) { switch (n->val.type.t) { case T_INT: printf("\t%d [label=\"%ld\"]", n->id, n->val.i); break; case T_BOOL: printf("\t%d [label=\"%s\"]", n->id, n->val.i ? "true" : "false"); break; default: printf("\t%d [label=\"literal %d\"]", n->id, n->id); break; } } else if (n->type == N_PROJ) { Str d = type_desc(&n->in.data[0]->val.tuple.data[n->val.i].type, &p->arena); printf("\t%d [label=\"%ld | %.*s\", shape=record]", n->id, n->val.i, (int)d.n, d.s); } else { printf("\t%d [label=\"%s\", shape=record]", n->id, node_type_name(n->type)); } printf("\n"); n->walked = 1; for (int i = 0; i < n->out.len; i++) { Node *o = n->out.data[i]; if (o->type == N_LIT) { printf("\t%d -> %d [style=dashed]\n", n->id, o->id); } else { int j; for (j = 0; j < o->in.len && o->in.data[j] != n; j++); if (j == 0) { printf("\t%d -> %d [color=red,headlabel=%d]\n", n->id, o->id, j); } else { printf("\t%d -> %d [headlabel=%d]\n", n->id, o->id, j); } } } for (int i = 0; i < n->out.len; i++) { node_print(n->out.data[i], p); } } void proc_print(Proc *p) { if (p->start) { printf("\t\"%.*s\" -> %d\n", (int)p->name.n, p->name.s, p->start->id); node_print(p->start, p); if (no_opt) { for (NameBinding *b = p->scope.free_bind; b; b = b->prev) { uint64_t id = (uintptr_t)b->node; printf("\t\t%lu [label=\"%.*s\",shape=none,fontcolor=blue]\n", id, (int)b->name.n, b->name.s); printf("\t\t%lu -> %d [arrowhead=none,style=dotted,color=blue]\n", id, b->node->id); } } } } void unit_print(Unit *u) { puts("digraph {"); for (int i = 0; i < u->procs.len; i++) { proc_print(&u->procs.data[i]); } puts("}"); } /* main */ int main(int argc, const char **argv) { if (argc != 2) { fprintf(stderr, "Usage: %s FILE\n", argv[0]); return 1; } Lexer l = { 0 }; lex_start(&l, argv[1]); parse_unit(&l); lex_free(&l); return 0; }