#include #include #include #include #include #include #include #include "lex.h" #include "arena.h" #include "dynarr.h" #include "strio.h" #include "ir.h" int no_opt = 0; typedef struct { DYNARR(Proc) procs; } Unit; void unit_init(Unit *u) { (void)u; } void unit_free(Unit *u) { for (int i = 0; i < u->procs.len; i++) { proc_free(&u->procs.data[i]); } free(u->procs.data); } /* parsing */ Node *parse_expr(Lexer *l, Proc *p); void parse_return(Lexer *l, Proc *p) { lex_next(l); p->stop = node_new(p, N_RETURN, p->start, parse_expr(l, p)); } void parse_let(Lexer *l, Proc *p) { recurse: lex_expect(l, TM_IDENT); Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_EQL); lex_next(l); Node *rhs = parse_expr(l, p); NameBinding *b = scope_bind(&p->scope, name, rhs, pos, p); if (b) { lex_error_at(l, pos, LE_WARN, S("shadowing previous declaration")); lex_error_at(l, b->src_pos, LE_WARN, S("declared here")); } if (l->tok == TOK_COMMA) goto recurse; } void parse_stmt(Lexer *l, Proc *p); /* TODO: return node from this! */ void parse_block(Lexer *l, Proc *p) { lex_next(l); scope_push(&p->scope, p); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, p); } scope_pop(&p->scope, p); lex_expected(l, TM_RBRACE); lex_next(l); } void parse_assign(Lexer *l, Proc *p) { Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_ASSIGN); lex_next(l); Node *e = parse_expr(l, p); if (!scope_update(&p->scope, name, e, p)) { lex_error_at(l, pos, LE_ERROR, S("undeclared identifier")); } } void parse_stmt(Lexer *l, Proc *p) { /* TODO */ (void)l; switch (l->tok) { case TOK_RETURN: parse_return(l, p); break; case TOK_LET: parse_let(l, p); break; case TOK_LBRACE: parse_block(l, p); break; case TOK_IDENT: parse_assign(l, p); break; default: lex_expected(l, TM_RBRACE); break; } } Type parse_type(Lexer *l, Proc *proc) { (void)proc; Type t = { .lvl = T_BOT }; if (str_eql(l->ident, S("i64"))) { t.t = T_INT; } else if (str_eql(l->ident, S("bool"))) { t.t = T_BOOL; } else { lex_error(l, LE_ERROR, S("unknown type")); } lex_next(l); return t; } void parse_args_list(Lexer *l, Proc *proc) { Node *start = proc->start; int i = 0; while (l->tok != TOK_RPAREN && l->tok != TOK_EOF) { lex_expect(l, TM_IDENT); Str name = l->ident; LexSpan pos = l->pos; lex_expect(l, TM_IDENT); Value v = (Value) { .type = parse_type(l, proc) }; ZDA_PUSH(&start->val.tuple, v, &proc->arena); lex_expected(l, TM_RPAREN | TM_COMMA); Node *proj = node_new(proc, N_PROJ, proc->start); proj->val.type = v.type; proj->val.i = i++; scope_bind(&proc->scope, name, proj, pos, proc); } lex_expected(l, TM_RPAREN); lex_next(l); } Proc *parse_proc(Lexer *l, Unit *u) { DA_FIT(&u->procs, u->procs.len + 1); Proc *proc = &u->procs.data[u->procs.len++]; lex_expect(l, TM_IDENT); proc_init(proc, l->ident); scope_push(&proc->scope, proc); lex_expect(l, TM_LPAREN | TM_LBRACE); if (l->tok == TOK_LPAREN) { parse_args_list(l, proc); } lex_expected(l, TM_LBRACE); lex_next(l); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); parse_stmt(l, proc); } scope_pop(&proc->scope, proc); lex_expected(l, TM_RBRACE); lex_next(l); return proc; } Node *parse_term(Lexer *l, Proc *p) { Node *node = NULL; NodeType op_after = N_START; if (TMASK(l->tok) & (TM_MINUS | TM_PLUS | TM_NOT)) { Token t = l->tok; lex_next(l); node = parse_term(l, p); NodeType post_op = N_START; switch (t) { case TOK_MINUS: post_op = N_OP_NEG; break; case TOK_NOT: post_op = N_OP_NOT; break; default: return node; } if (post_op == N_START) return node; return node_peephole(node_new(p, post_op, node), p, l); } if (l->tok == TOK_LPAREN) { lex_next(l); node = parse_expr(l, p); lex_expected(l, TM_RPAREN); lex_next(l); } else if (l->tok == TOK_IDENT) { NameBinding *b = scope_find(&p->scope, l->ident); if (b) { node = b->node; } else { lex_error(l, LE_ERROR, S("undeclared identifier")); } lex_next(l); } else if (TMASK(l->tok) & (TM_TRUE | TM_FALSE)) { node = node_new_lit_bool(p, l->tok == TOK_TRUE); lex_next(l); } else { lex_expected(l, TM_LIT_NUM); int64_t val = 0; for (int i = 0; i < l->ident.n; i++) { if (!(l->ident.s[i] >= '0' && l->ident.s[i] <= '9')) { lex_error(l, LE_ERROR, S("not a digit")); break; } val = (val * 10) + (l->ident.s[i] - '0'); } node = node_new_lit_i64(p, val); node->src_pos = l->pos; lex_next(l); } if (op_after != N_START) { node = node_new(p, op_after, node_peephole(node, p, l)); } return node_peephole(node, p, l); } /* TODO: operator precedence would be kinda nice actually, sad to say */ Node *parse_expr(Lexer *l, Proc *p) { LexSpan pos = l->pos; Node *lhs = parse_term(l, p); if (TMASK(l->tok) & (TM_PLUS | TM_MINUS | TM_ASTERISK | TM_SLASH | TM_NOT | TM_AND | TM_XOR | TM_OR | TM_SHL | TM_SHR | TM_EQL | TM_NEQ | TM_LES | TM_GTR | TM_LTE | TM_GTE)) { Token t = l->tok; lex_next(l); Node *rhs = parse_expr(l, p); NodeType nt = N_OP_ADD; switch (t) { case TOK_PLUS: nt = N_OP_ADD; break; case TOK_MINUS: nt = N_OP_SUB; break; case TOK_ASTERISK: nt = N_OP_MUL; break; case TOK_SLASH: nt = N_OP_DIV; break; case TOK_NOT: nt = N_OP_NOT; break; case TOK_AND: nt = N_OP_AND; break; case TOK_OR: nt = N_OP_OR; break; case TOK_XOR: nt = N_OP_XOR; break; case TOK_SHL: nt = N_OP_SHL; break; case TOK_SHR: nt = N_OP_SHR; break; case TOK_EQL: nt = N_CMP_EQL; break; case TOK_NEQ: nt = N_CMP_NEQ; break; case TOK_LES: nt = N_CMP_LES; break; case TOK_GTR: nt = N_CMP_GTR; break; case TOK_LTE: nt = N_CMP_LTE; break; case TOK_GTE: nt = N_CMP_GTE; break; default: break; } lhs = node_new(p, nt, lhs, rhs); } Node *n = node_peephole(lhs, p, l); n->src_pos = (LexSpan) { pos.ofs, l->pos.ofs - pos.ofs }; return n; } void parse_toplevel(Lexer *l, Unit *u) { switch (l->tok) { case TOK_PROC: parse_proc(l, u); break; default: lex_expected(l, TM_PROC); break; } } void unit_print(Unit *u); void parse_unit(Lexer *l) { Unit u = { 0 }; unit_init(&u); while (l->tok != TOK_EOF) { parse_toplevel(l, &u); } unit_print(&u); unit_free(&u); } /* graph output */ void node_print(Node *n, Proc *p) { if (n->type == N_START) { Str s = S(""); int c = n->val.tuple.len; Value *v = n->val.tuple.data; for (int i = 0; i < c; i++) { if (i > 0) str_cat(&s, S(", "), &p->arena); str_cat(&s, type_desc(&v[i].type, &p->arena), &p->arena); } printf("\t%d [label=\"start(%.*s)\"]\n", n->id, (int)s.n, s.s); } else if (n->type == N_LIT) { switch (n->val.type.t) { case T_INT: printf("\t%d [label=\"%ld\"]\n", n->id, n->val.i); break; case T_BOOL: printf("\t%d [label=\"%s\"]\n", n->id, n->val.i ? "true" : "false"); break; default: printf("\t%d [label=\"literal %d\"]\n", n->id, n->id); break; } } else if (n->type == N_PROJ) { printf("\t%d [label=\"PROJ(%ld)\", shape=record]\n", n->id, n->val.i); } else { printf("\t%d [label=\"%s\", shape=record]\n", n->id, node_type_name(n->type)); } if (n->walked) { return; } n->walked = 1; for (int i = 0; i < n->out.len; i++) { Node *o = n->out.data[i]; if (o->type == N_LIT) { printf("\t%d -> %d [style=dashed]\n", n->id, o->id); } else { int j; for (j = 0; j < o->in.len && o->in.data[j] != n; j++); printf("\t%d -> %d [label=%d]\n", n->id, o->id, j); } } for (int i = 0; i < n->out.len; i++) { node_print(n->out.data[i], p); } } void proc_print(Proc *p) { if (p->start) { printf("\t\"%.*s\" -> %d\n", (int)p->name.n, p->name.s, p->start->id); node_print(p->start, p); if (no_opt) { for (NameBinding *b = p->scope.free_bind; b; b = b->prev) { uint64_t id = (uintptr_t)b->node; printf("\t\t%lu [label=\"%.*s\",shape=none,fontcolor=blue]\n", id, (int)b->name.n, b->name.s); printf("\t\t%lu -> %d [arrowhead=none,style=dotted,color=blue]\n", id, b->node->id); } } } } void unit_print(Unit *u) { puts("digraph {"); for (int i = 0; i < u->procs.len; i++) { proc_print(&u->procs.data[i]); } puts("}"); } /* main */ int main(int argc, const char **argv) { if (argc != 2) { fprintf(stderr, "Usage: %s FILE\n", argv[0]); return 1; } Lexer l = { 0 }; lex_start(&l, argv[1]); parse_unit(&l); lex_free(&l); return 0; }