diff options
| -rw-r--r-- | lex.c | 22 | ||||
| -rw-r--r-- | lex.h | 19 | ||||
| -rw-r--r-- | main.c | 279 | ||||
| -rw-r--r-- | test.lang | 2 |
4 files changed, 271 insertions, 51 deletions
@@ -6,6 +6,7 @@ #include "strio.h" void lex_start(Lexer *l, const char *path) { + l->ofs = 0; l->filename = str_dup(str_from_cstr(path), &l->arena); FILE *f = fopen(path, "r/o"); if (!f) { @@ -57,9 +58,11 @@ void lex_expected_not(Lexer *l, TokMask t) { } } -void lex_pos(Lexer *l, int *line, int *col) { +/* should only be called in the event of errors, so probably not + * much of an issue that this has to scan the whole file */ +void lex_pos(Lexer *l, int ofs, int *line, int *col) { int ln = 0, c = 0; - for (int i = 0; i < l->ofs; i++) { + for (int i = 0; i < ofs; i++) { if (l->buf.s[i] == '\n') { ln++; c = 0; @@ -75,9 +78,12 @@ void lex_pos(Lexer *l, int *line, int *col) { } void lex_error(Lexer *l, LexErr e, Str msg) { + lex_error_at(l, l->pos, e, msg); +} + +void lex_error_at(Lexer *l, LexSpan pos, LexErr e, Str msg) { int line, col; - l->ofs -= l->ident.n; - lex_pos(l, &line, &col); + lex_pos(l, pos.ofs, &line, &col); fprintf(stderr, "%s", e == LE_ERROR ? "\x1b[1;31m" : "\x1b[1;33m"); @@ -85,14 +91,14 @@ void lex_error(Lexer *l, LexErr e, Str msg) { (int)l->filename.n, l->filename.s, line + 1, col + 1, (int)msg.n, msg.s); { - int ofs = l->ofs; + int ofs = pos.ofs; int line_start = ofs; while (line_start > 0 && l->buf.s[line_start - 1] != '\n') line_start--; int line_end = line_start; while (line_end < l->buf.n && l->buf.s[line_end] != '\n') line_end++; fprintf(stderr, "%.*s\n", line_end - line_start, &l->buf.s[line_start]); - for (int i = 0; i < col; i++) putchar(' '); - for (int i = ofs; i < ofs + l->ident.n && i < line_end; i++) putchar('^'); + for (int i = 0; i < col; i++) fputc(' ', stderr); + for (int i = ofs; i < ofs + pos.n && i < line_end; i++) fputc('^', stderr); putchar('\n'); } @@ -142,6 +148,7 @@ recurse: int start_ofs = i; l->ident = (Str) { &l->buf.s[start_ofs], 0 }; + l->pos = (LexSpan) { start_ofs, 0 }; if (i >= l->buf.n) { l->tok = TOK_EOF; return; @@ -186,6 +193,7 @@ recurse: lex_error(l, LE_ERROR, S("Invalid token")); } l->ident.n = i - start_ofs; + l->pos.n = i - start_ofs; l->ofs = i; if (l->tok == TOK_IDENT) { l->tok = ident_to_keyword(l->ident); @@ -22,6 +22,10 @@ X(EQUALS, "=")\ X(COLON, ":")\ X(COMMA, ",")\ + X(NOT, "~")\ + X(AND, "&")\ + X(OR, "|")\ + X(XOR, "^")\ X(LIT_STR, "string literal")\ X(LIT_CHAR, "character literal")\ X(LIT_NUM, "numeric literal") @@ -37,7 +41,12 @@ X(TOK_SLASH, '/')\ X(TOK_EQUALS, '=')\ X(TOK_COLON, ':')\ - X(TOK_COMMA, ',') + X(TOK_COMMA, ',')\ + X(TOK_NOT, '~')\ + X(TOK_AND, '&')\ + X(TOK_OR, '|')\ + X(TOK_XOR, '^') + typedef enum { #define X(n, s) TOK_##n, @@ -61,8 +70,13 @@ typedef enum { } TokMask; typedef struct { + int ofs, n; +} LexSpan; + +typedef struct { Token tok; Str ident; + LexSpan pos; Str filename, buf; Arena arena; int ofs; @@ -79,8 +93,9 @@ void lex_expected(Lexer *l, TokMask t); void lex_expected_not(Lexer *l, TokMask t); void lex_expect(Lexer *l, TokMask t); /* next -> expected */ void lex_expect_not(Lexer *l, TokMask t); /* next -> expected_not */ +void lex_error_at(Lexer *l, LexSpan pos, LexErr e, Str msg); void lex_error(Lexer *l, LexErr e, Str msg); -void lex_pos(Lexer *l, int *line, int *col); +void lex_pos(Lexer *l, int ofs, int *line, int *col); void lex_next(Lexer *l); #endif @@ -3,6 +3,8 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <stdarg.h> +#include <assert.h> #include "lex.h" #include "arena.h" @@ -12,28 +14,54 @@ typedef enum { N_START, - N_STOP, - N_LIT_INT, - N_OP_ADD, - N_OP_SUB, - N_OP_MUL, - N_OP_DIV, - N_OP_NEG, + N_RETURN, + N_LIT, + N_OP_ADD, N_OP_SUB, N_OP_MUL, N_OP_DIV, + N_OP_AND, N_OP_OR, N_OP_XOR, + N_OP_NEG, N_OP_NOT } NodeType; +const char *node_type_name[] = { + "start", + "return", + "integer literal", + "add", "sub", "mul", "div", + "and", "or", "xor", + "neg", "not", +}; + typedef struct Node { - int id; - NodeType type; - DYNARR(struct Node *) in, out; union { - int64_t i; - } v; + struct Node *prev_free; + struct { + int id, refs; + int walked; + NodeType type; + LexSpan src_pos; + DYNARR(struct Node *) in, out; + union { + struct { + int64_t i; + } lit; + }; + }; + }; } Node; +typedef struct NameBinding { + Str name; + struct NameBinding *prev; +} NameBinding; + +typedef struct { + NameBinding *last; +} Scope; + typedef struct { Arena arena; - int last_id; + Str name; Node *start, *stop; + Node *free_list; } Proc; typedef struct { @@ -48,55 +76,217 @@ void unit_fini(Unit *u) { free(u->procs.data); } -Node *node_new(Proc *p, NodeType t) { - Node *n = new(&p->arena, Node); - n->type = t; - n->id = p->last_id++; - return n; +void node_kill(Node *n, Proc *p); + +void node_die(Node *n, Proc *p) { + n->prev_free = p->free_list; + p->free_list = n; +} + +void node_del_out(Node *n, Node *p) { + for (int i = 0; i < n->out.len; i++) { + if (n->out.data[i] == p) { + p->refs--; + if (i + 1 < n->out.len) { + memmove(&n->out.data[i], &n->out.data[i + 1], sizeof(Node*) * (n->out.len - i - 1)); + } + n->out.len--; + i--; + } + } +} + +void node_del_in(Node *n, Node *p) { + for (int i = 0; i < n->in.len; i++) { + if (n->in.data[i] == p) { + p->refs--; + if (i + 1 < n->in.len) { + memmove(&n->in.data[i], &n->in.data[i + 1], sizeof(Node*) * (n->in.len - i - 1)); + } + n->in.len--; + i--; + } + } +} + +void node_kill(Node *n, Proc *p) { + for (int i = 0; i < n->in.len; i++) { + node_del_out(n->in.data[i], n); + n->in.data[i]->refs--; + if (n->in.data[i]->out.len < 1) node_kill(n->in.data[i], p); + } + for (int i = 0; i < n->out.len; i++) { + node_del_in(n->out.data[i], n); + n->out.data[i]->refs--; + if (n->out.data[i]->refs < 1) node_die(n->out.data[i], p); + } + n->in.len = 0; + n->out.len = 0; + node_die(n, p); } void node_add(Proc *p, Node *src, Node *dest) { ZDA_PUSH(&src->out, dest, &p->arena); ZDA_PUSH(&dest->in, src, &p->arena); + src->refs++; + dest->refs++; + if (dest->src_pos.n == 0) dest->src_pos = src->src_pos; + else { + LexSpan *sp = &src->src_pos, *dp = &dest->src_pos; + if (sp->ofs < dp->ofs) { + dp->n = (dp->ofs + dp->n) - sp->ofs; + dp->ofs = sp->ofs; + } + if ((sp->ofs + sp->n) > (dp->ofs + dp->n)) { + dp->n = (sp->ofs + sp->n) - dp->ofs; + } + } +} + +static int global_node_count = 0; + +Node *node_new_empty(Proc *p, NodeType t) { + Node *n; + if (p->free_list) { + n = p->free_list; + p->free_list = n->prev_free; + memset(n, 0, sizeof(Node)); + } else { + n = new(&p->arena, Node); + } + n->type = t; + n->id = global_node_count++; + return n; +} + +Node *node_new(Proc *p, NodeType t, ...) { + Node *node = node_new_empty(p, t); + va_list ap; + va_start(ap, t); + for (;;) { + Node *n = va_arg(ap, Node *); + if (!n) break; + node_add(p, n, node); + } + va_end(ap); + return node; +} + +#define node_new(...) node_new(__VA_ARGS__, NULL) + +void node_print(Node *n) { + if (n->type == N_LIT) { + printf("\t%d [label=\"%ld\"]\n", n->id, n->lit.i); + } else { + printf("\t%d [label=\"%s\", shape=record]\n", n->id, node_type_name[n->type]); + } + if (n->walked) { + return; + } + n->walked = 1; + for (int i = 0; i < n->out.len; i++) { + if (n->out.data[i]->type == N_LIT) { + printf("\t%d -> %d [style=dashed]\n", n->id, n->out.data[i]->id); + } else { + printf("\t%d -> %d\n", n->id, n->out.data[i]->id); + } + } + for (int i = 0; i < n->out.len; i++) { + node_print(n->out.data[i]); + } +} + +void proc_print(Proc *p) { + if (p->start) { + printf("\t\"%.*s\" -> %d\n", (int)p->name.n, p->name.s, p->start->id); + node_print(p->start); + } +} + +void unit_print(Unit *u) { + puts("digraph {"); + for (int i = 0; i < u->procs.len; i++) { + proc_print(&u->procs.data[i]); + } + puts("}"); +} + +Node *node_new_lit_i64(Proc *p, int64_t i) { + Node *n = node_new(p, N_LIT, p->start); + n->lit.i = i; + return n; +} + +/* needs lexer for error reporting */ +Node *node_peephole(Node *n, Proc *p, Lexer *l) { + return n; + for (int i = 0; i < n->in.len; i++) { + if (n->in.data[i]->type != N_LIT) return n; + } + Node **in = n->in.data; + Node *r = n; + switch (n->type) { + case N_OP_NEG: r = node_new_lit_i64(p, -in[0]->lit.i); break; + case N_OP_NOT: r = node_new_lit_i64(p, ~in[0]->lit.i); break; + case N_OP_ADD: r = node_new_lit_i64(p, in[0]->lit.i + in[1]->lit.i); break; + case N_OP_SUB: r = node_new_lit_i64(p, in[0]->lit.i - in[1]->lit.i); break; + case N_OP_MUL: r = node_new_lit_i64(p, in[0]->lit.i * in[1]->lit.i); break; + case N_OP_DIV: + if (in[1]->lit.i == 0) { + lex_error_at(l, n->src_pos, LE_ERROR, S("Division by zero")); + } + r = node_new_lit_i64(p, in[0]->lit.i / in[1]->lit.i); + break; + case N_OP_AND: r = node_new_lit_i64(p, in[0]->lit.i & in[1]->lit.i); break; + case N_OP_OR: r = node_new_lit_i64(p, in[0]->lit.i | in[1]->lit.i); break; + case N_OP_XOR: r = node_new_lit_i64(p, in[0]->lit.i ^ in[1]->lit.i); break; + default: + return n; + break; + } + r->src_pos = n->src_pos; + if (n->out.len == 0) node_kill(n, p); + return r; } /* parsing */ Node *parse_expr(Lexer *l, Proc *p); -void parse_return(Lexer *l, Proc *p) { +Node *parse_return(Lexer *l, Proc *p) { lex_next(l); - Node *n = parse_expr(l, p); - node_add(p, n, p->stop); + return node_new(p, N_RETURN, p->start, parse_expr(l, p)); } -void parse_stmt(Lexer *l, Proc *p) { +Node *parse_stmt(Lexer *l, Proc *p) { /* TODO */ (void)l; switch (l->tok) { case TOK_RETURN: - parse_return(l, p); + return parse_return(l, p); break; default: lex_expected(l, TM_RBRACE); + return NULL; } } -void parse_proc(Lexer *l, Unit *u) { +Proc *parse_proc(Lexer *l, Unit *u) { DA_FIT(&u->procs, u->procs.len + 1); Proc *proc = &u->procs.data[u->procs.len++]; memset(proc, 0, sizeof(Proc)); - proc->start = node_new(proc, N_START); - proc->stop = node_new(proc, N_STOP); + proc->start = node_new_empty(proc, N_START); lex_expect(l, TM_IDENT); + proc->name = l->ident; lex_expect(l, TM_LBRACE); lex_next(l); while (l->tok != TOK_RBRACE) { lex_expected_not(l, TM_EOF); - parse_stmt(l, proc); + proc->stop = parse_stmt(l, proc); } lex_expected(l, TM_RBRACE); lex_next(l); + return proc; } Node *parse_term(Lexer *l, Proc *p) { @@ -120,17 +310,14 @@ Node *parse_term(Lexer *l, Proc *p) { } val = (val * 10) + (l->ident.s[i] - '0'); } - node = node_new(p, N_LIT_INT); - node_add(p, p->start, node); - node->v.i = val; + node = node_new_lit_i64(p, val); + node->src_pos = l->pos; lex_next(l); } if (negate_after) { - Node *neg = node_new(p, N_OP_NEG); - node_add(p, node, neg); - return neg; + node = node_new(p, N_OP_NEG, node); } - return node; + return node_peephole(node, p, l); } Node *parse_expr(Lexer *l, Proc *p) { @@ -148,16 +335,25 @@ Node *parse_expr(Lexer *l, Proc *p) { puts("args_end"); puts("func_call"); } - if (TMASK(l->tok) & (TM_PLUS | TM_MINUS | TM_ASTERISK | TM_SLASH)) { - lex_expected(l, TM_PLUS); + if (TMASK(l->tok) & (TM_PLUS | TM_MINUS | TM_ASTERISK | TM_SLASH | TM_NOT | TM_AND | TM_XOR | TM_OR)) { + Token t = l->tok; lex_next(l); Node *rhs = parse_expr(l, p); - Node *add = node_new(p, N_OP_ADD); - node_add(p, lhs, add); - node_add(p, rhs, add); - return add; + NodeType nt = N_OP_ADD; + switch (t) { + case TOK_PLUS: nt = N_OP_ADD; break; + case TOK_MINUS: nt = N_OP_SUB; break; + case TOK_ASTERISK: nt = N_OP_MUL; break; + case TOK_SLASH: nt = N_OP_DIV; break; + case TOK_NOT: nt = N_OP_NOT; break; + case TOK_AND: nt = N_OP_AND; break; + case TOK_OR: nt = N_OP_OR; break; + case TOK_XOR: nt = N_OP_XOR; break; + default: break; + } + lhs = node_new(p, nt, lhs, rhs); } - return lhs; + return node_peephole(lhs, p, l); } void parse_toplevel(Lexer *l, Unit *u) { @@ -177,6 +373,7 @@ void parse_unit(Lexer *l) { while (l->tok != TOK_EOF) { parse_toplevel(l, &u); } + unit_print(&u); unit_fini(&u); } @@ -7,5 +7,5 @@ */ proc main { - return 1 + return ((1 & 3) * 1 + 4 / (81237 & 37)) - 23 + -3 + 4 } |
