summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lex.c22
-rw-r--r--lex.h19
-rw-r--r--main.c279
-rw-r--r--test.lang2
4 files changed, 271 insertions, 51 deletions
diff --git a/lex.c b/lex.c
index 12cd284..a254e7b 100644
--- a/lex.c
+++ b/lex.c
@@ -6,6 +6,7 @@
#include "strio.h"
void lex_start(Lexer *l, const char *path) {
+ l->ofs = 0;
l->filename = str_dup(str_from_cstr(path), &l->arena);
FILE *f = fopen(path, "r/o");
if (!f) {
@@ -57,9 +58,11 @@ void lex_expected_not(Lexer *l, TokMask t) {
}
}
-void lex_pos(Lexer *l, int *line, int *col) {
+/* should only be called in the event of errors, so probably not
+ * much of an issue that this has to scan the whole file */
+void lex_pos(Lexer *l, int ofs, int *line, int *col) {
int ln = 0, c = 0;
- for (int i = 0; i < l->ofs; i++) {
+ for (int i = 0; i < ofs; i++) {
if (l->buf.s[i] == '\n') {
ln++;
c = 0;
@@ -75,9 +78,12 @@ void lex_pos(Lexer *l, int *line, int *col) {
}
void lex_error(Lexer *l, LexErr e, Str msg) {
+ lex_error_at(l, l->pos, e, msg);
+}
+
+void lex_error_at(Lexer *l, LexSpan pos, LexErr e, Str msg) {
int line, col;
- l->ofs -= l->ident.n;
- lex_pos(l, &line, &col);
+ lex_pos(l, pos.ofs, &line, &col);
fprintf(stderr, "%s", e == LE_ERROR ? "\x1b[1;31m" : "\x1b[1;33m");
@@ -85,14 +91,14 @@ void lex_error(Lexer *l, LexErr e, Str msg) {
(int)l->filename.n, l->filename.s, line + 1, col + 1, (int)msg.n, msg.s);
{
- int ofs = l->ofs;
+ int ofs = pos.ofs;
int line_start = ofs;
while (line_start > 0 && l->buf.s[line_start - 1] != '\n') line_start--;
int line_end = line_start;
while (line_end < l->buf.n && l->buf.s[line_end] != '\n') line_end++;
fprintf(stderr, "%.*s\n", line_end - line_start, &l->buf.s[line_start]);
- for (int i = 0; i < col; i++) putchar(' ');
- for (int i = ofs; i < ofs + l->ident.n && i < line_end; i++) putchar('^');
+ for (int i = 0; i < col; i++) fputc(' ', stderr);
+ for (int i = ofs; i < ofs + pos.n && i < line_end; i++) fputc('^', stderr);
putchar('\n');
}
@@ -142,6 +148,7 @@ recurse:
int start_ofs = i;
l->ident = (Str) { &l->buf.s[start_ofs], 0 };
+ l->pos = (LexSpan) { start_ofs, 0 };
if (i >= l->buf.n) {
l->tok = TOK_EOF;
return;
@@ -186,6 +193,7 @@ recurse:
lex_error(l, LE_ERROR, S("Invalid token"));
}
l->ident.n = i - start_ofs;
+ l->pos.n = i - start_ofs;
l->ofs = i;
if (l->tok == TOK_IDENT) {
l->tok = ident_to_keyword(l->ident);
diff --git a/lex.h b/lex.h
index daf71cc..9c38208 100644
--- a/lex.h
+++ b/lex.h
@@ -22,6 +22,10 @@
X(EQUALS, "=")\
X(COLON, ":")\
X(COMMA, ",")\
+ X(NOT, "~")\
+ X(AND, "&")\
+ X(OR, "|")\
+ X(XOR, "^")\
X(LIT_STR, "string literal")\
X(LIT_CHAR, "character literal")\
X(LIT_NUM, "numeric literal")
@@ -37,7 +41,12 @@
X(TOK_SLASH, '/')\
X(TOK_EQUALS, '=')\
X(TOK_COLON, ':')\
- X(TOK_COMMA, ',')
+ X(TOK_COMMA, ',')\
+ X(TOK_NOT, '~')\
+ X(TOK_AND, '&')\
+ X(TOK_OR, '|')\
+ X(TOK_XOR, '^')
+
typedef enum {
#define X(n, s) TOK_##n,
@@ -61,8 +70,13 @@ typedef enum {
} TokMask;
typedef struct {
+ int ofs, n;
+} LexSpan;
+
+typedef struct {
Token tok;
Str ident;
+ LexSpan pos;
Str filename, buf;
Arena arena;
int ofs;
@@ -79,8 +93,9 @@ void lex_expected(Lexer *l, TokMask t);
void lex_expected_not(Lexer *l, TokMask t);
void lex_expect(Lexer *l, TokMask t); /* next -> expected */
void lex_expect_not(Lexer *l, TokMask t); /* next -> expected_not */
+void lex_error_at(Lexer *l, LexSpan pos, LexErr e, Str msg);
void lex_error(Lexer *l, LexErr e, Str msg);
-void lex_pos(Lexer *l, int *line, int *col);
+void lex_pos(Lexer *l, int ofs, int *line, int *col);
void lex_next(Lexer *l);
#endif
diff --git a/main.c b/main.c
index b1e5150..12e43bd 100644
--- a/main.c
+++ b/main.c
@@ -3,6 +3,8 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
#include "lex.h"
#include "arena.h"
@@ -12,28 +14,54 @@
typedef enum {
N_START,
- N_STOP,
- N_LIT_INT,
- N_OP_ADD,
- N_OP_SUB,
- N_OP_MUL,
- N_OP_DIV,
- N_OP_NEG,
+ N_RETURN,
+ N_LIT,
+ N_OP_ADD, N_OP_SUB, N_OP_MUL, N_OP_DIV,
+ N_OP_AND, N_OP_OR, N_OP_XOR,
+ N_OP_NEG, N_OP_NOT
} NodeType;
+const char *node_type_name[] = {
+ "start",
+ "return",
+ "integer literal",
+ "add", "sub", "mul", "div",
+ "and", "or", "xor",
+ "neg", "not",
+};
+
typedef struct Node {
- int id;
- NodeType type;
- DYNARR(struct Node *) in, out;
union {
- int64_t i;
- } v;
+ struct Node *prev_free;
+ struct {
+ int id, refs;
+ int walked;
+ NodeType type;
+ LexSpan src_pos;
+ DYNARR(struct Node *) in, out;
+ union {
+ struct {
+ int64_t i;
+ } lit;
+ };
+ };
+ };
} Node;
+typedef struct NameBinding {
+ Str name;
+ struct NameBinding *prev;
+} NameBinding;
+
+typedef struct {
+ NameBinding *last;
+} Scope;
+
typedef struct {
Arena arena;
- int last_id;
+ Str name;
Node *start, *stop;
+ Node *free_list;
} Proc;
typedef struct {
@@ -48,55 +76,217 @@ void unit_fini(Unit *u) {
free(u->procs.data);
}
-Node *node_new(Proc *p, NodeType t) {
- Node *n = new(&p->arena, Node);
- n->type = t;
- n->id = p->last_id++;
- return n;
+void node_kill(Node *n, Proc *p);
+
+void node_die(Node *n, Proc *p) {
+ n->prev_free = p->free_list;
+ p->free_list = n;
+}
+
+void node_del_out(Node *n, Node *p) {
+ for (int i = 0; i < n->out.len; i++) {
+ if (n->out.data[i] == p) {
+ p->refs--;
+ if (i + 1 < n->out.len) {
+ memmove(&n->out.data[i], &n->out.data[i + 1], sizeof(Node*) * (n->out.len - i - 1));
+ }
+ n->out.len--;
+ i--;
+ }
+ }
+}
+
+void node_del_in(Node *n, Node *p) {
+ for (int i = 0; i < n->in.len; i++) {
+ if (n->in.data[i] == p) {
+ p->refs--;
+ if (i + 1 < n->in.len) {
+ memmove(&n->in.data[i], &n->in.data[i + 1], sizeof(Node*) * (n->in.len - i - 1));
+ }
+ n->in.len--;
+ i--;
+ }
+ }
+}
+
+void node_kill(Node *n, Proc *p) {
+ for (int i = 0; i < n->in.len; i++) {
+ node_del_out(n->in.data[i], n);
+ n->in.data[i]->refs--;
+ if (n->in.data[i]->out.len < 1) node_kill(n->in.data[i], p);
+ }
+ for (int i = 0; i < n->out.len; i++) {
+ node_del_in(n->out.data[i], n);
+ n->out.data[i]->refs--;
+ if (n->out.data[i]->refs < 1) node_die(n->out.data[i], p);
+ }
+ n->in.len = 0;
+ n->out.len = 0;
+ node_die(n, p);
}
void node_add(Proc *p, Node *src, Node *dest) {
ZDA_PUSH(&src->out, dest, &p->arena);
ZDA_PUSH(&dest->in, src, &p->arena);
+ src->refs++;
+ dest->refs++;
+ if (dest->src_pos.n == 0) dest->src_pos = src->src_pos;
+ else {
+ LexSpan *sp = &src->src_pos, *dp = &dest->src_pos;
+ if (sp->ofs < dp->ofs) {
+ dp->n = (dp->ofs + dp->n) - sp->ofs;
+ dp->ofs = sp->ofs;
+ }
+ if ((sp->ofs + sp->n) > (dp->ofs + dp->n)) {
+ dp->n = (sp->ofs + sp->n) - dp->ofs;
+ }
+ }
+}
+
+static int global_node_count = 0;
+
+Node *node_new_empty(Proc *p, NodeType t) {
+ Node *n;
+ if (p->free_list) {
+ n = p->free_list;
+ p->free_list = n->prev_free;
+ memset(n, 0, sizeof(Node));
+ } else {
+ n = new(&p->arena, Node);
+ }
+ n->type = t;
+ n->id = global_node_count++;
+ return n;
+}
+
+Node *node_new(Proc *p, NodeType t, ...) {
+ Node *node = node_new_empty(p, t);
+ va_list ap;
+ va_start(ap, t);
+ for (;;) {
+ Node *n = va_arg(ap, Node *);
+ if (!n) break;
+ node_add(p, n, node);
+ }
+ va_end(ap);
+ return node;
+}
+
+#define node_new(...) node_new(__VA_ARGS__, NULL)
+
+void node_print(Node *n) {
+ if (n->type == N_LIT) {
+ printf("\t%d [label=\"%ld\"]\n", n->id, n->lit.i);
+ } else {
+ printf("\t%d [label=\"%s\", shape=record]\n", n->id, node_type_name[n->type]);
+ }
+ if (n->walked) {
+ return;
+ }
+ n->walked = 1;
+ for (int i = 0; i < n->out.len; i++) {
+ if (n->out.data[i]->type == N_LIT) {
+ printf("\t%d -> %d [style=dashed]\n", n->id, n->out.data[i]->id);
+ } else {
+ printf("\t%d -> %d\n", n->id, n->out.data[i]->id);
+ }
+ }
+ for (int i = 0; i < n->out.len; i++) {
+ node_print(n->out.data[i]);
+ }
+}
+
+void proc_print(Proc *p) {
+ if (p->start) {
+ printf("\t\"%.*s\" -> %d\n", (int)p->name.n, p->name.s, p->start->id);
+ node_print(p->start);
+ }
+}
+
+void unit_print(Unit *u) {
+ puts("digraph {");
+ for (int i = 0; i < u->procs.len; i++) {
+ proc_print(&u->procs.data[i]);
+ }
+ puts("}");
+}
+
+Node *node_new_lit_i64(Proc *p, int64_t i) {
+ Node *n = node_new(p, N_LIT, p->start);
+ n->lit.i = i;
+ return n;
+}
+
+/* needs lexer for error reporting */
+Node *node_peephole(Node *n, Proc *p, Lexer *l) {
+ return n;
+ for (int i = 0; i < n->in.len; i++) {
+ if (n->in.data[i]->type != N_LIT) return n;
+ }
+ Node **in = n->in.data;
+ Node *r = n;
+ switch (n->type) {
+ case N_OP_NEG: r = node_new_lit_i64(p, -in[0]->lit.i); break;
+ case N_OP_NOT: r = node_new_lit_i64(p, ~in[0]->lit.i); break;
+ case N_OP_ADD: r = node_new_lit_i64(p, in[0]->lit.i + in[1]->lit.i); break;
+ case N_OP_SUB: r = node_new_lit_i64(p, in[0]->lit.i - in[1]->lit.i); break;
+ case N_OP_MUL: r = node_new_lit_i64(p, in[0]->lit.i * in[1]->lit.i); break;
+ case N_OP_DIV:
+ if (in[1]->lit.i == 0) {
+ lex_error_at(l, n->src_pos, LE_ERROR, S("Division by zero"));
+ }
+ r = node_new_lit_i64(p, in[0]->lit.i / in[1]->lit.i);
+ break;
+ case N_OP_AND: r = node_new_lit_i64(p, in[0]->lit.i & in[1]->lit.i); break;
+ case N_OP_OR: r = node_new_lit_i64(p, in[0]->lit.i | in[1]->lit.i); break;
+ case N_OP_XOR: r = node_new_lit_i64(p, in[0]->lit.i ^ in[1]->lit.i); break;
+ default:
+ return n;
+ break;
+ }
+ r->src_pos = n->src_pos;
+ if (n->out.len == 0) node_kill(n, p);
+ return r;
}
/* parsing */
Node *parse_expr(Lexer *l, Proc *p);
-void parse_return(Lexer *l, Proc *p) {
+Node *parse_return(Lexer *l, Proc *p) {
lex_next(l);
- Node *n = parse_expr(l, p);
- node_add(p, n, p->stop);
+ return node_new(p, N_RETURN, p->start, parse_expr(l, p));
}
-void parse_stmt(Lexer *l, Proc *p) {
+Node *parse_stmt(Lexer *l, Proc *p) {
/* TODO */
(void)l;
switch (l->tok) {
case TOK_RETURN:
- parse_return(l, p);
+ return parse_return(l, p);
break;
default:
lex_expected(l, TM_RBRACE);
+ return NULL;
}
}
-void parse_proc(Lexer *l, Unit *u) {
+Proc *parse_proc(Lexer *l, Unit *u) {
DA_FIT(&u->procs, u->procs.len + 1);
Proc *proc = &u->procs.data[u->procs.len++];
memset(proc, 0, sizeof(Proc));
- proc->start = node_new(proc, N_START);
- proc->stop = node_new(proc, N_STOP);
+ proc->start = node_new_empty(proc, N_START);
lex_expect(l, TM_IDENT);
+ proc->name = l->ident;
lex_expect(l, TM_LBRACE);
lex_next(l);
while (l->tok != TOK_RBRACE) {
lex_expected_not(l, TM_EOF);
- parse_stmt(l, proc);
+ proc->stop = parse_stmt(l, proc);
}
lex_expected(l, TM_RBRACE);
lex_next(l);
+ return proc;
}
Node *parse_term(Lexer *l, Proc *p) {
@@ -120,17 +310,14 @@ Node *parse_term(Lexer *l, Proc *p) {
}
val = (val * 10) + (l->ident.s[i] - '0');
}
- node = node_new(p, N_LIT_INT);
- node_add(p, p->start, node);
- node->v.i = val;
+ node = node_new_lit_i64(p, val);
+ node->src_pos = l->pos;
lex_next(l);
}
if (negate_after) {
- Node *neg = node_new(p, N_OP_NEG);
- node_add(p, node, neg);
- return neg;
+ node = node_new(p, N_OP_NEG, node);
}
- return node;
+ return node_peephole(node, p, l);
}
Node *parse_expr(Lexer *l, Proc *p) {
@@ -148,16 +335,25 @@ Node *parse_expr(Lexer *l, Proc *p) {
puts("args_end");
puts("func_call");
}
- if (TMASK(l->tok) & (TM_PLUS | TM_MINUS | TM_ASTERISK | TM_SLASH)) {
- lex_expected(l, TM_PLUS);
+ if (TMASK(l->tok) & (TM_PLUS | TM_MINUS | TM_ASTERISK | TM_SLASH | TM_NOT | TM_AND | TM_XOR | TM_OR)) {
+ Token t = l->tok;
lex_next(l);
Node *rhs = parse_expr(l, p);
- Node *add = node_new(p, N_OP_ADD);
- node_add(p, lhs, add);
- node_add(p, rhs, add);
- return add;
+ NodeType nt = N_OP_ADD;
+ switch (t) {
+ case TOK_PLUS: nt = N_OP_ADD; break;
+ case TOK_MINUS: nt = N_OP_SUB; break;
+ case TOK_ASTERISK: nt = N_OP_MUL; break;
+ case TOK_SLASH: nt = N_OP_DIV; break;
+ case TOK_NOT: nt = N_OP_NOT; break;
+ case TOK_AND: nt = N_OP_AND; break;
+ case TOK_OR: nt = N_OP_OR; break;
+ case TOK_XOR: nt = N_OP_XOR; break;
+ default: break;
+ }
+ lhs = node_new(p, nt, lhs, rhs);
}
- return lhs;
+ return node_peephole(lhs, p, l);
}
void parse_toplevel(Lexer *l, Unit *u) {
@@ -177,6 +373,7 @@ void parse_unit(Lexer *l) {
while (l->tok != TOK_EOF) {
parse_toplevel(l, &u);
}
+ unit_print(&u);
unit_fini(&u);
}
diff --git a/test.lang b/test.lang
index 44c3cf4..4ce371a 100644
--- a/test.lang
+++ b/test.lang
@@ -7,5 +7,5 @@
*/
proc main {
- return 1
+ return ((1 & 3) * 1 + 4 / (81237 & 37)) - 23 + -3 + 4
}