summaryrefslogtreecommitdiff
path: root/ir.c
diff options
context:
space:
mode:
authorWormHeamer2025-08-03 18:48:20 -0400
committerWormHeamer2025-08-03 18:48:20 -0400
commit62e4b45143e4ff24a1758a2ccd3af5dfe69706ec (patch)
tree2949a720e278da21254fdf72e3faab8bd55688b9 /ir.c
parentc4594891b79e305c661e74d495f78b1d143b4d8a (diff)
factor out node stuff into ir.c & ir.h
Diffstat (limited to 'ir.c')
-rw-r--r--ir.c308
1 files changed, 308 insertions, 0 deletions
diff --git a/ir.c b/ir.c
new file mode 100644
index 0000000..2e3e3b2
--- /dev/null
+++ b/ir.c
@@ -0,0 +1,308 @@
+#include <stdarg.h>
+#include "ir.h"
+
+extern int no_opt;
+
+const char *node_type_name(NodeType t) {
+ const char *names[] = {
+ "start",
+ "return",
+ "keepalive",
+ "literal",
+ "add", "sub", "mul", "div",
+ "and", "or", "xor",
+ "lshift", "rshift",
+ "neg", "not",
+ "value"
+ };
+ return names[t];
+}
+
+void node_die(Node *n, Proc *p) {
+ n->prev_free = p->free_list;
+ p->free_list = n;
+}
+
+void node_del_out(Node *n, Node *p) {
+ for (int i = 0; i < n->out.len; i++) {
+ if (n->out.data[i] == p) {
+ p->refs--;
+ if (i + 1 < n->out.len) {
+ memmove(&n->out.data[i], &n->out.data[i + 1], sizeof(Node*) * (n->out.len - i - 1));
+ }
+ n->out.len--;
+ i--;
+ }
+ }
+}
+
+void node_del_in(Node *n, Node *p) {
+ for (int i = 0; i < n->in.len; i++) {
+ if (n->in.data[i] == p) {
+ p->refs--;
+ if (i + 1 < n->in.len) {
+ memmove(&n->in.data[i], &n->in.data[i + 1], sizeof(Node*) * (n->in.len - i - 1));
+ }
+ n->in.len--;
+ i--;
+ }
+ }
+}
+
+void node_kill(Node *n, Proc *p) {
+ for (int i = 0; i < n->in.len; i++) {
+ node_del_out(n->in.data[i], n);
+ n->in.data[i]->refs--;
+ if (n->in.data[i]->out.len < 1) node_kill(n->in.data[i], p);
+ }
+ for (int i = 0; i < n->out.len; i++) {
+ node_del_in(n->out.data[i], n);
+ n->out.data[i]->refs--;
+ if (n->out.data[i]->refs < 1) node_die(n->out.data[i], p);
+ }
+ n->in.len = 0;
+ n->out.len = 0;
+ node_die(n, p);
+}
+
+void node_add(Proc *p, Node *src, Node *dest) {
+ ZDA_PUSH(&src->out, dest, &p->arena);
+ ZDA_PUSH(&dest->in, src, &p->arena);
+ src->refs++;
+ dest->refs++;
+ if (dest->src_pos.n == 0) dest->src_pos = src->src_pos;
+}
+
+void node_remove(Proc *p, Node *src, Node *dest) {
+ node_del_out(src, dest);
+ node_del_in(dest, src);
+ if (dest->refs < 1) node_die(dest, p);
+ if (src->refs < 1) node_die(src, p);
+ else if (src->out.len < 1) node_kill(src, p);
+}
+
+static int global_node_count = 0;
+
+Node *node_new_empty(Proc *p, NodeType t) {
+ Node *n;
+ if (p->free_list) {
+ n = p->free_list;
+ p->free_list = n->prev_free;
+ memset(n, 0, sizeof(Node));
+ } else {
+ n = new(&p->arena, Node);
+ }
+ n->type = t;
+ n->id = global_node_count++;
+ return n;
+}
+
+Node *node_newv(Proc *p, NodeType t, ...) {
+ Node *node = node_new_empty(p, t);
+ va_list ap;
+ va_start(ap, t);
+ for (;;) {
+ Node *n = va_arg(ap, Node *);
+ if (!n) break;
+ node_add(p, n, node);
+ }
+ va_end(ap);
+ return node;
+}
+
+Node *node_dedup_lit(Proc *p, Value v) {
+ /* TODO: this is probably real inefficient for large procedure graphs,
+ * but does it matter? how many nodes are direct children of the start node?
+ * how many literals even usually occur in a procedure? */
+ for (int i = 0; i < p->start->out.len; i++) {
+ Node *t = p->start->out.data[i];
+ if (t->type == N_LIT && t->val.type == v.type && t->val.i == v.i) {
+ return t;
+ }
+ }
+ return NULL;
+}
+
+Node *node_new_lit_i64(Proc *p, int64_t i) {
+ Value v = (Value) { T_INT, { .i = i } };
+ Node *t = node_dedup_lit(p, v);
+ if (t) return t;
+ Node *n = node_new(p, N_LIT, p->start);
+ n->val = v;
+ return n;
+}
+
+static inline int node_op_communative(NodeType t) {
+ NodeType ops[] = { N_OP_ADD, N_OP_MUL, N_OP_AND, N_OP_XOR, N_OP_OR };
+ for (unsigned i = 0; i < sizeof ops / sizeof *ops; i++) {
+ if (ops[i] == t) return 1;
+ }
+ return 0;
+}
+
+Value node_compute(Node *n, Lexer *l) {
+ Type lit_type = T_BOT;
+ Node **in = n->in.data;
+ for (int i = 0; i < n->in.len; i++) {
+ Node *p = in[i];
+ if (p->type != N_LIT) break;
+ if (p->val.type != lit_type) {
+ if (lit_type == T_BOT) {
+ lit_type = p->val.type;
+ } else {
+ lit_type = T_BOT;
+ break;
+ }
+ }
+ }
+ if (lit_type == T_INT) {
+ Value v = { .type = lit_type };
+ switch (n->type) {
+ case N_OP_NEG: v.i = -in[0]->val.i; break;
+ case N_OP_NOT: v.i = ~in[0]->val.i; break;
+ case N_OP_ADD: v.i = in[0]->val.i + in[1]->val.i; break;
+ case N_OP_SUB: v.i = in[0]->val.i - in[1]->val.i; break;
+ case N_OP_MUL: v.i = in[0]->val.i * in[1]->val.i; break;
+ case N_OP_DIV:
+ if (in[1]->val.i == 0) {
+ lex_error_at(l, in[1]->src_pos, LE_ERROR, S("divisor always evaluates to zero"));
+ }
+ v.i = in[0]->val.i / in[1]->val.i;
+ break;
+ case N_OP_AND: v.i = in[0]->val.i & in[1]->val.i; break;
+ case N_OP_OR: v.i = in[0]->val.i | in[1]->val.i; break;
+ case N_OP_XOR: v.i = in[0]->val.i ^ in[1]->val.i; break;
+ case N_OP_SHL: v.i = in[0]->val.u << in[1]->val.u; break;
+ case N_OP_SHR: v.i = in[0]->val.u >> in[1]->val.u; break;
+ default: return n->val;
+ }
+ return v;
+ }
+ return n->val;
+}
+
+/* needs lexer for error reporting */
+Node *node_peephole(Node *n, Proc *p, Lexer *l) {
+ if (no_opt) return n;
+
+ if (n->type != N_LIT) {
+ Value v = node_compute(n, l);
+ if (v.type > T_CONST) {
+ node_kill(n, p);
+ Node *t = node_dedup_lit(p, v);
+ if (t) return t;
+ Node *r = node_new(p, N_LIT, p->start);
+ r->val = v;
+ r->src_pos = n->src_pos;
+ return r;
+ }
+ }
+
+ Node **in = n->in.data;
+ /* TODO: figure out to do peepholes recursively, without fucking up the graph or having to clone everything */
+ if (node_op_communative(n->type)) {
+ /* transformations to help encourage constant folding */
+ /* the overall trend is to move them rightwards */
+ if (in[0]->type == N_LIT
+ && in[1]->type == n->type
+ && in[1]->in.data[0]->type != N_LIT
+ && in[1]->in.data[1]->type == N_LIT) {
+ /* op(lit, op(X, lit)) -> op(X, op(lit, lit)) */
+ Node *tmp = in[1]->in.data[0];
+ in[1]->in.data[0] = in[0];
+ in[0] = tmp;
+ /* TODO: ...would it break anything at all to just do in[1] = node_peephole(in[1], p, l)?
+ * probably not, right? */
+ } else if (in[0]->type == n->type
+ && in[0]->in.data[0]->type != N_LIT
+ && in[0]->in.data[1]->type == N_LIT
+ && in[1]->type != N_LIT) {
+ /* op(op(X, lit), Y) -> op(op(X, Y), lit) */
+ Node *tmp = in[0]->in.data[1];
+ in[0]->in.data[1] = in[1];
+ in[1] = tmp;
+ } else if (in[0]->type == N_LIT && in[1]->type != N_LIT) {
+ /* op(lit, X) -> op(X, lit) */
+ Node *tmp = in[0];
+ in[0] = in[1];
+ in[1] = tmp;
+ }
+ }
+
+ return n;
+}
+
+/* procedures */
+
+void proc_init(Proc *proc, Str name) {
+ memset(proc, 0, sizeof(Proc));
+ proc->start = node_new_empty(proc, N_START);
+ proc->keepalive = node_new_empty(proc, N_KEEPALIVE);
+ proc->name = name;
+}
+
+void proc_free(Proc *proc) {
+ arena_free(&proc->arena);
+}
+
+/* scope */
+
+NameBinding *scope_find(Scope *scope, Str name) {
+ for (ScopeFrame *f = scope->tail; f; f = f->prev) {
+ for (NameBinding *b = f->latest; b; b = b->prev) {
+ if (str_eql(b->name, name)) {
+ return b;
+ }
+ }
+ }
+ return NULL;
+}
+
+ScopeFrame *scope_push(Scope *scope, Proc *proc) {
+ ScopeFrame *f;
+ if (scope->free_scope) {
+ f = scope->free_scope;
+ *f = (ScopeFrame) { 0 };
+ scope->free_scope = f->prev;
+ } else {
+ f = new(&proc->arena, ScopeFrame);
+ }
+ f->prev = scope->tail;
+ scope->tail = f;
+ return f;
+}
+
+ScopeFrame *scope_pop(Scope *scope, Proc *proc) {
+ ScopeFrame *f = scope->tail;
+ scope->tail = f->prev;
+ f->prev = scope->free_scope;
+ scope->free_scope = f;
+ for (NameBinding *b = f->latest; b; ) {
+ NameBinding *p = b->prev;
+ b->prev = scope->free_bind;
+ scope->free_bind = b;
+ node_remove(proc, b->node, proc->keepalive);
+ b = p;
+ }
+ return scope->tail;
+}
+
+/* returns previous value */
+NameBinding *scope_bind(Scope *scope, Str name, Node *value, LexSpan pos, Proc *proc) {
+ NameBinding *prev = scope_find(scope, name);
+ NameBinding *b;
+ if (scope->free_bind) {
+ b = scope->free_bind;
+ *b = (NameBinding) { 0 };
+ scope->free_bind = b->prev;
+ } else {
+ b = new(&proc->arena, NameBinding);
+ }
+ b->name = name;
+ b->prev = scope->tail->latest;
+ scope->tail->latest = b;
+ b->node = value;
+ b->src_pos = pos;
+ node_add(proc, value, proc->keepalive);
+ return prev;
+}