4 files changed, 217 insertions, 38 deletions
diff --git a/ir.c b/ir.c
index ff61c7a..8201724 100644
--- a/ir.c
+++ b/ir.c
@@ -21,6 +21,11 @@ int type_base_eql(Type *a, Type *b) {
 	return a->next ? type_base_eql(a->next, b->next) : 1;
 }
 
+int value_eql(Value *a, Value *b) {
+	if (!type_eql(&a->type, &b->type)) return 0;
+	return a->i == b->i;
+}
+
 Str type_desc(Type *t, Arena *arena) {
 	(void)arena;
 	switch (t->t) {
@@ -108,6 +113,7 @@ void node_del_out(Node *n, Node *p) {
 			}
 			n->out.len--;
 			i--;
+			break;
 		}
 	}
 }
@@ -121,6 +127,7 @@ void node_del_in(Node *n, Node *p) {
 			}
 			n->in.len--;
 			i--;
+			break;
 		}
 	}
 }
@@ -156,6 +163,11 @@ void node_add(Proc *p, Node *src, Node *dest) {
 	node_add_out(p, src, dest);
 	node_add_in(p, dest, src);
 	if (dest->src_pos.n == 0) dest->src_pos = src->src_pos;
+	else if (src->src_pos.n != 0) {
+		int lo = dest->src_pos.ofs < src->src_pos.ofs ? dest->src_pos.ofs : src->src_pos.ofs;
+		int hi = dest->src_pos.ofs + dest->src_pos.n > src->src_pos.ofs + src->src_pos.n ? dest->src_pos.ofs + dest->src_pos.n : src->src_pos.ofs + src->src_pos.n;
+		dest->src_pos = (LexSpan) { lo, hi - lo };
+	}
 }
 
 void node_remove(Proc *p, Node *src, Node *dest) {
@@ -197,7 +209,6 @@ Node *node_newv(Proc *p, NodeType t, ...) {
 }
 
 Node *node_dedup_lit(Proc *p, Value v) {
-	return NULL;
 	/* TODO: this is probably real inefficient for large procedure graphs,
 	 * but does it matter? how many nodes are direct children of the start node?
 	 * how many literals even usually occur in a procedure? */
@@ -227,13 +238,44 @@ Node *node_new_lit_bool(Proc *p, int b) {
 }
 
 static inline int node_op_communative(NodeType t) {
-	NodeType ops[] = { N_OP_ADD, N_OP_MUL, N_OP_AND, N_OP_XOR, N_OP_OR };
+	NodeType ops[] = { N_OP_ADD, N_OP_MUL, N_OP_AND, N_OP_XOR, N_OP_OR, N_CMP_EQL, N_CMP_NEQ };
 	for (unsigned i = 0; i < sizeof ops / sizeof *ops; i++) {
 		if (ops[i] == t) return 1;
 	}
 	return 0;
 }
 
+/* when applied to the same input, at least one must be true */
+static inline int node_cmp_opposite(NodeType a, NodeType b) {
+	struct { NodeType l, r; } pairs[] = {
+		{ N_CMP_EQL, N_CMP_NEQ },
+		{ N_CMP_LES, N_CMP_GTE },
+		{ N_CMP_GTR, N_CMP_LTE },
+	};
+	for (unsigned i = 0; i < sizeof pairs / sizeof *pairs; i++) {
+		if ((pairs[i].l == a && pairs[i].r == b) || (pairs[i].l == b && pairs[i].r == a)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/* when applied to the same input, at least one must be false */
+static inline int node_cmp_incompat(NodeType a, NodeType b) {
+	struct { NodeType l, r; } pairs[] = {
+		{ N_CMP_EQL, N_CMP_NEQ },
+		{ N_CMP_LES, N_CMP_GTE },
+		{ N_CMP_GTR, N_CMP_LTE },
+		{ N_CMP_LES, N_CMP_GTR },
+	};
+	for (unsigned i = 0; i < sizeof pairs / sizeof *pairs; i++) {
+		if ((pairs[i].l == a && pairs[i].r == b) || (pairs[i].l == b && pairs[i].r == a)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
 Value node_compute(Node *n, Lexer *l) {
 	Type lit_type = { .lvl = T_BOT };
 	Node **in = n->in.data;
@@ -265,11 +307,11 @@ Value node_compute(Node *n, Lexer *l) {
 		case N_OP_SUB: v.i = in[0]->val.i - in[1]->val.i; break;
 		case N_OP_MUL: v.i = in[0]->val.i * in[1]->val.i; break;
 		case N_OP_DIV:
-			       if (in[1]->val.i == 0) {
-				       lex_error_at(l, in[1]->src_pos, LE_ERROR, S("divisor always evaluates to zero"));
-			       }
-			       v.i = in[0]->val.i / in[1]->val.i;
-			       break;
+		       if (in[1]->val.i == 0) {
+			       lex_error_at(l, in[1]->src_pos, LE_ERROR, S("divisor always evaluates to zero"));
+		       }
+		       v.i = in[0]->val.i / in[1]->val.i;
+		       break;
 		case N_OP_AND: v.i = in[0]->val.i & in[1]->val.i; break;
 		case N_OP_OR: v.i = in[0]->val.i | in[1]->val.i; break;
 		case N_OP_XOR: v.i = in[0]->val.i ^ in[1]->val.i; break;
@@ -318,11 +360,61 @@ void node_peephole_in(Node *n, int idx, Proc *p, Lexer *l) {
 #define CADR(n) CDR(CAR(n))
 #define CDAR(n) CAR(CDR(n))
 #define CDDR(n) CDR(CDR(n))
+#define CAAAR(n) CAR(CAAR(n))
+#define CAADR(n) CDR(CAAR(n))
+#define CADAR(n) CAR(CADR(n))
+#define CADDR(n) CDR(CADR(n))
+#define CDAAR(n) CAR(CDAR(n))
+#define CDADR(n) CDR(CDAR(n))
+#define CDDAR(n) CAR(CDDR(n))
+#define CDDDR(n) CDR(CDDR(n))
+
+#define T(a,b) ((a)->type == b)
+
+static inline int node_eql_i64(Node *n, int64_t i) {
+	return n->type == N_LIT && n->val.type.t == T_INT && n->val.i == i;
+}
+
+static inline int u64_power_of_2(uint64_t x) {
+	int ldz = 0, tlz = 0;
+	for (int i = 0; i < 64; i++) {
+		if ((x >> i) & 1) break;
+		else tlz++;
+	}
+	for (int i = 0; i < 64; i++) {
+		if ((x << i) & (1UL << 63)) break;
+		else ldz++;
+	}
+	if (ldz + tlz != 63) return 0;
+	return tlz;
+}
+
+/* will this become too slow eventually? */
+static inline int node_equiv(Node *a, Node *b) {
+	if (a == b) return 1;
+	if (a->type != b->type) return 0;
+	if (a->in.len != b->in.len) return 0;
+	if (!value_eql(&a->val, &b->val)) return 0;
+	for (int i = 0; i < a->in.len; i++) {
+		if (!node_equiv(a->in.data[i], b->in.data[i])) return 0;
+	}
+	return 1;
+}
+
+static inline int node_equiv_input(Node *a, Node *b) {
+	if (a->in.len != b->in.len) return 0;
+	for (int i = 0; i < a->in.len; i++) {
+		if (!node_equiv(a->in.data[i], b->in.data[i])) return 0;
+	}
+	return 1;
+}
 
 /* needs lexer for error reporting */
 Node *node_idealize(Node *n, Proc *p, Lexer *l) {
 	if (no_opt) return NULL;
 
+	/* try to compute a literal value */
+
 	if (n->type != N_LIT) {
 		Value v = node_compute(n, l);
 		if (v.type.lvl == T_CONST) {
@@ -335,44 +427,125 @@ Node *node_idealize(Node *n, Proc *p, Lexer *l) {
 		}
 	}
 
-	/* TODO: figure out to do peepholes recursively, without fucking up the graph or having to clone everything */
-	if (node_op_communative(n->type)) {
-		/* transformations to help encourage constant folding */
-		/* the overall trend is to move them rightwards */
+	/* try to trim duplicate inputs from the graph */
 
-		if (CAR(n)->type == N_LIT && CDR(n)->type != N_LIT) {
-			fprintf(stderr, "op(lit, X) -> op(X, lit)\n");
-			return OP(CDR(n), CAR(n));
+	int same = 1, same_ptr = 1;
+	for (int i = 1; i < n->in.len; i++) {
+		if (n->in.data[i] == n->in.data[0]) continue;
+		same_ptr = 0;
+		if (!node_equiv(n->in.data[i], n->in.data[0])) {
+			same = 0;
+			break;
 		}
+	}
 
-		if (CDR(n)->type == n->type && CAR(n)->type != n->type) {
-			fprintf(stderr, "op(X, op(Y, Z)) -> op(op(Y, Z), X)\n");
-			return OP(CDR(n), CAR(n));
+	if (n->in.len > 1 && same && !same_ptr) {
+		Node *r = node_new_empty(p, n->type);
+		for (int i = 0; i < n->in.len; i++) {
+			node_add(p, n->in.data[0], r);
 		}
+		return node_peephole(r, p, l);
+	}
 
-		if (CAR(n)->type == n->type
-				&& CDR(n)->type == n->type
-				&& CDDR(n)->type == N_LIT) {
-			fprintf(stderr, "op(op(X, Y), op(Z, lit)) -> op(op(X, op(Y, Z)), lit)\n");
-			return OP(OP(CAAR(n), OP(CADR(n), CDAR(n))), CDDR(n));
+	/* optimize based on situations where the input is partly known (e.g.
+	 * one constant input and one not, or identical inputs) */
+
+	switch (n->type) {
+	case N_OP_NOT:
+	case N_OP_NEG:
+		if (T(CAR(n), n->type)) return CAAR(n);
+		break;
+	case N_OP_ADD:
+		if (same) return NODE(N_OP_MUL, CAR(n), node_new_lit_i64(p, 2));
+		goto zero_no_effect;
+	case N_OP_SUB:
+		if (same) return node_new_lit_i64(p, 0);
+		if (node_eql_i64(CAR(n), 0)) return NODE(N_OP_NEG, CDR(n));
+		if (node_eql_i64(CDR(n), 0)) return CAR(n);
+		break;
+	case N_OP_MUL:
+		if (node_eql_i64(CDR(n), 0)) return CDR(n);
+		if (node_eql_i64(CDR(n), 1)) return CAR(n);
+		{
+			int po2;
+			if (T(CDR(n), N_LIT) && CDR(n)->val.type.t == T_INT && (po2 = u64_power_of_2(CDR(n)->val.u))) {;
+				return NODE(N_OP_SHL, CAR(n), node_new_lit_i64(p, po2));
+			}
+		}
+		break;
+	case N_OP_DIV:
+		if (node_eql_i64(CDR(n), 0)) {
+		       lex_error_at(l, CDR(n)->src_pos, LE_ERROR, S("divisor always evaluates to zero"));
+		}
+		break;
+	case N_OP_OR:
+		if (same) return CAR(n);
+		if (node_cmp_opposite(CAR(n)->type, CDR(n)->type) && node_equiv_input(CAR(n), CDR(n))) {
+			return node_new_lit_bool(p, 1);
+		}
+		goto zero_no_effect;
+	case N_OP_AND:
+		if (same) return CAR(n);
+		if (node_eql_i64(CAR(n), 0)) return node_new_lit_i64(p, 0);
+		if (node_eql_i64(CDR(n), 0)) return node_new_lit_i64(p, 0);
+		if (node_cmp_incompat(CAR(n)->type, CDR(n)->type) && node_equiv_input(CAR(n), CDR(n))) {
+			return node_new_lit_bool(p, 0);
 		}
+		break;
+	case N_OP_XOR:
+		if (same) return node_new_lit_i64(p, 0);
+		if (node_cmp_opposite(CAR(n)->type, CDR(n)->type) && node_equiv_input(CAR(n), CDR(n))) {
+			return node_new_lit_bool(p, 1);
+		}
+zero_no_effect:	if (node_eql_i64(CAR(n), 0)) return CDR(n);
+		if (node_eql_i64(CDR(n), 0)) return CAR(n);
+		break;
+	case N_CMP_EQL:
+		if (same) return node_new_lit_bool(p, 1);
+		break;
+	case N_CMP_NEQ:
+		if (same) return node_new_lit_bool(p, 0);
+		break;
+	case N_CMP_LES:
+		if (same) return node_new_lit_bool(p, 0);
+		break;
+	case N_CMP_GTR:
+		if (same) return node_new_lit_bool(p, 0);
+		break;
+	case N_CMP_LTE:
+		if (same) return node_new_lit_bool(p, 1);
+		break;
+	case N_CMP_GTE:
+		if (same) return node_new_lit_bool(p, 1);
+		break;
+	default:
+		break;
+	}
+
+	/* transformations to help encourage constant folding */
+	/* the overall trend is to move them rightwards */
 
-		if (CDR(n)->type == N_LIT
-				&& CAR(n)->type == n->type
-				&& CAAR(n)->type != N_LIT
-				&& CADR(n)->type == N_LIT) {
+	if (node_op_communative(n->type)) {
+		/* op(lit, X) -> op(X, lit) */
+		if (T(CAR(n), N_LIT) && !T(CDR(n), N_LIT)) return OP(CDR(n), CAR(n));
 
-			fprintf(stderr, "op(op(X, lit), lit) -> op(X, op(lit, lit))\n");
+		/* op(X, op(Y,Z)) -> op(op(Y,Z), X) */
+		if (!T(CAR(n), n->type) && T(CDR(n), n->type)) return OP(CDR(n), CAR(n));
 
+		/* op(op(X,Y), op(Z, lit)) -> op(op(X, op(Y, Z)), lit) */
+		if (T(CAR(n), n->type) && T(CDR(n), n->type) && T(CDDR(n), N_LIT)) {
+			return OP(OP(CAAR(n), OP(CADR(n), CDAR(n))), CDDR(n));
+		}
+
+		/* op(op(X, lit), lit) -> op(X, op(lit, lit)) */
+		if (T(CDR(n), N_LIT) && T(CAR(n), n->type)
+				&& !T(CAAR(n), N_LIT) && T(CADR(n), N_LIT)) {
 			return OP(CAAR(n), OP(CADR(n), CDR(n)));
 		}
 
 		/* op(op(X, lit), Y) -> op(op(X, Y), lit) */
-		if (CAR(n)->type == n->type
-				&& CAAR(n)->type != N_LIT
-				&& CADR(n)->type == N_LIT
-				&& CDR(n)->type != N_LIT) {
-			fprintf(stderr, "op(op(X, lit), Y) -> op(op(X, Y), lit)\n");
+		if (T(CAR(n), n->type) && !T(CAAR(n), N_LIT)
+				&& T(CADR(n), N_LIT) && !T(CDR(n), N_LIT)) {
 			return OP(OP(CAAR(n), CDR(n)), CADR(n));
 		}
 	}
@@ -383,14 +556,18 @@ Node *node_idealize(Node *n, Proc *p, Lexer *l) {
 Node *node_peephole(Node *n, Proc *p, Lexer *l) {
 	Node *r = node_idealize(n, p, l);
 	if (r) {
+		r->src_pos = n->src_pos;
 		/* make sure r doesn't get deleted even if connected to n */
 		node_add_out(p, r, p->keepalive);
 		node_kill(n, p);
 		node_del_out(r, p->keepalive);
-		return r;
-	} else {
-		return n;
+		n = r;
+	}
+	/* FIXME: figure out why this shows the wrong position when in an assignment */
+	if (!type_check(n)) {
+		type_err(n, l);
 	}
+	return n;
 }
 
 /* procedures */
diff --git a/ir.h b/ir.h
index 0a8a9c2..1cb3def 100644
--- a/ir.h
+++ b/ir.h
@@ -39,6 +39,7 @@ typedef struct Value {
 struct Node;
 int type_eql(Type *a, Type *b);
 int type_base_eql(Type *a, Type *b);
+int value_eql(Value *a, Value *b);
 int type_check(struct Node *n);
 Str type_desc(Type *t, Arena *arena);
 void type_err(struct Node *n, Lexer *l);
diff --git a/main.c b/main.c
index d15559c..ce8981b 100644
--- a/main.c
+++ b/main.c
@@ -180,6 +180,8 @@ Node *parse_term(Lexer *l, Proc *p) {
 		node = parse_expr(l, p);
 		lex_expected(l, TM_RPAREN);
 		lex_next(l);
+		node->src_pos.ofs--;
+		node->src_pos.n += 2;
 	} else if (l->tok == TOK_IDENT) {
 		NameBinding *b = scope_find(&p->scope, l->ident);
 		if (b) {
diff --git a/test.lang b/test.lang
index 7148cbe..af8927d 100644
--- a/test.lang
+++ b/test.lang
@@ -9,7 +9,6 @@
 // also single-line now
 
 proc main(a i64, b i64) {
-	a := a + 1
-	b := b + 2
-	return a * b
+	let c = a + b
+	return (a - b) = (((a + b) - b) - b)
 }