thinking about known-relations

author: WormHeamer 2025-08-10 02:23:53 -0400
committer: WormHeamer 2025-08-10 02:23:53 -0400
commit: 1c4efc8009292b6f8d6079f87645e8eb65e85f3e (patch)
tree: 7da28a2ed7c3a66993e4f0b527f3ceda9b7a9ae9
parent: 6368f56ec5e2ab8c67b44fc341daffde3da4f5af (diff)
1 files changed, 39 insertions, 6 deletions
diff --git a/peephole.c b/peephole.c
index 7313e68..58a7612 100644
--- a/peephole.c
+++ b/peephole.c
@@ -1,5 +1,6 @@
 #include <stdarg.h>
 #include <assert.h>
+#include <stdio.h>
 
 #include "peephole.h"
 #include "strio.h"
@@ -10,6 +11,10 @@ extern int no_opt;
 #define NM_ASSOCIATIVE (NM_OP_ADD | NM_OP_MUL | NM_OP_AND | NM_OP_XOR | NM_OP_OR)
 #define NM_COMPARISON (NM_CMP_EQL | NM_CMP_NEQ | NM_CMP_LES | NM_CMP_GTR | NM_CMP_LTE | NM_CMP_GTE)
 
+#define C(a, b) type_base_eql(&(a)->type, &(b)->type)
+#define INT_EQ(n, v) ((n)->type.lvl == T_CONST && (n)->type.t == T_INT && (n)->val.i == v)
+#define BOOL_EQ(n, v) ((n)->type.lvl == T_CONST && (n)->type.t == T_BOOL && (n)->val.i == v)
+
 /* when applied to the same input, at least one must be true */
 static inline NodeType node_cmp_opposite(NodeType a) {
 	switch (a) {
@@ -114,8 +119,6 @@ Value node_compute(Node *n, Lexer *l) {
 	return n->val;
 }
 
-#include <stdio.h>
-
 #define NODE(t, ...) node_peephole(node_new(p, t, CTRL(n), __VA_ARGS__), p, l)
 #define OP(...) NODE(n->op, __VA_ARGS__)
 
@@ -137,6 +140,40 @@ static inline int u64_power_of_2(uint64_t x) {
 	return tlz;
 }
 
+/* TODO:
+ *
+ * Implement something like
+ * int node_is(Node *a, Relation r, Node *b);
+ * so you can do node_is(a, LES, b).
+ *
+ * In theory, there are six different relations nodes can have:
+ * - a = b
+ * - a <> b
+ * - a < b
+ * - a > b
+ * - a <= b
+ * - a >= b
+ *
+ * To identify any of them is necessarily kinda recursive --- we need to walk
+ * the tree of known relations.  First check to see if `a < b` is stored as an
+ * axiomatically true thing in the knowledge table -- if not, we need to scan
+ * through their relations, looking to see if there's any value x where a < x
+ * < b --- or a < x < y < b, or a < x < y < z < b, and so on.
+ *
+ * Because there are six node-relations, they can be packed into a single byte.
+ * They can also all be reordered (a < b = b > a), so we don't need to store a
+ * different flags byte for each node --- the order of which node should be on
+ * what side of the comparison is determined by their ID, low to high.
+ *
+ * The simplest way to store them is a 2d array, NxN where N is the number of
+ * nodes.  But this quickly gets expensive:  with 1024 nodes we're already up
+ * to a megabyte of space.  So instead it's probably gonna be a hash table or
+ * trie, with both 32-bit node IDs mixed into one 64-bit key.  One problem,
+ * though:  how do we efficiently _scan_ that space, to find those recursive
+ * values?  We need to track, for each node, a list of other nodes it has
+ * known-relations with.
+ */
+
 /* functions to query the compile-time equivalence of nodes */
 /* fairly conservative of necessity */
 
@@ -257,7 +294,6 @@ Node *node_idealize(Node *n, Proc *p, Lexer *l) {
 	/* the overall trend is to move them rightwards */
 
 	/* need to check for type compatibility */
-	#define C(a, b) type_base_eql(&(a)->type, &(b)->type)
 	if (NMASK(n->op) & NM_COMMUNATIVE) {
 		/* op(lit, X) -> op(X, lit) */
 		if (T(CAR(n), N_LIT) && !T(CDR(n), N_LIT)) return OP(CDR(n), CAR(n));
@@ -304,9 +340,6 @@ Node *node_idealize(Node *n, Proc *p, Lexer *l) {
 	/* optimize based on situations where the input is partly known (e.g.
 	 * one constant input and one not, or identical inputs) */
 
-#define INT_EQ(n, v) ((n)->type.lvl == T_CONST && (n)->type.t == T_INT && (n)->val.i == v)
-#define BOOL_EQ(n, v) ((n)->type.lvl == T_CONST && (n)->type.t == T_BOOL && (n)->val.i == v)
-
 	switch (n->op) {
 	case N_OP_NOT:
 		{
author	WormHeamer	2025-08-10 02:23:53 -0400
committer	WormHeamer	2025-08-10 02:23:53 -0400
commit	1c4efc8009292b6f8d6079f87645e8eb65e85f3e (patch)
tree	7da28a2ed7c3a66993e4f0b527f3ceda9b7a9ae9
parent	6368f56ec5e2ab8c67b44fc341daffde3da4f5af (diff)