summary refs log tree commit diff
diff options
context:
space:
mode:
authorWormHeamer2025-03-09 18:11:05 -0400
committerWormHeamer2025-03-09 18:11:05 -0400
commitf30af4d2a2e313a53a447b6dd6918b43635caf3d (patch)
treec335e9692d81b9cd8445afc566e79d314b335882
parentd0f41dfcd7b69d02d0ca03087097a67eb9454bb3 (diff)
construct list of blocks and lines first, instead of a single pass
-rw-r--r--main.c309
1 files changed, 176 insertions, 133 deletions
diff --git a/main.c b/main.c
index fa12a70..70029f8 100644
--- a/main.c
+++ b/main.c
@@ -79,6 +79,42 @@ void str_cat_html(Str *s, Str uri, Arena *a) {
 	}
 }
 
+typedef struct Doc Doc;
+struct Doc {
+	Str html;
+	Str title;
+	Doc *prev, *next;
+};
+
+typedef enum {
+	LN_PAR,
+	LN_CODE,
+	LN_LINK,
+	LN_BQUOT,
+	LN_ULIST,
+	LN_OLIST,
+	LN_HDR1,
+	LN_HDR2,
+	LN_HDR3,
+	LN_NONE
+} LineType;
+
+typedef struct Line Line;
+struct Line {
+	Str txt;
+	Line *next;
+};
+
+typedef struct {
+	LineType type;
+	Line *lines;
+} Block;
+
+typedef struct {
+	Block *data;
+	size_t len, cap;
+} BlockList;
+
 int is_ol_item(Str s) {
 	Str h = str_cut(s, '.').head;
 	if (h.n < 1) return 0;
@@ -88,74 +124,142 @@ int is_ol_item(Str s) {
 	return 1;
 }
 
-typedef enum {
-	LINE_BLANK, LINE_PARA,
-	LINE_LINK, LINE_FIGURE,
-	LINE_UL, LINE_OL,
-	LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE,
-	LINE_BQUOT,
-} LineMode;
-
-LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) {
-#undef S
-#define S(s) {s,sizeof(s)-1}
-	static Str op[] = {
-		[LINE_BLANK] = S(""),
-		[LINE_PARA] = S("<p>"),
-		[LINE_LINK] = S("<ul>\n<li>"),
-		[LINE_FIGURE] = S("<figure>"),
-		[LINE_UL] = S("<ul>\n<li>"),
-		[LINE_OL] = S("<ol>\n<li>"),
-		[LINE_HDR1] = S("<h1>"),
-		[LINE_HDR2] = S("<h2>"),
-		[LINE_HDR3] = S("<h3>"),
-		[LINE_CODE] = S("<pre><code>"),
-		[LINE_BQUOT] = S("<blockquote>"),
-	};
-	static Str cl[] = {
-		[LINE_BLANK] = S(""),
-		[LINE_PARA] = S("</p>"),
-		[LINE_LINK] = S("</li>\n</ul>"),
-		[LINE_FIGURE] = S("</figure>"),
-		[LINE_UL] = S("</li>\n</ul>"),
-		[LINE_OL] = S("</li>\n</ol>"),
-		[LINE_HDR1] = S("</h1>"),
-		[LINE_HDR2] = S("</h2>"),
-		[LINE_HDR3] = S("</h3>"),
-		[LINE_CODE] = S("</code></pre>"),
-		[LINE_BQUOT] = S("</blockquote>"),
-	};
-	static Str cont[] = {
-		[LINE_BLANK] = S(""),
-		[LINE_PARA] = S("<br>\n"),
-		[LINE_FIGURE] = S("</figure>\n<figure>"),
-		[LINE_LINK] = S("</li>\n<li>"),
-		[LINE_UL] = S("</li>\n<li>"),
-		[LINE_OL] = S("</li>\n<li>"),
-		[LINE_HDR1] = S("</h1>\n<h1>"),
-		[LINE_HDR2] = S("</h2>\n<h2>"),
-		[LINE_HDR3] = S("</h3>\n<h3>"),
-		[LINE_CODE] = S("\n"),
-		[LINE_BQUOT] = S("<br>\n"),
-	};
-#undef S
-#define S(s) (Str){s,sizeof(s)-1}
-	if (from == to) {
-		str_cat(out, cont[from], a);
+LineType classify_line(Str line, LineType prev) {
+	if (line.n == 0) {
+		return LN_NONE;
+	} else if (str_starts(line, S("###"))) {
+		return LN_HDR3;
+	} else if (str_starts(line, S("##"))) {
+		return LN_HDR2;
+	} else if (str_starts(line, S("#"))) {
+		return LN_HDR1;
+	} else if (str_starts(line, S("=>"))) {
+		return LN_LINK;
+	} else if (str_starts(line, S(">"))) {
+		return LN_BQUOT;
+	} else if (str_starts(line, S("* "))
+			|| (prev == LN_ULIST
+			&& str_starts(line, S("  ")))) {
+		return LN_ULIST;
+	} else if (is_ol_item(line)) {
+		return LN_OLIST;
+	} else if (str_starts(line, S("```"))) {
+		return LN_CODE;
 	} else {
-		str_cat(out, cl[from], a);
-		str_catc(out, '\n', a);
-		str_cat(out, op[to], a);
+		return LN_PAR;
 	}
-	return to;
 }
 
-typedef struct Doc Doc;
-struct Doc {
-	Str html;
-	Str title;
-	Doc *prev, *next;
-};
+BlockList blk_gather(Str src, Arena *perm) {
+	Str line;
+	LineType last = LN_NONE;
+	BlockList blk = { 0 };
+	Line *lptr = NULL;
+	while (next_line(&src, &line)) {
+		LineType t = classify_line(line, last);
+		if (last == LN_CODE) {
+			if (t == LN_CODE) last = LN_NONE;
+		} else if (t == LN_CODE) {
+			last = LN_CODE;
+		} else {
+			if (blk.len < 1 || t != blk.data[blk.len-1].type) {
+				if (blk.cap <= blk.len) {
+					size_t c = blk.cap;
+					if (!c) c = 16;
+					while (c <= blk.len) c <<= 1;
+					blk.data = resize(perm, blk.data,
+							blk.cap, c);
+					blk.cap = c;
+				}
+				Block *b = &blk.data[blk.len++];
+				b->type = t;
+				b->lines = NULL;
+				lptr = NULL;
+			}
+			Line *l = new(perm, Line);
+			l->txt = line;
+			if (lptr) lptr->next = l;
+			lptr = l;
+			Block *b = &blk.data[blk.len-1];
+			if (!b->lines) b->lines = lptr;
+		}
+	}
+	for (size_t i = 0; i < blk.len; i++) {
+		if (blk.data[i].type == LN_NONE) {
+			blk.len--;
+			memcpy(&blk.data[i], &blk.data[i+1], (blk.len - i) *
+					sizeof(*blk.data));
+			i--;
+		}
+	}
+	return blk;
+}
+
+#define O(s) str_cat_html(out, s, perm)
+#define Os(s) str_cat(out, S(s), perm)
+#define Ot(a, s, b) Os(a), O(s), Os(b)
+#define Otl(a, f, b) for (Line *l = blk->lines; l; l = l->next) Ot(a, f, b)
+
+void str_cat_blk(Str *out, Block *blk, Arena *perm) {
+	switch (blk->type) {
+	case LN_CODE:
+		Os("<pre><code>");
+		for (Line *l = blk->lines; l; l = l->next) {
+			O(l->txt);
+			Os("\n");
+		}
+		Os("</code></pre>");
+		break;
+	case LN_LINK:
+		Os("<ul>\n");
+		for (Line *l = blk->lines; l; l = l->next) {
+			Cut c = str_cut(str_trim(str_skip(l->txt, 2)), ' ');
+			Str url = c.head, txt = c.tail.n > 0 ? c.tail : c.head;
+			Os("<li><a href=");
+			str_cat_uri(out, url, perm);
+			Ot(">", txt, "</a></li>\n");
+		}
+		Os("</ul>");
+		break;
+	case LN_BQUOT:
+		Os("<blockquote>");
+		for (Line *l = blk->lines; l; l = l->next) {
+			O(str_trim(str_skip(l->txt, 1)));
+			if (l->next) Os("<br>\n");
+		}
+		Os("<blockquote>");
+		break;
+	case LN_ULIST:
+		Os("<ul>\n");
+		Otl("<li>", str_skip(l->txt, 2), "</li>\n");
+		Os("<ul>");
+		break;
+	case LN_OLIST:
+		Os("<ol>\n");
+		Otl("<li>", str_trim(str_cut(l->txt, '.').tail), "</li>\n");
+		Os("<ol>");
+		break;
+	case LN_HDR1:
+		Otl("<h1>", str_trim(str_skip(l->txt,1)), "</h1>");
+		break;
+	case LN_HDR2:
+		Otl("<h2>", str_trim(str_skip(l->txt,2)), "</h2>");
+		break;
+	case LN_HDR3:
+		Otl("<h3>", str_trim(str_skip(l->txt,3)), "</h3>");
+		break;
+	default:
+	case LN_PAR:
+		Os("<p>");
+		for (Line *l = blk->lines; l; l = l->next) {
+			O(l->txt);
+			if (l->next) Os("<br>\n");
+		}
+		Os("</p>");
+		break;
+	}
+	Os("\n");
+}
 
 int has_image_ext(Str url) {
 	return str_ends(url, S(".png"))
@@ -172,78 +276,17 @@ Str str_replace_end(Str s, Str a, Str b, Arena *m) {
 	return (Str) { p, s.n + b.n - a.n };
 }
 
-int wdoc(FILE *f, Doc **dp, Arena *a, Arena *scratch) {
-	Str buf, line, out = {0}, title = {0};
+int wdoc(FILE *f, Doc **dp, Arena *perm, Arena *scratch) {
+	Str buf;
 	if (read_all(f, &buf, scratch)) return -1;
-	LineMode lm = LINE_BLANK;
-	while (next_line(&buf, &line)) {
-		if (str_starts(line, S("```"))) {
-			lm = lm_chg(lm, lm == LINE_CODE ? LINE_BLANK : LINE_CODE, &out, a);
-			continue;
-		} else if (lm == LINE_CODE) {
-			lm = lm_chg(lm, LINE_CODE, &out, a);
-			str_cat(&out, line, a);
-			continue;
-		} else if (line.n == 0) {
-			lm = lm_chg(lm, LINE_BLANK, &out, a);
-		} else if (str_starts(line, S("=>"))) {
-			line = str_trim(str_skip(line, 2));
-			isize i = 0;
-			while (i < line.n && !is_space(line.s[i])) i++;
-			Str url = { line.s, i };
-			line = str_trim(str_skip(line, i));
-			if (!str_starts(url, S("gemini://"))) {
-				url = str_replace_end(url, S(".gmi"), S(".html"), scratch);
-			}
-			if (has_image_ext(url)) {
-				lm = lm_chg(lm, LINE_FIGURE, &out, a);
-				str_cat(&out, S("<img src="), a);
-				str_cat_uri(&out, url, a);
-				str_catc(&out, '>', a);
-				if (line.n > 0) {
-					str_cat(&out, S("<figcaption>"), a);
-					str_cat_html(&out, line, a);
-					str_cat(&out, S("</figcaption>"), a);
-				}
-			} else {
-				Str display = line.n > 0 ? line : url;
-				lm = lm_chg(lm, LINE_LINK, &out, a);
-				str_cat(&out, S("<a href="), a);
-				str_cat_uri(&out, url, a);
-				str_catc(&out, '>', a);
-				str_cat_html(&out, display, a);
-				str_cat(&out, S("</a>"), a);
-			}
-		} else if (str_starts(line, S("*"))) {
-			lm = lm_chg(lm, LINE_UL, &out, a);
-			str_cat_html(&out, str_trim(str_skip(line, 1)), a);
-		} else if (is_ol_item(line)) {
-			lm = lm_chg(lm, LINE_OL, &out, a);
-			str_cat_html(&out, str_trim(str_cut(line, '.').tail), a);
-		} else if (str_starts(line, S("###"))) {
-			lm = lm_chg(lm, LINE_HDR3, &out, a);
-			str_cat_html(&out, str_trim(str_skip(line, 3)), a);
-		} else if (str_starts(line, S("##"))) {
-			lm = lm_chg(lm, LINE_HDR2, &out, a);
-			str_cat_html(&out, str_trim(str_skip(line, 2)), a);
-		} else if (str_starts(line, S("#"))) {
-			lm = lm_chg(lm, LINE_HDR1, &out, a);
-			title = str_trim(str_skip(line, 1));
-			str_cat_html(&out, title, a);
-		} else if (str_starts(line, S(">"))) {
-			lm = lm_chg(lm, LINE_BQUOT, &out, a);
-			str_cat_html(&out, str_trim(str_skip(line, 1)), a);
-		} else {
-			lm = lm_chg(lm, LINE_PARA, &out, a);
-			str_cat_html(&out, line, a);
+	Doc *d = new(perm, Doc);
+	BlockList blk = blk_gather(buf, scratch);
+	for (size_t i = 0; i < blk.len; i++) {
+		if (blk.data[i].type == LN_HDR1 && !d->title.s) {
+			d->title = str_trim(str_skip(blk.data[i].lines->txt, 1));
 		}
+		str_cat_blk(&d->html, &blk.data[i], perm);
 	}
-	lm = lm_chg(lm, LINE_BLANK, &out, a);
-	Doc *d = new(a, Doc);
-	if (title.s) d->title = str_dup(title, a);
-	d->html = out;
-	d->prev = (*dp);
-	if (*dp) (*dp)->next = d;
 	*dp = d;
 	return 0;
 }