summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.c309
1 files changed, 176 insertions, 133 deletions
diff --git a/main.c b/main.c
index fa12a70..70029f8 100644
--- a/main.c
+++ b/main.c
@@ -79,6 +79,42 @@ void str_cat_html(Str *s, Str uri, Arena *a) {
}
}
+typedef struct Doc Doc;
+struct Doc {
+ Str html;
+ Str title;
+ Doc *prev, *next;
+};
+
+typedef enum {
+ LN_PAR,
+ LN_CODE,
+ LN_LINK,
+ LN_BQUOT,
+ LN_ULIST,
+ LN_OLIST,
+ LN_HDR1,
+ LN_HDR2,
+ LN_HDR3,
+ LN_NONE
+} LineType;
+
+typedef struct Line Line;
+struct Line {
+ Str txt;
+ Line *next;
+};
+
+typedef struct {
+ LineType type;
+ Line *lines;
+} Block;
+
+typedef struct {
+ Block *data;
+ size_t len, cap;
+} BlockList;
+
int is_ol_item(Str s) {
Str h = str_cut(s, '.').head;
if (h.n < 1) return 0;
@@ -88,74 +124,142 @@ int is_ol_item(Str s) {
return 1;
}
-typedef enum {
- LINE_BLANK, LINE_PARA,
- LINE_LINK, LINE_FIGURE,
- LINE_UL, LINE_OL,
- LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE,
- LINE_BQUOT,
-} LineMode;
-
-LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) {
-#undef S
-#define S(s) {s,sizeof(s)-1}
- static Str op[] = {
- [LINE_BLANK] = S(""),
- [LINE_PARA] = S("<p>"),
- [LINE_LINK] = S("<ul>\n<li>"),
- [LINE_FIGURE] = S("<figure>"),
- [LINE_UL] = S("<ul>\n<li>"),
- [LINE_OL] = S("<ol>\n<li>"),
- [LINE_HDR1] = S("<h1>"),
- [LINE_HDR2] = S("<h2>"),
- [LINE_HDR3] = S("<h3>"),
- [LINE_CODE] = S("<pre><code>"),
- [LINE_BQUOT] = S("<blockquote>"),
- };
- static Str cl[] = {
- [LINE_BLANK] = S(""),
- [LINE_PARA] = S("</p>"),
- [LINE_LINK] = S("</li>\n</ul>"),
- [LINE_FIGURE] = S("</figure>"),
- [LINE_UL] = S("</li>\n</ul>"),
- [LINE_OL] = S("</li>\n</ol>"),
- [LINE_HDR1] = S("</h1>"),
- [LINE_HDR2] = S("</h2>"),
- [LINE_HDR3] = S("</h3>"),
- [LINE_CODE] = S("</code></pre>"),
- [LINE_BQUOT] = S("</blockquote>"),
- };
- static Str cont[] = {
- [LINE_BLANK] = S(""),
- [LINE_PARA] = S("<br>\n"),
- [LINE_FIGURE] = S("</figure>\n<figure>"),
- [LINE_LINK] = S("</li>\n<li>"),
- [LINE_UL] = S("</li>\n<li>"),
- [LINE_OL] = S("</li>\n<li>"),
- [LINE_HDR1] = S("</h1>\n<h1>"),
- [LINE_HDR2] = S("</h2>\n<h2>"),
- [LINE_HDR3] = S("</h3>\n<h3>"),
- [LINE_CODE] = S("\n"),
- [LINE_BQUOT] = S("<br>\n"),
- };
-#undef S
-#define S(s) (Str){s,sizeof(s)-1}
- if (from == to) {
- str_cat(out, cont[from], a);
+LineType classify_line(Str line, LineType prev) {
+ if (line.n == 0) {
+ return LN_NONE;
+ } else if (str_starts(line, S("###"))) {
+ return LN_HDR3;
+ } else if (str_starts(line, S("##"))) {
+ return LN_HDR2;
+ } else if (str_starts(line, S("#"))) {
+ return LN_HDR1;
+ } else if (str_starts(line, S("=>"))) {
+ return LN_LINK;
+ } else if (str_starts(line, S(">"))) {
+ return LN_BQUOT;
+ } else if (str_starts(line, S("* "))
+ || (prev == LN_ULIST
+ && str_starts(line, S(" ")))) {
+ return LN_ULIST;
+ } else if (is_ol_item(line)) {
+ return LN_OLIST;
+ } else if (str_starts(line, S("```"))) {
+ return LN_CODE;
} else {
- str_cat(out, cl[from], a);
- str_catc(out, '\n', a);
- str_cat(out, op[to], a);
+ return LN_PAR;
}
- return to;
}
-typedef struct Doc Doc;
-struct Doc {
- Str html;
- Str title;
- Doc *prev, *next;
-};
+BlockList blk_gather(Str src, Arena *perm) {
+ Str line;
+ LineType last = LN_NONE;
+ BlockList blk = { 0 };
+ Line *lptr = NULL;
+ while (next_line(&src, &line)) {
+ LineType t = classify_line(line, last);
+ if (last == LN_CODE) {
+ if (t == LN_CODE) last = LN_NONE;
+ } else if (t == LN_CODE) {
+ last = LN_CODE;
+ } else {
+ if (blk.len < 1 || t != blk.data[blk.len-1].type) {
+ if (blk.cap <= blk.len) {
+ size_t c = blk.cap;
+ if (!c) c = 16;
+ while (c <= blk.len) c <<= 1;
+ blk.data = resize(perm, blk.data,
+ blk.cap, c);
+ blk.cap = c;
+ }
+ Block *b = &blk.data[blk.len++];
+ b->type = t;
+ b->lines = NULL;
+ lptr = NULL;
+ }
+ Line *l = new(perm, Line);
+ l->txt = line;
+ if (lptr) lptr->next = l;
+ lptr = l;
+ Block *b = &blk.data[blk.len-1];
+ if (!b->lines) b->lines = lptr;
+ }
+ }
+ for (size_t i = 0; i < blk.len; i++) {
+ if (blk.data[i].type == LN_NONE) {
+ blk.len--;
+ memcpy(&blk.data[i], &blk.data[i+1], (blk.len - i) *
+ sizeof(*blk.data));
+ i--;
+ }
+ }
+ return blk;
+}
+
+#define O(s) str_cat_html(out, s, perm)
+#define Os(s) str_cat(out, S(s), perm)
+#define Ot(a, s, b) Os(a), O(s), Os(b)
+#define Otl(a, f, b) for (Line *l = blk->lines; l; l = l->next) Ot(a, f, b)
+
+void str_cat_blk(Str *out, Block *blk, Arena *perm) {
+ switch (blk->type) {
+ case LN_CODE:
+ Os("<pre><code>");
+ for (Line *l = blk->lines; l; l = l->next) {
+ O(l->txt);
+ Os("\n");
+ }
+ Os("</code></pre>");
+ break;
+ case LN_LINK:
+ Os("<ul>\n");
+ for (Line *l = blk->lines; l; l = l->next) {
+ Cut c = str_cut(str_trim(str_skip(l->txt, 2)), ' ');
+ Str url = c.head, txt = c.tail.n > 0 ? c.tail : c.head;
+ Os("<li><a href=");
+ str_cat_uri(out, url, perm);
+ Ot(">", txt, "</a></li>\n");
+ }
+ Os("</ul>");
+ break;
+ case LN_BQUOT:
+ Os("<blockquote>");
+ for (Line *l = blk->lines; l; l = l->next) {
+ O(str_trim(str_skip(l->txt, 1)));
+ if (l->next) Os("<br>\n");
+ }
+ Os("<blockquote>");
+ break;
+ case LN_ULIST:
+ Os("<ul>\n");
+ Otl("<li>", str_skip(l->txt, 2), "</li>\n");
+ Os("<ul>");
+ break;
+ case LN_OLIST:
+ Os("<ol>\n");
+ Otl("<li>", str_trim(str_cut(l->txt, '.').tail), "</li>\n");
+ Os("<ol>");
+ break;
+ case LN_HDR1:
+ Otl("<h1>", str_trim(str_skip(l->txt,1)), "</h1>");
+ break;
+ case LN_HDR2:
+ Otl("<h2>", str_trim(str_skip(l->txt,2)), "</h2>");
+ break;
+ case LN_HDR3:
+ Otl("<h3>", str_trim(str_skip(l->txt,3)), "</h3>");
+ break;
+ default:
+ case LN_PAR:
+ Os("<p>");
+ for (Line *l = blk->lines; l; l = l->next) {
+ O(l->txt);
+ if (l->next) Os("<br>\n");
+ }
+ Os("</p>");
+ break;
+ }
+ Os("\n");
+}
int has_image_ext(Str url) {
return str_ends(url, S(".png"))
@@ -172,78 +276,17 @@ Str str_replace_end(Str s, Str a, Str b, Arena *m) {
return (Str) { p, s.n + b.n - a.n };
}
-int wdoc(FILE *f, Doc **dp, Arena *a, Arena *scratch) {
- Str buf, line, out = {0}, title = {0};
+int wdoc(FILE *f, Doc **dp, Arena *perm, Arena *scratch) {
+ Str buf;
if (read_all(f, &buf, scratch)) return -1;
- LineMode lm = LINE_BLANK;
- while (next_line(&buf, &line)) {
- if (str_starts(line, S("```"))) {
- lm = lm_chg(lm, lm == LINE_CODE ? LINE_BLANK : LINE_CODE, &out, a);
- continue;
- } else if (lm == LINE_CODE) {
- lm = lm_chg(lm, LINE_CODE, &out, a);
- str_cat(&out, line, a);
- continue;
- } else if (line.n == 0) {
- lm = lm_chg(lm, LINE_BLANK, &out, a);
- } else if (str_starts(line, S("=>"))) {
- line = str_trim(str_skip(line, 2));
- isize i = 0;
- while (i < line.n && !is_space(line.s[i])) i++;
- Str url = { line.s, i };
- line = str_trim(str_skip(line, i));
- if (!str_starts(url, S("gemini://"))) {
- url = str_replace_end(url, S(".gmi"), S(".html"), scratch);
- }
- if (has_image_ext(url)) {
- lm = lm_chg(lm, LINE_FIGURE, &out, a);
- str_cat(&out, S("<img src="), a);
- str_cat_uri(&out, url, a);
- str_catc(&out, '>', a);
- if (line.n > 0) {
- str_cat(&out, S("<figcaption>"), a);
- str_cat_html(&out, line, a);
- str_cat(&out, S("</figcaption>"), a);
- }
- } else {
- Str display = line.n > 0 ? line : url;
- lm = lm_chg(lm, LINE_LINK, &out, a);
- str_cat(&out, S("<a href="), a);
- str_cat_uri(&out, url, a);
- str_catc(&out, '>', a);
- str_cat_html(&out, display, a);
- str_cat(&out, S("</a>"), a);
- }
- } else if (str_starts(line, S("*"))) {
- lm = lm_chg(lm, LINE_UL, &out, a);
- str_cat_html(&out, str_trim(str_skip(line, 1)), a);
- } else if (is_ol_item(line)) {
- lm = lm_chg(lm, LINE_OL, &out, a);
- str_cat_html(&out, str_trim(str_cut(line, '.').tail), a);
- } else if (str_starts(line, S("###"))) {
- lm = lm_chg(lm, LINE_HDR3, &out, a);
- str_cat_html(&out, str_trim(str_skip(line, 3)), a);
- } else if (str_starts(line, S("##"))) {
- lm = lm_chg(lm, LINE_HDR2, &out, a);
- str_cat_html(&out, str_trim(str_skip(line, 2)), a);
- } else if (str_starts(line, S("#"))) {
- lm = lm_chg(lm, LINE_HDR1, &out, a);
- title = str_trim(str_skip(line, 1));
- str_cat_html(&out, title, a);
- } else if (str_starts(line, S(">"))) {
- lm = lm_chg(lm, LINE_BQUOT, &out, a);
- str_cat_html(&out, str_trim(str_skip(line, 1)), a);
- } else {
- lm = lm_chg(lm, LINE_PARA, &out, a);
- str_cat_html(&out, line, a);
+ Doc *d = new(perm, Doc);
+ BlockList blk = blk_gather(buf, scratch);
+ for (size_t i = 0; i < blk.len; i++) {
+ if (blk.data[i].type == LN_HDR1 && !d->title.s) {
+ d->title = str_trim(str_skip(blk.data[i].lines->txt, 1));
}
+ str_cat_blk(&d->html, &blk.data[i], perm);
}
- lm = lm_chg(lm, LINE_BLANK, &out, a);
- Doc *d = new(a, Doc);
- if (title.s) d->title = str_dup(title, a);
- d->html = out;
- d->prev = (*dp);
- if (*dp) (*dp)->next = d;
*dp = d;
return 0;
}