diff options
author | WormHeamer | 2025-03-09 18:11:05 -0400 |
---|---|---|
committer | WormHeamer | 2025-03-09 18:11:05 -0400 |
commit | f30af4d2a2e313a53a447b6dd6918b43635caf3d (patch) | |
tree | c335e9692d81b9cd8445afc566e79d314b335882 | |
parent | d0f41dfcd7b69d02d0ca03087097a67eb9454bb3 (diff) |
construct list of blocks and lines first, instead of a single pass
-rw-r--r-- | main.c | 309 |
1 files changed, 176 insertions, 133 deletions
diff --git a/main.c b/main.c index fa12a70..70029f8 100644 --- a/main.c +++ b/main.c @@ -79,6 +79,42 @@ void str_cat_html(Str *s, Str uri, Arena *a) { } } +typedef struct Doc Doc; +struct Doc { + Str html; + Str title; + Doc *prev, *next; +}; + +typedef enum { + LN_PAR, + LN_CODE, + LN_LINK, + LN_BQUOT, + LN_ULIST, + LN_OLIST, + LN_HDR1, + LN_HDR2, + LN_HDR3, + LN_NONE +} LineType; + +typedef struct Line Line; +struct Line { + Str txt; + Line *next; +}; + +typedef struct { + LineType type; + Line *lines; +} Block; + +typedef struct { + Block *data; + size_t len, cap; +} BlockList; + int is_ol_item(Str s) { Str h = str_cut(s, '.').head; if (h.n < 1) return 0; @@ -88,74 +124,142 @@ int is_ol_item(Str s) { return 1; } -typedef enum { - LINE_BLANK, LINE_PARA, - LINE_LINK, LINE_FIGURE, - LINE_UL, LINE_OL, - LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE, - LINE_BQUOT, -} LineMode; - -LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) { -#undef S -#define S(s) {s,sizeof(s)-1} - static Str op[] = { - [LINE_BLANK] = S(""), - [LINE_PARA] = S("<p>"), - [LINE_LINK] = S("<ul>\n<li>"), - [LINE_FIGURE] = S("<figure>"), - [LINE_UL] = S("<ul>\n<li>"), - [LINE_OL] = S("<ol>\n<li>"), - [LINE_HDR1] = S("<h1>"), - [LINE_HDR2] = S("<h2>"), - [LINE_HDR3] = S("<h3>"), - [LINE_CODE] = S("<pre><code>"), - [LINE_BQUOT] = S("<blockquote>"), - }; - static Str cl[] = { - [LINE_BLANK] = S(""), - [LINE_PARA] = S("</p>"), - [LINE_LINK] = S("</li>\n</ul>"), - [LINE_FIGURE] = S("</figure>"), - [LINE_UL] = S("</li>\n</ul>"), - [LINE_OL] = S("</li>\n</ol>"), - [LINE_HDR1] = S("</h1>"), - [LINE_HDR2] = S("</h2>"), - [LINE_HDR3] = S("</h3>"), - [LINE_CODE] = S("</code></pre>"), - [LINE_BQUOT] = S("</blockquote>"), - }; - static Str cont[] = { - [LINE_BLANK] = S(""), - [LINE_PARA] = S("<br>\n"), - [LINE_FIGURE] = S("</figure>\n<figure>"), - [LINE_LINK] = S("</li>\n<li>"), - [LINE_UL] = S("</li>\n<li>"), - [LINE_OL] = S("</li>\n<li>"), - [LINE_HDR1] = S("</h1>\n<h1>"), - [LINE_HDR2] = S("</h2>\n<h2>"), - [LINE_HDR3] = S("</h3>\n<h3>"), - [LINE_CODE] = S("\n"), - [LINE_BQUOT] = S("<br>\n"), - }; -#undef S -#define S(s) (Str){s,sizeof(s)-1} - if (from == to) { - str_cat(out, cont[from], a); +LineType classify_line(Str line, LineType prev) { + if (line.n == 0) { + return LN_NONE; + } else if (str_starts(line, S("###"))) { + return LN_HDR3; + } else if (str_starts(line, S("##"))) { + return LN_HDR2; + } else if (str_starts(line, S("#"))) { + return LN_HDR1; + } else if (str_starts(line, S("=>"))) { + return LN_LINK; + } else if (str_starts(line, S(">"))) { + return LN_BQUOT; + } else if (str_starts(line, S("* ")) + || (prev == LN_ULIST + && str_starts(line, S(" ")))) { + return LN_ULIST; + } else if (is_ol_item(line)) { + return LN_OLIST; + } else if (str_starts(line, S("```"))) { + return LN_CODE; } else { - str_cat(out, cl[from], a); - str_catc(out, '\n', a); - str_cat(out, op[to], a); + return LN_PAR; } - return to; } -typedef struct Doc Doc; -struct Doc { - Str html; - Str title; - Doc *prev, *next; -}; +BlockList blk_gather(Str src, Arena *perm) { + Str line; + LineType last = LN_NONE; + BlockList blk = { 0 }; + Line *lptr = NULL; + while (next_line(&src, &line)) { + LineType t = classify_line(line, last); + if (last == LN_CODE) { + if (t == LN_CODE) last = LN_NONE; + } else if (t == LN_CODE) { + last = LN_CODE; + } else { + if (blk.len < 1 || t != blk.data[blk.len-1].type) { + if (blk.cap <= blk.len) { + size_t c = blk.cap; + if (!c) c = 16; + while (c <= blk.len) c <<= 1; + blk.data = resize(perm, blk.data, + blk.cap, c); + blk.cap = c; + } + Block *b = &blk.data[blk.len++]; + b->type = t; + b->lines = NULL; + lptr = NULL; + } + Line *l = new(perm, Line); + l->txt = line; + if (lptr) lptr->next = l; + lptr = l; + Block *b = &blk.data[blk.len-1]; + if (!b->lines) b->lines = lptr; + } + } + for (size_t i = 0; i < blk.len; i++) { + if (blk.data[i].type == LN_NONE) { + blk.len--; + memcpy(&blk.data[i], &blk.data[i+1], (blk.len - i) * + sizeof(*blk.data)); + i--; + } + } + return blk; +} + +#define O(s) str_cat_html(out, s, perm) +#define Os(s) str_cat(out, S(s), perm) +#define Ot(a, s, b) Os(a), O(s), Os(b) +#define Otl(a, f, b) for (Line *l = blk->lines; l; l = l->next) Ot(a, f, b) + +void str_cat_blk(Str *out, Block *blk, Arena *perm) { + switch (blk->type) { + case LN_CODE: + Os("<pre><code>"); + for (Line *l = blk->lines; l; l = l->next) { + O(l->txt); + Os("\n"); + } + Os("</code></pre>"); + break; + case LN_LINK: + Os("<ul>\n"); + for (Line *l = blk->lines; l; l = l->next) { + Cut c = str_cut(str_trim(str_skip(l->txt, 2)), ' '); + Str url = c.head, txt = c.tail.n > 0 ? c.tail : c.head; + Os("<li><a href="); + str_cat_uri(out, url, perm); + Ot(">", txt, "</a></li>\n"); + } + Os("</ul>"); + break; + case LN_BQUOT: + Os("<blockquote>"); + for (Line *l = blk->lines; l; l = l->next) { + O(str_trim(str_skip(l->txt, 1))); + if (l->next) Os("<br>\n"); + } + Os("<blockquote>"); + break; + case LN_ULIST: + Os("<ul>\n"); + Otl("<li>", str_skip(l->txt, 2), "</li>\n"); + Os("<ul>"); + break; + case LN_OLIST: + Os("<ol>\n"); + Otl("<li>", str_trim(str_cut(l->txt, '.').tail), "</li>\n"); + Os("<ol>"); + break; + case LN_HDR1: + Otl("<h1>", str_trim(str_skip(l->txt,1)), "</h1>"); + break; + case LN_HDR2: + Otl("<h2>", str_trim(str_skip(l->txt,2)), "</h2>"); + break; + case LN_HDR3: + Otl("<h3>", str_trim(str_skip(l->txt,3)), "</h3>"); + break; + default: + case LN_PAR: + Os("<p>"); + for (Line *l = blk->lines; l; l = l->next) { + O(l->txt); + if (l->next) Os("<br>\n"); + } + Os("</p>"); + break; + } + Os("\n"); +} int has_image_ext(Str url) { return str_ends(url, S(".png")) @@ -172,78 +276,17 @@ Str str_replace_end(Str s, Str a, Str b, Arena *m) { return (Str) { p, s.n + b.n - a.n }; } -int wdoc(FILE *f, Doc **dp, Arena *a, Arena *scratch) { - Str buf, line, out = {0}, title = {0}; +int wdoc(FILE *f, Doc **dp, Arena *perm, Arena *scratch) { + Str buf; if (read_all(f, &buf, scratch)) return -1; - LineMode lm = LINE_BLANK; - while (next_line(&buf, &line)) { - if (str_starts(line, S("```"))) { - lm = lm_chg(lm, lm == LINE_CODE ? LINE_BLANK : LINE_CODE, &out, a); - continue; - } else if (lm == LINE_CODE) { - lm = lm_chg(lm, LINE_CODE, &out, a); - str_cat(&out, line, a); - continue; - } else if (line.n == 0) { - lm = lm_chg(lm, LINE_BLANK, &out, a); - } else if (str_starts(line, S("=>"))) { - line = str_trim(str_skip(line, 2)); - isize i = 0; - while (i < line.n && !is_space(line.s[i])) i++; - Str url = { line.s, i }; - line = str_trim(str_skip(line, i)); - if (!str_starts(url, S("gemini://"))) { - url = str_replace_end(url, S(".gmi"), S(".html"), scratch); - } - if (has_image_ext(url)) { - lm = lm_chg(lm, LINE_FIGURE, &out, a); - str_cat(&out, S("<img src="), a); - str_cat_uri(&out, url, a); - str_catc(&out, '>', a); - if (line.n > 0) { - str_cat(&out, S("<figcaption>"), a); - str_cat_html(&out, line, a); - str_cat(&out, S("</figcaption>"), a); - } - } else { - Str display = line.n > 0 ? line : url; - lm = lm_chg(lm, LINE_LINK, &out, a); - str_cat(&out, S("<a href="), a); - str_cat_uri(&out, url, a); - str_catc(&out, '>', a); - str_cat_html(&out, display, a); - str_cat(&out, S("</a>"), a); - } - } else if (str_starts(line, S("*"))) { - lm = lm_chg(lm, LINE_UL, &out, a); - str_cat_html(&out, str_trim(str_skip(line, 1)), a); - } else if (is_ol_item(line)) { - lm = lm_chg(lm, LINE_OL, &out, a); - str_cat_html(&out, str_trim(str_cut(line, '.').tail), a); - } else if (str_starts(line, S("###"))) { - lm = lm_chg(lm, LINE_HDR3, &out, a); - str_cat_html(&out, str_trim(str_skip(line, 3)), a); - } else if (str_starts(line, S("##"))) { - lm = lm_chg(lm, LINE_HDR2, &out, a); - str_cat_html(&out, str_trim(str_skip(line, 2)), a); - } else if (str_starts(line, S("#"))) { - lm = lm_chg(lm, LINE_HDR1, &out, a); - title = str_trim(str_skip(line, 1)); - str_cat_html(&out, title, a); - } else if (str_starts(line, S(">"))) { - lm = lm_chg(lm, LINE_BQUOT, &out, a); - str_cat_html(&out, str_trim(str_skip(line, 1)), a); - } else { - lm = lm_chg(lm, LINE_PARA, &out, a); - str_cat_html(&out, line, a); + Doc *d = new(perm, Doc); + BlockList blk = blk_gather(buf, scratch); + for (size_t i = 0; i < blk.len; i++) { + if (blk.data[i].type == LN_HDR1 && !d->title.s) { + d->title = str_trim(str_skip(blk.data[i].lines->txt, 1)); } + str_cat_blk(&d->html, &blk.data[i], perm); } - lm = lm_chg(lm, LINE_BLANK, &out, a); - Doc *d = new(a, Doc); - if (title.s) d->title = str_dup(title, a); - d->html = out; - d->prev = (*dp); - if (*dp) (*dp)->next = d; *dp = d; return 0; } |