From f30af4d2a2e313a53a447b6dd6918b43635caf3d Mon Sep 17 00:00:00 2001 From: WormHeamer Date: Sun, 9 Mar 2025 18:11:05 -0400 Subject: construct list of blocks and lines first, instead of a single pass --- main.c | 309 +++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 176 insertions(+), 133 deletions(-) diff --git a/main.c b/main.c index fa12a70..70029f8 100644 --- a/main.c +++ b/main.c @@ -79,6 +79,42 @@ void str_cat_html(Str *s, Str uri, Arena *a) { } } +typedef struct Doc Doc; +struct Doc { + Str html; + Str title; + Doc *prev, *next; +}; + +typedef enum { + LN_PAR, + LN_CODE, + LN_LINK, + LN_BQUOT, + LN_ULIST, + LN_OLIST, + LN_HDR1, + LN_HDR2, + LN_HDR3, + LN_NONE +} LineType; + +typedef struct Line Line; +struct Line { + Str txt; + Line *next; +}; + +typedef struct { + LineType type; + Line *lines; +} Block; + +typedef struct { + Block *data; + size_t len, cap; +} BlockList; + int is_ol_item(Str s) { Str h = str_cut(s, '.').head; if (h.n < 1) return 0; @@ -88,74 +124,142 @@ int is_ol_item(Str s) { return 1; } -typedef enum { - LINE_BLANK, LINE_PARA, - LINE_LINK, LINE_FIGURE, - LINE_UL, LINE_OL, - LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE, - LINE_BQUOT, -} LineMode; - -LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) { -#undef S -#define S(s) {s,sizeof(s)-1} - static Str op[] = { - [LINE_BLANK] = S(""), - [LINE_PARA] = S("

"), - [LINE_LINK] = S("

"), - [LINE_OL] = S("\n"), - [LINE_HDR1] = S(""), - [LINE_HDR2] = S(""), - [LINE_HDR3] = S(""), - [LINE_CODE] = S(""), - [LINE_BQUOT] = S(""), - }; - static Str cont[] = { - [LINE_BLANK] = S(""), - [LINE_PARA] = S("
\n"), - [LINE_FIGURE] = S("\n
"), - [LINE_LINK] = S("\n
  • "), - [LINE_UL] = S("
  • \n
  • "), - [LINE_OL] = S("
  • \n
  • "), - [LINE_HDR1] = S("\n

    "), - [LINE_HDR2] = S("

    \n

    "), - [LINE_HDR3] = S("

    \n

    "), - [LINE_CODE] = S("\n"), - [LINE_BQUOT] = S("
    \n"), - }; -#undef S -#define S(s) (Str){s,sizeof(s)-1} - if (from == to) { - str_cat(out, cont[from], a); +LineType classify_line(Str line, LineType prev) { + if (line.n == 0) { + return LN_NONE; + } else if (str_starts(line, S("###"))) { + return LN_HDR3; + } else if (str_starts(line, S("##"))) { + return LN_HDR2; + } else if (str_starts(line, S("#"))) { + return LN_HDR1; + } else if (str_starts(line, S("=>"))) { + return LN_LINK; + } else if (str_starts(line, S(">"))) { + return LN_BQUOT; + } else if (str_starts(line, S("* ")) + || (prev == LN_ULIST + && str_starts(line, S(" ")))) { + return LN_ULIST; + } else if (is_ol_item(line)) { + return LN_OLIST; + } else if (str_starts(line, S("```"))) { + return LN_CODE; } else { - str_cat(out, cl[from], a); - str_catc(out, '\n', a); - str_cat(out, op[to], a); + return LN_PAR; } - return to; } -typedef struct Doc Doc; -struct Doc { - Str html; - Str title; - Doc *prev, *next; -}; +BlockList blk_gather(Str src, Arena *perm) { + Str line; + LineType last = LN_NONE; + BlockList blk = { 0 }; + Line *lptr = NULL; + while (next_line(&src, &line)) { + LineType t = classify_line(line, last); + if (last == LN_CODE) { + if (t == LN_CODE) last = LN_NONE; + } else if (t == LN_CODE) { + last = LN_CODE; + } else { + if (blk.len < 1 || t != blk.data[blk.len-1].type) { + if (blk.cap <= blk.len) { + size_t c = blk.cap; + if (!c) c = 16; + while (c <= blk.len) c <<= 1; + blk.data = resize(perm, blk.data, + blk.cap, c); + blk.cap = c; + } + Block *b = &blk.data[blk.len++]; + b->type = t; + b->lines = NULL; + lptr = NULL; + } + Line *l = new(perm, Line); + l->txt = line; + if (lptr) lptr->next = l; + lptr = l; + Block *b = &blk.data[blk.len-1]; + if (!b->lines) b->lines = lptr; + } + } + for (size_t i = 0; i < blk.len; i++) { + if (blk.data[i].type == LN_NONE) { + blk.len--; + memcpy(&blk.data[i], &blk.data[i+1], (blk.len - i) * + sizeof(*blk.data)); + i--; + } + } + return blk; +} + +#define O(s) str_cat_html(out, s, perm) +#define Os(s) str_cat(out, S(s), perm) +#define Ot(a, s, b) Os(a), O(s), Os(b) +#define Otl(a, f, b) for (Line *l = blk->lines; l; l = l->next) Ot(a, f, b) + +void str_cat_blk(Str *out, Block *blk, Arena *perm) { + switch (blk->type) { + case LN_CODE: + Os("
    ");
    +		for (Line *l = blk->lines; l; l = l->next) {
    +			O(l->txt);
    +			Os("\n");
    +		}
    +		Os("
    "); + break; + case LN_LINK: + Os("
      \n"); + for (Line *l = blk->lines; l; l = l->next) { + Cut c = str_cut(str_trim(str_skip(l->txt, 2)), ' '); + Str url = c.head, txt = c.tail.n > 0 ? c.tail : c.head; + Os("
    • ", txt, "
    • \n"); + } + Os("
    "); + break; + case LN_BQUOT: + Os("
    "); + for (Line *l = blk->lines; l; l = l->next) { + O(str_trim(str_skip(l->txt, 1))); + if (l->next) Os("
    \n"); + } + Os("
    "); + break; + case LN_ULIST: + Os("