diff options
-rw-r--r-- | Makefile | 30 | ||||
-rw-r--r-- | arena.h | 56 | ||||
-rw-r--r-- | args.h | 138 | ||||
-rw-r--r-- | main.c | 409 | ||||
-rw-r--r-- | str.h | 102 | ||||
-rw-r--r-- | typ.h | 7 |
6 files changed, 742 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..50fd1dc --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +EXE = wdoc +RUNARGS = + +CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Os ${CFLAGS_${DEBUG}} +LDFLAGS = -flto ${LDFLAGS_${DEBUG}} +PREFIX = /usr/local/ +LDLIBS = + +DEBUG = 0 +GDB != which gf2 2> /dev/null || which gdb + +CFLAGS_1 = -g -fsanitize=undefined +LDFLAGS_1 = -g -fsanitize=undefined +LDFLAGS_0 = -s + +OBJ != find -type f -name '*.c' | sed 's/\.c$$/.o/' + +.PHONY: run all clean + +all: ${EXE} +run: ${EXE} + ./${EXE} ${RUNARGS} +debug: ${EXE} + ${GDB} -ex start --args ./${EXE} ${RUNARGS} + +clean: + rm -fv ${EXE} ${OBJ} + +${EXE}: ${OBJ} + ${CC} ${LDFLAGS} ${OBJ} -o ${EXE} ${LDLIBS} diff --git a/arena.h b/arena.h new file mode 100644 index 0000000..2058559 --- /dev/null +++ b/arena.h @@ -0,0 +1,56 @@ +#ifndef ARENA_H +#define ARENA_H + +#include "typ.h" + +typedef struct { + char *beg, *end; +} Arena; + +#define new(a, t)\ + zeroed(alloc(a, sizeof(t), _Alignof(t)), sizeof(t)) + +#define new_arr(a, t, n)\ + alloc(a, sizeof(t) * n, _Alignof(t)) + +#define resize(a, p, old, new)\ + re_alloc(a, p, (old) * sizeof(*(p)), (new) * sizeof(*(p)),\ + _Alignof(__typeof__(*(p)))) + +void *alloc(Arena *a, isize n, isize align); +void *re_alloc(Arena *a, void *ptr, isize old, isize new, isize align); +void *zeroed(void *p, usize n); + +#ifdef ARENA_IMPL + +#include <stdio.h> +#include <stdlib.h> + +void *alloc(Arena *a, isize n, isize align) { + char *p = a->beg + (-(uintptr_t)a->beg & (align - 1)); + if (p + n >= a->end) { + fprintf(stderr, "out of arena memory!\n"); + abort(); + } + a->beg = p + n; + return p; +} + +void *zeroed(void *p, usize n) { + memset(p, 0, n); + return p; +} + +void *re_alloc(Arena *a, void *ptr, isize old, isize new, isize align) { + if (ptr && a->beg - old == ptr && a->beg - old + new < a->end) { + a->beg = a->beg - old + new; + return ptr; + } else { + void *p = alloc(a, new, align); + if (ptr) memcpy(p, ptr, old); + return p; + } +} + +#endif +#endif diff --git a/args.h b/args.h new file mode 100644 index 0000000..35c5283 --- /dev/null +++ b/args.h @@ -0,0 +1,138 @@ +#ifndef ARGS_H +#define ARGS_H + +#include "str.h" + +typedef struct { + const char **arg; + const char *opt_end; +} ArgsState; + +typedef enum { + ARG_OK = 0, + ARG_END = -1, + ARG_BAD = -2, + ARG_EMPTY = -3 +} ArgResult; + +ArgsState args_begin(const char **argv); +ArgResult arg_getv(ArgsState *a, const char *fmt, Str *arg, ...); +#define arg_get(a, fmt, arg, ...)\ + arg_getv(a, fmt, arg __VA_OPT__(,) __VA_ARGS__, NULL) + +#ifdef ARGS_IMPL +#include <stdarg.h> + +ArgsState args_begin(const char **argv) { + return (ArgsState) { argv + 1, NULL }; +} + +static int arg_opt_find(const char **opts, Str key) { + for (int i = 0; opts[i]; i++) { + const char *o = opts[i]; + if (*o == ':') o++; + if (str_eql(str_from_cstr(o), key)) return i; + } + return -1; +} + +static ArgResult arg_param(ArgsState *a, Str name, Str rem, Str *arg) { + if (rem.n > 0) { + *arg = rem; + return ARG_OK; + } else if (a->arg[1]) { + *arg = str_from_cstr(*++a->arg); + return ARG_OK; + } else { + *arg = name; + return ARG_EMPTY; + } +} + +static int arg_got_long(ArgsState *a, const char **opts, int *optv, Str *arg) { + Cut key = str_cut(str_from_cstr(*a->arg + 2), '='); + if (opts && optv) { + int o = arg_opt_find(opts, key.head); + if (o < 0) { + *arg = key.head; + return ARG_BAD; + } + if (opts[o][0] == ':') { + int x = arg_param(a, key.head, key.tail, arg); + if (x < 0) return x; + } + a->arg++; + return optv[o]; + } + *arg = key.head; + return ARG_BAD; +} + +static ArgResult arg_got_short(ArgsState *a, const char *fmt, Str *arg) { + Str opt = { (char*)*a->arg, 1 }; + Str rem = str_from_cstr(*a->arg + 1); + for (const char *f = fmt; *f; f++) { + if (*f == ':') continue; + if (*f == **a->arg) { + if (f[1] == ':') { + int x = arg_param(a, opt, rem, arg); + if (x < 0) return x; + a->arg++; + } else { + (*a->arg)++; + } + return *f; + } + } + *arg = opt; + return ARG_BAD; +} + +static ArgResult arg_get_long(ArgsState *a, const char *fmt, const char **opts, int *optv, Str *arg) { + if (*a->arg && !**a->arg) a->arg++; + if (!*a->arg) return ARG_END; + const char *arg_end = *a->arg + strlen(*a->arg); + if (a->opt_end != arg_end) { + if (a->arg[0][0] != '-' || a->arg[0][1] == '\0') { + *arg = str_from_cstr(*a->arg++); + return ARG_OK; + } + if (a->arg[0][1] == '-') { + return arg_got_long(a, opts, optv, arg); + } + (*a->arg)++; + a->opt_end = arg_end; + } + return arg_got_short(a, fmt, arg); +} + +ArgResult arg_getv(ArgsState *a, const char *fmt, Str *arg, ...) { + /* I think this is a legitimate usecase for VLAs --- they're not + * safe if N depends on user input, but here it very much doesn't! + * Just on the number of arguments passed, which is a compile time + * constant. */ + va_list ap; + int n = 0; + va_start(ap, arg); + while (va_arg(ap, const char *)) { + n++; + (void)va_arg(ap, int); + } + va_end(ap); + if (n > 0) { + const char *opt[n]; + int optv[n]; + va_start(ap, arg); + for (int i = 0; i < n; i++) { + opt[i] = va_arg(ap, const char *); + optv[i] = va_arg(ap, int); + } + va_end(ap); + return arg_get_long(a, fmt, opt, optv, arg); + } else { + return arg_get_long(a, fmt, NULL, NULL, arg); + } +} + +#endif +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..8dc8a6e --- /dev/null +++ b/main.c @@ -0,0 +1,409 @@ +#define _POSIX_C_SOURCE 200809L +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/mman.h> + +#define ARENA_IMPL +#include "str.h" +#include "arena.h" +#define ARGS_IMPL +#include "args.h" + +int read_all(FILE *f, Str *buf, Arena *a) { + if (!f) return -1; + if (isatty(fileno(f))) { + buf->s = a->beg; + buf->n = fread(a->beg, 1, (uintptr_t)(a->end - a->beg) - 1, f); + a->beg += buf->n; + } else { + fseek(f, 0, SEEK_END); + long ofs = ftell(f); + fseek(f, 0, SEEK_SET); + buf->n = ofs; + buf->s = new_arr(a, char, buf->n); + if ((isize)fread(buf->s, 1, buf->n, f) != buf->n) return -1; + } + return ferror(f) ? -1 : 0; +} + +int next_line(Str *src, Str *line) { + if (src->n < 1) return 0; + line->s = src->s; + char *newln = memchr(src->s, '\n', src->n); + line->n = newln ? newln - src->s : src->n; + src->s += line->n + 1; + src->n -= line->n + 1; + return 1; +} + +void str_putf(Str s, FILE *f) { + fwrite(s.s, 1, s.n, f); +} + +void str_put(Str s) { + str_putf(s, stdout); +} + +char to_xdigit(int x) { + if (x > 9) { + return 'A' + (x - 10); + } else { + return '0' + x; + } +} + +void str_cat_uri(Str *s, Str uri, Arena *a) { + str_catc(s, '\'', a); + for (isize i = 0; i < uri.n; i++) { + char c = uri.s[i]; + if (c == '\'' || c == '%') { + str_catc(s, '%', a); + str_catc(s, to_xdigit((c & 0xff) >> 4), a); + str_catc(s, to_xdigit(c & 0xf), a); + } else { + str_catc(s, c, a); + } + } + str_catc(s, '\'', a); +} + +void str_cat_html(Str *s, Str uri, Arena *a) { + for (isize i = 0; i < uri.n; i++) { + char c = uri.s[i]; + switch (c) { + case '&': str_cat(s, S("&"), a); break; + case '<': str_cat(s, S("<"), a); break; + case '>': str_cat(s, S(">"), a); break; + default: str_catc(s, c, a); break; + } + } +} + +int is_ol_item(Str s) { + Str h = str_cut(s, '.').head; + if (h.n < 1) return 0; + for (isize i = 0; i < h.n; i++) { + if (!(h.s[i] >= '0' && h.s[i] <= '9')) return 0; + } + return 1; +} + +typedef enum { + LINE_BLANK, LINE_PARA, + LINE_LINK, LINE_FIGURE, + LINE_UL, LINE_OL, + LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE, + LINE_BQUOT, +} LineMode; + +LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) { + static Str op[] = { + [LINE_BLANK] = S(""), + [LINE_PARA] = S("<p>"), + [LINE_LINK] = S("<ul>\n<li>"), + [LINE_FIGURE] = S("<figure>"), + [LINE_UL] = S("<ul>\n<li>"), + [LINE_OL] = S("<ol>\n<li>"), + [LINE_HDR1] = S("<h1>"), + [LINE_HDR2] = S("<h2>"), + [LINE_HDR3] = S("<h3>"), + [LINE_CODE] = S("<pre><code>"), + [LINE_BQUOT] = S("<blockquote>"), + }; + static Str cl[] = { + [LINE_BLANK] = S(""), + [LINE_PARA] = S("</p>"), + [LINE_LINK] = S("</li>\n</ul>"), + [LINE_FIGURE] = S("</figure>"), + [LINE_UL] = S("</li>\n</ul>"), + [LINE_OL] = S("</li>\n</ol>"), + [LINE_HDR1] = S("</h1>"), + [LINE_HDR2] = S("</h2>"), + [LINE_HDR3] = S("</h3>"), + [LINE_CODE] = S("</code></pre>"), + [LINE_BQUOT] = S("</blockquote>"), + }; + static Str cont[] = { + [LINE_BLANK] = S(""), + [LINE_PARA] = S("<br>\n"), + [LINE_FIGURE] = S("</figure>\n<figure>"), + [LINE_LINK] = S("</li>\n<li>"), + [LINE_UL] = S("</li>\n<li>"), + [LINE_OL] = S("</li>\n<li>"), + [LINE_HDR1] = S("</h1>\n<h1>"), + [LINE_HDR2] = S("</h2>\n<h2>"), + [LINE_HDR3] = S("</h3>\n<h3>"), + [LINE_CODE] = S("\n"), + [LINE_BQUOT] = S("<br>\n"), + }; + if (from == to) { + str_cat(out, cont[from], a); + } else { + str_cat(out, cl[from], a); + str_catc(out, '\n', a); + str_cat(out, op[to], a); + } + return to; +} + +typedef struct Doc Doc; +struct Doc { + Str html; + Str title; + Doc *prev, *next; +}; + +int has_image_ext(Str url) { + return str_ends(url, S(".png")) + || str_ends(url, S(".jpg")) + || str_ends(url, S(".jpeg")) + || str_ends(url, S(".webp")); +} + +Str str_replace_end(Str s, Str a, Str b, Arena *m) { + if (!str_ends(s, a)) return s; + char *p = new_arr(m, char, s.n + b.n - a.n); + memcpy(p, s.s, s.n - a.n); + memcpy(p + s.n - a.n, b.s, b.n); + return (Str) { p, s.n + b.n - a.n }; +} + +int wdoc(FILE *f, Doc **dp, Arena *a, Arena *scratch) { + Str buf, line, out = {0}, title = {0}; + if (read_all(f, &buf, scratch)) return -1; + LineMode lm = LINE_BLANK; + while (next_line(&buf, &line)) { + if (str_starts(line, S("```"))) { + lm = lm_chg(lm, lm == LINE_CODE ? LINE_BLANK : LINE_CODE, &out, a); + continue; + } else if (lm == LINE_CODE) { + lm = lm_chg(lm, LINE_CODE, &out, a); + str_cat(&out, line, a); + continue; + } else if (line.n == 0) { + lm = lm_chg(lm, LINE_BLANK, &out, a); + } else if (str_starts(line, S("=>"))) { + line = str_trim(str_skip(line, 2)); + isize i = 0; + while (i < line.n && !is_space(line.s[i])) i++; + Str url = { line.s, i }; + line = str_trim(str_skip(line, i)); + if (!str_starts(url, S("gemini://"))) { + url = str_replace_end(url, S(".gmi"), S(".html"), scratch); + } + if (has_image_ext(url)) { + lm = lm_chg(lm, LINE_FIGURE, &out, a); + str_cat(&out, S("<img src="), a); + str_cat_uri(&out, url, a); + str_catc(&out, '>', a); + if (line.n > 0) { + str_cat(&out, S("<figcaption>"), a); + str_cat_html(&out, line, a); + str_cat(&out, S("</figcaption>"), a); + } + } else { + Str display = line.n > 0 ? line : url; + lm = lm_chg(lm, LINE_LINK, &out, a); + str_cat(&out, S("<a href="), a); + str_cat_uri(&out, url, a); + str_catc(&out, '>', a); + str_cat_html(&out, display, a); + str_cat(&out, S("</a>"), a); + } + } else if (str_starts(line, S("*"))) { + lm = lm_chg(lm, LINE_UL, &out, a); + str_cat_html(&out, str_trim(str_skip(line, 1)), a); + } else if (is_ol_item(line)) { + lm = lm_chg(lm, LINE_OL, &out, a); + str_cat_html(&out, str_trim(str_cut(line, '.').tail), a); + } else if (str_starts(line, S("###"))) { + lm = lm_chg(lm, LINE_HDR3, &out, a); + str_cat_html(&out, str_trim(str_skip(line, 3)), a); + } else if (str_starts(line, S("##"))) { + lm = lm_chg(lm, LINE_HDR2, &out, a); + str_cat_html(&out, str_trim(str_skip(line, 2)), a); + } else if (str_starts(line, S("#"))) { + lm = lm_chg(lm, LINE_HDR1, &out, a); + title = str_trim(str_skip(line, 1)); + str_cat_html(&out, title, a); + } else if (str_starts(line, S(">"))) { + lm = lm_chg(lm, LINE_BQUOT, &out, a); + str_cat_html(&out, str_trim(str_skip(line, 1)), a); + } else { + lm = lm_chg(lm, LINE_PARA, &out, a); + str_cat_html(&out, line, a); + } + } + lm = lm_chg(lm, LINE_BLANK, &out, a); + Doc *d = new(a, Doc); + if (title.s) d->title = str_dup(title, a); + d->html = out; + d->prev = (*dp); + if (*dp) (*dp)->next = d; + *dp = d; + return 0; +} + +#define ARENA(n, sz) Arena n; { static char arena_backarr[sz];\ + n.beg = arena_backarr; n.end = arena_backarr + sizeof(arena_backarr);\ + __asm("":"+r"(n.beg)); __asm("":"+r"(n.end)); } + +uint64_t str_hash(Str s) { + uint64_t h = 14695981039346656037LU; + for (isize i = 0; i < s.n; i++) h = (h ^ (s.s[i] & 0xff)) * 1099511628211LU; + return h; +} + +/* --hvar bgcolor:'#fcc,#cfc,#ccf,#cff,#ffc,#fcf' */ +int hvar_calc(Str param, Str *name, Str *val, Str filename) { + Cut c = str_cut(param, ':'); + *name = c.head; + usize n = 0; + for (Str h = c.tail; h.n > 0; h = str_cut(h, ',').tail) n++; + srand(str_hash(filename)); + usize j = rand() % n; + usize i = 0; + for (Str h = c.tail; h.n > 0; h = str_cut(h, ',').tail) { + if (i == j) { + *val = str_cut(h, ',').head; + return 0; + } + i++; + } + return 1; +} + +#define countof(x) (sizeof(x) / sizeof(*x)) +int main(int argc, const char **argv) { + (void)argc; + + ARENA(perm, 1 << 20) + ARENA(scratch, 1 << 20) + + Doc *doc = 0; + struct { + int standalone; + int from_stdin; + Str stylesheet; + Str hvarv[1024]; + int hvarc; + } opts = { 0 }; + int r; + + ArgsState a = args_begin(argv); + Str param = { 0 }; + opts.from_stdin = 1; + + while ((r = arg_get(&a, "sc:h:", ¶m, ":css", 'c', ":hvar", 'h')) >= ARG_OK) { + Arena reset = scratch; + FILE *f; + switch (r) { + case 's': + opts.standalone = 1; + break; + case 'c': + opts.stylesheet = param; + break; + case 'h': + if (opts.hvarc == countof(opts.hvarv)) { + fprintf(stderr, "too many hash variables!\n"); + return 1; + } + opts.hvarv[opts.hvarc++] = param; + break; + default: + opts.from_stdin = 0; + f = fopen(str_to_cstr(param, &scratch), "r/o"); + if (wdoc(f, &doc, &perm, &scratch)) { + fwrite(param.s, 1, param.n, stderr); + fprintf(stderr, ": %s\n", strerror(errno)); + return 1; + } + fclose(f); + break; + } + scratch = reset; + } + + switch (r) { + case ARG_BAD: + fprintf(stderr, "unknown option '"); + fwrite(param.s, 1, param.n, stderr); + fprintf(stderr, "'\n"); + return 1; + case ARG_EMPTY: + fprintf(stderr, "'"); + fwrite(param.s, 1, param.n, stderr); + fprintf(stderr, "' option expected an argument\n"); + return 1; + } + + if (opts.from_stdin) { + wdoc(stdin, &doc, &perm, &scratch); + } + + if (doc && opts.standalone) { + Str title = doc->title; + while (doc->prev) { + doc = doc->prev; + if (doc->title.s) title = doc->title; + } + Str thtml = S("<!DOCTYPE html>" + "<meta charset=utf-8>" + "<meta name=viewport content='width=device-width,initial-scale=1'>"); + if (title.s) { + str_cat(&thtml, S("<title>"), &scratch); + str_cat_html(&thtml, title, &scratch); + str_cat(&thtml, S("</title>"), &scratch); + } + + if (opts.stylesheet.s) { + FILE *f = fopen(str_to_cstr(opts.stylesheet, &scratch), "r/o"); + if (!f) { + str_putf(opts.stylesheet, stderr); + fprintf(stderr, ": %s\n", strerror(errno)); + return 1; + } + Str css; + Arena p = perm; + if (read_all(f, &css, &perm)) { + fprintf(stderr, "failed to read stylesheet: %s\n", strerror(errno)); + return 1; + } + str_cat(&thtml, S("<style>"), &scratch); + if (opts.hvarc > 0) { + str_cat(&thtml, S(":root{"), &scratch); + for (int i = 0; i < opts.hvarc; i++) { + Str name, val; + if (hvar_calc(opts.hvarv[i], &name, &val, title)) { + fprintf(stderr, "failed to caluclate hashvar!\n"); + return 1; + } + str_cat(&thtml, S("--"), &scratch); + str_cat(&thtml, name, &scratch); + str_catc(&thtml, ':', &scratch); + str_cat(&thtml, val, &scratch); + str_catc(&thtml, ';', &scratch); + } + str_catc(&thtml, '}', &scratch); + } + str_cat(&thtml, css, &scratch); + str_cat(&thtml, S("</style>"), &scratch); + perm = p; + } + + str_put(thtml); + } + + while (doc) { + str_put(doc->html); + doc = doc->next; + } + + return 0; +} diff --git a/str.h b/str.h new file mode 100644 index 0000000..22a0d31 --- /dev/null +++ b/str.h @@ -0,0 +1,102 @@ +#ifndef STR_H +#define STR_H + +#include <string.h> +#include <stddef.h> + +#include "typ.h" +#include "arena.h" + +typedef struct { + char *s; + isize n; +} Str; + +#define S(s) (Str){s,sizeof(s)-1} + +/* allocating */ + +Str str_dup(Str a, Arena *m) { + char *s = new_arr(m, char, a.n); + memcpy(s, a.s, a.n); + a.s = s; + return a; +} + +static inline void str_cat(Str *a, Str b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + b.n); + memcpy(&a->s[a->n], b.s, b.n); + a->n += b.n; +} + +/* conversions */ + +static inline char *str_to_cstr(Str s, Arena *a) { + char *r = new_arr(a, char, s.n + 1); + memcpy(r, s.s, s.n); + r[s.n] = 0; + return r; +} + +static inline Str str_from_cstr(const char *s) { + return (Str) { (char*)s, strlen(s) }; +} + +/* pure functions */ + +static inline int str_eql(Str a, Str b) { + return a.n == b.n && !memcmp(a.s, b.s, b.n); +} + +static inline int str_starts(Str a, Str b) { + return a.n >= b.n && !memcmp(a.s, b.s, b.n); +} + +static inline int str_ends(Str a, Str b) { + return a.n >= b.n && !memcmp(&a.s[a.n - b.n], b.s, b.n); +} + +static inline void str_catc(Str *a, char b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + 1); + a->s[a->n++] = b; +} + +static inline Str str_skip(Str a, isize n) { + return (Str) { a.s + n, a.n - n }; +} + +static inline int is_space(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +static inline Str str_trim_left(Str a) { + while (a.n > 0 && is_space(a.s[0])) a.s++, a.n--; + return a; +} + +static inline Str str_trim_right(Str a) { + while (a.n > 0 && is_space(a.s[a.n - 1])) a.n--; + return a; +} + +static inline Str str_trim(Str a) { + return str_trim_left(str_trim_right(a)); +} + +typedef struct { + Str head, tail; +} Cut; + +static inline Cut str_cut(Str s, char c) { + char *p = memchr(s.s, c, s.n); + if (!p) { + return (Cut) { s, { &s.s[s.n], 0 } }; + } else { + return (Cut) { + { s.s, p - s.s }, + { p + 1, &s.s[s.n] - (p + 1) } + }; + } +} + +#endif diff --git a/typ.h b/typ.h new file mode 100644 index 0000000..9a5d2b5 --- /dev/null +++ b/typ.h @@ -0,0 +1,7 @@ +#ifndef TYP_H +#define TYP_H + +typedef size_t usize; +typedef ptrdiff_t isize; + +#endif |