From 4af753d591e61a7380735e03a8658fb8949e0448 Mon Sep 17 00:00:00 2001
From: WormHeamer
Date: Sat, 8 Mar 2025 16:45:17 -0500
Subject: initial commit
---
Makefile | 30 +++++
arena.h | 56 +++++++++
args.h | 138 +++++++++++++++++++++
main.c | 409 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
str.h | 102 ++++++++++++++++
typ.h | 7 ++
6 files changed, 742 insertions(+)
create mode 100644 Makefile
create mode 100644 arena.h
create mode 100644 args.h
create mode 100644 main.c
create mode 100644 str.h
create mode 100644 typ.h
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..50fd1dc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,30 @@
+EXE = wdoc
+RUNARGS =
+
+CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Os ${CFLAGS_${DEBUG}}
+LDFLAGS = -flto ${LDFLAGS_${DEBUG}}
+PREFIX = /usr/local/
+LDLIBS =
+
+DEBUG = 0
+GDB != which gf2 2> /dev/null || which gdb
+
+CFLAGS_1 = -g -fsanitize=undefined
+LDFLAGS_1 = -g -fsanitize=undefined
+LDFLAGS_0 = -s
+
+OBJ != find -type f -name '*.c' | sed 's/\.c$$/.o/'
+
+.PHONY: run all clean
+
+all: ${EXE}
+run: ${EXE}
+ ./${EXE} ${RUNARGS}
+debug: ${EXE}
+ ${GDB} -ex start --args ./${EXE} ${RUNARGS}
+
+clean:
+ rm -fv ${EXE} ${OBJ}
+
+${EXE}: ${OBJ}
+ ${CC} ${LDFLAGS} ${OBJ} -o ${EXE} ${LDLIBS}
diff --git a/arena.h b/arena.h
new file mode 100644
index 0000000..2058559
--- /dev/null
+++ b/arena.h
@@ -0,0 +1,56 @@
+#ifndef ARENA_H
+#define ARENA_H
+
+#include "typ.h"
+
+typedef struct {
+ char *beg, *end;
+} Arena;
+
+#define new(a, t)\
+ zeroed(alloc(a, sizeof(t), _Alignof(t)), sizeof(t))
+
+#define new_arr(a, t, n)\
+ alloc(a, sizeof(t) * n, _Alignof(t))
+
+#define resize(a, p, old, new)\
+ re_alloc(a, p, (old) * sizeof(*(p)), (new) * sizeof(*(p)),\
+ _Alignof(__typeof__(*(p))))
+
+void *alloc(Arena *a, isize n, isize align);
+void *re_alloc(Arena *a, void *ptr, isize old, isize new, isize align);
+void *zeroed(void *p, usize n);
+
+#ifdef ARENA_IMPL
+
+#include
+#include
+
+void *alloc(Arena *a, isize n, isize align) {
+ char *p = a->beg + (-(uintptr_t)a->beg & (align - 1));
+ if (p + n >= a->end) {
+ fprintf(stderr, "out of arena memory!\n");
+ abort();
+ }
+ a->beg = p + n;
+ return p;
+}
+
+void *zeroed(void *p, usize n) {
+ memset(p, 0, n);
+ return p;
+}
+
+void *re_alloc(Arena *a, void *ptr, isize old, isize new, isize align) {
+ if (ptr && a->beg - old == ptr && a->beg - old + new < a->end) {
+ a->beg = a->beg - old + new;
+ return ptr;
+ } else {
+ void *p = alloc(a, new, align);
+ if (ptr) memcpy(p, ptr, old);
+ return p;
+ }
+}
+
+#endif
+#endif
diff --git a/args.h b/args.h
new file mode 100644
index 0000000..35c5283
--- /dev/null
+++ b/args.h
@@ -0,0 +1,138 @@
+#ifndef ARGS_H
+#define ARGS_H
+
+#include "str.h"
+
+typedef struct {
+ const char **arg;
+ const char *opt_end;
+} ArgsState;
+
+typedef enum {
+ ARG_OK = 0,
+ ARG_END = -1,
+ ARG_BAD = -2,
+ ARG_EMPTY = -3
+} ArgResult;
+
+ArgsState args_begin(const char **argv);
+ArgResult arg_getv(ArgsState *a, const char *fmt, Str *arg, ...);
+#define arg_get(a, fmt, arg, ...)\
+ arg_getv(a, fmt, arg __VA_OPT__(,) __VA_ARGS__, NULL)
+
+#ifdef ARGS_IMPL
+#include
+
+ArgsState args_begin(const char **argv) {
+ return (ArgsState) { argv + 1, NULL };
+}
+
+static int arg_opt_find(const char **opts, Str key) {
+ for (int i = 0; opts[i]; i++) {
+ const char *o = opts[i];
+ if (*o == ':') o++;
+ if (str_eql(str_from_cstr(o), key)) return i;
+ }
+ return -1;
+}
+
+static ArgResult arg_param(ArgsState *a, Str name, Str rem, Str *arg) {
+ if (rem.n > 0) {
+ *arg = rem;
+ return ARG_OK;
+ } else if (a->arg[1]) {
+ *arg = str_from_cstr(*++a->arg);
+ return ARG_OK;
+ } else {
+ *arg = name;
+ return ARG_EMPTY;
+ }
+}
+
+static int arg_got_long(ArgsState *a, const char **opts, int *optv, Str *arg) {
+ Cut key = str_cut(str_from_cstr(*a->arg + 2), '=');
+ if (opts && optv) {
+ int o = arg_opt_find(opts, key.head);
+ if (o < 0) {
+ *arg = key.head;
+ return ARG_BAD;
+ }
+ if (opts[o][0] == ':') {
+ int x = arg_param(a, key.head, key.tail, arg);
+ if (x < 0) return x;
+ }
+ a->arg++;
+ return optv[o];
+ }
+ *arg = key.head;
+ return ARG_BAD;
+}
+
+static ArgResult arg_got_short(ArgsState *a, const char *fmt, Str *arg) {
+ Str opt = { (char*)*a->arg, 1 };
+ Str rem = str_from_cstr(*a->arg + 1);
+ for (const char *f = fmt; *f; f++) {
+ if (*f == ':') continue;
+ if (*f == **a->arg) {
+ if (f[1] == ':') {
+ int x = arg_param(a, opt, rem, arg);
+ if (x < 0) return x;
+ a->arg++;
+ } else {
+ (*a->arg)++;
+ }
+ return *f;
+ }
+ }
+ *arg = opt;
+ return ARG_BAD;
+}
+
+static ArgResult arg_get_long(ArgsState *a, const char *fmt, const char **opts, int *optv, Str *arg) {
+ if (*a->arg && !**a->arg) a->arg++;
+ if (!*a->arg) return ARG_END;
+ const char *arg_end = *a->arg + strlen(*a->arg);
+ if (a->opt_end != arg_end) {
+ if (a->arg[0][0] != '-' || a->arg[0][1] == '\0') {
+ *arg = str_from_cstr(*a->arg++);
+ return ARG_OK;
+ }
+ if (a->arg[0][1] == '-') {
+ return arg_got_long(a, opts, optv, arg);
+ }
+ (*a->arg)++;
+ a->opt_end = arg_end;
+ }
+ return arg_got_short(a, fmt, arg);
+}
+
+ArgResult arg_getv(ArgsState *a, const char *fmt, Str *arg, ...) {
+ /* I think this is a legitimate usecase for VLAs --- they're not
+ * safe if N depends on user input, but here it very much doesn't!
+ * Just on the number of arguments passed, which is a compile time
+ * constant. */
+ va_list ap;
+ int n = 0;
+ va_start(ap, arg);
+ while (va_arg(ap, const char *)) {
+ n++;
+ (void)va_arg(ap, int);
+ }
+ va_end(ap);
+ if (n > 0) {
+ const char *opt[n];
+ int optv[n];
+ va_start(ap, arg);
+ for (int i = 0; i < n; i++) {
+ opt[i] = va_arg(ap, const char *);
+ optv[i] = va_arg(ap, int);
+ }
+ va_end(ap);
+ return arg_get_long(a, fmt, opt, optv, arg);
+ } else {
+ return arg_get_long(a, fmt, NULL, NULL, arg);
+ }
+}
+
+#endif
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..8dc8a6e
--- /dev/null
+++ b/main.c
@@ -0,0 +1,409 @@
+#define _POSIX_C_SOURCE 200809L
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define ARENA_IMPL
+#include "str.h"
+#include "arena.h"
+#define ARGS_IMPL
+#include "args.h"
+
+int read_all(FILE *f, Str *buf, Arena *a) {
+ if (!f) return -1;
+ if (isatty(fileno(f))) {
+ buf->s = a->beg;
+ buf->n = fread(a->beg, 1, (uintptr_t)(a->end - a->beg) - 1, f);
+ a->beg += buf->n;
+ } else {
+ fseek(f, 0, SEEK_END);
+ long ofs = ftell(f);
+ fseek(f, 0, SEEK_SET);
+ buf->n = ofs;
+ buf->s = new_arr(a, char, buf->n);
+ if ((isize)fread(buf->s, 1, buf->n, f) != buf->n) return -1;
+ }
+ return ferror(f) ? -1 : 0;
+}
+
+int next_line(Str *src, Str *line) {
+ if (src->n < 1) return 0;
+ line->s = src->s;
+ char *newln = memchr(src->s, '\n', src->n);
+ line->n = newln ? newln - src->s : src->n;
+ src->s += line->n + 1;
+ src->n -= line->n + 1;
+ return 1;
+}
+
+void str_putf(Str s, FILE *f) {
+ fwrite(s.s, 1, s.n, f);
+}
+
+void str_put(Str s) {
+ str_putf(s, stdout);
+}
+
+char to_xdigit(int x) {
+ if (x > 9) {
+ return 'A' + (x - 10);
+ } else {
+ return '0' + x;
+ }
+}
+
+void str_cat_uri(Str *s, Str uri, Arena *a) {
+ str_catc(s, '\'', a);
+ for (isize i = 0; i < uri.n; i++) {
+ char c = uri.s[i];
+ if (c == '\'' || c == '%') {
+ str_catc(s, '%', a);
+ str_catc(s, to_xdigit((c & 0xff) >> 4), a);
+ str_catc(s, to_xdigit(c & 0xf), a);
+ } else {
+ str_catc(s, c, a);
+ }
+ }
+ str_catc(s, '\'', a);
+}
+
+void str_cat_html(Str *s, Str uri, Arena *a) {
+ for (isize i = 0; i < uri.n; i++) {
+ char c = uri.s[i];
+ switch (c) {
+ case '&': str_cat(s, S("&"), a); break;
+ case '<': str_cat(s, S("<"), a); break;
+ case '>': str_cat(s, S(">"), a); break;
+ default: str_catc(s, c, a); break;
+ }
+ }
+}
+
+int is_ol_item(Str s) {
+ Str h = str_cut(s, '.').head;
+ if (h.n < 1) return 0;
+ for (isize i = 0; i < h.n; i++) {
+ if (!(h.s[i] >= '0' && h.s[i] <= '9')) return 0;
+ }
+ return 1;
+}
+
+typedef enum {
+ LINE_BLANK, LINE_PARA,
+ LINE_LINK, LINE_FIGURE,
+ LINE_UL, LINE_OL,
+ LINE_HDR1, LINE_HDR2, LINE_HDR3, LINE_CODE,
+ LINE_BQUOT,
+} LineMode;
+
+LineMode lm_chg(LineMode from, LineMode to, Str *out, Arena *a) {
+ static Str op[] = {
+ [LINE_BLANK] = S(""),
+ [LINE_PARA] = S(""),
+ [LINE_LINK] = S("
\n- "),
+ [LINE_FIGURE] = S(""),
+ [LINE_UL] = S("
\n- "),
+ [LINE_OL] = S("
\n- "),
+ [LINE_HDR1] = S("
"),
+ [LINE_HDR2] = S(""),
+ [LINE_HDR3] = S(""),
+ [LINE_CODE] = S(""),
+ [LINE_BQUOT] = S(""),
+ };
+ static Str cl[] = {
+ [LINE_BLANK] = S(""),
+ [LINE_PARA] = S("
"),
+ [LINE_LINK] = S("\n"),
+ [LINE_FIGURE] = S(""),
+ [LINE_UL] = S("\n"),
+ [LINE_OL] = S("\n"),
+ [LINE_HDR1] = S(""),
+ [LINE_HDR2] = S(""),
+ [LINE_HDR3] = S(""),
+ [LINE_CODE] = S(""),
+ [LINE_BQUOT] = S(""),
+ };
+ static Str cont[] = {
+ [LINE_BLANK] = S(""),
+ [LINE_PARA] = S("
\n"),
+ [LINE_FIGURE] = S("\n"),
+ [LINE_LINK] = S("\n"),
+ [LINE_UL] = S("\n"),
+ [LINE_OL] = S("\n"),
+ [LINE_HDR1] = S("\n