From 837a9067c0ef2e0b3affbf7035788cd0e80ce7ba Mon Sep 17 00:00:00 2001 From: wrmr Date: Tue, 5 Nov 2024 23:56:09 -0500 Subject: add rudimentary gophermap parsing --- parse.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 97 insertions(+), 15 deletions(-) (limited to 'parse.c') diff --git a/parse.c b/parse.c index 22e7edf..601c6b1 100644 --- a/parse.c +++ b/parse.c @@ -1,25 +1,107 @@ #include "parse.h" #include "err.h" +int parse_plain(struct doc *d, const buf_t *b) { + doc_init(d); + for (size_t i = 0; i < b->sz; i++) { + char c = b->buf[i]; + if (c == '\n') { + doc_new_line(d); + } else { + doc_add_textn(d, &c, 1); + } + } + return 0; +} + +struct str_slice { + const char *s; + size_t n; +}; + +static struct str_slice gmbit(size_t *i, const char *s, size_t n) { + struct str_slice ss = { + &s[*i], + 0 + }; + while (*i < n && s[*i] != '\t') { + *i += 1; + ss.n++; + } + *i += 1; + return ss; +} + +size_t scatss(char *buf, size_t i, size_t n, struct str_slice ss) { + size_t si = 0; + while (i < n && si < ss.n) { + buf[i++] = ss.s[si++]; + } + return i; +} + +int parse_gophermap_line(struct doc *d, const char *s, size_t n) { + char url[512] = "gopher://"; + size_t urln = 9; + struct { + char item_type; + struct str_slice dstr; + struct str_slice sel; + struct str_slice host; + struct str_slice port; + } bits; + size_t i = 0; + bits.item_type = s[i++]; + bits.dstr = gmbit(&i, s, n); + bits.sel = gmbit(&i, s, n); + bits.host = gmbit(&i, s, n); + bits.port = gmbit(&i, s, n); + switch (bits.item_type) { + case '.': + if (n == 1) return 1; + default: + urln = scatss(url, urln, sizeof url, bits.host); + if (urln < sizeof url) url[urln++] = ':'; + urln = scatss(url, urln, sizeof url, bits.port); + if (urln < sizeof url) url[urln++] = '/'; + if (urln < sizeof url) url[urln++] = bits.item_type; + urln = scatss(url, urln, sizeof url, bits.sel); + url[urln] = 0; + doc_set_link(d, doc_add_link(d, url)); + case 'i': + doc_add_textn(d, bits.dstr.s, bits.dstr.n); + doc_new_line(d); + break; + } + return 0; +} + +int parse_gophermap(struct doc *d, const buf_t *b) { + doc_init(d); + size_t ln_start = 0; + for (size_t i = 0; i < b->sz; i++) { + if (b->buf[i] == '\r') continue; + if (b->buf[i] == '\n') { + char *ln_str = &b->buf[ln_start]; + size_t ln_len = i - ln_start; + if (i > 0 && b->buf[i - 1] == '\r') ln_len--; + if (parse_gophermap_line(d, ln_str, ln_len)) { + break; + } + ln_start = i + 1; + } + } + return 0; +} + int parse_doc(enum doc_type type, struct doc *d, const buf_t *b) { switch (type) { case DOC_PLAIN: - doc_init(d); - for (size_t i = 0; i < b->sz; i++) { - char c = b->buf[i]; - if (c == '\n') { - doc_new_line(d); - } else { - doc_add_textn(d, &c, 1); - } - } - goto ok; + return parse_plain(d, b); + case DOC_GOPHERMAP: + return parse_gophermap(d, b); default: perr("unsupported doctype"); - goto err; + return -1; } -ok: - return 0; -err: - return -1; } -- cgit 1.4.1-2-gfad0