diff options
author | wrmr | 2024-11-05 23:56:09 -0500 |
---|---|---|
committer | wrmr | 2024-11-05 23:56:09 -0500 |
commit | 837a9067c0ef2e0b3affbf7035788cd0e80ce7ba (patch) | |
tree | 683b41aed4496d623490419dcbdc00ca926da9eb /parse.c | |
parent | 9c932e49a7516570b6c1ff9863595d495e3a3212 (diff) |
add rudimentary gophermap parsing
Diffstat (limited to 'parse.c')
-rw-r--r-- | parse.c | 112 |
1 files changed, 97 insertions, 15 deletions
diff --git a/parse.c b/parse.c index 22e7edf..601c6b1 100644 --- a/parse.c +++ b/parse.c @@ -1,25 +1,107 @@ #include "parse.h" #include "err.h" +int parse_plain(struct doc *d, const buf_t *b) { + doc_init(d); + for (size_t i = 0; i < b->sz; i++) { + char c = b->buf[i]; + if (c == '\n') { + doc_new_line(d); + } else { + doc_add_textn(d, &c, 1); + } + } + return 0; +} + +struct str_slice { + const char *s; + size_t n; +}; + +static struct str_slice gmbit(size_t *i, const char *s, size_t n) { + struct str_slice ss = { + &s[*i], + 0 + }; + while (*i < n && s[*i] != '\t') { + *i += 1; + ss.n++; + } + *i += 1; + return ss; +} + +size_t scatss(char *buf, size_t i, size_t n, struct str_slice ss) { + size_t si = 0; + while (i < n && si < ss.n) { + buf[i++] = ss.s[si++]; + } + return i; +} + +int parse_gophermap_line(struct doc *d, const char *s, size_t n) { + char url[512] = "gopher://"; + size_t urln = 9; + struct { + char item_type; + struct str_slice dstr; + struct str_slice sel; + struct str_slice host; + struct str_slice port; + } bits; + size_t i = 0; + bits.item_type = s[i++]; + bits.dstr = gmbit(&i, s, n); + bits.sel = gmbit(&i, s, n); + bits.host = gmbit(&i, s, n); + bits.port = gmbit(&i, s, n); + switch (bits.item_type) { + case '.': + if (n == 1) return 1; + default: + urln = scatss(url, urln, sizeof url, bits.host); + if (urln < sizeof url) url[urln++] = ':'; + urln = scatss(url, urln, sizeof url, bits.port); + if (urln < sizeof url) url[urln++] = '/'; + if (urln < sizeof url) url[urln++] = bits.item_type; + urln = scatss(url, urln, sizeof url, bits.sel); + url[urln] = 0; + doc_set_link(d, doc_add_link(d, url)); + case 'i': + doc_add_textn(d, bits.dstr.s, bits.dstr.n); + doc_new_line(d); + break; + } + return 0; +} + +int parse_gophermap(struct doc *d, const buf_t *b) { + doc_init(d); + size_t ln_start = 0; + for (size_t i = 0; i < b->sz; i++) { + if (b->buf[i] == '\r') continue; + if (b->buf[i] == '\n') { + char *ln_str = &b->buf[ln_start]; + size_t ln_len = i - ln_start; + if (i > 0 && b->buf[i - 1] == '\r') ln_len--; + if (parse_gophermap_line(d, ln_str, ln_len)) { + break; + } + ln_start = i + 1; + } + } + return 0; +} + int parse_doc(enum doc_type type, struct doc *d, const buf_t *b) { switch (type) { case DOC_PLAIN: - doc_init(d); - for (size_t i = 0; i < b->sz; i++) { - char c = b->buf[i]; - if (c == '\n') { - doc_new_line(d); - } else { - doc_add_textn(d, &c, 1); - } - } - goto ok; + return parse_plain(d, b); + case DOC_GOPHERMAP: + return parse_gophermap(d, b); default: perr("unsupported doctype"); - goto err; + return -1; } -ok: - return 0; -err: - return -1; } |