#include #include #include "parse.h" #include "strv.h" #include "err.h" int isurlch(char c) { return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/'; } void parse_plain_url(struct doc *d, struct doc_line *l, size_t i) { char url[l->len + 1]; size_t start = i - 1; while (start > 0 && isalpha(l->txt[start])) start--; if (!isalpha(l->txt[start])) start++; size_t end = i + 3; while (end < l->len && isurlch(l->txt[end])) end++; if (end == i + 3) return; size_t urln = end - start; memcpy(url, &l->txt[start], urln); url[urln] = 0; l->link = doc_add_link(d, url); } int parse_plain(struct doc *d, const buf_t *b) { doc_init(d); strv_t buf = (strv_t) { b->buf, b->sz }; size_t i = 0; while (i < buf.n) { strv_t ln = strv_head(buf, '\n', &i); struct doc_line *l = doc_line_at(d, d->latest); for (size_t j = 1; j + 2 < ln.n; j++) { if (ln.s[j] == ':' && ln.s[j + 1] == '/' && ln.s[j + 2] == '/') { parse_plain_url(d, l, j); break; } } doc_add_line(d, ln); } return 0; } size_t scatss(char *buf, size_t i, size_t n, strv_t ss) { size_t si = 0; while (i < n && si < ss.n) { buf[i++] = ss.s[si++]; } return i; } int parse_gophermap_line(struct doc *d, strv_t ln) { char url[512] = "gopher://"; size_t urln = 9; struct { char item_type; strv_t dstr; strv_t sel; strv_t host; strv_t port; } bits; size_t i = 0; bits.item_type = ln.s[i++]; bits.dstr = strv_head(ln, '\t', &i); bits.sel = strv_head(ln, '\t', &i); bits.host = strv_head(ln, '\t', &i); bits.port = strv_head(ln, '\t', &i); switch (bits.item_type) { case '.': if (ln.n == 1) return 1; default: urln = scatss(url, urln, sizeof url, bits.host); if (urln < sizeof url) url[urln++] = ':'; urln = scatss(url, urln, sizeof url, bits.port); if (urln < sizeof url) url[urln++] = '/'; if (urln < sizeof url) url[urln++] = bits.item_type; urln = scatss(url, urln, sizeof url, bits.sel); url[urln] = 0; doc_set_link(d, doc_add_link(d, url)); case 'i': doc_add_text(d, bits.dstr); doc_new_line(d); break; } return 0; } int parse_gophermap(struct doc *d, const buf_t *b) { doc_init(d); size_t i = 0; strv_t bufss = { b->buf, b->sz }; while (i < b->sz) { strv_t ln = strv_head(bufss, '\n', &i); if (ln.n > 0 && ln.s[ln.n - 1] == '\r') ln.n--; if (parse_gophermap_line(d, ln)) { break; } } return 0; } int parse_doc(enum doc_type type, struct doc *d, const buf_t *b) { switch (type) { case DOC_PLAIN: return parse_plain(d, b); case DOC_GOPHERMAP: return parse_gophermap(d, b); default: perr("unsupported doctype"); return -1; } }