#include #include #include "parse.h" #include "str.h" #include "err.h" int isurlch(char c) { return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/'; } void parse_plain_url(struct doc *d, struct doc_line *l, size_t i) { size_t start = i - 1; while (start > 0 && isalpha(l->txt[start])) start--; if (!isalpha(l->txt[start])) start++; size_t end = i + 3; while (end < l->len && isurlch(l->txt[end])) end++; if (end == i + 3) return; strv_t url = { &l->txt[start], end - start }; l->link = doc_add_link(d, url); } int parse_plain(struct doc *d, const str_t *b) { doc_init(d); strv_t ln, buf = (strv_t) { b->buf, b->sz }; while (strv_split(&buf, '\n', &ln)) { struct doc_line *l = doc_line_at(d, d->latest); for (size_t j = 1; j + 2 < ln.n; j++) { if (ln.s[j] == ':' && ln.s[j + 1] == '/' && ln.s[j + 2] == '/') { parse_plain_url(d, l, j); break; } } doc_add_line(d, ln); } return 0; } size_t scatss(char *buf, size_t i, size_t n, strv_t ss) { size_t si = 0; while (i < n && si < ss.n) { buf[i++] = ss.s[si++]; } return i; } int parse_gophermap_line(struct doc *d, strv_t ln) { if (ln.n == 1 && ln.s[0] == '.') return 1; char url[512] = "gopher://"; size_t urln = 9; struct { char item_type; strv_t dstr; strv_t sel; strv_t host; strv_t port; } bits; bits.item_type = *(ln.s++); ln.n++; strv_split(&ln, '\t', &bits.dstr); strv_split(&ln, '\t', &bits.sel); strv_split(&ln, '\t', &bits.host); strv_split(&ln, '\t', &bits.port); switch (bits.item_type) { default: urln = scatss(url, urln, sizeof url, bits.host); if (urln < sizeof url) url[urln++] = ':'; urln = scatss(url, urln, sizeof url, bits.port); if (urln < sizeof url) url[urln++] = '/'; if (urln < sizeof url) url[urln++] = bits.item_type; urln = scatss(url, urln, sizeof url, bits.sel); doc_set_link(d, doc_add_link(d, (strv_t) { url, urln })); case 'i': doc_add_text(d, bits.dstr); doc_new_line(d); break; } return 0; } int parse_gophermap(struct doc *d, const str_t *b) { doc_init(d); strv_t ln, buf = { b->buf, b->sz }; while (strv_split(&buf, '\n', &ln)) { if (ln.n > 0 && ln.s[ln.n - 1] == '\r') ln.n--; if (parse_gophermap_line(d, ln)) break; } return 0; } int parse_doc(enum doc_type type, struct doc *d, const str_t *b) { switch (type) { case DOC_PLAIN: return parse_plain(d, b); case DOC_GOPHERMAP: return parse_gophermap(d, b); default: perr("unsupported doctype"); return -1; } }