#include #include #include "parse.h" #include "str.h" #include "err.h" int isurlch(char c) { return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/'; } void parse_plain_url(struct doc *d, strv_t ln, size_t i) { size_t start = i - 1; while (start > 0 && isalpha(ln.s[start])) start--; if (!isalpha(ln.s[start])) start++; size_t end = i + 3; while (end < ln.n && isurlch(ln.s[end])) end++; if (end == i + 3) return; strv_t url = { &ln.s[start], end - start }; doc_line_at(d, d->latest)->link = doc_add_link(d, url); } int parse_plain(struct doc *d, const str_t *b) { doc_init(d); strv_t ln, buf = strv(b); while (strv_split(&buf, '\n', &ln)) { if (ln.n > 0 && ln.s[ln.n - 1] == '\r') ln.n--; for (size_t j = 1; j + 2 < ln.n; j++) { if (ln.s[j] == ':' && ln.s[j + 1] == '/' && ln.s[j + 2] == '/') { parse_plain_url(d, ln, j); } } doc_add_line(d, ln); } return 0; } int parse_gophermap_line(struct doc *d, strv_t ln) { return 0; } int parse_gophermap(struct doc *d, const str_t *b) { str_t url; struct { char item_type; strv_t dstr; strv_t sel; strv_t host; strv_t port; } bits; doc_init(d); str_init(&url, 64); strv_t ln, buf = { b->buf, b->sz }; while (strv_split(&buf, '\n', &ln)) { if (ln.n > 0 && ln.s[ln.n - 1] == '\r') ln.n--; if (ln.n == 1 && ln.s[0] == '.') break; bits.item_type = strv_next(&ln); strv_split(&ln, '\t', &bits.dstr); strv_split(&ln, '\t', &bits.sel); strv_split(&ln, '\t', &bits.host); strv_split(&ln, '\t', &bits.port); switch (bits.item_type) { default: str_fmt(&url, "%s:%s/%c%s", bits.host, bits.port, bits.item_type, bits.sel); doc_set_link(d, doc_add_link(d, strv(url))); case 'i': doc_add_text(d, bits.dstr); doc_new_line(d); break; } } str_free(&url); return 0; } int parse_doc(enum doc_type type, struct doc *d, const str_t *b) { switch (type) { case DOC_PLAIN: return parse_plain(d, b); case DOC_GOPHERMAP: return parse_gophermap(d, b); default: perr("unsupported doctype"); return -1; } }