#include #include #include "parse.h" #include "err.h" struct str_slice { const char *s; size_t n; }; int isurlch(char c) { return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/'; } void parse_plain_url(struct doc *d, struct doc_line *l, size_t i) { char url[l->len + 1]; size_t start = i - 1; while (start > 0 && isalpha(l->txt[start])) start--; if (!isalpha(l->txt[start])) start++; size_t end = i + 3; while (end < l->len && isurlch(l->txt[end])) end++; if (end == i + 3) return; size_t urln = end - start; memcpy(url, &l->txt[start], urln); url[urln] = 0; l->link = doc_add_link(d, url); } int parse_plain(struct doc *d, const buf_t *b) { doc_init(d); for (size_t i = 0; i < b->sz; i++) { char c = b->buf[i]; if (c == '\n') { struct doc_line *l = doc_line_at(d, d->latest); for (size_t i = 1; i + 2 < l->len; i++) { if (l->txt[i] == ':' && l->txt[i + 1] == '/' && l->txt[i + 2] == '/') { parse_plain_url(d, l, i); break; } } doc_new_line(d); } else { doc_add_textn(d, &c, 1); } } return 0; } static struct str_slice gmbit(size_t *i, const char *s, size_t n) { struct str_slice ss = { &s[*i], 0 }; while (*i < n && s[*i] != '\t') { *i += 1; ss.n++; } *i += 1; return ss; } size_t scatss(char *buf, size_t i, size_t n, struct str_slice ss) { size_t si = 0; while (i < n && si < ss.n) { buf[i++] = ss.s[si++]; } return i; } int parse_gophermap_line(struct doc *d, const char *s, size_t n) { char url[512] = "gopher://"; size_t urln = 9; struct { char item_type; struct str_slice dstr; struct str_slice sel; struct str_slice host; struct str_slice port; } bits; size_t i = 0; bits.item_type = s[i++]; bits.dstr = gmbit(&i, s, n); bits.sel = gmbit(&i, s, n); bits.host = gmbit(&i, s, n); bits.port = gmbit(&i, s, n); switch (bits.item_type) { case '.': if (n == 1) return 1; default: urln = scatss(url, urln, sizeof url, bits.host); if (urln < sizeof url) url[urln++] = ':'; urln = scatss(url, urln, sizeof url, bits.port); if (urln < sizeof url) url[urln++] = '/'; if (urln < sizeof url) url[urln++] = bits.item_type; urln = scatss(url, urln, sizeof url, bits.sel); url[urln] = 0; doc_set_link(d, doc_add_link(d, url)); case 'i': doc_add_textn(d, bits.dstr.s, bits.dstr.n); doc_new_line(d); break; } return 0; } int parse_gophermap(struct doc *d, const buf_t *b) { doc_init(d); size_t ln_start = 0; for (size_t i = 0; i < b->sz; i++) { if (b->buf[i] == '\r') continue; if (b->buf[i] == '\n') { char *ln_str = &b->buf[ln_start]; size_t ln_len = i - ln_start; if (i > 0 && b->buf[i - 1] == '\r') ln_len--; if (parse_gophermap_line(d, ln_str, ln_len)) { break; } ln_start = i + 1; } } return 0; } int parse_doc(enum doc_type type, struct doc *d, const buf_t *b) { switch (type) { case DOC_PLAIN: return parse_plain(d, b); case DOC_GOPHERMAP: return parse_gophermap(d, b); default: perr("unsupported doctype"); return -1; } }