From b977942932bf1cd625f8d7a6f310b0d3a537f72e Mon Sep 17 00:00:00 2001 From: wrmr Date: Wed, 6 Nov 2024 17:22:46 -0500 Subject: add plaintext url parsing --- parse.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'parse.c') diff --git a/parse.c b/parse.c index 601c6b1..a57c1b3 100644 --- a/parse.c +++ b/parse.c @@ -1,11 +1,44 @@ +#include +#include + #include "parse.h" #include "err.h" +struct str_slice { + const char *s; + size_t n; +}; + +int isurlch(char c) { + return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/'; +} + +void parse_plain_url(struct doc *d, struct doc_line *l, size_t i) { + char url[l->len + 1]; + size_t start = i - 1; + while (start > 0 && isalpha(l->txt[start])) start--; + if (!isalpha(l->txt[start])) start++; + size_t end = i + 3; + while (end < l->len && isurlch(l->txt[end])) end++; + if (end == i + 3) return; + size_t urln = end - start; + memcpy(url, &l->txt[start], urln); + url[urln] = 0; + l->link = doc_add_link(d, url); +} + int parse_plain(struct doc *d, const buf_t *b) { doc_init(d); for (size_t i = 0; i < b->sz; i++) { char c = b->buf[i]; if (c == '\n') { + struct doc_line *l = doc_line_at(d, d->latest); + for (size_t i = 1; i + 2 < l->len; i++) { + if (l->txt[i] == ':' && l->txt[i + 1] == '/' && l->txt[i + 2] == '/') { + parse_plain_url(d, l, i); + break; + } + } doc_new_line(d); } else { doc_add_textn(d, &c, 1); @@ -14,11 +47,6 @@ int parse_plain(struct doc *d, const buf_t *b) { return 0; } -struct str_slice { - const char *s; - size_t n; -}; - static struct str_slice gmbit(size_t *i, const char *s, size_t n) { struct str_slice ss = { &s[*i], -- cgit 1.4.1-2-gfad0