summary refs log tree commit diff
path: root/parse.c
blob: a57c1b3506aa5870a385924d0c7b60dc5e779ae8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <ctype.h>
#include <string.h>

#include "parse.h"
#include "err.h"

struct str_slice {
	const char *s;
	size_t n;
};

int isurlch(char c) {
	return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~' || c == '!' || c == '$' || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || c == '=' || c == '%' || c == '@' || c == ':' || c == '/';
}

void parse_plain_url(struct doc *d, struct doc_line *l, size_t i) {
	char url[l->len + 1];
	size_t start = i - 1;
	while (start > 0 && isalpha(l->txt[start])) start--;
	if (!isalpha(l->txt[start])) start++;
	size_t end = i + 3;
	while (end < l->len && isurlch(l->txt[end])) end++;
	if (end == i + 3) return;
	size_t urln = end - start;
	memcpy(url, &l->txt[start], urln);
	url[urln] = 0;
	l->link = doc_add_link(d, url);
}

int parse_plain(struct doc *d, const buf_t *b) {
	doc_init(d);
	for (size_t i = 0; i < b->sz; i++) {
		char c = b->buf[i];
		if (c == '\n') {
			struct doc_line *l = doc_line_at(d, d->latest);
			for (size_t i = 1; i + 2 < l->len; i++) {
				if (l->txt[i] == ':' && l->txt[i + 1] == '/' && l->txt[i + 2] == '/') {
					parse_plain_url(d, l, i);
					break;
				}
			}
			doc_new_line(d);
		} else {
			doc_add_textn(d, &c, 1);
		}
	}
	return 0;
}

static struct str_slice gmbit(size_t *i, const char *s, size_t n) {
	struct str_slice ss = {
		&s[*i],
		0
	};
	while (*i < n && s[*i] != '\t') {
		*i += 1;
		ss.n++;
	}
	*i += 1;
	return ss;
}

size_t scatss(char *buf, size_t i, size_t n, struct str_slice ss) {
	size_t si = 0;
	while (i < n && si < ss.n) {
		buf[i++] = ss.s[si++];
	}
	return i;
}

int parse_gophermap_line(struct doc *d, const char *s, size_t n) {
	char url[512] = "gopher://";
	size_t urln = 9;
	struct {
		char item_type;
		struct str_slice dstr;
		struct str_slice sel;
		struct str_slice host;
		struct str_slice port;
	} bits;
	size_t i = 0;
	bits.item_type = s[i++];
	bits.dstr = gmbit(&i, s, n);
	bits.sel = gmbit(&i, s, n);
	bits.host = gmbit(&i, s, n);
	bits.port = gmbit(&i, s, n);
	switch (bits.item_type) {
	case '.':
		if (n == 1) return 1;
	default:
		urln = scatss(url, urln, sizeof url, bits.host);
		if (urln < sizeof url) url[urln++] = ':';
		urln = scatss(url, urln, sizeof url, bits.port);
		if (urln < sizeof url) url[urln++] = '/';
		if (urln < sizeof url) url[urln++] = bits.item_type;
		urln = scatss(url, urln, sizeof url, bits.sel);
		url[urln] = 0;
		doc_set_link(d, doc_add_link(d, url));
	case 'i':
		doc_add_textn(d, bits.dstr.s, bits.dstr.n);
		doc_new_line(d);
		break;
	}
	return 0;
}

int parse_gophermap(struct doc *d, const buf_t *b) {
	doc_init(d);
	size_t ln_start = 0;
	for (size_t i = 0; i < b->sz; i++) {
		if (b->buf[i] == '\r') continue;
		if (b->buf[i] == '\n') {
			char *ln_str = &b->buf[ln_start];
			size_t ln_len = i - ln_start;
			if (i > 0 && b->buf[i - 1] == '\r') ln_len--;
			if (parse_gophermap_line(d, ln_str, ln_len)) {
				break;
			}
			ln_start = i + 1;
		}
	}
	return 0;
}

int parse_doc(enum doc_type type, struct doc *d, const buf_t *b) {
	switch (type) {
	case DOC_PLAIN:
		return parse_plain(d, b);
	case DOC_GOPHERMAP:
		return parse_gophermap(d, b);
	default:
		perr("unsupported doctype");
		return -1;
	}
}