summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--arena.h149
-rw-r--r--str.h152
-rw-r--r--strio.h217
-rw-r--r--utf8.h64
4 files changed, 582 insertions, 0 deletions
diff --git a/arena.h b/arena.h
new file mode 100644
index 0000000..045cde0
--- /dev/null
+++ b/arena.h
@@ -0,0 +1,149 @@
+#ifndef ARENA_H
+#define ARENA_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+typedef struct ArenaPg {
+	struct ArenaPg *prev, *next;
+	char *beg, *end;
+	char data[];
+} ArenaPg;
+
+typedef struct {
+	ArenaPg *pg;
+	char *beg;
+} ArenaMark;
+
+typedef struct {
+	ArenaPg *cur, *tail;
+} Arena;
+
+#define new(a, t)\
+	(t*)arena_zeroed(arena_alloc(a, sizeof(t), _Alignof(t)), sizeof(t))
+
+#define new_arr(a, t, n)\
+	arena_alloc(a, sizeof(t) * n, _Alignof(t))
+
+#define resize(a, p, old, new)\
+	arena_realloc(a, p, (old) * sizeof(*(p)), (new) * sizeof(*(p)),\
+			_Alignof(__typeof__(*(p))))
+
+void arena_free(Arena *a);
+
+void arena_save(Arena *a, ArenaMark *m);
+void arena_load(Arena *a, ArenaMark *m);
+
+void arena_reset(Arena *a);
+void arena_reserve(Arena *a, ptrdiff_t n);
+
+void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align);
+void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align);
+void *arena_zeroed(void *p, size_t n);
+
+#define ARENA_BACKEND_MALLOC 0
+#define ARENA_BACKEND_MMAP 1
+
+#ifndef ARENA_BACKEND
+#if defined(__linux__)
+#	define ARENA_BACKEND ARENA_BACKEND_MMAP
+#else
+#	define ARENA_BACKEND ARENA_BACKEND_MALLOC
+#endif
+#endif
+
+#ifdef ARENA_IMPL
+
+#include <stdio.h>
+#include <stdlib.h>
+static void arena_pg_alloc_fail(void) {
+	fprintf(stderr, "failed to allocate arena page\n");
+	abort();
+}
+
+#if ARENA_BACKEND == ARENA_BACKEND_MMAP
+#include <sys/mman.h>
+#include <unistd.h>
+#define ARENA_PG_SIZE sysconf(_SC_PAGESIZE)
+static inline void *arena_pg_alloc(ptrdiff_t n) {
+	void *p = mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	return p == MAP_FAILED ? NULL : p;
+}
+static inline void arena_pg_free(void *ptr, ptrdiff_t n) { munmap(ptr, n); }
+#elif ARENA_BACKEND == ARENA_BACKEND_MALLOC
+#define ARENA_PG_SIZE 8192
+static inline void *arena_pg_alloc(ptrdiff_t n) { return malloc(n); }
+static inline void arena_pg_free(void *ptr, ptrdiff_t n) { free(ptr); (void)n; }
+#endif
+
+void arena_free(Arena *a) {
+	while (a->tail) {
+		a->cur = a->tail->prev;
+		arena_pg_free(a->tail, (uintptr_t)(a->tail->end - (char*)a->tail));
+		a->tail = a->cur;
+	}
+}
+
+void arena_reserve(Arena *a, ptrdiff_t n) {
+	while (a->cur && a->cur->beg + n >= a->cur->end) a->cur = a->cur->next;
+	if (a->cur) return;
+	ptrdiff_t cap = n + sizeof(ArenaPg);
+	cap += (uintptr_t)-cap & (ARENA_PG_SIZE - 1);
+	ArenaPg *p = arena_pg_alloc(cap);
+	if (!p) arena_pg_alloc_fail();
+	p->next = NULL;
+	p->prev = a->tail;
+	p->beg = p->data;
+	p->end = (char*)p + cap;
+	if (a->tail) a->tail->next = p;
+	a->cur = (a->tail = p);
+}
+
+void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align) {
+	arena_reserve(a, n + (align - 1));
+	char *ptr = a->cur->beg + (-(uintptr_t)a->cur->beg & (align - 1));
+	a->cur->beg = ptr + n;
+	return ptr;
+}
+
+void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align) {
+	if (a->cur && ptr == a->cur->beg - old && (char*)ptr + new < a->cur->end) {
+		a->cur->beg += new - old;
+		return ptr;
+	} else {
+		void *p = arena_alloc(a, new, align);
+		if (ptr) memcpy(p, ptr, old);
+		return p;
+	}
+}
+
+void *arena_zeroed(void *p, size_t n) {
+	memset(p, 0, n);
+	return p;
+}
+
+void arena_reset(Arena *a) {
+	if (!a->cur) return;
+	while (a->cur->prev) {
+		a->cur->beg = a->cur->data;
+		a->cur = a->cur->prev;
+	}
+	a->cur->beg = a->cur->data;
+}
+
+void arena_save(Arena *a, ArenaMark *m) {
+	m->pg = a->cur;
+	if (a->cur) m->beg = a->cur->beg;
+}
+
+void arena_load(Arena *a, ArenaMark *m) {
+	while (a->cur && a->cur != m->pg) {
+		a->cur->beg = a->cur->data;
+		a->cur = a->cur->prev;
+	}
+	if (a->cur) a->cur->beg = m->beg;
+}
+
+#endif
+#endif
diff --git a/str.h b/str.h
new file mode 100644
index 0000000..b10bc64
--- /dev/null
+++ b/str.h
@@ -0,0 +1,152 @@
+#ifndef STR_H
+#define STR_H
+
+#include <string.h>
+#include <stddef.h>
+
+#include "arena.h"
+
+typedef struct {
+	char *s;
+	ptrdiff_t n;
+} Str;
+
+typedef struct {
+	Str head, tail;
+} Cut;
+
+#define S(s) (Str){s,sizeof(s)-1}
+
+char *str_to_cstr(Str s, Arena *a);
+Str str_from_cstr(const char *s);
+int str_eql(Str a, Str b);
+int str_starts(Str a, Str b);
+int str_ends(Str a, Str b);
+void str_catc(Str *a, char b, Arena *m);
+Str str_skip(Str a, ptrdiff_t n);
+int is_space(char c);
+Str str_trim_left(Str a);
+Str str_trim_right(Str a);
+Str str_trim(Str a);
+Cut str_cut(Str s, char c);
+Str str_findc(Str s, char c);
+Str str_find(Str haystack, Str needle);
+int str_contains(Str a, Str b);
+Str str_dup(Str a, Arena *m);
+void str_cat(Str *a, Str b, Arena *m);
+Str str_replace_end(Str s, Str a, Str b, Arena *m);
+
+#ifdef STR_IMPL
+
+/* conversions */
+
+char *str_to_cstr(Str s, Arena *a) {
+	char *r = new_arr(a, char, s.n + 1);
+	memcpy(r, s.s, s.n);
+	r[s.n] = 0;
+	return r;
+}
+
+Str str_from_cstr(const char *s) {
+	return (Str) { (char*)s, strlen(s) };
+}
+
+/* pure functions */
+
+int str_eql(Str a, Str b) {
+	return a.n == b.n && !memcmp(a.s, b.s, b.n);
+}
+
+int str_starts(Str a, Str b) {
+	return a.n >= b.n && !memcmp(a.s, b.s, b.n);
+}
+
+int str_ends(Str a, Str b) {
+	return a.n >= b.n && !memcmp(&a.s[a.n - b.n], b.s, b.n);
+}
+
+void str_catc(Str *a, char b, Arena *m) {
+	a->s = resize(m, a->s, a->n, a->n + 1);
+	a->s[a->n++] = b;
+}
+
+Str str_skip(Str a, ptrdiff_t n) {
+	return (Str) { a.s + n, a.n - n };
+}
+
+int is_space(char c) {
+	return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+Str str_trim_left(Str a) {
+	while (a.n > 0 && is_space(a.s[0])) a.s++, a.n--;
+	return a;
+}
+
+Str str_trim_right(Str a) {
+	while (a.n > 0 && is_space(a.s[a.n - 1])) a.n--;
+	return a;
+}
+
+Str str_trim(Str a) {
+	return str_trim_left(str_trim_right(a));
+}
+
+/* splitting, searching */
+
+Cut str_cut(Str s, char c) {
+	char *p = memchr(s.s, c, s.n);
+	if (!p) {
+		return (Cut) { s, { &s.s[s.n], 0 } };
+	} else {
+		return (Cut) {
+			{ s.s, p - s.s },
+			{ p + 1, &s.s[s.n] - (p + 1) }
+		};
+	}
+}
+
+Str str_findc(Str s, char c) {
+	char *p = memchr(s.s, c, s.n);
+	return p ? (Str) { p, s.n - (p - s.s) } : (Str) { &s.s[s.n], 0 };
+}
+
+Str str_find(Str haystack, Str needle) {
+	if (needle.n < 1) return haystack;
+	while (haystack.n > 0) {
+		haystack = str_findc(haystack, needle.s[0]);
+		if (str_starts(haystack, needle)) break;
+		if (haystack.n > 0) haystack = str_skip(haystack, 1);
+	}
+	return haystack;
+}
+
+int str_contains(Str a, Str b) {
+	return str_find(a, b).n > 0;
+}
+
+/* allocating */
+
+Str str_dup(Str a, Arena *m) {
+	char *s = new_arr(m, char, a.n);
+	memcpy(s, a.s, a.n);
+	a.s = s;
+	return a;
+}
+
+void str_cat(Str *a, Str b, Arena *m) {
+	a->s = resize(m, a->s, a->n, a->n + b.n);
+	memcpy(&a->s[a->n], b.s, b.n);
+	a->n += b.n;
+}
+
+Str str_replace_end(Str s, Str a, Str b, Arena *m) {
+	if (!str_ends(s, a)) return s;
+	char *p = new_arr(m, char, s.n + b.n - a.n);
+	memcpy(p, s.s, s.n - a.n);
+	memcpy(p + s.n - a.n, b.s, b.n);
+	return (Str) { p, s.n + b.n - a.n };
+}
+
+#endif
+#endif
diff --git a/strio.h b/strio.h
new file mode 100644
index 0000000..c8bb21f
--- /dev/null
+++ b/strio.h
@@ -0,0 +1,217 @@
+#ifndef STRIO_H
+#define STRIO_H
+
+#include "str.h"
+#include "arena.h"
+
+int read_all(FILE *f, Str *buf, Arena *a);
+int next_line(Str *src, Str *line);
+void str_putf(Str s, FILE *f);
+void str_put(Str s);
+
+int str_to_u64(Str s, uint64_t *out);
+void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a);
+void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a);
+void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap);
+void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...);
+Str str_fmtv(Arena *arena, const char *fmt, va_list ap);
+Str str_fmt(Arena *arena, const char *fmt, ...);
+const char *cstr_fmt(Arena *arena, const char *fmt, ...);
+
+#ifdef STRIO_IMPL
+
+#include <stdio.h>
+#include <stdint.h>
+
+static inline long read_all_file_size(FILE *f) {
+	fseek(f, 0, SEEK_END);
+	long t = ftell(f);
+	fseek(f, 0, SEEK_SET);
+	return t > 0 ? t : -1;
+}
+
+int read_all(FILE *f, Str *buf, Arena *a) {
+	if (!f) return -1;
+	long sz = read_all_file_size(f);
+	if (sz < 1) {
+		ptrdiff_t cap = 4096;
+		buf->s = new_arr(a, char, cap);
+		buf->n = 0;
+		while (!feof(f)) {
+			size_t n = fread(&buf->s[buf->n], 1, cap - buf->n, f);
+			if (n < 1) break;
+			buf->n += n;
+			if (buf->n >= cap) {
+				size_t c = cap;
+				while (buf->n >= cap) cap <<= 1;
+				buf->s = resize(a, buf->s, c, cap);
+			}
+		}
+	} else {
+		buf->n = sz;
+		buf->s = new_arr(a, char, sz);
+		size_t sz = fread(buf->s, 1, buf->n, f);
+		if (sz < (size_t)buf->n) return -1;
+	}
+	return ferror(f) ? -1 : 0;
+}
+
+int next_line(Str *src, Str *line) {
+	if (src->n < 1) return 0;
+	line->s = src->s;
+	char *newln = memchr(src->s, '\n', src->n);
+	line->n = newln ? newln - src->s : src->n;
+	src->s += line->n + 1;
+	src->n -= line->n + 1;
+	if (line->n > 0 && line->s[line->n-1] == '\r') line->n--;
+	return 1;
+}
+
+void str_putf(Str s, FILE *f) {
+	fwrite(s.s, 1, s.n, f);
+}
+
+void str_put(Str s) {
+	str_putf(s, stdout);
+}
+
+/* formatted conversion */
+
+int str_to_u64(Str s, uint64_t *out) {
+	if (s.n < 1) return -1;
+	uint64_t acc = 0;
+	for (int i = 0; i < s.n; i++) {
+		char c = s.s[i];
+		if (!(c >= '0' && c <= '9')) return -1;
+		acc = (acc * 10) + (c - '0');
+	}
+	*out = acc;
+	return 0;
+}
+
+static void str_cat_u64_(char buf[32], int *n, uint64_t c) {
+	int i = 0;
+	buf[31] = '\0';
+	do {
+		buf[32 - ++i] = (c % 10) + '0';
+		c /= 10;
+	} while (c);
+	*n = i;
+}
+
+void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a) {
+	int n;
+	/* more than enough for the largest 64-bit number
+	 * log_10(1 << 64) ~= 19.3 digits max */
+	char buf[32];
+	str_cat_u64_(buf, &n, c);
+	while (n < min_width && ++n < 32) buf[32-n] = pad_char;
+	str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a);
+}
+
+void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a) {
+	/* more than enough for the largest 64-bit number
+	 * log_10(1 << 64) ~= 19.3 digits max */
+	int n, neg = 0;
+	char buf[32];
+	if (c < 0) neg = 1, c = -c;
+	str_cat_u64_(buf, &n, c);
+	if (neg) buf[sizeof(buf) - ++n] = '-';
+	while (n < min_width && ++n < 32) buf[32-n] = pad_char;
+	str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a);
+}
+
+/* IMPORTANT: this is not and will not be printf() compatible
+ *
+ * %s - c string
+ * %S - Str
+ * %i - int32
+ * %I - int64
+ * %u - uint32
+ * %U - uin64
+ *
+ **/
+void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap) {
+	size_t n = strlen(fmt);
+	for (size_t i = 0; i < n; i++) {
+		const char *mch = memchr(&fmt[i], '%', n - i);
+		if (!mch) {
+			str_cat(out, (Str) { (char*)&fmt[i], n - i }, arena);
+			break;
+		}
+		size_t skip = mch - &fmt[i];
+		if (mch != &fmt[i]) {
+			str_cat(out, (Str) { (char*)&fmt[i], skip }, arena);
+			i += skip;
+		}
+		if (i + 1 < n) {
+			int zero_pad = 0, min_width = 0;
+			i++;
+			if (fmt[i] == '0') {
+				zero_pad = 1;
+				i++;
+			}
+			while (i < n && fmt[i] >= '0' && fmt[i] <= '9') {
+				min_width = min_width * 10 + (fmt[i] - '0');
+				i++;
+			}
+			if (i >= n) break;
+			switch (fmt[i]) {
+			case 's':
+				str_cat(out, str_from_cstr(va_arg(ap, const char *)), arena);
+				break;
+			case 'S':
+				str_cat(out, va_arg(ap, Str), arena);
+				break;
+			case 'i':
+				str_cat_i64(out, va_arg(ap, int32_t), zero_pad?'0':' ', min_width, arena);
+				break;
+			case 'I':
+				str_cat_i64(out, va_arg(ap, int64_t), zero_pad?'0':' ', min_width, arena);
+				break;
+			case 'u':
+				str_cat_u64(out, va_arg(ap, uint32_t), zero_pad?'0':' ', min_width, arena);
+				break;
+			case 'U':
+				str_cat_u64(out, va_arg(ap, uint64_t), zero_pad?'0':' ', min_width, arena);
+				break;
+			default:
+				str_catc(out, fmt[i], arena);
+				break;
+			}
+		}
+	}
+}
+
+void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...) {
+	va_list ap;
+	va_start(ap, fmt);
+	str_cat_fmtv(out, arena, fmt, ap);
+	va_end(ap);
+}
+
+Str str_fmtv(Arena *arena, const char *fmt, va_list ap) {
+	Str s = { 0 };
+	str_cat_fmtv(&s, arena, fmt, ap);
+	return s;
+}
+
+Str str_fmt(Arena *arena, const char *fmt, ...) {
+	va_list ap;
+	va_start(ap, fmt);
+	Str r = str_fmtv(arena, fmt, ap);
+	va_end(ap);
+	return r;
+}
+
+const char *cstr_fmt(Arena *arena, const char *fmt, ...) {
+	va_list ap;
+	va_start(ap, fmt);
+	Str r = str_fmtv(arena, fmt, ap);
+	str_catc(&r, '\0', arena);
+	va_end(ap);
+	return r.s;
+}
+
+#endif
+#endif
diff --git a/utf8.h b/utf8.h
new file mode 100644
index 0000000..10a4f2b
--- /dev/null
+++ b/utf8.h
@@ -0,0 +1,64 @@
+#ifndef UTF8_H
+#define UTF8_H
+
+#include "str.h"
+
+#define UTF8_INVALID (unsigned)-1
+
+int utf8_len(unsigned cp);
+unsigned utf8_next(Str *s);
+void utf8_to_buf(unsigned cp, char *buf, int n);
+
+#ifdef UTF8_IMPL
+
+#include <stdbit.h>
+#include <stdint.h>
+
+unsigned utf8_next(Str *s) { 
+	if (s->n < 1) return 0;
+	static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 };
+	static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf };
+	int len = niblen[(uint8_t)*s->s >> 4];
+	if (!len) { s->n--; return *s->s++; }
+	if (s->n < len || (s->s[0] & (0x80 >> len))) return UTF8_INVALID;
+	unsigned cp = (unsigned)*s->s & cpmask[len];
+	for (int i = 1; i < len; i++) {
+		if ((s->s[i] & 0xc0) != 0x80) return UTF8_INVALID;
+		cp = (cp << 6) | (s->s[i] & 0x3f);
+	}
+	s->s += len, s->n -= len;
+	return cp;
+}
+
+unsigned utf8_next_unchecked(Str *s) { 
+	if (s->n < 1) return 0;
+	static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 };
+	static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf };
+	int len = niblen[(uint8_t)*s->s >> 4];
+	if (!len) { s->n--; return *s->s++; }
+	unsigned cp = (unsigned)*s->s & cpmask[len];
+	for (int i = 1; i < len; i++) cp = (cp << 6) | (s->s[i] & 0x3f);
+	s->s += len, s->n -= len;
+	return cp;
+}
+
+int utf8_len(unsigned cp) {
+	static const uint8_t tbl[33] = {
+		6,6,6,6,6,6, 5,5,5,5,5, 4,4,4,4,4,
+		3,3,3,3,3, 2,2,2,2, 1,1,1,1,1,1,1,1,
+	};
+	return tbl[stdc_leading_zeros(cp)];
+}
+
+void utf8_to_buf(unsigned cp, char *buf, int n) {
+	if (n == 1) {
+		*buf = cp;
+		return;
+	}
+	static const uint8_t tbl[5] = { 0b11000000, 0b11100000, 0b11110000, 0b11111000, 0b11111100 };
+	for (int i = n; --i;) buf[i] = 0x80 | (cp & 0x3f), cp >>= 6;
+	buf[0] = tbl[n - 2] | cp;
+}
+
+#endif
+#endif