diff options
author | wrmr | 2025-06-23 00:08:13 -0400 |
---|---|---|
committer | wrmr | 2025-06-23 00:08:13 -0400 |
commit | 77dbfeab0b777f7e063c7d10d91572f423c5dd18 (patch) | |
tree | ec0ca40e2e28dbb82be42467785642f1464057ca |
Initial commit
-rw-r--r-- | arena.h | 149 | ||||
-rw-r--r-- | str.h | 152 | ||||
-rw-r--r-- | strio.h | 217 | ||||
-rw-r--r-- | utf8.h | 64 |
4 files changed, 582 insertions, 0 deletions
diff --git a/arena.h b/arena.h new file mode 100644 index 0000000..045cde0 --- /dev/null +++ b/arena.h @@ -0,0 +1,149 @@ +#ifndef ARENA_H +#define ARENA_H + +#include <stdint.h> +#include <stddef.h> +#include <string.h> + +typedef struct ArenaPg { + struct ArenaPg *prev, *next; + char *beg, *end; + char data[]; +} ArenaPg; + +typedef struct { + ArenaPg *pg; + char *beg; +} ArenaMark; + +typedef struct { + ArenaPg *cur, *tail; +} Arena; + +#define new(a, t)\ + (t*)arena_zeroed(arena_alloc(a, sizeof(t), _Alignof(t)), sizeof(t)) + +#define new_arr(a, t, n)\ + arena_alloc(a, sizeof(t) * n, _Alignof(t)) + +#define resize(a, p, old, new)\ + arena_realloc(a, p, (old) * sizeof(*(p)), (new) * sizeof(*(p)),\ + _Alignof(__typeof__(*(p)))) + +void arena_free(Arena *a); + +void arena_save(Arena *a, ArenaMark *m); +void arena_load(Arena *a, ArenaMark *m); + +void arena_reset(Arena *a); +void arena_reserve(Arena *a, ptrdiff_t n); + +void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align); +void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align); +void *arena_zeroed(void *p, size_t n); + +#define ARENA_BACKEND_MALLOC 0 +#define ARENA_BACKEND_MMAP 1 + +#ifndef ARENA_BACKEND +#if defined(__linux__) +# define ARENA_BACKEND ARENA_BACKEND_MMAP +#else +# define ARENA_BACKEND ARENA_BACKEND_MALLOC +#endif +#endif + +#ifdef ARENA_IMPL + +#include <stdio.h> +#include <stdlib.h> +static void arena_pg_alloc_fail(void) { + fprintf(stderr, "failed to allocate arena page\n"); + abort(); +} + +#if ARENA_BACKEND == ARENA_BACKEND_MMAP +#include <sys/mman.h> +#include <unistd.h> +#define ARENA_PG_SIZE sysconf(_SC_PAGESIZE) +static inline void *arena_pg_alloc(ptrdiff_t n) { + void *p = mmap(NULL, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + return p == MAP_FAILED ? NULL : p; +} +static inline void arena_pg_free(void *ptr, ptrdiff_t n) { munmap(ptr, n); } +#elif ARENA_BACKEND == ARENA_BACKEND_MALLOC +#define ARENA_PG_SIZE 8192 +static inline void *arena_pg_alloc(ptrdiff_t n) { return malloc(n); } +static inline void arena_pg_free(void *ptr, ptrdiff_t n) { free(ptr); (void)n; } +#endif + +void arena_free(Arena *a) { + while (a->tail) { + a->cur = a->tail->prev; + arena_pg_free(a->tail, (uintptr_t)(a->tail->end - (char*)a->tail)); + a->tail = a->cur; + } +} + +void arena_reserve(Arena *a, ptrdiff_t n) { + while (a->cur && a->cur->beg + n >= a->cur->end) a->cur = a->cur->next; + if (a->cur) return; + ptrdiff_t cap = n + sizeof(ArenaPg); + cap += (uintptr_t)-cap & (ARENA_PG_SIZE - 1); + ArenaPg *p = arena_pg_alloc(cap); + if (!p) arena_pg_alloc_fail(); + p->next = NULL; + p->prev = a->tail; + p->beg = p->data; + p->end = (char*)p + cap; + if (a->tail) a->tail->next = p; + a->cur = (a->tail = p); +} + +void *arena_alloc(Arena *a, ptrdiff_t n, ptrdiff_t align) { + arena_reserve(a, n + (align - 1)); + char *ptr = a->cur->beg + (-(uintptr_t)a->cur->beg & (align - 1)); + a->cur->beg = ptr + n; + return ptr; +} + +void *arena_realloc(Arena *a, void *ptr, ptrdiff_t old, ptrdiff_t new, ptrdiff_t align) { + if (a->cur && ptr == a->cur->beg - old && (char*)ptr + new < a->cur->end) { + a->cur->beg += new - old; + return ptr; + } else { + void *p = arena_alloc(a, new, align); + if (ptr) memcpy(p, ptr, old); + return p; + } +} + +void *arena_zeroed(void *p, size_t n) { + memset(p, 0, n); + return p; +} + +void arena_reset(Arena *a) { + if (!a->cur) return; + while (a->cur->prev) { + a->cur->beg = a->cur->data; + a->cur = a->cur->prev; + } + a->cur->beg = a->cur->data; +} + +void arena_save(Arena *a, ArenaMark *m) { + m->pg = a->cur; + if (a->cur) m->beg = a->cur->beg; +} + +void arena_load(Arena *a, ArenaMark *m) { + while (a->cur && a->cur != m->pg) { + a->cur->beg = a->cur->data; + a->cur = a->cur->prev; + } + if (a->cur) a->cur->beg = m->beg; +} + +#endif +#endif diff --git a/str.h b/str.h new file mode 100644 index 0000000..b10bc64 --- /dev/null +++ b/str.h @@ -0,0 +1,152 @@ +#ifndef STR_H +#define STR_H + +#include <string.h> +#include <stddef.h> + +#include "arena.h" + +typedef struct { + char *s; + ptrdiff_t n; +} Str; + +typedef struct { + Str head, tail; +} Cut; + +#define S(s) (Str){s,sizeof(s)-1} + +char *str_to_cstr(Str s, Arena *a); +Str str_from_cstr(const char *s); +int str_eql(Str a, Str b); +int str_starts(Str a, Str b); +int str_ends(Str a, Str b); +void str_catc(Str *a, char b, Arena *m); +Str str_skip(Str a, ptrdiff_t n); +int is_space(char c); +Str str_trim_left(Str a); +Str str_trim_right(Str a); +Str str_trim(Str a); +Cut str_cut(Str s, char c); +Str str_findc(Str s, char c); +Str str_find(Str haystack, Str needle); +int str_contains(Str a, Str b); +Str str_dup(Str a, Arena *m); +void str_cat(Str *a, Str b, Arena *m); +Str str_replace_end(Str s, Str a, Str b, Arena *m); + +#ifdef STR_IMPL + +/* conversions */ + +char *str_to_cstr(Str s, Arena *a) { + char *r = new_arr(a, char, s.n + 1); + memcpy(r, s.s, s.n); + r[s.n] = 0; + return r; +} + +Str str_from_cstr(const char *s) { + return (Str) { (char*)s, strlen(s) }; +} + +/* pure functions */ + +int str_eql(Str a, Str b) { + return a.n == b.n && !memcmp(a.s, b.s, b.n); +} + +int str_starts(Str a, Str b) { + return a.n >= b.n && !memcmp(a.s, b.s, b.n); +} + +int str_ends(Str a, Str b) { + return a.n >= b.n && !memcmp(&a.s[a.n - b.n], b.s, b.n); +} + +void str_catc(Str *a, char b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + 1); + a->s[a->n++] = b; +} + +Str str_skip(Str a, ptrdiff_t n) { + return (Str) { a.s + n, a.n - n }; +} + +int is_space(char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} + +Str str_trim_left(Str a) { + while (a.n > 0 && is_space(a.s[0])) a.s++, a.n--; + return a; +} + +Str str_trim_right(Str a) { + while (a.n > 0 && is_space(a.s[a.n - 1])) a.n--; + return a; +} + +Str str_trim(Str a) { + return str_trim_left(str_trim_right(a)); +} + +/* splitting, searching */ + +Cut str_cut(Str s, char c) { + char *p = memchr(s.s, c, s.n); + if (!p) { + return (Cut) { s, { &s.s[s.n], 0 } }; + } else { + return (Cut) { + { s.s, p - s.s }, + { p + 1, &s.s[s.n] - (p + 1) } + }; + } +} + +Str str_findc(Str s, char c) { + char *p = memchr(s.s, c, s.n); + return p ? (Str) { p, s.n - (p - s.s) } : (Str) { &s.s[s.n], 0 }; +} + +Str str_find(Str haystack, Str needle) { + if (needle.n < 1) return haystack; + while (haystack.n > 0) { + haystack = str_findc(haystack, needle.s[0]); + if (str_starts(haystack, needle)) break; + if (haystack.n > 0) haystack = str_skip(haystack, 1); + } + return haystack; +} + +int str_contains(Str a, Str b) { + return str_find(a, b).n > 0; +} + +/* allocating */ + +Str str_dup(Str a, Arena *m) { + char *s = new_arr(m, char, a.n); + memcpy(s, a.s, a.n); + a.s = s; + return a; +} + +void str_cat(Str *a, Str b, Arena *m) { + a->s = resize(m, a->s, a->n, a->n + b.n); + memcpy(&a->s[a->n], b.s, b.n); + a->n += b.n; +} + +Str str_replace_end(Str s, Str a, Str b, Arena *m) { + if (!str_ends(s, a)) return s; + char *p = new_arr(m, char, s.n + b.n - a.n); + memcpy(p, s.s, s.n - a.n); + memcpy(p + s.n - a.n, b.s, b.n); + return (Str) { p, s.n + b.n - a.n }; +} + +#endif +#endif diff --git a/strio.h b/strio.h new file mode 100644 index 0000000..c8bb21f --- /dev/null +++ b/strio.h @@ -0,0 +1,217 @@ +#ifndef STRIO_H +#define STRIO_H + +#include "str.h" +#include "arena.h" + +int read_all(FILE *f, Str *buf, Arena *a); +int next_line(Str *src, Str *line); +void str_putf(Str s, FILE *f); +void str_put(Str s); + +int str_to_u64(Str s, uint64_t *out); +void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a); +void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a); +void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap); +void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...); +Str str_fmtv(Arena *arena, const char *fmt, va_list ap); +Str str_fmt(Arena *arena, const char *fmt, ...); +const char *cstr_fmt(Arena *arena, const char *fmt, ...); + +#ifdef STRIO_IMPL + +#include <stdio.h> +#include <stdint.h> + +static inline long read_all_file_size(FILE *f) { + fseek(f, 0, SEEK_END); + long t = ftell(f); + fseek(f, 0, SEEK_SET); + return t > 0 ? t : -1; +} + +int read_all(FILE *f, Str *buf, Arena *a) { + if (!f) return -1; + long sz = read_all_file_size(f); + if (sz < 1) { + ptrdiff_t cap = 4096; + buf->s = new_arr(a, char, cap); + buf->n = 0; + while (!feof(f)) { + size_t n = fread(&buf->s[buf->n], 1, cap - buf->n, f); + if (n < 1) break; + buf->n += n; + if (buf->n >= cap) { + size_t c = cap; + while (buf->n >= cap) cap <<= 1; + buf->s = resize(a, buf->s, c, cap); + } + } + } else { + buf->n = sz; + buf->s = new_arr(a, char, sz); + size_t sz = fread(buf->s, 1, buf->n, f); + if (sz < (size_t)buf->n) return -1; + } + return ferror(f) ? -1 : 0; +} + +int next_line(Str *src, Str *line) { + if (src->n < 1) return 0; + line->s = src->s; + char *newln = memchr(src->s, '\n', src->n); + line->n = newln ? newln - src->s : src->n; + src->s += line->n + 1; + src->n -= line->n + 1; + if (line->n > 0 && line->s[line->n-1] == '\r') line->n--; + return 1; +} + +void str_putf(Str s, FILE *f) { + fwrite(s.s, 1, s.n, f); +} + +void str_put(Str s) { + str_putf(s, stdout); +} + +/* formatted conversion */ + +int str_to_u64(Str s, uint64_t *out) { + if (s.n < 1) return -1; + uint64_t acc = 0; + for (int i = 0; i < s.n; i++) { + char c = s.s[i]; + if (!(c >= '0' && c <= '9')) return -1; + acc = (acc * 10) + (c - '0'); + } + *out = acc; + return 0; +} + +static void str_cat_u64_(char buf[32], int *n, uint64_t c) { + int i = 0; + buf[31] = '\0'; + do { + buf[32 - ++i] = (c % 10) + '0'; + c /= 10; + } while (c); + *n = i; +} + +void str_cat_u64(Str *out, uint64_t c, char pad_char, int min_width, Arena *a) { + int n; + /* more than enough for the largest 64-bit number + * log_10(1 << 64) ~= 19.3 digits max */ + char buf[32]; + str_cat_u64_(buf, &n, c); + while (n < min_width && ++n < 32) buf[32-n] = pad_char; + str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a); +} + +void str_cat_i64(Str *out, int64_t c, char pad_char, int min_width, Arena *a) { + /* more than enough for the largest 64-bit number + * log_10(1 << 64) ~= 19.3 digits max */ + int n, neg = 0; + char buf[32]; + if (c < 0) neg = 1, c = -c; + str_cat_u64_(buf, &n, c); + if (neg) buf[sizeof(buf) - ++n] = '-'; + while (n < min_width && ++n < 32) buf[32-n] = pad_char; + str_cat(out, (Str) { &buf[sizeof(buf) - n], n }, a); +} + +/* IMPORTANT: this is not and will not be printf() compatible + * + * %s - c string + * %S - Str + * %i - int32 + * %I - int64 + * %u - uint32 + * %U - uin64 + * + **/ +void str_cat_fmtv(Str *out, Arena *arena, const char *fmt, va_list ap) { + size_t n = strlen(fmt); + for (size_t i = 0; i < n; i++) { + const char *mch = memchr(&fmt[i], '%', n - i); + if (!mch) { + str_cat(out, (Str) { (char*)&fmt[i], n - i }, arena); + break; + } + size_t skip = mch - &fmt[i]; + if (mch != &fmt[i]) { + str_cat(out, (Str) { (char*)&fmt[i], skip }, arena); + i += skip; + } + if (i + 1 < n) { + int zero_pad = 0, min_width = 0; + i++; + if (fmt[i] == '0') { + zero_pad = 1; + i++; + } + while (i < n && fmt[i] >= '0' && fmt[i] <= '9') { + min_width = min_width * 10 + (fmt[i] - '0'); + i++; + } + if (i >= n) break; + switch (fmt[i]) { + case 's': + str_cat(out, str_from_cstr(va_arg(ap, const char *)), arena); + break; + case 'S': + str_cat(out, va_arg(ap, Str), arena); + break; + case 'i': + str_cat_i64(out, va_arg(ap, int32_t), zero_pad?'0':' ', min_width, arena); + break; + case 'I': + str_cat_i64(out, va_arg(ap, int64_t), zero_pad?'0':' ', min_width, arena); + break; + case 'u': + str_cat_u64(out, va_arg(ap, uint32_t), zero_pad?'0':' ', min_width, arena); + break; + case 'U': + str_cat_u64(out, va_arg(ap, uint64_t), zero_pad?'0':' ', min_width, arena); + break; + default: + str_catc(out, fmt[i], arena); + break; + } + } + } +} + +void str_cat_fmt(Str *out, Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + str_cat_fmtv(out, arena, fmt, ap); + va_end(ap); +} + +Str str_fmtv(Arena *arena, const char *fmt, va_list ap) { + Str s = { 0 }; + str_cat_fmtv(&s, arena, fmt, ap); + return s; +} + +Str str_fmt(Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + Str r = str_fmtv(arena, fmt, ap); + va_end(ap); + return r; +} + +const char *cstr_fmt(Arena *arena, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + Str r = str_fmtv(arena, fmt, ap); + str_catc(&r, '\0', arena); + va_end(ap); + return r.s; +} + +#endif +#endif diff --git a/utf8.h b/utf8.h new file mode 100644 index 0000000..10a4f2b --- /dev/null +++ b/utf8.h @@ -0,0 +1,64 @@ +#ifndef UTF8_H +#define UTF8_H + +#include "str.h" + +#define UTF8_INVALID (unsigned)-1 + +int utf8_len(unsigned cp); +unsigned utf8_next(Str *s); +void utf8_to_buf(unsigned cp, char *buf, int n); + +#ifdef UTF8_IMPL + +#include <stdbit.h> +#include <stdint.h> + +unsigned utf8_next(Str *s) { + if (s->n < 1) return 0; + static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 }; + static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf }; + int len = niblen[(uint8_t)*s->s >> 4]; + if (!len) { s->n--; return *s->s++; } + if (s->n < len || (s->s[0] & (0x80 >> len))) return UTF8_INVALID; + unsigned cp = (unsigned)*s->s & cpmask[len]; + for (int i = 1; i < len; i++) { + if ((s->s[i] & 0xc0) != 0x80) return UTF8_INVALID; + cp = (cp << 6) | (s->s[i] & 0x3f); + } + s->s += len, s->n -= len; + return cp; +} + +unsigned utf8_next_unchecked(Str *s) { + if (s->n < 1) return 0; + static const uint8_t niblen[16] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,4 }; + static const uint8_t cpmask[4] = { 0x7f, 0x3f, 0x1f, 0xf }; + int len = niblen[(uint8_t)*s->s >> 4]; + if (!len) { s->n--; return *s->s++; } + unsigned cp = (unsigned)*s->s & cpmask[len]; + for (int i = 1; i < len; i++) cp = (cp << 6) | (s->s[i] & 0x3f); + s->s += len, s->n -= len; + return cp; +} + +int utf8_len(unsigned cp) { + static const uint8_t tbl[33] = { + 6,6,6,6,6,6, 5,5,5,5,5, 4,4,4,4,4, + 3,3,3,3,3, 2,2,2,2, 1,1,1,1,1,1,1,1, + }; + return tbl[stdc_leading_zeros(cp)]; +} + +void utf8_to_buf(unsigned cp, char *buf, int n) { + if (n == 1) { + *buf = cp; + return; + } + static const uint8_t tbl[5] = { 0b11000000, 0b11100000, 0b11110000, 0b11111000, 0b11111100 }; + for (int i = n; --i;) buf[i] = 0x80 | (cp & 0x3f), cp >>= 6; + buf[0] = tbl[n - 2] | cp; +} + +#endif +#endif |