From bf4535008a78ed84fe3e76a9ae262646b9a5f150 Mon Sep 17 00:00:00 2001 From: WormHeamer Date: Sat, 27 Dec 2025 23:26:28 -0500 Subject: basic piece table implementation --- .gitignore | 3 ++ Makefile | 41 +++++++++++++++++++++ arena.h | 81 ++++++++++++++++++++++++++++++++++++++++ dynarr.h | 57 +++++++++++++++++++++++++++++ main.c | 27 ++++++++++++++ txt.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ txt.h | 43 ++++++++++++++++++++++ wrmr.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 473 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 arena.h create mode 100644 dynarr.h create mode 100644 main.c create mode 100644 txt.c create mode 100644 txt.h create mode 100644 wrmr.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..083996d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +a.out +*.o +test.txt diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..36549b0 --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +EXE = a.out +RUNARGS = + +CFLAGS = -std=c23 -Wall -Wextra -Wpedantic ${CFLAGS_${DEBUG}} +LDFLAGS = ${LDFLAGS_${DEBUG}} +LDLIBS = + +OBJ != find -type f -name '*.c' | sed 's/\.c$$/.o/' + +DEBUG = 0 + +CFLAGS_0 = -O2 -DNDEBUG +CFLAGS_1 = -O0 -g3 # -fsanitize=address,undefined +CFLAGS_2 = -O2 -g3 -DNDEBUG +LDFLAGS_0 = -s +LDFLAGS_1 = -g3 # -fsanitize=address,undefined +LDFLAGS_2 = -O2 -g3 + +PREFIX ?= ${HOME}/.local +BINDIR = ${PREFIX}/bin + +.PHONY: run all clean install uninstall + +all: ${EXE} +run: ${EXE} + ./${EXE} ${RUNARGS} + +debug: ${EXE} + gdb -ex run --args ./${EXE} ${RUNARGS} + +clean: + rm -fv ${EXE} ${OBJ} out.pdf + +${EXE}: ${OBJ} + ${CC} ${LDFLAGS} ${OBJ} -o ${EXE} ${LDLIBS} + +install: ${EXE} + cp ${EXE} ${BINDIR}/${EXE} + +uninstall: + rm ${BINDIR}/${EXE} diff --git a/arena.h b/arena.h new file mode 100644 index 0000000..58c9d38 --- /dev/null +++ b/arena.h @@ -0,0 +1,81 @@ +#ifndef ARENA_H +#define ARENA_H + +#include "wrmr.h" + +#include +#include + +typedef struct { + char *start; /* need to release memory */ + char *beg, *end; +} Arena; + +#define ARENA(name, sz)\ + static char name##_buf[sz];\ + Arena name = { name##_buf, name##_buf + (sz) }; + +void *vmem_alloc(size_t sz); +void vmem_free(void *ptr, size_t sz); + +Arena arena_init(size_t sz); +void arena_free(Arena *a); +void *arena_alloc(Arena *a, size_t n, size_t align); +void *arena_realloc(Arena *a, void *old, size_t oldsz, size_t newsz, size_t align); + +#define new(a, t)\ + arena_alloc(a, sizeof(t), _Alignof(__typeof__(t))) + +#define new_arr(a, t, n)\ + arena_alloc(a, sizeof(t) * (n), _Alignof(__typeof__(t))) + +#define resize(a, x, osz, nsz)\ + arena_realloc(a, x, sizeof(*(x)) * (osz),\ + sizeof(*(x)) * (nsz), _Alignof(__typeof__(*(x)))) + +#ifdef ARENA_IMPL + +/* virtual memory */ + +#include +#include + +void *vmem_alloc(size_t sz) { + void *ptr = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!ptr) FAIL_WITH_MSG("mmap failed"); + return ptr; +} + +void vmem_free(void *ptr, size_t sz) { + munmap(ptr, sz); +} + +/* arenas */ + +#include + +Arena arena_init(size_t sz) { + char *p = vmem_alloc(sz); + return (Arena) { p, p, p + sz }; +} + +void arena_free(Arena *a) { + vmem_free(a->start, a->end - a->start); +} + +void *arena_alloc(Arena *a, size_t sz, size_t align) { + char *p = a->beg + (-(uintptr_t)a->beg & (align - 1)); + if (p >= a->end) FAIL_WITH_MSG("arena out-of-memory"); + a->beg = p + sz; + memset(p, 0, sz); + return p; +} + +void *arena_realloc(Arena *a, void *old, size_t oldsz, size_t newsz, size_t align) { + if (old == a->beg - oldsz) a->beg = old; + else if (old) memcpy(a->beg, old, oldsz); + return arena_alloc(a, newsz, align); +} + +#endif +#endif diff --git a/dynarr.h b/dynarr.h new file mode 100644 index 0000000..ae93462 --- /dev/null +++ b/dynarr.h @@ -0,0 +1,57 @@ +#ifndef DYNARR_H +#define DYNARR_H + +#include +#include +#include + +#include "wrmr.h" + +typedef struct { + u32 n, c; + void *v; +} DynArrHdr; + +#define DYNARR(T) struct { u32 n, c; T *v; } + +#define DA_INIT_CAP 32 +#define DA_ELEM(da, n) ((n) * sizeof(*(da)->v)) + +/* malloc */ + +#define DA_FIT(da, n) do {\ + (da)->c = stdc_bit_ceil(n);\ + (da)->v = realloc((da)->v, DA_ELEM(da, (da)->c));\ + if (!(da)->v) FAIL_WITH_MSG("failed to realloc dynamic array");\ +} while(0) +#define DA_GROW(da, n) DA_FIT(da, (da)->n + (n)) +#define DA_PUSH(da, ...) do {\ + DA_GROW(da, 1);\ + (da)->v[(da)->n++] = (__VA_ARGS__);\ +} while(0) +#define DA_PUSH_MULT(da, o, n) do {\ + DA_GROW(da, n);\ + memcpy((da)->v + (da)->n, (o), (n) * sizeof(*(da)->v));\ + (da)->n += (n);\ +} while(0) + +/* arena */ + +#define DA_AFIT(da, a, n) do {\ + u32 da_fit_c = stdc_bit_ceil(n);\ + (da)->v = arena_realloc(a, (da)->v, DA_ELEM(da, (da)->c),\ + DA_ELEM(da, da_fit_c), _Alignof(TYPEOF(*(da)->v)));\ + (da)->c = da_fit_c;\ +} while(0) +#define DA_AGROW(da, a, n) DA_AFIT(da, a, (da)->n + (n)) +#define DA_APUSH(da, a, ...) do {\ + DA_AGROW(da, a, 1);\ + (da)->v[(da)->n++] = (__VA_ARGS__);\ +} while(0) +#define DA_APUSH_MULT(da, a, o, n) do {\ + DA_AGROW(da, a, n);\ + memcpy((da)->v + (da)->n, (o), (n) * sizeof(*(da)->v));\ + (da)->n += (n);\ +} while(0) + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..e367825 --- /dev/null +++ b/main.c @@ -0,0 +1,27 @@ +#include +#include +#include + +#include +#include +#include + +#define ARENA_IMPL + +#include "wrmr.h" +#include "arena.h" +#include "txt.h" + +int main(void) { + Arena a = arena_init(1L << 20); + Txt txt = { 0 }; + txt_open(&txt, "test.txt"); + txt_insert(&txt, txt.len, "wheeee", 6); + txt_delete(&txt, txt.len, 6); + for (u32 i = 0; i < txt.ptbl.n; i++) { + TxtPiece *p = &txt.ptbl.v[i]; + printf("%u. %.*s\n", i, (int)p->n, txt.buf[p->buf].s + p->ofs); + } + arena_free(&a); + return 0; +} diff --git a/txt.c b/txt.c new file mode 100644 index 0000000..b71bba3 --- /dev/null +++ b/txt.c @@ -0,0 +1,122 @@ +#include +#include + +#include +#include +#include +#include + +#include "wrmr.h" +#include "dynarr.h" +#include "txt.h" + +void txt_insert_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { + DA_FIT(&b->ptbl, b->ptbl.n + 1); + MOVE(&b->ptbl.v[pi+1], &b->ptbl.v[pi], b->ptbl.n - pi); + b->ptbl.v[pi] = (TxtPiece) { buf, ofs, n }; + b->ptbl.n++; +} + +void txt_remove_piece(Txt *b, u32 pi) { + if (pi + 1 < b->ptbl.n) { + MOVE(&b->ptbl.v[pi], &b->ptbl.v[pi+1], b->ptbl.n - (pi + 1)); + } + b->ptbl.n--; +} + +u32 txt_split_piece(Txt *b, u32 pi, u32 i) { + TxtPiece *p = &b->ptbl.v[pi]; + if (i == 0) return pi; + if (i == p->n) return pi + 1; + txt_insert_piece(b, pi + 1, p->buf, p->ofs + i, p->n - i); + b->ptbl.v[pi].n = i; + return pi + 1; +} + +TxtLoc txt_at(Txt *b, u32 cur) { + for (u32 i = 0; i < b->ptbl.n; i++) { + if (cur <= b->ptbl.v[i].n) { + return (TxtLoc) { i, cur }; + } + cur -= b->ptbl.v[i].n; + } + return (TxtLoc) { 0, 0 }; +} + +void txt_buf_append(Txt *b, TxtBufIdx bi, const char *s, u32 n) { + TxtBuf *buf = &b->buf[bi]; + if (buf->n + n > buf->c) { + buf->c = stdc_bit_ceil(buf->n + n); + buf->s = realloc(buf->s, buf->c); + if (!buf->s) FAIL_WITH_MSG("realloc failure"); + } + memcpy(&buf->s[buf->n], s, n); + buf->n += n; +} + +u32 txt_insert(Txt *b, u32 cur, const char *s, u32 n) { + TxtLoc l = txt_at(b, cur); + if (l.p < b->ptbl.n) { + TxtPiece *p = &b->ptbl.v[l.p]; + int mid = p->ofs + l.i < b->buf[p->buf].n; + if (p->buf == TXT_SRC || mid) { + l.p = txt_split_piece(b, l.p, l.i); + txt_insert_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); + } + } else { + txt_insert_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); + } + TxtPiece *p = &b->ptbl.v[l.p]; + txt_buf_append(b, p->buf, s, n); + p->n += n; + b->len += n; + return cur + n; +} + +u32 txt_insert_c(Txt *b, u32 cur, char ch) { + /* TODO: utf-8 char */ + return txt_insert(b, cur, &ch, 1); +} + +u32 txt_delete(Txt *b, u32 cur, u32 n) { + TxtLoc l = txt_at(b, cur); + txt_split_piece(b, l.p, l.i); + TxtPiece *p = &b->ptbl.v[l.p]; + if (n > cur) n = cur; + cur -= n; + b->len -= n; + while (n > p->n) { + n -= p->n; + if (l.p + 1 < b->ptbl.n) txt_remove_piece(b, l.p); + b->ptbl.n--; + if (!l.p) return cur; + l.p--; + p = &b->ptbl.v[l.p]; + } + p->n -= n; + if (p->n == 0) txt_remove_piece(b, l.p); + return cur; +} + +int txt_open(Txt *b, const char *path) { + struct stat sb; + int fd = open(path, O_RDONLY); + if (fd == -1) return -1; + if (fstat(fd, &sb)) { + close(fd); + return -1; + } + void *m = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (m == MAP_FAILED) { + close(fd); + return -1; + } + memset(b, 0, sizeof(TxtBuf)); + b->buf[TXT_SRC].s = m; + b->buf[TXT_SRC].n = sb.st_size; + b->buf[TXT_SRC].c = sb.st_size; + b->len = sb.st_size; + close(fd); + txt_insert_piece(b, 0, TXT_SRC, 0, b->len); + return 0; +} diff --git a/txt.h b/txt.h new file mode 100644 index 0000000..21712af --- /dev/null +++ b/txt.h @@ -0,0 +1,43 @@ +#ifndef TXT_H +#define TXT_H + +#include "dynarr.h" + +typedef enum : u8 { + TXT_SRC, + TXT_ADD +} TxtBufIdx; + +typedef struct TxtPiece { + TxtBufIdx buf; + u32 ofs, n; +} TxtPiece; + +typedef struct { + char *s; + u32 n, c; +} TxtBuf; + +typedef struct { + u32 p, i; +} TxtLoc; + +typedef struct { + DYNARR(TxtPiece) ptbl; + TxtBuf buf[2]; + u32 len; +} Txt; + +TxtLoc txt_at(Txt *b, u32 cur); + +u32 txt_split_piece(Txt *b, u32 pi, u32 i); +void txt_remove_piece(Txt *b, u32 pi); +void txt_insert_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n); +void txt_buf_append(Txt *b, TxtBufIdx bi, const char *s, u32 n); + +u32 txt_insert(Txt *b, u32 cur, const char *s, u32 n); +u32 txt_insert_c(Txt *b, u32 cur, char ch); +u32 txt_delete(Txt *b, u32 cur, u32 n); +int txt_open(Txt *b, const char *path); + +#endif diff --git a/wrmr.h b/wrmr.h new file mode 100644 index 0000000..35776a3 --- /dev/null +++ b/wrmr.h @@ -0,0 +1,99 @@ +/* wrmr.h */ +/* common definitions used on most projects */ + +#ifndef WRMR_H +#define WRMR_H + +#include +#include + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef size_t usize; +typedef ptrdiff_t isize; +typedef intptr_t iptr; +typedef uintptr_t uptr; + +#define BESTRING_(x) #x +#define BESTRING(x) BESTRING_(x) + +#ifdef __has_builtin +#define HAS_BUILTIN(x) __has_builtin(x) +#else +#define HAS_BUILTIN(x) 0 +#endif + +#if defined(__i368__) || defined(__x86_64__) +/* nop so gdb will break on the trap, not the line after */ +# define TRAP() __asm__ volatile ("int3; nop") +#elif HAS_BUILTIN(__builtin_debugtrap) +# define TRAP() __builtin_debugtrap() +#elif HAS_BUILTIN(__builtin_trap) && defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) +# define TRAP() __builtin_trap() +#elif defined(_MSC_VER) +# define TRAP() __debugbreak() +#else +# define TRAP() (*(volatile int *)NULL = 0) +#endif + +#ifndef PUT_ERROR_MSG +# include +# define PUT_ERROR_MSG(msg) fputs(msg, stderr) +#endif + +#define FILE_LINE_MSG(msg) (__FILE__ ":" BESTRING(__LINE__) ": " msg "\n") +#define FAIL_WITH_MSG(msg) do { PUT_ERROR_MSG(FILE_LINE_MSG(msg)); TRAP(); } while(0) + +#ifdef NDEBUG +# define ASSERT(...) do {} while(0) +#else +# define ASSERT(...) do {\ + if (!(__VA_ARGS__)) FAIL_WITH_MSG("Assertion failed: " #__VA_ARGS__);\ + } while(0) +#endif + +#ifndef NDEBUG +# define UNREACHABLE_MSG(msg) FAIL_WITH_MSG(msg) +#else +# if __STDC_VERSION__ >= 202311L +# define UNREACHABLE_MSG(msg) (unreachable()) +# elif HAS_BUILTIN(__builtin_unreachable) +# define UNREACHABLE_MSG(msg) (__builtin_unreachable()) +# elif defined(_MSC_VER) +# define UNREACHABLE_MSG(msg) (__assume(0)) +# else +# define UNREACHABLE_MSG(msg) do {} while(0) +# endif +#endif + +#define UNREACHABLE UNREACHABLE_MSG("unreachable code") + +#ifdef NDEBUG +# if defined(_MSC_VER) +# define ASSUME(...) (__assume(__VA_ARGS__)) +# elif HAS_BUILTIN(__builtin_assume) +# define ASSUME(...) (__builtin_assume(__VA_ARGS__)) +# endif +#else +# define ASSUME(...) do if (!(__VA_ARGS__)) UNREACHABLE_MSG("assumption failed: " #__VA_ARGS__); while(0) +#endif + +#if __STDC_VERSION__ >= 202311L +# define TYPEOF(...) typeof(__VA_ARGS__) +#else +# define TYPEOF(...) __typeof__(__VA_ARGS__) +#endif + +#define COUNTOF(x) (sizeof(x) / sizeof(*(x))) +#define MOVE(a0, a1, n) memmove(1?(a0):(a1), a1, sizeof(*(a0)) * (n)); +#define COPY(a0, a1, n) memmove(1?(a0):(a1), a1, sizeof(*(a0)) * (n)); + +#endif -- cgit v1.2.3