#include #include #include #include #include #include #include "wrmr.h" #include "arena.h" #include "dynarr.h" #include "txt.h" #include "utf8.h" #include "str.h" void txt_replace_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { b->ptbl.v[pi] = (TxtPiece) { buf, ofs, n }; } void txt_insert_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { DA_FIT(&b->ptbl, b->ptbl.n + 1); if (pi < b->ptbl.n) { MOVE(&b->ptbl.v[pi+1], &b->ptbl.v[pi], b->ptbl.n - pi); } txt_replace_piece(b, pi, buf, ofs, n); b->ptbl.n++; } void txt_remove_piece(Txt *b, u32 pi) { if (pi + 1 < b->ptbl.n) { MOVE(&b->ptbl.v[pi], &b->ptbl.v[pi+1], b->ptbl.n - (pi + 1)); } b->ptbl.n--; } u32 txt_split_piece(Txt *b, u32 pi, u32 i) { TxtPiece *p = &b->ptbl.v[pi]; if (i == 0) return pi; if (i == p->n) return pi + 1; txt_insert_piece(b, pi + 1, p->buf, p->ofs + i, p->n - i); b->ptbl.v[pi].n = i; return pi + 1; } static void txt_buf_fit(Txt *b, TxtBufIdx bi, u32 sz) { TxtBuf *buf = &b->buf[bi]; if (sz > buf->c) { buf->c = stdc_bit_ceil(sz); buf->s = realloc(buf->s, buf->c); if (!buf->s) FAIL_WITH_MSG("realloc failure"); } } static void txt_buf_append(Txt *b, TxtBufIdx bi, const char *s, u32 n) { TxtBuf *buf = &b->buf[bi]; txt_buf_fit(b, bi, buf->n + n); memcpy(&buf->s[buf->n], s, n); buf->n += n; } static isize txt_buf_dedup(Txt *b, TxtBufIdx bi, const char *s, u32 n) { TxtBuf *buf = &b->buf[bi]; if (buf->n >= n && buf->s && !memcmp(buf->s + (buf->n - n), s, n)) { return buf->n - n; } else { return -1; } } TxtLoc txt_add_piece(Txt *b, TxtLoc l) { if (l.p > 0 && !l.i) { l.p--; l.i = b->ptbl.v[l.p].n; } if (l.p < b->ptbl.n) { TxtPiece *p = &b->ptbl.v[l.p]; int mid = p->ofs + l.i < b->buf[p->buf].n; if (p->buf == TXT_SRC || mid) { l.p = txt_split_piece(b, l.p, l.i); goto insert; } } else { insert: if (l.p < b->ptbl.n && b->ptbl.v[l.p].n == 0) { txt_replace_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); } else { txt_insert_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); } } return l; } static int txt_are_pieces_adjacent(Txt *t, u32 a, u32 b) { TxtPiece *pa = &t->ptbl.v[a]; TxtPiece *pb = &t->ptbl.v[b]; return pa->buf == pb->buf && pa->ofs + pa->n == pb->ofs; } static int txt_trim_empty(TxtLoc *lp) { TxtLoc l = *lp; int deleted = 0; while (l.p > 0 && l.t->ptbl.v[l.p-1].n == 0) { txt_remove_piece(l.t, l.p - 1); l.p--; deleted = 1; } while (l.p + 1 < l.t->ptbl.n && l.t->ptbl.v[l.p+1].n == 0) { txt_remove_piece(l.t, l.p + 1); deleted = 1; } while (l.t->ptbl.n > 1 && l.t->ptbl.v[l.p].n == 0) { txt_remove_piece(l.t, l.p); if (l.p >= l.t->ptbl.n) { l.p = l.t->ptbl.n - 1; l.i = l.t->ptbl.v[l.p].n; } deleted = 1; } *lp = l; return deleted; } static inline TxtLoc resolve_loc(TxtLoc l) { while (l.p + 1 < l.t->ptbl.n && l.i == l.t->ptbl.v[l.p].n) { l.p++; l.i = 0; } ASSERT(txt_valid_loc(l)); return l; } static TxtLoc txt_post_edit(TxtLoc l) { l = resolve_loc(l); loop: if (txt_trim_empty(&l)) { ASSERT(txt_valid_loc(l)); goto loop; } if (l.p > 0 && txt_are_pieces_adjacent(l.t, l.p - 1, l.p)) { l.i = l.t->ptbl.v[l.p - 1].n; l.t->ptbl.v[l.p - 1].n += l.t->ptbl.v[l.p].n; txt_remove_piece(l.t, l.p); l.p--; ASSERT(txt_valid_loc(l)); goto loop; } if (l.p + 1 < l.t->ptbl.n && txt_are_pieces_adjacent(l.t, l.p, l.p + 1)) { l.t->ptbl.v[l.p].n += l.t->ptbl.v[l.p + 1].n; txt_remove_piece(l.t, l.p + 1); ASSERT(txt_valid_loc(l)); goto loop; } ASSERT(txt_valid_loc(l)); return resolve_loc(l); } TxtLoc txt_insert(TxtLoc l, const char *s, u32 n) { l = txt_add_piece(l.t, l); TxtPiece *p = &l.t->ptbl.v[l.p]; isize ext = txt_buf_dedup(l.t, p->buf, s, n); if (p->n > 0 || ext == -1) { txt_buf_append(l.t, p->buf, s, n); } else { p->ofs = ext; } p->n += n; l.t->len += n; l.i = p->n; return txt_post_edit(l); } TxtLoc txt_insert_c(TxtLoc l, u32 ch) { char buf[6]; u32 n = utf8_encode_len(&ch, 1); utf8_encode(buf, &ch, 1); return txt_insert(l, buf, n); } TxtLoc txt_delete(TxtLoc l, u32 n) { u32 pi = txt_split_piece(l.t, l.p, l.i); if (pi > 0) pi--; l.p = pi; l.i = l.t->ptbl.v[l.p].n; for (;;) { if (l.i != l.t->ptbl.v[l.p].n) break; if (l.i >= n) { l.i -= n; l.t->ptbl.v[l.p].n -= n; break; } else { l.t->ptbl.v[l.p].n -= l.i; n -= l.i; l.i = 0; if (l.p == 0) break; l.p = l.p - 1; l.i = l.t->ptbl.v[l.p].n; } while (txt_trim_empty(&l)); } return txt_post_edit(l); } u32 txt_range_len(TxtLoc lo, TxtLoc hi) { u32 n = 0; while (lo.p < hi.p) { n += lo.t->ptbl.v[lo.p].n - lo.i; lo.p++; lo.i = 0; } n += hi.i - lo.i; return n; } TxtLoc txt_delete_range(TxtLoc lo, TxtLoc hi) { /* TODO: figure out nr. of chars, then delete all at once */ if (txt_before(hi, lo)) { TxtLoc t = lo; lo = hi; hi = t; } return txt_delete(hi, txt_range_len(lo, hi)); } TxtLoc txt_delete_c(TxtLoc l) { while (!at_start(l) && (txt_byte(bprev(l)) & 0xc0) == 0x80) l = txt_delete(l, 1); l = txt_delete(l, 1); return l; } int txt_load(Txt *b, const char *path) { struct stat sb; int fd = open(path, O_RDONLY); if (fd == -1) goto empty_file; if (fstat(fd, &sb)) { close(fd); return -1; } if (!sb.st_size) { empty_file: txt_buf_fit(b, TXT_SRC, 8192); goto done; } void *m = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (m == MAP_FAILED) { close(fd); return -1; } txt_buf_fit(b, TXT_SRC, sb.st_size); memcpy(b->buf[TXT_SRC].s, m, sb.st_size); munmap(m, sb.st_size); b->buf[TXT_SRC].n = sb.st_size; close(fd); done: b->ptbl.n = 0; b->buf[TXT_ADD].n = 0; b->len = b->buf[TXT_SRC].n; txt_insert_piece(b, 0, TXT_SRC, 0, b->len); return 0; } int txt_save(Txt *t, const char *path) { FILE *f = fopen(path, "wb"); if (!f) return -1; for (u32 i = 0; i < t->ptbl.n; i++) { TxtPiece *p = &t->ptbl.v[i]; TxtBuf *b = &t->buf[p->buf]; fwrite(b->s + p->ofs, 1, p->n, f); } int e = ferror(f); fclose(f); return e ? -1 : 0; } void txt_free(Txt *t) { for (u32 i = 0; i < COUNTOF(t->buf); i++) { free(t->buf[i].s); } free(t->ptbl.v); } Str txt_collect_range(TxtLoc lo, TxtLoc hi, Arena *a) { DYNARR(char) buf = { 0 }; while (lo.p < hi.p) { TxtPiece *p = &lo.t->ptbl.v[lo.p]; DA_APUSH_MULT(&buf, a, lo.t->buf[p->buf].s + p->ofs + lo.i, p->n - lo.i); lo.p++; lo.i = 0; } ASSERT(lo.p == hi.p); if (hi.i > lo.i) { TxtPiece *p = &lo.t->ptbl.v[lo.p]; DA_AGROW(&buf, a, hi.i - lo.i); memcpy((&buf)->v + (&buf)->n, lo.t->buf[p->buf].s + p->ofs + lo.i, DA_ELEM(&buf, hi.i - lo.i)); (&buf)->n += (hi.i - lo.i); //DA_APUSH_MULT(&buf, a, lo.t->buf[p->buf].s + p->ofs + lo.i, hi.i - lo.i); } return (Str) { buf.v, buf.n }; } u32 txt_read_chunk(TxtLoc *lo, TxtLoc hi, char *buf, u32 sz) { u32 n = 0; TxtLoc l = *lo; while (n < sz && l.p < hi.p) { TxtPiece *p = &l.t->ptbl.v[l.p]; u32 copy_n = p->n - l.i; if (copy_n > sz - n) copy_n = sz - n; memcpy(buf + n, l.t->buf[p->buf].s + p->ofs + l.i, copy_n); n += copy_n; l.i += copy_n; if (l.i >= p->n) { l.p++; l.i = 0; } } if (n < sz && hi.i > l.i) { TxtPiece *p = &l.t->ptbl.v[l.p]; u32 copy_n = hi.i - l.i; if (copy_n > sz - n) copy_n = sz - n; memcpy(buf + n, l.t->buf[p->buf].s + p->ofs + l.i, copy_n); } *lo = l; return n; } /* navigation */ int txt_valid_loc(TxtLoc l) { return l.p < l.t->ptbl.n && (l.i < l.t->ptbl.v[l.p].n || (l.p + 1 == l.t->ptbl.n && l.i <= l.t->ptbl.v[l.p].n)); } TxtLoc txt_at(Txt *b, u32 ofs) { for (u32 i = 0; i < b->ptbl.n; i++) { if (ofs < b->ptbl.v[i].n) { return (TxtLoc) { b, i, ofs }; } ofs -= b->ptbl.v[i].n; } return (TxtLoc) { b, b->ptbl.n - 1, b->ptbl.v[b->ptbl.n - 1].n }; } u32 txt_ofs(TxtLoc l) { u32 r = 0; for (u32 i = 0; i < l.p; i++) { r += l.t->ptbl.v[i].n; } return r + l.i; } int txt_before(TxtLoc a, TxtLoc b) { ASSERT(txt_valid_loc(a)); ASSERT(txt_valid_loc(b)); return a.p < b.p || (a.p == b.p && a.i < b.i); } int txt_after(TxtLoc a, TxtLoc b) { ASSERT(txt_valid_loc(a)); ASSERT(txt_valid_loc(b)); return a.p > b.p || (a.p == b.p && a.i > b.i); } TxtLoc txt_start(Txt *t) { return (TxtLoc) { t, 0, 0 }; } TxtLoc txt_end(Txt *t) { return (TxtLoc) { t, t->ptbl.n-1, t->ptbl.v[t->ptbl.n-1].n }; } TxtLoc bnext(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; if (l.p + 1 < l.t->ptbl.n) { if (l.i + 1 < p->n) l.i++; else l.p++, l.i = 0; } else { l.i++; if (l.i > p->n) l.i = p->n; } return l; } TxtLoc bprev(TxtLoc l) { if (l.i > 0) { return (TxtLoc) { l.t, l.p, l.i - 1 }; } else if (l.p > 0 && l.t->ptbl.v[l.p - 1].n > 0) { return (TxtLoc) { l.t, l.p - 1, l.t->ptbl.v[l.p - 1].n - 1 }; } else { return (TxtLoc) { l.t, 0, 0 }; } } TxtLoc cnext(TxtLoc l) { l = bnext(l); while ((txt_byte(l) & 0xc0) == 0x80) l = bnext(l); return l; } TxtLoc cprev(TxtLoc l) { l = bprev(l); while ((txt_byte(l) & 0xc0) == 0x80) l = bprev(l); return l; } int at_start(TxtLoc l) { return l.p == 0 && l.i == 0; } int at_end(TxtLoc l) { return l.p + 1 == l.t->ptbl.n && l.i == l.t->ptbl.v[l.p].n; } TxtLoc next_newline(TxtLoc l) { do l = cnext(l); while (!at_end(l) && txt_byte(l) != '\n'); return l; } TxtLoc prev_newline(TxtLoc l) { do l = cprev(l); while (!at_start(l) && txt_byte(l) != '\n'); return l; } TxtLoc start_of_line(TxtLoc l) { l = prev_newline(l); if (!at_start(l) && txt_byte(l) == '\n') l = cnext(l); return l; } TxtLoc end_of_line(TxtLoc l) { TxtLoc start = start_of_line(l); return txt_byte(start) == '\n' ? start : next_newline(start); } u32 get_col(TxtLoc l) { u32 n = 0; for (TxtLoc tmp = start_of_line(l); txt_before(tmp, l); txt_chr_next(&tmp)) n++; return n; } TxtLoc at_col(TxtLoc l, u32 col) { l = start_of_line(l); while (col--) { if (txt_chr_next(&l) == '\n') { l = bprev(l); break; } } return l; } TxtLoc prev_line(TxtLoc l) { TxtLoc start = start_of_line(l); return at_col(cprev(start), get_col(l)); } TxtLoc next_line(TxtLoc l) { TxtLoc end = end_of_line(l); if (at_end(end)) return end; return at_col(cnext(end), get_col(l)); } /* reading chars */ u32 txt_chr(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; return utf8_decode_at(b->s, p->ofs + l.i, p->ofs + p->n); } u8 txt_byte(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; if (l.i >= p->n) return 0; return b->s[p->ofs + l.i]; } u32 txt_chr_next(TxtLoc *l) { if (at_end(*l)) return 0; u32 c = txt_chr(*l); u32 n = UTF8_CP_LEN(c); for (u32 i = 0; i < n; i++) *l = bnext(*l); return c; }