#include #include #include #include #include #include #include "wrmr.h" #include "arena.h" #include "dynarr.h" #include "txt.h" #include "utf8.h" #include "str.h" void txt_replace_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { b->ptbl.v[pi] = (TxtPiece) { buf, ofs, n }; } void txt_insert_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { DA_FIT(&b->ptbl, b->ptbl.n + 1); if (pi < b->ptbl.n) { MOVE(&b->ptbl.v[pi+1], &b->ptbl.v[pi], b->ptbl.n - pi); } txt_replace_piece(b, pi, buf, ofs, n); b->ptbl.n++; } void txt_remove_piece(Txt *b, u32 pi) { if (pi + 1 < b->ptbl.n) { MOVE(&b->ptbl.v[pi], &b->ptbl.v[pi+1], b->ptbl.n - (pi + 1)); } b->ptbl.n--; } /* ensures that the returned TxtLoc is at the end of its respective piece, * one past the final character. note that this is an invalid position for * normal navigation, so it must be moved back into place with resolve_loc() * or txt_post_edit(). */ TxtLoc txt_split_piece(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; if (l.i < p->n) { if (l.i == 0 && l.p > 0) { l.p--; l.i = l.t->ptbl.v[l.p].n; } else { txt_insert_piece(l.t, l.p + 1, p->buf, p->ofs + l.i, p->n - l.i); l.t->ptbl.v[l.p].n = l.i; } } ASSERT(l.p < l.t->ptbl.n); ASSERT(l.i == l.t->ptbl.v[l.p].n); return l; } TxtLoc txt_add_piece(TxtLoc l) { l = txt_split_piece(l); TxtPiece *p = &l.t->ptbl.v[l.p]; if (p->buf != TXT_ADD || p->ofs + p->n != l.t->buf[TXT_ADD].n) { if (l.i > 0) { txt_insert_piece(l.t, l.p + 1, TXT_ADD, l.t->buf[TXT_ADD].n, 0); l.p++; l.i = 0; } else { txt_replace_piece(l.t, l.p, TXT_ADD, l.t->buf[TXT_ADD].n, 0); } } return l; } static void txt_buf_fit(Txt *b, TxtBufIdx bi, u32 sz) { TxtBuf *buf = &b->buf[bi]; if (sz > buf->c) { buf->c = stdc_bit_ceil(sz); buf->s = realloc(buf->s, buf->c); if (!buf->s) FAIL_WITH_MSG("realloc failure"); } } static void txt_buf_append(Txt *b, TxtBufIdx bi, const char *s, u32 n) { TxtBuf *buf = &b->buf[bi]; txt_buf_fit(b, bi, buf->n + n); memcpy(&buf->s[buf->n], s, n); buf->n += n; } static isize txt_buf_dedup(Txt *b, TxtBufIdx bi, const char *s, u32 n) { TxtBuf *buf = &b->buf[bi]; if (buf->n >= n && buf->s && !memcmp(buf->s + (buf->n - n), s, n)) { return buf->n - n; } else { return -1; } } static int txt_are_pieces_adjacent(Txt *t, u32 a, u32 b) { TxtPiece *pa = &t->ptbl.v[a]; TxtPiece *pb = &t->ptbl.v[b]; return pa->buf == pb->buf && pa->ofs + pa->n == pb->ofs; } static inline TxtLoc resolve_loc(TxtLoc l) { while (l.p + 1 < l.t->ptbl.n && l.i == l.t->ptbl.v[l.p].n) { l.p++; l.i = 0; } ASSERT(txt_valid_loc(l)); return l; } static TxtLoc txt_post_edit(TxtLoc l) { l = resolve_loc(l); loop: if (l.p > 0 && txt_are_pieces_adjacent(l.t, l.p - 1, l.p)) { l.i = l.t->ptbl.v[l.p - 1].n; l.t->ptbl.v[l.p - 1].n += l.t->ptbl.v[l.p].n; txt_remove_piece(l.t, l.p); l.p--; ASSERT(txt_valid_loc(l)); goto loop; } if (l.p + 1 < l.t->ptbl.n && txt_are_pieces_adjacent(l.t, l.p, l.p + 1)) { l.t->ptbl.v[l.p].n += l.t->ptbl.v[l.p + 1].n; txt_remove_piece(l.t, l.p + 1); ASSERT(txt_valid_loc(l)); goto loop; } ASSERT(txt_valid_loc(l)); l.t->ptbl.dirty = 1; return resolve_loc(l); } TxtLoc txt_insert(TxtLoc l, const char *s, u32 n) { if (l.t->readonly) return l; l = txt_add_piece(l); TxtPiece *p = &l.t->ptbl.v[l.p]; isize ext = txt_buf_dedup(l.t, p->buf, s, n); if (p->n > 0 || ext == -1) { txt_buf_append(l.t, p->buf, s, n); } else { p->ofs = ext; } p->n += n; l.t->len += n; l.i = p->n; return txt_post_edit(l); } TxtLoc txt_delete(TxtLoc l, u32 n) { if (l.t->readonly) return l; l = txt_split_piece(l); while (n > 0) { TxtPiece *p = &l.t->ptbl.v[l.p]; u32 sz = p->n > n ? n : p->n; p->n -= sz; l.i -= sz; n -= sz; if (!p->n && l.t->ptbl.n > 1) txt_remove_piece(l.t, l.p); if (l.p == 0) break; if (l.i == 0) { l.p--; l.i = l.t->ptbl.v[l.p].n; } } return txt_post_edit(l); } TxtLoc txt_insert_c(TxtLoc l, u32 ch) { char buf[6]; u32 n = utf8_encode_len(&ch, 1); utf8_encode(buf, &ch, 1); return txt_insert(l, buf, n); } u32 txt_range_len(TxtLoc lo, TxtLoc hi) { u32 n = 0; while (lo.p < hi.p) { n += lo.t->ptbl.v[lo.p].n - lo.i; lo.p++; lo.i = 0; } n += hi.i - lo.i; return n; } TxtLoc txt_delete_range(TxtLoc lo, TxtLoc hi) { /* TODO: figure out nr. of chars, then delete all at once */ if (txt_before(hi, lo)) { TxtLoc t = lo; lo = hi; hi = t; } return txt_delete(hi, txt_range_len(lo, hi)); } TxtLoc txt_delete_c(TxtLoc l) { if (l.t->readonly) return l; while (!at_start(l) && (txt_byte(bprev(l)) & 0xc0) == 0x80) l = txt_delete(l, 1); l = txt_delete(l, 1); return l; } int txt_load(Txt *b, const char *path) { struct stat sb; memset(b, 0, sizeof(Txt)); int fd = open(path, O_RDONLY); if (fd == -1) goto empty_file; if (fstat(fd, &sb)) { close(fd); return -1; } if (!sb.st_size) { empty_file: txt_buf_fit(b, TXT_SRC, 8192); goto done; } void *m = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (m == MAP_FAILED) { close(fd); return -1; } txt_buf_fit(b, TXT_SRC, sb.st_size); memcpy(b->buf[TXT_SRC].s, m, sb.st_size); munmap(m, sb.st_size); b->buf[TXT_SRC].n = sb.st_size; close(fd); done: b->buf[TXT_ADD].n = 0; b->len = b->buf[TXT_SRC].n; txt_insert_piece(b, 0, TXT_SRC, 0, b->len); return 0; } void txt_load_empty(Txt *b) { memset(b, 0, sizeof(Txt)); txt_buf_fit(b, TXT_SRC, 32); txt_buf_fit(b, TXT_ADD, 8192); txt_insert_piece(b, 0, TXT_ADD, 0, b->len); } static inline int txt_hist_unchanged(Txt *t) { if (t->hist.n < 1) return 0; TxtHistNode *pt = &t->hist.v[t->hist.i]; if (t->ptbl.n != pt->n) return 0; if (memcmp(pt->v, t->ptbl.v, sizeof(TxtPiece) * pt->n)) return 0; return 1; } int txt_save(Txt *t, const char *path) { FILE *f = fopen(path, "wb"); if (!f) return -1; for (u32 i = 0; i < t->ptbl.n; i++) { TxtPiece *p = &t->ptbl.v[i]; TxtBuf *b = &t->buf[p->buf]; fwrite(b->s + p->ofs, 1, p->n, f); } int e = ferror(f); fclose(f); t->ptbl.dirty = 0; for (u32 i = 0; i < t->hist.n; i++) t->hist.v[i].dirty = 1; if (txt_hist_unchanged(t)) t->hist.v[t->hist.i].dirty = 0; return e ? -1 : 0; } void txt_free(Txt *t) { for (u32 i = 0; i < COUNTOF(t->buf); i++) free(t->buf[i].s); for (u32 i = 0; i < TXT_HIST_MAX; i++) free(t->hist.v[i].v); free(t->ptbl.v); } void txt_hist_push(Txt *t, TxtLoc cur) { if (txt_hist_unchanged(t)) { t->hist.v[t->hist.i].cur = cur; return; } if (t->hist.i + 1 < t->hist.n) t->hist.n = t->hist.i + 1; if (t->hist.n) t->hist.i++; if (t->hist.i == TXT_HIST_MAX) { free(t->hist.v[0].v); MOVE(&t->hist.v[0], &t->hist.v[1], TXT_HIST_MAX - 1); memset(&t->hist.v[TXT_HIST_MAX - 1], 0, sizeof(*t->hist.v)); t->hist.i--; t->hist.n--; } DA_FIT(&t->hist.v[t->hist.i], t->ptbl.n); memcpy(t->hist.v[t->hist.i].v, t->ptbl.v, sizeof(TxtPiece) * t->ptbl.n); t->hist.v[t->hist.i].n = t->ptbl.n; t->hist.v[t->hist.i].dirty = t->ptbl.dirty; t->hist.v[t->hist.i].cur = cur; t->hist.n++; ASSERT(t->hist.n == t->hist.i + 1); } int txt_hist_set(Txt *t, TxtLoc *cur) { t->ptbl.n = t->hist.v[t->hist.i].n; DA_FIT(&t->ptbl, t->ptbl.n); memcpy(t->ptbl.v, t->hist.v[t->hist.i].v, sizeof(TxtPiece) * t->ptbl.n); t->ptbl.dirty = t->hist.v[t->hist.i].dirty; *cur = t->hist.v[t->hist.i].cur; return 1; } int txt_hist_fwd(Txt *t, TxtLoc *cur) { if (t->hist.i + 1 >= t->hist.n) return 0; t->hist.i++; return txt_hist_set(t, cur); } int txt_hist_back(Txt *t, TxtLoc *cur) { if (t->hist.i == 0) return 0; t->hist.i--; return txt_hist_set(t, cur); } Str txt_collect_range(TxtLoc lo, TxtLoc hi, Arena *a) { DYNARR(char) buf = { 0 }; while (lo.p < hi.p) { TxtPiece *p = &lo.t->ptbl.v[lo.p]; DA_APUSH_MULT(&buf, a, lo.t->buf[p->buf].s + p->ofs + lo.i, p->n - lo.i); lo.p++; lo.i = 0; } ASSERT(lo.p == hi.p); if (hi.i > lo.i) { TxtPiece *p = &lo.t->ptbl.v[lo.p]; DA_AGROW(&buf, a, hi.i - lo.i); memcpy((&buf)->v + (&buf)->n, lo.t->buf[p->buf].s + p->ofs + lo.i, DA_ELEM(&buf, hi.i - lo.i)); (&buf)->n += (hi.i - lo.i); //DA_APUSH_MULT(&buf, a, lo.t->buf[p->buf].s + p->ofs + lo.i, hi.i - lo.i); } return (Str) { buf.v, buf.n }; } u32 txt_read_chunk(TxtLoc *lo, TxtLoc hi, char *buf, u32 sz) { u32 n = 0; TxtLoc l = *lo; while (n < sz && l.p < hi.p) { TxtPiece *p = &l.t->ptbl.v[l.p]; u32 copy_n = p->n - l.i; if (copy_n > sz - n) copy_n = sz - n; memcpy(buf + n, l.t->buf[p->buf].s + p->ofs + l.i, copy_n); n += copy_n; l.i += copy_n; if (l.i >= p->n) { l.p++; l.i = 0; } } if (n < sz && hi.i > l.i) { TxtPiece *p = &l.t->ptbl.v[l.p]; u32 copy_n = hi.i - l.i; if (copy_n > sz - n) copy_n = sz - n; memcpy(buf + n, l.t->buf[p->buf].s + p->ofs + l.i, copy_n); } *lo = l; return n; } Str txt_next_chunk(TxtLoc *l) { TxtPiece *p = &l->t->ptbl.v[l->p]; Str s = { l->t->buf[p->buf].s + p->ofs + l->i, p->n - l->i }; if (l->p + 1 < l->t->ptbl.n) { l->p++; l->i = 0; } else { l->i = p->n; } return s; } Str txt_prev_chunk(TxtLoc *l) { while (!l->i && l->p > 0) { l->p--; l->i = l->t->ptbl.v[l->p].n; } TxtPiece *p = &l->t->ptbl.v[l->p]; Str s = { l->t->buf[p->buf].s + p->ofs, l->i }; l->i = 0; return s; } int txt_range_equal(TxtLoc lo, TxtLoc hi, Str cmp) { u32 i = 0; while (txt_before(lo, hi)) { Str s = txt_next_chunk(&lo); u32 n = cmp.n - i; if (s.n < n) n = s.n; if (s.n > 0 && memcmp(s.s, cmp.s + i, n)) return 0; i += s.n; } return 1; } /* navigation */ int txt_valid_loc(TxtLoc l) { return l.p < l.t->ptbl.n && (l.i < l.t->ptbl.v[l.p].n || (l.p + 1 == l.t->ptbl.n && l.i <= l.t->ptbl.v[l.p].n)); } TxtLoc txt_at(Txt *b, u32 ofs) { for (u32 i = 0; i < b->ptbl.n; i++) { if (ofs < b->ptbl.v[i].n) { return (TxtLoc) { b, i, ofs }; } ofs -= b->ptbl.v[i].n; } return (TxtLoc) { b, b->ptbl.n - 1, b->ptbl.v[b->ptbl.n - 1].n }; } u32 txt_ofs(TxtLoc l) { u32 r = 0; for (u32 i = 0; i < l.p; i++) { r += l.t->ptbl.v[i].n; } return r + l.i; } int txt_before(TxtLoc a, TxtLoc b) { ASSERT(txt_valid_loc(a)); ASSERT(txt_valid_loc(b)); return a.p < b.p || (a.p == b.p && a.i < b.i); } int txt_after(TxtLoc a, TxtLoc b) { ASSERT(txt_valid_loc(a)); ASSERT(txt_valid_loc(b)); return a.p > b.p || (a.p == b.p && a.i > b.i); } TxtLoc txt_start(Txt *t) { return (TxtLoc) { t, 0, 0 }; } TxtLoc txt_end(Txt *t) { return (TxtLoc) { t, t->ptbl.n-1, t->ptbl.v[t->ptbl.n-1].n }; } /* TODO: make these use memchr() or equivalent */ TxtLoc next_newline(TxtLoc l) { do l = cnext(l); while (!at_end(l) && txt_byte(l) != '\n'); return l; } TxtLoc prev_newline(TxtLoc l) { do l = cprev(l); while (!at_start(l) && txt_byte(l) != '\n'); return l; } TxtLoc start_of_line(TxtLoc l) { if (at_start(l)) return l; l = prev_newline(l); if (txt_byte(l) == '\n') l = cnext(l); return l; } TxtLoc end_of_line(TxtLoc l) { TxtLoc start = start_of_line(l); return txt_byte(start) == '\n' ? start : next_newline(start); } u32 get_col(TxtLoc l) { u32 n = 0; for (TxtLoc tmp = start_of_line(l); txt_before(tmp, l); txt_chr_next(&tmp)) n++; return n; } TxtLoc at_col(TxtLoc l, u32 col) { l = start_of_line(l); while (col-- && txt_byte(l) != '\n') l = cnext(l); return l; } TxtLoc next_line_start(TxtLoc l) { if (txt_byte(l) == '\n') return cnext(l); return cnext(next_newline(l)); } TxtLoc prev_line_start(TxtLoc l) { return start_of_line(prev_newline(l)); } TxtLoc prev_line(TxtLoc l) { return at_col(prev_line_start(l), get_col(l)); } TxtLoc next_line(TxtLoc l) { return at_col(next_line_start(l), get_col(l)); } /* reading chars */ u32 txt_chr(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; return utf8_decode_at(b->s, p->ofs + l.i, p->ofs + p->n); } u8 txt_byte(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; if (l.i >= p->n) return 0; return b->s[p->ofs + l.i]; } u32 txt_chr_next(TxtLoc *l) { if (at_end(*l)) return 0; u32 c = txt_chr(*l); u32 n = UTF8_CP_LEN(c); for (u32 i = 0; i < n; i++) *l = bnext(*l); return c; }