#include #include #include #include #include #include #include "wrmr.h" #include "dynarr.h" #include "txt.h" #include "utf8.h" void txt_replace_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { b->ptbl.v[pi] = (TxtPiece) { buf, ofs, n }; } void txt_insert_piece(Txt *b, u32 pi, TxtBufIdx buf, u32 ofs, u32 n) { DA_FIT(&b->ptbl, b->ptbl.n + 1); if (pi < b->ptbl.n) { MOVE(&b->ptbl.v[pi+1], &b->ptbl.v[pi], b->ptbl.n - pi); } txt_replace_piece(b, pi, buf, ofs, n); b->ptbl.n++; } void txt_remove_piece(Txt *b, u32 pi) { if (pi + 1 < b->ptbl.n) { MOVE(&b->ptbl.v[pi], &b->ptbl.v[pi+1], b->ptbl.n - (pi + 1)); } b->ptbl.n--; } u32 txt_split_piece(Txt *b, u32 pi, u32 i) { TxtPiece *p = &b->ptbl.v[pi]; if (i == 0) return pi; if (i == p->n) return pi + 1; txt_insert_piece(b, pi + 1, p->buf, p->ofs + i, p->n - i); b->ptbl.v[pi].n = i; return pi + 1; } static void txt_buf_fit(Txt *b, TxtBufIdx bi, u32 sz) { TxtBuf *buf = &b->buf[bi]; if (sz > buf->c) { buf->c = stdc_bit_ceil(sz); buf->s = realloc(buf->s, buf->c); if (!buf->s) FAIL_WITH_MSG("realloc failure"); } } static void txt_buf_append(Txt *b, TxtBufIdx bi, const char *s, u32 n) { TxtBuf *buf = &b->buf[bi]; txt_buf_fit(b, bi, buf->n + n); memcpy(&buf->s[buf->n], s, n); buf->n += n; } TxtLoc txt_add_piece(Txt *b, TxtLoc l) { if (l.p > 0 && !l.i) { l.p--; l.i = b->ptbl.v[l.p].n; } if (l.p < b->ptbl.n) { TxtPiece *p = &b->ptbl.v[l.p]; int mid = p->ofs + l.i < b->buf[p->buf].n; if (p->buf == TXT_SRC || mid) { l.p = txt_split_piece(b, l.p, l.i); goto insert; } } else { insert: if (l.p < b->ptbl.n && b->ptbl.v[l.p].n == 0) { txt_replace_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); } else { txt_insert_piece(b, l.p, TXT_ADD, b->buf[TXT_ADD].n, 0); } } return l; } TxtLoc txt_insert(TxtLoc l, const char *s, u32 n) { l = txt_add_piece(l.t, l); TxtPiece *p = &l.t->ptbl.v[l.p]; txt_buf_append(l.t, p->buf, s, n); p->n += n; l.t->len += n; l.i = p->n; return l; } TxtLoc txt_insert_c(TxtLoc l, u32 ch) { char buf[6]; u32 n = utf8_encode_len(&ch, 1); utf8_encode(buf, &ch, 1); return txt_insert(l, buf, n); } static int txt_are_pieces_adjacent(Txt *t, u32 a, u32 b) { TxtPiece *pa = &t->ptbl.v[a]; TxtPiece *pb = &t->ptbl.v[b]; return pa->buf == pb->buf && pa->ofs + pa->n == pb->ofs; } static TxtLoc txt_join_or_kill(TxtLoc l) { Txt *b = l.t; if (l.t->ptbl.v[l.p].n == 0 && !(l.p == 0 && b->ptbl.n == 1)) { txt_remove_piece(b, l.p); } if (l.p + 1 < b->ptbl.n && txt_are_pieces_adjacent(b, l.p, l.p + 1)) { b->ptbl.v[l.p].n += b->ptbl.v[l.p+1].n; txt_remove_piece(b, l.p + 1); } return l; } TxtLoc txt_delete(TxtLoc l, u32 n) { u32 pi = txt_split_piece(l.t, l.p, l.i); if (pi > 0) pi--; l.p = pi; l.i = l.t->ptbl.v[l.p].n; for (;;) { ASSERT(l.i == l.t->ptbl.v[l.p].n); if (l.i >= n) { l.i -= n; l.t->ptbl.v[l.p].n -= n; break; } else { l.t->ptbl.v[l.p].n -= l.i; n -= l.i; l.i = 0; if (l.p == 0) break; l.p = l.p - 1; l.i = l.t->ptbl.v[l.p].n; } } return txt_join_or_kill(l); } TxtLoc txt_delete_c(TxtLoc l) { while (!at_start(l) && (txt_byte(l) & 0xc0) == 0x80) l = txt_delete(l, 1); l = txt_delete(l, 1); return l; } int txt_load(Txt *b, const char *path) { struct stat sb; int fd = open(path, O_RDONLY); if (fd == -1) goto empty_file; if (fstat(fd, &sb)) { close(fd); return -1; } if (!sb.st_size) { empty_file: txt_buf_fit(b, TXT_SRC, 8192); goto done; } void *m = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (m == MAP_FAILED) { close(fd); return -1; } txt_buf_fit(b, TXT_SRC, sb.st_size); memcpy(b->buf[TXT_SRC].s, m, sb.st_size); munmap(m, sb.st_size); b->buf[TXT_SRC].n = sb.st_size; close(fd); done: b->ptbl.n = 0; b->buf[TXT_ADD].n = 0; b->len = b->buf[TXT_SRC].n; txt_insert_piece(b, 0, TXT_SRC, 0, b->len); return 0; } int txt_save(Txt *t, const char *path) { FILE *f = fopen(path, "wb"); if (!f) return -1; for (u32 i = 0; i < t->ptbl.n; i++) { TxtPiece *p = &t->ptbl.v[i]; TxtBuf *b = &t->buf[p->buf]; fwrite(b->s + p->ofs, 1, p->n, f); } int e = ferror(f); fclose(f); return e ? -1 : 0; } void txt_free(Txt *t) { for (u32 i = 0; i < COUNTOF(t->buf); i++) { free(t->buf[i].s); } free(t->ptbl.v); } /* navigation */ TxtLoc txt_at(Txt *b, u32 ofs) { for (u32 i = 0; i < b->ptbl.n; i++) { if (ofs < b->ptbl.v[i].n) { return (TxtLoc) { b, i, ofs }; } ofs -= b->ptbl.v[i].n; } return (TxtLoc) { b, b->ptbl.n - 1, b->ptbl.v[b->ptbl.n - 1].n }; } u32 txt_ofs(TxtLoc l) { u32 r = 0; for (u32 i = 0; i < l.p; i++) { r += l.t->ptbl.v[i].n; } return r + l.i; } int txt_before(TxtLoc a, TxtLoc b) { return a.p < b.p || (a.p == b.p && a.i < b.i); } int txt_after(TxtLoc a, TxtLoc b) { return a.p > b.p || (a.p == b.p && a.i > b.i); } TxtLoc bnext(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; if (l.p + 1 < l.t->ptbl.n) { if (l.i + 1 < p->n) l.i++; else l.p++, l.i = 0; } else { l.i++; if (l.i > p->n) l.i = p->n; } return l; } TxtLoc bprev(TxtLoc l) { if (l.i > 0) { return (TxtLoc) { l.t, l.p, l.i - 1 }; } else if (l.p > 0 && l.t->ptbl.v[l.p - 1].n > 0) { return (TxtLoc) { l.t, l.p - 1, l.t->ptbl.v[l.p - 1].n - 1 }; } else { return (TxtLoc) { l.t, 0, 0 }; } } TxtLoc cnext(TxtLoc l) { l = bnext(l); while ((txt_byte(l) & 0xc0) == 0x80) l = bnext(l); return l; } TxtLoc cprev(TxtLoc l) { l = bprev(l); while ((txt_byte(l) & 0xc0) == 0x80) l = bprev(l); return l; } int at_start(TxtLoc l) { return l.p == 0 && l.i == 0; } int at_end(TxtLoc l) { return l.p + 1 == l.t->ptbl.n && l.i == l.t->ptbl.v[l.p].n; } TxtLoc next_newline(TxtLoc l) { do l = cnext(l); while (!at_end(l) && txt_byte(l) != '\n'); return l; } TxtLoc prev_newline(TxtLoc l) { do l = cprev(l); while (!at_start(l) && txt_byte(l) != '\n'); return l; } TxtLoc start_of_line(TxtLoc l) { l = prev_newline(l); if (!at_start(l) && txt_byte(l) == '\n') l = cnext(l); return l; } TxtLoc end_of_line(TxtLoc l) { TxtLoc start = start_of_line(l); return txt_byte(start) == '\n' ? start : next_newline(start); } u32 get_col(TxtLoc l) { u32 n = 0; for (TxtLoc tmp = start_of_line(l); txt_before(tmp, l); txt_chr_next(&tmp)) n++; return n; } TxtLoc at_col(TxtLoc l, u32 col) { l = start_of_line(l); while (col--) { if (txt_chr_next(&l) == '\n') { l = bprev(l); break; } } return l; } TxtLoc prev_line(TxtLoc l) { TxtLoc start = start_of_line(l); return at_col(cprev(start), get_col(l)); } TxtLoc next_line(TxtLoc l) { TxtLoc end = end_of_line(l); if (at_end(end)) return end; return at_col(cnext(end), get_col(l)); } /* reading chars */ u32 txt_chr(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; return utf8_decode_at(b->s, p->ofs + l.i, p->ofs + p->n); } u8 txt_byte(TxtLoc l) { TxtPiece *p = &l.t->ptbl.v[l.p]; TxtBuf *b = &l.t->buf[p->buf]; if (l.i >= p->n) return 0; return b->s[p->ofs + l.i]; } u32 txt_chr_next(TxtLoc *l) { if (at_end(*l)) return 0; u32 c = txt_chr(*l); u32 n = UTF8_CP_LEN(c); for (u32 i = 0; i < n; i++) *l = bnext(*l); return c; }