summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vui.c146
1 files changed, 76 insertions, 70 deletions
diff --git a/vui.c b/vui.c
index 0434334..258b7f1 100644
--- a/vui.c
+++ b/vui.c
@@ -62,8 +62,12 @@ static inline void vui_outsn(const char *s, unsigned n);
static inline void vui_outs(const char *s);
static inline void vui_out_flush(void);
-static inline const char *utf8_next(u32 *out, const char *src);
-static void utf8_decode(uint32_t *dst, const char *src, unsigned n);
+/* unicode */
+
+static u32 utf8_decode_len(const char *src, u32 n);
+static u32 utf8_encode_len(const u32 *src, u32 n);
+static void utf8_decode(u32 *dst, u32 n, const char *src);
+static void utf8_encode(char *dst, const u32 *src, u32 n);
/* globals */
@@ -303,24 +307,21 @@ static void vui_outf(const char *fmt, ...) {
* 6 sixes, 5 fives, 5 fours, 5 threes, four twos, and eight ones.
* no obvious pattern.
*
- * to avoid the subtraction we reverse:
- * { 1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,6,6,6,6,6,6, }
- *
- * subtract one from each:
- * { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,5,5,5,5,5,5, }
+ * no utf8 elements above 0x10FFFF, so never more than 4 bytes:
+ * 4444433333222211111111
*
- * clamp to <=3 (no unicode codepoints are actually that long):
- * { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, }
+ * subtract one from each, so they fit in 3 bytes:
+ * 3333322222111100000000
*
- * concatenate bits:
- * 0b0000000000000001010101101010101011111111111111111111111111111111
+ * pack into a u64:
+ * 0b11111111111010101010010101010000000000000000
*
* convert to hex, and now:
- * len(cp) = 1 + (0x156AAFFFFFFFF >> (2 * clz(cp))) & 3
+ * len(cp) = 1 + (0xFFEAA550000 >> (2 * (32 - clz(cp)))) & 3
*/
-#define UTF8_CP_LEN_BITS ((uint64_t)0x156AAFFFFFFFF)
-#define UTF8_CP_SHIFT(cp) (stdc_leading_zeros((uint32_t)cp) << 1)
+#define UTF8_CP_LEN_BITS ((uint64_t)0xFFEAA550000)
+#define UTF8_CP_SHIFT(cp) ((32 - stdc_leading_zeros((uint32_t)(cp))) << 1)
#define UTF8_CP_LEN(cp) (1 + ((UTF8_CP_LEN_BITS >> UTF8_CP_SHIFT(cp)) & 3))
static inline void vui_outc(char c) {
@@ -332,29 +333,14 @@ static inline void vui_outc(char c) {
/* it doesn't make sense to do so, and assuming non-zero lets us dodge a branch
* in stdc_leading_zeros() */
static inline void vui_outvc(VuiChar c) {
- //ASSUME(c > 0 && c <= 0x110000);
- ASSUME(c > 0);
- ASSUME(c <= 0x110000);
- uint8_t len = UTF8_CP_LEN(c);
- vui_out_fit(vui_outn + len);
- ASSUME(len > 0 && len < 5);
-
- if (len == 1) {
- vui_out[vui_outn++] = c;
- return;
- }
-
- for (unsigned i = len; --i;) {
- vui_out[vui_outn + i] = 0x80 | (c & 0x3f);
- c >>= 6;
- }
-
- vui_out[vui_outn] = (0xf0 << (4 - len)) | c;
- vui_outn += len;
+ vui_outvcn(&c, 1);
}
static inline void vui_outvcn(VuiChar *c, size_t n) {
- while (n--) vui_outvc(*c++);
+ u32 len = utf8_encode_len(c, n);
+ vui_out_fit(vui_outn + len);
+ utf8_encode(&vui_out[vui_outn], c, n);
+ vui_outn += len;
}
static inline void vui_outsn(const char *s, unsigned n) {
@@ -617,40 +603,6 @@ void vui_chr(int x, int y, VuiChar c) {
vui_chra(x, y, c, ATTR_DEFAULT);
}
-/*
-static inline u32 utf8_next(u32 *p, const char *s, u32 n) {
- u32 i = *p;
- u8 c = s[i++];
- usize bits = stdc_leading_ones(c);
- ASSUME(bits < 5);
- u32 cp = c & ((1 << (7-bits)) - 1);
- while (bits-- > 1) {
- c = s[i++];
- cp = (cp << 6) | (c & 0x3F);
- }
- *p = i;
- return cp;
-}
-*/
-
-static inline const char *utf8_next(u32 *out, const char *src) {
- u8 c = *src++;
- usize bits = stdc_leading_ones(c);
- ASSUME(bits < 5);
- u32 cp = c & (-1 >> bits);
- while (bits-- > 1) {
- c = *src++;
- cp = (cp << 6) | (c & 0x3F);
- }
- *out = cp;
- return src;
-}
-
-static void utf8_decode(uint32_t *dst, const char *src, unsigned n) {
- const char *end = src + n;
- while (src < end) src = utf8_next(dst++, src);
-}
-
static void truncate_span(int *x, unsigned *nptr) {
int n = (int)*nptr;
if (*x < 0) {
@@ -670,10 +622,11 @@ static void truncate_span(int *x, unsigned *nptr) {
*nptr = n;
}
-void vui_putsna(int x, int y, const char *s, unsigned n, VuiAttr a) {
+void vui_putsna(int x, int y, const char *s, unsigned srcn, VuiAttr a) {
+ u32 n = utf8_decode_len(s, srcn);
truncate_span(&x, &n);
if (n < 1 || y < 0 || y >= (int)LINES) return;
- utf8_decode(&CHR(x, y), s, n);
+ utf8_decode(&CHR(x, y), n, s);
for (uint16_t *pa = &ATTR(x, y); n--;) *pa++ = a;
}
@@ -877,3 +830,56 @@ VuiKey vui_key(void) {
return c & 0x80 ? getk_utf8(c) : c;
}
}
+
+/* utf8 */
+
+static u32 utf8_decode_len(const char *src, u32 n) {
+ u32 i = 0;
+ u32 len = 0;
+ while (i < n) {
+ i += stdc_leading_ones((u8)src[i]) + ((~src[i] & 0x80) >> 7);
+ len++;
+ }
+ return len;
+}
+
+static void utf8_decode(u32 *dst, u32 n, const char *src) {
+ while (n--) {
+ u8 c = *src++;
+ u32 bits = stdc_leading_ones(c);
+ ASSUME(bits < 5);
+ u32 cp = c & (0xff >> bits);
+ while (bits-- > 1) {
+ c = *src++;
+ cp = (cp << 6) | (c & 0x3F);
+ }
+ *dst++ = cp;
+ }
+}
+
+static u32 utf8_encode_len(const u32 *src, u32 n) {
+ u32 len = 0;
+ while (n) len += UTF8_CP_LEN(src[--n]);
+ return len;
+}
+
+static void utf8_encode(char *dst, const u32 *src, u32 n) {
+ while (n--) {
+ u32 c = *src++;
+ ASSUME(c > 0 && c <= 0x110000);
+
+ u32 len = UTF8_CP_LEN(c);
+ ASSUME(len > 0 && len < 5);
+
+ if (len > 1) {
+ for (u32 i = len; --i;) {
+ dst[i] = 0x80 | (c & 0x3f);
+ c >>= 6;
+ }
+ *dst = (0xf0 << (4 - len)) | c;
+ dst += len;
+ } else {
+ *dst++ = c;
+ }
+ }
+}