From 615601fb355709d611d18f878f77c993c312f6aa Mon Sep 17 00:00:00 2001 From: WormHeamer Date: Sun, 28 Dec 2025 04:19:59 -0500 Subject: lots of features & bugfixes both --- utf8.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'utf8.h') diff --git a/utf8.h b/utf8.h index 01c3336..164279b 100644 --- a/utf8.h +++ b/utf8.h @@ -6,6 +6,9 @@ #include "wrmr.h" #define UTF8_INVALID 0xFFFD /* replacement character */ +#define UTF8_CP_LEN_BITS ((uint64_t)0xFFEAA550000) +#define UTF8_CP_SHIFT(cp) ((32 - stdc_leading_zeros((uint32_t)(cp))) << 1) +#define UTF8_CP_LEN(cp) (1 + ((UTF8_CP_LEN_BITS >> UTF8_CP_SHIFT(cp)) & 3)) u32 utf8_decode_len(const char *src, u32 ch_count); u32 utf8_encode_len(const u32 *src, u32 cp_count); @@ -18,9 +21,6 @@ u32 utf8_decode_at(const char *s, u32 i, u32 n); #include /* packed array of 2-bit lengths for codepoints 0..10FFFF */ -#define UTF8_CP_LEN_BITS ((uint64_t)0xFFEAA550000) -#define UTF8_CP_SHIFT(cp) ((32 - stdc_leading_zeros((uint32_t)(cp))) << 1) -#define UTF8_CP_LEN(cp) (1 + ((UTF8_CP_LEN_BITS >> UTF8_CP_SHIFT(cp)) & 3)) u32 utf8_encode_len(const u32 *src, u32 cp_count) { u32 len = 0; @@ -72,14 +72,14 @@ void utf8_decode(u32 *dst, const char *src, u32 cp_count) { u32 utf8_decode_at(const char *s, u32 i, u32 n) { if (i >= n) return 0; - u32 cp = (u8)s[i++]; + u32 cp = (u8)s[i]; u32 b = stdc_leading_ones((u8)cp); if (!b) return cp; u32 end = i + b - 1; if (end >= n) return 0; cp &= 0xff >> b; - while (i < end) { - u8 c = s[i++]; + while (++i <= end) { + u8 c = s[i]; cp = (cp << 6) | (c & 0x3f); } return cp; -- cgit v1.2.3