diff options
| author | WormHeamer | 2025-12-28 04:19:59 -0500 |
|---|---|---|
| committer | WormHeamer | 2025-12-28 04:19:59 -0500 |
| commit | 615601fb355709d611d18f878f77c993c312f6aa (patch) | |
| tree | 5fb32e00c201944f9c77308da1ab9bee443bdf7c /utf8.h | |
| parent | 9f4310c24ca39284ad768a82d368e749b18fd76c (diff) | |
lots of features & bugfixes both
Diffstat (limited to 'utf8.h')
| -rw-r--r-- | utf8.h | 12 |
1 files changed, 6 insertions, 6 deletions
@@ -6,6 +6,9 @@ #include "wrmr.h" #define UTF8_INVALID 0xFFFD /* replacement character */ +#define UTF8_CP_LEN_BITS ((uint64_t)0xFFEAA550000) +#define UTF8_CP_SHIFT(cp) ((32 - stdc_leading_zeros((uint32_t)(cp))) << 1) +#define UTF8_CP_LEN(cp) (1 + ((UTF8_CP_LEN_BITS >> UTF8_CP_SHIFT(cp)) & 3)) u32 utf8_decode_len(const char *src, u32 ch_count); u32 utf8_encode_len(const u32 *src, u32 cp_count); @@ -18,9 +21,6 @@ u32 utf8_decode_at(const char *s, u32 i, u32 n); #include <stdbit.h> /* packed array of 2-bit lengths for codepoints 0..10FFFF */ -#define UTF8_CP_LEN_BITS ((uint64_t)0xFFEAA550000) -#define UTF8_CP_SHIFT(cp) ((32 - stdc_leading_zeros((uint32_t)(cp))) << 1) -#define UTF8_CP_LEN(cp) (1 + ((UTF8_CP_LEN_BITS >> UTF8_CP_SHIFT(cp)) & 3)) u32 utf8_encode_len(const u32 *src, u32 cp_count) { u32 len = 0; @@ -72,14 +72,14 @@ void utf8_decode(u32 *dst, const char *src, u32 cp_count) { u32 utf8_decode_at(const char *s, u32 i, u32 n) { if (i >= n) return 0; - u32 cp = (u8)s[i++]; + u32 cp = (u8)s[i]; u32 b = stdc_leading_ones((u8)cp); if (!b) return cp; u32 end = i + b - 1; if (end >= n) return 0; cp &= 0xff >> b; - while (i < end) { - u8 c = s[i++]; + while (++i <= end) { + u8 c = s[i]; cp = (cp << 6) | (c & 0x3f); } return cp; |
