From 07b3782804272abce9ada300e96fa141e359b359 Mon Sep 17 00:00:00 2001 From: WormHeamer Date: Fri, 2 Jan 2026 03:00:29 -0500 Subject: convert non-utf-8 files to a readonly hex dump --- utf8.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'utf8.h') diff --git a/utf8.h b/utf8.h index 164279b..db98e18 100644 --- a/utf8.h +++ b/utf8.h @@ -15,6 +15,7 @@ u32 utf8_encode_len(const u32 *src, u32 cp_count); void utf8_decode(u32 *dst, const char *src, u32 cp_count); void utf8_encode(char *dst, const u32 *src, u32 cp_count); u32 utf8_decode_at(const char *s, u32 i, u32 n); +int utf8_validate(const char *src, u32 n); #ifdef UTF8_IMPL @@ -85,5 +86,15 @@ u32 utf8_decode_at(const char *s, u32 i, u32 n) { return cp; } +int utf8_validate(const char *src, u32 n) { + /* TODO: rewrite this to be faster */ + for (u32 i = 0; i < n; i++) { + if (utf8_decode_at(src, i, n) == 0) { + return 0; + } + } + return 1; +} + #endif #endif -- cgit v1.2.3