summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorWormHeamer2026-01-02 03:00:29 -0500
committerWormHeamer2026-01-02 03:00:29 -0500
commit07b3782804272abce9ada300e96fa141e359b359 (patch)
tree018d1cb14ea771ff9d7858bea5c91f5ac50c9c4d /utf8.h
parent52d4c6e59f5a5034988ddcf6f4cf2800df6ea7c8 (diff)
convert non-utf-8 files to a readonly hex dump
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 164279b..db98e18 100644
--- a/utf8.h
+++ b/utf8.h
@@ -15,6 +15,7 @@ u32 utf8_encode_len(const u32 *src, u32 cp_count);
void utf8_decode(u32 *dst, const char *src, u32 cp_count);
void utf8_encode(char *dst, const u32 *src, u32 cp_count);
u32 utf8_decode_at(const char *s, u32 i, u32 n);
+int utf8_validate(const char *src, u32 n);
#ifdef UTF8_IMPL
@@ -85,5 +86,15 @@ u32 utf8_decode_at(const char *s, u32 i, u32 n) {
return cp;
}
+int utf8_validate(const char *src, u32 n) {
+ /* TODO: rewrite this to be faster */
+ for (u32 i = 0; i < n; i++) {
+ if (utf8_decode_at(src, i, n) == 0) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
#endif
#endif