[v4,2/3] libcpp: add a function to determine UTF-8 validity of a C string
Checks
Commit Message
This simplifies the interface for other UTF-8 validity detections when a
simple "yes" or "no" answer is sufficient.
libcpp/
* charset.cc: Add `_cpp_valid_utf8_str` which determines whether
a C string is valid UTF-8 or not.
* internal.h: Add prototype for `_cpp_valid_utf8_str`.
Signed-off-by: Ben Boeckel <ben.boeckel@kitware.com>
---
libcpp/charset.cc | 20 ++++++++++++++++++++
libcpp/internal.h | 2 ++
2 files changed, 22 insertions(+)
@@ -1868,6 +1868,26 @@ _cpp_valid_utf8 (cpp_reader *pfile,
return true;
}
+/* Detect whether a C-string is a valid UTF-8-encoded set of bytes. Returns
+ `false` if any contained byte sequence encodes an invalid Unicode codepoint
+ or is not a valid UTF-8 sequence. Returns `true` otherwise. */
+
+extern bool
+_cpp_valid_utf8_str (const char *name)
+{
+ const uchar* in = (const uchar*)name;
+ size_t len = strlen (name);
+ cppchar_t cp;
+
+ while (*in)
+ {
+ if (one_utf8_to_cppchar (&in, &len, &cp))
+ return false;
+ }
+
+ return true;
+}
+
/* Subroutine of convert_hex and convert_oct. N is the representation
in the execution character set of a numeric escape; write it into the
string buffer TBUF and update the end-of-string pointer therein. WIDE
@@ -834,6 +834,8 @@ extern bool _cpp_valid_utf8 (cpp_reader *pfile,
struct normalize_state *nst,
cppchar_t *cp);
+extern bool _cpp_valid_utf8_str (const char *str);
+
extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,