[1/3,v2] C: Implement C2X N2653 char8_t and UTF-8 string literal changes
Commit Message
This patch implements the core language and compiler dependent library
changes adopted for C2X via WG14 N2653. The changes include:
- Change of type for UTF-8 string literals from array of const char to
array of const char8_t (unsigned char).
- A new atomic_char8_t typedef.
- A new ATOMIC_CHAR8_T_LOCK_FREE macro defined in terms of the existing
__GCC_ATOMIC_CHAR8_T_LOCK_FREE predefined macro.
gcc/ChangeLog:
* ginclude/stdatomic.h (atomic_char8_t,
ATOMIC_CHAR8_T_LOCK_FREE): New typedef and macro.
gcc/c/ChangeLog:
* c-parser.c (c_parser_string_literal): Use char8_t as the type
of CPP_UTF8STRING when char8_t support is enabled.
* c-typeck.c (digest_init): Allow initialization of an array
of character type by a string literal with type array of
char8_t.
gcc/c-family/ChangeLog:
* c-lex.c (lex_string, lex_charconst): Use char8_t as the type
of CPP_UTF8CHAR and CPP_UTF8STRING when char8_t support is
enabled.
* c-opts.c (c_common_post_options): Set flag_char8_t if
targeting C2x.
---
gcc/c-family/c-lex.cc | 13 +++++++++----
gcc/c-family/c-opts.cc | 4 ++--
gcc/c/c-parser.cc | 16 ++++++++++++++--
gcc/c/c-typeck.cc | 2 +-
gcc/ginclude/stdatomic.h | 6 ++++++
5 files changed, 32 insertions(+), 9 deletions(-)
@@ -1352,7 +1352,14 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
default:
case CPP_STRING:
case CPP_UTF8STRING:
- value = build_string (1, "");
+ if (type == CPP_UTF8STRING && flag_char8_t)
+ {
+ value = build_string (TYPE_PRECISION (char8_type_node)
+ / TYPE_PRECISION (char_type_node),
+ ""); /* char8_t is 8 bits */
+ }
+ else
+ value = build_string (1, "");
break;
case CPP_STRING16:
value = build_string (TYPE_PRECISION (char16_type_node)
@@ -1425,9 +1432,7 @@ lex_charconst (const cpp_token *token)
type = char16_type_node;
else if (token->type == CPP_UTF8CHAR)
{
- if (!c_dialect_cxx ())
- type = unsigned_char_type_node;
- else if (flag_char8_t)
+ if (flag_char8_t)
type = char8_type_node;
else
type = char_type_node;
@@ -1059,9 +1059,9 @@ c_common_post_options (const char **pfilename)
if (flag_sized_deallocation == -1)
flag_sized_deallocation = (cxx_dialect >= cxx14);
- /* char8_t support is new in C++20. */
+ /* char8_t support is implicitly enabled in C++20 and C2X. */
if (flag_char8_t == -1)
- flag_char8_t = (cxx_dialect >= cxx20);
+ flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
if (flag_extern_tls_init)
{
@@ -7447,7 +7447,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
default:
case CPP_STRING:
case CPP_UTF8STRING:
- value = build_string (1, "");
+ if (type == CPP_UTF8STRING && flag_char8_t)
+ {
+ value = build_string (TYPE_PRECISION (char8_type_node)
+ / TYPE_PRECISION (char_type_node),
+ ""); /* char8_t is 8 bits */
+ }
+ else
+ value = build_string (1, "");
break;
case CPP_STRING16:
value = build_string (TYPE_PRECISION (char16_type_node)
@@ -7472,9 +7479,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
{
default:
case CPP_STRING:
- case CPP_UTF8STRING:
TREE_TYPE (value) = char_array_type_node;
break;
+ case CPP_UTF8STRING:
+ if (flag_char8_t)
+ TREE_TYPE (value) = char8_array_type_node;
+ else
+ TREE_TYPE (value) = char_array_type_node;
+ break;
case CPP_STRING16:
TREE_TYPE (value) = char16_array_type_node;
break;
@@ -8045,7 +8045,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype,
if (char_array)
{
- if (typ2 != char_type_node)
+ if (typ2 != char_type_node && typ2 != char8_type_node)
incompat_string_cst = true;
}
else if (!comptypes (typ1, typ2))
@@ -49,6 +49,9 @@ typedef _Atomic long atomic_long;
typedef _Atomic unsigned long atomic_ulong;
typedef _Atomic long long atomic_llong;
typedef _Atomic unsigned long long atomic_ullong;
+#ifdef __CHAR8_TYPE__
+typedef _Atomic __CHAR8_TYPE__ atomic_char8_t;
+#endif
typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;
@@ -97,6 +100,9 @@ extern void atomic_signal_fence (memory_order);
#define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE
#define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE
+#ifdef __GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#define ATOMIC_CHAR8_T_LOCK_FREE __GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#endif
#define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE
#define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE
#define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE