[3/3] c++/106426: Treat u8 character literals as unsigned in char8_t modes.
Commit Message
This patch corrects handling of UTF-8 character literals in preprocessing
directives so that they are treated as unsigned types in char8_t enabled
C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously,
UTF-8 character literals were always treated as having the same type as
ordinary character literals (signed or unsigned dependent on target or use
of the -fsigned-char or -funsigned char options).
Fixes https://gcc.gnu.org/PR106426.
gcc/c-family/ChangeLog:
* c-opts.cc (c_common_post_options): Assign cpp_opts->unsigned_utf8char
subject to -fchar8_t, -fsigned-char, and/or -funsigned-char.
gcc/testsuite/ChangeLog:
* g++.dg/ext/char8_t-char-literal-1.C: Check signedness of u8 literals.
* g++.dg/ext/char8_t-char-literal-2.C: Check signedness of u8 literals.
libcpp/ChangeLog:
* charset.cc (narrow_str_to_charconst): Set signedness of CPP_UTF8CHAR
literals based on unsigned_utf8char.
* include/cpplib.h (cpp_options): Add unsigned_utf8char.
* init.cc (cpp_create_reader): Initialize unsigned_utf8char.
---
gcc/c-family/c-opts.cc | 1 +
gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C | 6 +++++-
gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C | 4 ++++
libcpp/charset.cc | 4 ++--
libcpp/include/cpplib.h | 4 ++--
libcpp/init.cc | 1 +
6 files changed, 15 insertions(+), 5 deletions(-)
Comments
On Mon, Jul 25, 2022 at 11:01 AM Tom Honermann via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch corrects handling of UTF-8 character literals in preprocessing
> directives so that they are treated as unsigned types in char8_t enabled
> C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously,
> UTF-8 character literals were always treated as having the same type as
> ordinary character literals (signed or unsigned dependent on target or use
> of the -fsigned-char or -funsigned char options).
>
> Fixes https://gcc.gnu.org/PR106426.
The above mention of the PR # should just be:
preprocessor/106426
And then when this patch gets committed, it will be recorded in bugzilla also.
Thanks,
Andrew Pinski
>
> gcc/c-family/ChangeLog:
> * c-opts.cc (c_common_post_options): Assign cpp_opts->unsigned_utf8char
> subject to -fchar8_t, -fsigned-char, and/or -funsigned-char.
>
> gcc/testsuite/ChangeLog:
> * g++.dg/ext/char8_t-char-literal-1.C: Check signedness of u8 literals.
> * g++.dg/ext/char8_t-char-literal-2.C: Check signedness of u8 literals.
>
> libcpp/ChangeLog:
> * charset.cc (narrow_str_to_charconst): Set signedness of CPP_UTF8CHAR
> literals based on unsigned_utf8char.
> * include/cpplib.h (cpp_options): Add unsigned_utf8char.
> * init.cc (cpp_create_reader): Initialize unsigned_utf8char.
> ---
> gcc/c-family/c-opts.cc | 1 +
> gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C | 6 +++++-
> gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C | 4 ++++
> libcpp/charset.cc | 4 ++--
> libcpp/include/cpplib.h | 4 ++--
> libcpp/init.cc | 1 +
> 6 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
> index 108adc5caf8..02ce1e86cdb 100644
> --- a/gcc/c-family/c-opts.cc
> +++ b/gcc/c-family/c-opts.cc
> @@ -1062,6 +1062,7 @@ c_common_post_options (const char **pfilename)
> /* char8_t support is implicitly enabled in C++20 and C2X. */
> if (flag_char8_t == -1)
> flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
> + cpp_opts->unsigned_utf8char = flag_char8_t ? 1 : cpp_opts->unsigned_char;
>
> if (flag_extern_tls_init)
> {
> diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
> index 8ed85ccfdcd..2994dd38516 100644
> --- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
> +++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
> @@ -1,6 +1,6 @@
> // Test that UTF-8 character literals have type char if -fchar8_t is not enabled.
> // { dg-do compile }
> -// { dg-options "-std=c++17 -fno-char8_t" }
> +// { dg-options "-std=c++17 -fsigned-char -fno-char8_t" }
>
> template<typename T1, typename T2>
> struct is_same
> @@ -10,3 +10,7 @@ template<typename T>
> { static const bool value = true; };
>
> static_assert(is_same<decltype(u8'x'), char>::value, "Error");
> +
> +#if u8'\0' - 1 > 0
> +#error "UTF-8 character literals not signed in preprocessor"
> +#endif
> diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
> index 7861736689c..db4fe70046d 100644
> --- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
> +++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
> @@ -10,3 +10,7 @@ template<typename T>
> { static const bool value = true; };
>
> static_assert(is_same<decltype(u8'x'), char8_t>::value, "Error");
> +
> +#if u8'\0' - 1 < 0
> +#error "UTF-8 character literals not unsigned in preprocessor"
> +#endif
> diff --git a/libcpp/charset.cc b/libcpp/charset.cc
> index ca8b7cf7aa5..12e31632228 100644
> --- a/libcpp/charset.cc
> +++ b/libcpp/charset.cc
> @@ -1960,8 +1960,8 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
> /* Multichar constants are of type int and therefore signed. */
> if (i > 1)
> unsigned_p = 0;
> - else if (type == CPP_UTF8CHAR && !CPP_OPTION (pfile, cplusplus))
> - unsigned_p = 1;
> + else if (type == CPP_UTF8CHAR)
> + unsigned_p = CPP_OPTION (pfile, unsigned_utf8char);
> else
> unsigned_p = CPP_OPTION (pfile, unsigned_char);
>
> diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
> index 3eba6f74b57..f9c042db034 100644
> --- a/libcpp/include/cpplib.h
> +++ b/libcpp/include/cpplib.h
> @@ -581,8 +581,8 @@ struct cpp_options
> ints and target wide characters, respectively. */
> size_t precision, char_precision, int_precision, wchar_precision;
>
> - /* True means chars (wide chars) are unsigned. */
> - bool unsigned_char, unsigned_wchar;
> + /* True means chars (wide chars, UTF-8 chars) are unsigned. */
> + bool unsigned_char, unsigned_wchar, unsigned_utf8char;
>
> /* True if the most significant byte in a word has the lowest
> address in memory. */
> diff --git a/libcpp/init.cc b/libcpp/init.cc
> index f4ab83d2145..0242da5f55c 100644
> --- a/libcpp/init.cc
> +++ b/libcpp/init.cc
> @@ -231,6 +231,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
> CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);
> CPP_OPTION (pfile, unsigned_char) = 0;
> CPP_OPTION (pfile, unsigned_wchar) = 1;
> + CPP_OPTION (pfile, unsigned_utf8char) = 1;
> CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */
>
> /* Default to no charset conversion. */
> --
> 2.32.0
>
This patch corrects handling of UTF-8 character literals in preprocessing
directives so that they are treated as unsigned types in char8_t enabled
C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously,
UTF-8 character literals were always treated as having the same type as
ordinary character literals (signed or unsigned dependent on target or use
of the -fsigned-char or -funsigned char options).
PR preprocessor/106426
gcc/c-family/ChangeLog:
* c-opts.cc (c_common_post_options): Assign cpp_opts->unsigned_utf8char
subject to -fchar8_t, -fsigned-char, and/or -funsigned-char.
gcc/testsuite/ChangeLog:
* g++.dg/ext/char8_t-char-literal-1.C: Check signedness of u8 literals.
* g++.dg/ext/char8_t-char-literal-2.C: Check signedness of u8 literals.
libcpp/ChangeLog:
* charset.cc (narrow_str_to_charconst): Set signedness of CPP_UTF8CHAR
literals based on unsigned_utf8char.
* include/cpplib.h (cpp_options): Add unsigned_utf8char.
* init.cc (cpp_create_reader): Initialize unsigned_utf8char.
---
gcc/c-family/c-opts.cc | 1 +
gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C | 6 +++++-
gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C | 4 ++++
libcpp/charset.cc | 4 ++--
libcpp/include/cpplib.h | 4 ++--
libcpp/init.cc | 1 +
6 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 108adc5caf8..02ce1e86cdb 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1062,6 +1062,7 @@ c_common_post_options (const char **pfilename)
/* char8_t support is implicitly enabled in C++20 and C2X. */
if (flag_char8_t == -1)
flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
+ cpp_opts->unsigned_utf8char = flag_char8_t ? 1 : cpp_opts->unsigned_char;
if (flag_extern_tls_init)
{
diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
index 8ed85ccfdcd..2994dd38516 100644
--- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
+++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C
@@ -1,6 +1,6 @@
// Test that UTF-8 character literals have type char if -fchar8_t is not enabled.
// { dg-do compile }
-// { dg-options "-std=c++17 -fno-char8_t" }
+// { dg-options "-std=c++17 -fsigned-char -fno-char8_t" }
template<typename T1, typename T2>
struct is_same
@@ -10,3 +10,7 @@ template<typename T>
{ static const bool value = true; };
static_assert(is_same<decltype(u8'x'), char>::value, "Error");
+
+#if u8'\0' - 1 > 0
+#error "UTF-8 character literals not signed in preprocessor"
+#endif
diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
index 7861736689c..db4fe70046d 100644
--- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
+++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C
@@ -10,3 +10,7 @@ template<typename T>
{ static const bool value = true; };
static_assert(is_same<decltype(u8'x'), char8_t>::value, "Error");
+
+#if u8'\0' - 1 < 0
+#error "UTF-8 character literals not unsigned in preprocessor"
+#endif
diff --git a/libcpp/charset.cc b/libcpp/charset.cc
index ca8b7cf7aa5..12e31632228 100644
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -1960,8 +1960,8 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
/* Multichar constants are of type int and therefore signed. */
if (i > 1)
unsigned_p = 0;
- else if (type == CPP_UTF8CHAR && !CPP_OPTION (pfile, cplusplus))
- unsigned_p = 1;
+ else if (type == CPP_UTF8CHAR)
+ unsigned_p = CPP_OPTION (pfile, unsigned_utf8char);
else
unsigned_p = CPP_OPTION (pfile, unsigned_char);
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 3eba6f74b57..f9c042db034 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -581,8 +581,8 @@ struct cpp_options
ints and target wide characters, respectively. */
size_t precision, char_precision, int_precision, wchar_precision;
- /* True means chars (wide chars) are unsigned. */
- bool unsigned_char, unsigned_wchar;
+ /* True means chars (wide chars, UTF-8 chars) are unsigned. */
+ bool unsigned_char, unsigned_wchar, unsigned_utf8char;
/* True if the most significant byte in a word has the lowest
address in memory. */
diff --git a/libcpp/init.cc b/libcpp/init.cc
index f4ab83d2145..0242da5f55c 100644
--- a/libcpp/init.cc
+++ b/libcpp/init.cc
@@ -231,6 +231,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);
CPP_OPTION (pfile, unsigned_char) = 0;
CPP_OPTION (pfile, unsigned_wchar) = 1;
+ CPP_OPTION (pfile, unsigned_utf8char) = 1;
CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */
/* Default to no charset conversion. */
On 7/25/22 2:05 PM, Andrew Pinski wrote:
> On Mon, Jul 25, 2022 at 11:01 AM Tom Honermann via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
>> This patch corrects handling of UTF-8 character literals in preprocessing
>> directives so that they are treated as unsigned types in char8_t enabled
>> C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously,
>> UTF-8 character literals were always treated as having the same type as
>> ordinary character literals (signed or unsigned dependent on target or use
>> of the -fsigned-char or -funsigned char options).
>>
>> Fixes https://gcc.gnu.org/PR106426.
> The above mention of the PR # should just be:
> preprocessor/106426
>
> And then when this patch gets committed, it will be recorded in bugzilla also.
Thank you. I resent the patch with a revised subject line and commit
message to reflect the component change in Bugzilla.
Tom.
>
> Thanks,
> Andrew Pinski
>
@@ -1062,6 +1062,7 @@ c_common_post_options (const char **pfilename)
/* char8_t support is implicitly enabled in C++20 and C2X. */
if (flag_char8_t == -1)
flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
+ cpp_opts->unsigned_utf8char = flag_char8_t ? 1 : cpp_opts->unsigned_char;
if (flag_extern_tls_init)
{
@@ -1,6 +1,6 @@
// Test that UTF-8 character literals have type char if -fchar8_t is not enabled.
// { dg-do compile }
-// { dg-options "-std=c++17 -fno-char8_t" }
+// { dg-options "-std=c++17 -fsigned-char -fno-char8_t" }
template<typename T1, typename T2>
struct is_same
@@ -10,3 +10,7 @@ template<typename T>
{ static const bool value = true; };
static_assert(is_same<decltype(u8'x'), char>::value, "Error");
+
+#if u8'\0' - 1 > 0
+#error "UTF-8 character literals not signed in preprocessor"
+#endif
@@ -10,3 +10,7 @@ template<typename T>
{ static const bool value = true; };
static_assert(is_same<decltype(u8'x'), char8_t>::value, "Error");
+
+#if u8'\0' - 1 < 0
+#error "UTF-8 character literals not unsigned in preprocessor"
+#endif
@@ -1960,8 +1960,8 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
/* Multichar constants are of type int and therefore signed. */
if (i > 1)
unsigned_p = 0;
- else if (type == CPP_UTF8CHAR && !CPP_OPTION (pfile, cplusplus))
- unsigned_p = 1;
+ else if (type == CPP_UTF8CHAR)
+ unsigned_p = CPP_OPTION (pfile, unsigned_utf8char);
else
unsigned_p = CPP_OPTION (pfile, unsigned_char);
@@ -581,8 +581,8 @@ struct cpp_options
ints and target wide characters, respectively. */
size_t precision, char_precision, int_precision, wchar_precision;
- /* True means chars (wide chars) are unsigned. */
- bool unsigned_char, unsigned_wchar;
+ /* True means chars (wide chars, UTF-8 chars) are unsigned. */
+ bool unsigned_char, unsigned_wchar, unsigned_utf8char;
/* True if the most significant byte in a word has the lowest
address in memory. */
@@ -231,6 +231,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int);
CPP_OPTION (pfile, unsigned_char) = 0;
CPP_OPTION (pfile, unsigned_wchar) = 1;
+ CPP_OPTION (pfile, unsigned_utf8char) = 1;
CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */
/* Default to no charset conversion. */