c-family: char8_t and aliasing in C vs C++ [PR111884]
Checks
Commit Message
Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
-- >8 --
In the PR, Joseph says that in C char8_t is not a distinct type. So
we should behave as if it can alias anything, like ordinary char.
In C, unsigned_char_type_node == char8_type_node, so with this patch
we return 0 instead of -1. And the following comment says:
/* The C standard guarantees that any object may be accessed via an
lvalue that has narrow character type (except char8_t). */
if (t == char_type_node
|| t == signed_char_type_node
|| t == unsigned_char_type_node)
return 0;
Which appears to be wrong, so I'm adjusting that as well.
PR c/111884
gcc/c-family/ChangeLog:
* c-common.cc (c_common_get_alias_set): Return -1 for char8_t only
in C++.
gcc/testsuite/ChangeLog:
* c-c++-common/alias-1.c: New test.
---
gcc/c-family/c-common.cc | 7 ++++---
gcc/testsuite/c-c++-common/alias-1.c | 23 +++++++++++++++++++++++
2 files changed, 27 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/alias-1.c
base-commit: eb15fad3190a8b33e3e451b964ff1ecf08bbb113
Comments
On 10/20/23 12:31, Marek Polacek wrote:
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
OK.
> -- >8 --
> In the PR, Joseph says that in C char8_t is not a distinct type. So
> we should behave as if it can alias anything, like ordinary char.
> In C, unsigned_char_type_node == char8_type_node, so with this patch
> we return 0 instead of -1. And the following comment says:
>
> /* The C standard guarantees that any object may be accessed via an
> lvalue that has narrow character type (except char8_t). */
> if (t == char_type_node
> || t == signed_char_type_node
> || t == unsigned_char_type_node)
> return 0;
>
> Which appears to be wrong, so I'm adjusting that as well.
>
> PR c/111884
>
> gcc/c-family/ChangeLog:
>
> * c-common.cc (c_common_get_alias_set): Return -1 for char8_t only
> in C++.
>
> gcc/testsuite/ChangeLog:
>
> * c-c++-common/alias-1.c: New test.
> ---
> gcc/c-family/c-common.cc | 7 ++++---
> gcc/testsuite/c-c++-common/alias-1.c | 23 +++++++++++++++++++++++
> 2 files changed, 27 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/c-c++-common/alias-1.c
>
> diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
> index f044db5b797..0efdc677217 100644
> --- a/gcc/c-family/c-common.cc
> +++ b/gcc/c-family/c-common.cc
> @@ -3828,12 +3828,13 @@ c_common_get_alias_set (tree t)
> if (!TYPE_P (t))
> return -1;
>
> - /* Unlike char, char8_t doesn't alias. */
> - if (flag_char8_t && t == char8_type_node)
> + /* Unlike char, char8_t doesn't alias in C++. (In C, char8_t is not
> + a distinct type.) */
> + if (flag_char8_t && t == char8_type_node && c_dialect_cxx ())
> return -1;
>
> /* The C standard guarantees that any object may be accessed via an
> - lvalue that has narrow character type (except char8_t). */
> + lvalue that has narrow character type. */
> if (t == char_type_node
> || t == signed_char_type_node
> || t == unsigned_char_type_node)
> diff --git a/gcc/testsuite/c-c++-common/alias-1.c b/gcc/testsuite/c-c++-common/alias-1.c
> new file mode 100644
> index 00000000000..d72fec47f76
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/alias-1.c
> @@ -0,0 +1,23 @@
> +/* PR c/111884 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -Wall" } */
> +/* { dg-additional-options "-std=c++20" { target c++ } } */
> +/* { dg-additional-options "-std=c2x" { target c } } */
> +
> +int f(int i)
> +{
> + int f = 1;
> + return i[(unsigned char *)&f];
> +}
> +
> +int g(int i)
> +{
> + int f = 1;
> + return i[(signed char *)&f];
> +}
> +
> +int h(int i)
> +{
> + int f = 1;
> + return i[(char *)&f];
> +}
>
> base-commit: eb15fad3190a8b33e3e451b964ff1ecf08bbb113
@@ -3828,12 +3828,13 @@ c_common_get_alias_set (tree t)
if (!TYPE_P (t))
return -1;
- /* Unlike char, char8_t doesn't alias. */
- if (flag_char8_t && t == char8_type_node)
+ /* Unlike char, char8_t doesn't alias in C++. (In C, char8_t is not
+ a distinct type.) */
+ if (flag_char8_t && t == char8_type_node && c_dialect_cxx ())
return -1;
/* The C standard guarantees that any object may be accessed via an
- lvalue that has narrow character type (except char8_t). */
+ lvalue that has narrow character type. */
if (t == char_type_node
|| t == signed_char_type_node
|| t == unsigned_char_type_node)
new file mode 100644
@@ -0,0 +1,23 @@
+/* PR c/111884 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wall" } */
+/* { dg-additional-options "-std=c++20" { target c++ } } */
+/* { dg-additional-options "-std=c2x" { target c } } */
+
+int f(int i)
+{
+ int f = 1;
+ return i[(unsigned char *)&f];
+}
+
+int g(int i)
+{
+ int f = 1;
+ return i[(signed char *)&f];
+}
+
+int h(int i)
+{
+ int f = 1;
+ return i[(char *)&f];
+}