i386: Fix ICE on __builtin_ia32_pabsd128 without lhs [PR112962]
Checks
Commit Message
Hi!
The following patch fixes ICE on the testcase in similar way to how
other folded builtins are handled in ix86_gimple_fold_builtin when
they don't have a lhs; these builtins are const or pure, so normally
DCE would remove them later, but with -O0 that isn't guaranteed to
happen, and during expansion if they are marked TREE_SIDE_EFFECTS
it might still be attempted to be expanded.
This removes them right away during the folding.
Initially I wanted to also change all gsi_replace last args in that function
to true, but Andrew pointed to PR107209, so I've kept them as is.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2023-12-13 Jakub Jelinek <jakub@redhat.com>
PR target/112962
* config/i386/i386.cc (ix86_gimple_fold_builtin): For shifts
and abs without lhs replace with nop.
* gcc.target/i386/pr112962.c: New test.
Jakub
Comments
On Wed, Dec 13, 2023 at 4:44 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following patch fixes ICE on the testcase in similar way to how
> other folded builtins are handled in ix86_gimple_fold_builtin when
> they don't have a lhs; these builtins are const or pure, so normally
> DCE would remove them later, but with -O0 that isn't guaranteed to
> happen, and during expansion if they are marked TREE_SIDE_EFFECTS
> it might still be attempted to be expanded.
> This removes them right away during the folding.
>
> Initially I wanted to also change all gsi_replace last args in that function
> to true, but Andrew pointed to PR107209, so I've kept them as is.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
>
> 2023-12-13 Jakub Jelinek <jakub@redhat.com>
>
> PR target/112962
> * config/i386/i386.cc (ix86_gimple_fold_builtin): For shifts
> and abs without lhs replace with nop.
>
> * gcc.target/i386/pr112962.c: New test.
>
> --- gcc/config/i386/i386.cc.jj 2023-12-12 13:06:05.864509295 +0100
> +++ gcc/config/i386/i386.cc 2023-12-13 00:02:28.543600557 +0100
> @@ -19377,7 +19377,10 @@ ix86_gimple_fold_builtin (gimple_stmt_it
> do_shift:
> gcc_assert (n_args >= 2);
> if (!gimple_call_lhs (stmt))
> - break;
> + {
> + gsi_replace (gsi, gimple_build_nop (), false);
> + return true;
> + }
> arg0 = gimple_call_arg (stmt, 0);
> arg1 = gimple_call_arg (stmt, 1);
> elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> @@ -19523,7 +19526,10 @@ ix86_gimple_fold_builtin (gimple_stmt_it
> case IX86_BUILTIN_PABSD256_MASK:
> gcc_assert (n_args >= 1);
> if (!gimple_call_lhs (stmt))
> - break;
> + {
> + gsi_replace (gsi, gimple_build_nop (), false);
> + return true;
> + }
> arg0 = gimple_call_arg (stmt, 0);
> elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> /* For masked ABS, only optimize if the mask is all ones. */
> --- gcc/testsuite/gcc.target/i386/pr112962.c.jj 2023-12-12 11:56:56.735917531 +0100
> +++ gcc/testsuite/gcc.target/i386/pr112962.c 2023-12-12 11:56:39.406157222 +0100
> @@ -0,0 +1,11 @@
> +/* PR target/112962 */
> +/* { dg-do compile } */
> +/* { dg-options "-fexceptions -mssse3" } */
> +
> +typedef int __attribute__((__vector_size__ (16))) V;
> +
> +void
> +foo (void)
> +{
> + __builtin_ia32_pabsd128 ((V) {});
> +}
>
> Jakub
>
@@ -19377,7 +19377,10 @@ ix86_gimple_fold_builtin (gimple_stmt_it
do_shift:
gcc_assert (n_args >= 2);
if (!gimple_call_lhs (stmt))
- break;
+ {
+ gsi_replace (gsi, gimple_build_nop (), false);
+ return true;
+ }
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
@@ -19523,7 +19526,10 @@ ix86_gimple_fold_builtin (gimple_stmt_it
case IX86_BUILTIN_PABSD256_MASK:
gcc_assert (n_args >= 1);
if (!gimple_call_lhs (stmt))
- break;
+ {
+ gsi_replace (gsi, gimple_build_nop (), false);
+ return true;
+ }
arg0 = gimple_call_arg (stmt, 0);
elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
/* For masked ABS, only optimize if the mask is all ones. */
@@ -0,0 +1,11 @@
+/* PR target/112962 */
+/* { dg-do compile } */
+/* { dg-options "-fexceptions -mssse3" } */
+
+typedef int __attribute__((__vector_size__ (16))) V;
+
+void
+foo (void)
+{
+ __builtin_ia32_pabsd128 ((V) {});
+}