MATCH: Sink convert for vec_cond

Message ID 20230817013733.3093010-1-apinski@marvell.com
State Accepted
Headers
Series MATCH: Sink convert for vec_cond |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Andrew Pinski Aug. 17, 2023, 1:37 a.m. UTC
  Convert be sinked into a vec_cond if both sides
fold. Unlike other unary operations, we need to check that we still can handle
this vec_cond's first operand is the same as the new truth type.

I tried a few different versions of this patch:
view_convert to the new truth_type but that does not work as we always support all vec_cond
afterwards.
using expand_vec_cond_expr_p; but that would allow too much.

I also tried to see if view_convert can be handled here but we end up with:
  _3 = VEC_COND_EXPR <_2, {  Nan(-1),  Nan(-1),  Nan(-1),  Nan(-1) }, { 0.0, 0.0, 0.0, 0.0 }>;
Which isel does not know how to handle as just being a view_convert from `vector(4) <signed-boolean:32>`
to `vector(4) float` and causes a regression with `g++.target/i386/pr88152.C`

Note, in the case of the SVE testcase, we will sink negate after the convert and be able
to remove a few extra instructions in the end.
Also with this change gcc.target/aarch64/sve/cond_unary_5.c will now pass.

OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.

gcc/ChangeLog:

	PR tree-optimization/111006
	PR tree-optimization/110986
	* match.pd: (op(vec_cond(a,b,c))): Handle convert for op.

gcc/testsuite/ChangeLog:

	PR tree-optimization/111006
	* gcc.target/aarch64/sve/cond_convert_7.c: New test.
---
 gcc/match.pd                                  |  9 ++++++++
 .../gcc.target/aarch64/sve/cond_convert_7.c   | 23 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
  

Comments

Richard Biener Aug. 18, 2023, 9:31 a.m. UTC | #1
On Thu, Aug 17, 2023 at 3:38 AM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Convert be sinked into a vec_cond if both sides
> fold. Unlike other unary operations, we need to check that we still can handle
> this vec_cond's first operand is the same as the new truth type.
>
> I tried a few different versions of this patch:
> view_convert to the new truth_type but that does not work as we always support all vec_cond
> afterwards.
> using expand_vec_cond_expr_p; but that would allow too much.
>
> I also tried to see if view_convert can be handled here but we end up with:
>   _3 = VEC_COND_EXPR <_2, {  Nan(-1),  Nan(-1),  Nan(-1),  Nan(-1) }, { 0.0, 0.0, 0.0, 0.0 }>;
> Which isel does not know how to handle as just being a view_convert from `vector(4) <signed-boolean:32>`
> to `vector(4) float` and causes a regression with `g++.target/i386/pr88152.C`
>
> Note, in the case of the SVE testcase, we will sink negate after the convert and be able
> to remove a few extra instructions in the end.
> Also with this change gcc.target/aarch64/sve/cond_unary_5.c will now pass.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.
>
> gcc/ChangeLog:
>
>         PR tree-optimization/111006
>         PR tree-optimization/110986
>         * match.pd: (op(vec_cond(a,b,c))): Handle convert for op.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/111006
>         * gcc.target/aarch64/sve/cond_convert_7.c: New test.
> ---
>  gcc/match.pd                                  |  9 ++++++++
>  .../gcc.target/aarch64/sve/cond_convert_7.c   | 23 +++++++++++++++++++
>  2 files changed, 32 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index acd2a964917..ca5ab6f289d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4704,6 +4704,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>    (op (vec_cond:s @0 @1 @2))
>    (vec_cond @0 (op! @1) (op! @2))))
>
> +/* Sink unary conversions to branches, but only if we do fold both
> +   and the target's truth type is the same as we already have.  */
> +(for op (convert)

This (for ..) looks unneeded?

Otherwise looks OK.

Thanks,
Richard.

> + (simplify
> +  (op (vec_cond:s @0 @1 @2))
> +  (if (VECTOR_TYPE_P (type)
> +       && types_match (TREE_TYPE (@0), truth_type_for (type)))
> +   (vec_cond @0 (op! @1) (op! @2)))))
> +
>  /* Sink binary operation to branches, but only if we can fold it.  */
>  (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
>          lshift rshift rdiv trunc_div ceil_div floor_div round_div
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
> new file mode 100644
> index 00000000000..4bb95b92195
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 -fdump-tree-optimized" } */
> +
> +/* This is a modified reduced version of cond_unary_5.c */
> +
> +void __attribute__ ((noipa))
> +f0 (unsigned short *__restrict r,
> +   int *__restrict a,
> +   int *__restrict pred)
> +{
> +  for (int i = 0; i < 1024; ++i)
> +  {
> +    int p = pred[i]?-1:0;
> +    r[i] = p ;
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } */
> +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */
> +
> +/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */
> --
> 2.31.1
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index acd2a964917..ca5ab6f289d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4704,6 +4704,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (op (vec_cond:s @0 @1 @2))
   (vec_cond @0 (op! @1) (op! @2))))
 
+/* Sink unary conversions to branches, but only if we do fold both
+   and the target's truth type is the same as we already have.  */
+(for op (convert)
+ (simplify
+  (op (vec_cond:s @0 @1 @2))
+  (if (VECTOR_TYPE_P (type)
+       && types_match (TREE_TYPE (@0), truth_type_for (type)))
+   (vec_cond @0 (op! @1) (op! @2)))))
+
 /* Sink binary operation to branches, but only if we can fold it.  */
 (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
 	 lshift rshift rdiv trunc_div ceil_div floor_div round_div
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
new file mode 100644
index 00000000000..4bb95b92195
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_7.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 -fdump-tree-optimized" } */
+
+/* This is a modified reduced version of cond_unary_5.c */
+
+void __attribute__ ((noipa))
+f0 (unsigned short *__restrict r,
+   int *__restrict a,
+   int *__restrict pred)
+{
+  for (int i = 0; i < 1024; ++i)
+  {
+    int p = pred[i]?-1:0;
+    r[i] = p ;
+  }
+}
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */
+
+/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */