MATCH: [PR111002] Sink view_convert for vec_cond

Message ID 20230821032123.3332286-1-apinski@marvell.com
State Unresolved
Headers
Series MATCH: [PR111002] Sink view_convert for vec_cond |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Andrew Pinski Aug. 21, 2023, 3:21 a.m. UTC
  Like convert we can sink view_convert into vec_cond but
we can only do it if the element types are nop_conversions.
This is to allow conversion between signed and unsigned types only.
Rather than between integer and float types which mess up the vec_cond
so that isel does not understand `a?-1:0` is still that.

OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.

	PR tree-optimization/111002

gcc/ChangeLog:

	* match.pd (view_convert(vec_cond(a,b,c))): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/sve/cond_convert_8.c: New test.
---
 gcc/match.pd                                  |  9 ++++++++
 .../gcc.target/aarch64/sve/cond_convert_8.c   | 22 +++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
  

Comments

Richard Biener Aug. 21, 2023, 7:37 a.m. UTC | #1
On Mon, Aug 21, 2023 at 5:22 AM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Like convert we can sink view_convert into vec_cond but
> we can only do it if the element types are nop_conversions.
> This is to allow conversion between signed and unsigned types only.
> Rather than between integer and float types which mess up the vec_cond
> so that isel does not understand `a?-1:0` is still that.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.

OK.

>         PR tree-optimization/111002
>
> gcc/ChangeLog:
>
>         * match.pd (view_convert(vec_cond(a,b,c))): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/sve/cond_convert_8.c: New test.
> ---
>  gcc/match.pd                                  |  9 ++++++++
>  .../gcc.target/aarch64/sve/cond_convert_8.c   | 22 +++++++++++++++++++
>  2 files changed, 31 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 851f1af6eac..81666f28465 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -4718,6 +4718,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>        && types_match (TREE_TYPE (@0), truth_type_for (type)))
>    (vec_cond @0 (convert! @1) (convert! @2))))
>
> +/* Likewise for view_convert of nop_conversions. */
> +(simplify
> + (view_convert (vec_cond:s @0 @1 @2))
> + (if (VECTOR_TYPE_P (type) && VECTOR_TYPE_P (TREE_TYPE (@1))
> +      && known_eq (TYPE_VECTOR_SUBPARTS (type),
> +                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (@1)))
> +      && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@1))))
> +  (vec_cond @0 (view_convert! @1) (view_convert! @2))))
> +
>  /* Sink binary operation to branches, but only if we can fold it.  */
>  (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
>          lshift rshift rdiv trunc_div ceil_div floor_div round_div
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
> new file mode 100644
> index 00000000000..d8b96e5fcfb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 -fdump-tree-optimized" } */
> +/* PR tree-optimization/111002 */
> +
> +/* We should be able to remove the neg. */
> +
> +void __attribute__ ((noipa))
> +f (int *__restrict r,
> +   int *__restrict a,
> +   short *__restrict pred)
> +{
> +  for (int i = 0; i < 1024; ++i)
> +    r[i] = pred[i] != 0 ? -1 : 0;
> +}
> +
> +
> +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } */
> +/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */
> +
> +/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */
> --
> 2.31.1
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 851f1af6eac..81666f28465 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4718,6 +4718,15 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       && types_match (TREE_TYPE (@0), truth_type_for (type)))
   (vec_cond @0 (convert! @1) (convert! @2))))
 
+/* Likewise for view_convert of nop_conversions. */
+(simplify
+ (view_convert (vec_cond:s @0 @1 @2))
+ (if (VECTOR_TYPE_P (type) && VECTOR_TYPE_P (TREE_TYPE (@1))
+      && known_eq (TYPE_VECTOR_SUBPARTS (type),
+		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (@1)))
+      && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@1))))
+  (vec_cond @0 (view_convert! @1) (view_convert! @2))))
+
 /* Sink binary operation to branches, but only if we can fold it.  */
 (for op (tcc_comparison plus minus mult bit_and bit_ior bit_xor
 	 lshift rshift rdiv trunc_div ceil_div floor_div round_div
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
new file mode 100644
index 00000000000..d8b96e5fcfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_8.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 -fdump-tree-optimized" } */
+/* PR tree-optimization/111002 */
+
+/* We should be able to remove the neg. */
+
+void __attribute__ ((noipa))
+f (int *__restrict r,
+   int *__restrict a,
+   short *__restrict pred)
+{
+  for (int i = 0; i < 1024; ++i)
+    r[i] = pred[i] != 0 ? -1 : 0;
+}
+
+
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]+/z, #-1} 1 } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.[hs], p[0-7]+/z, #1} } } */
+
+/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = -" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = \\\(vector" "optimized" } } */