[18/21] AArch64: Add optimization for vector != cbranch fed into compare with 0 for Advanced SIMD

Message ID ZUiY17ZhkfUlc4tp@arm.com
State Unresolved
Headers
Series None |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Tamar Christina Nov. 6, 2023, 7:42 a.m. UTC
  Hi All,

Advanced SIMD lacks a cmpeq for vectors, and unlike compare to 0 we can't
rewrite to a cmtst.

This operation is however fairly common, especially now that we support early
break vectorization.

As such this adds a pattern to recognize the negated any comparison and
transform it to an all.  i.e. any(~x) => all(x) and invert the branches.

For e.g.

void f1 (int x)
{
  for (int i = 0; i < N; i++)
    {
      b[i] += a[i];
      if (a[i] != x)
	break;
    }
}

We currently generate:

	cmeq	v31.4s, v30.4s, v29.4s
	not	v31.16b, v31.16b
	umaxp	v31.4s, v31.4s, v31.4s
	fmov	x5, d31
	cbnz	x5, .L2

and after this patch:

	cmeq	v31.4s, v30.4s, v29.4s
	uminp	v31.4s, v31.4s, v31.4s
	fmov	x5, d31
	cbz	x5, .L2

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (*cbranchnev4si): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/vect-early-break-cbranch_2.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78 100644




--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cd5ec35c3f53028f14828bd70a92924f62524c15..b1a2c617d7d4106ab725d53a5d0b5c2fb61a0c78 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@ (define_expand "cbranch<mode>4"
   DONE;
 })
 
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation.  To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+  [(set (pc)
+    (if_then_else
+      (ne (subreg:DI
+	    (unspec:V4SI
+	      [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+	       (not:V4SI (match_dup 0))]
+		UNSPEC_UMAXV) 0)
+	   (const_int 0))
+	(label_ref (match_operand 1 ""))
+	(pc)))
+    (clobber (match_scratch:DI 2 "=w"))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+   (set (pc)
+    (if_then_else
+      (eq (subreg:DI (match_dup 2) 0)
+	  (const_int 0))
+	(label_ref (match_dup 1))
+	(pc)))]
+{
+  if (can_create_pseudo_p ())
+    operands[2] = gen_reg_rtx (V4SImode);
+})
+
 ;; Patterns comparing two vectors to produce a mask.
 
 (define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+**	...
+	cmeq	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+	uminp	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+	fmov	x[0-9]+, d[0-9]+
+	cbz	x[0-9]+, \.L[0-9]+
+**	...
+*/
+void f1 (int x)
+{
+  for (int i = 0; i < N; i++)
+    {
+      b[i] += a[i];
+      if (a[i] != x)
+	break;
+    }
+}
  

Patch

--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3870,6 +3870,37 @@  (define_expand "cbranch<mode>4"
   DONE;
 })
 
+;; Avdanced SIMD lacks a vector != comparison, but this is a quite common
+;; operation.  To not pay the penalty for inverting == we can map our any
+;; comparisons to all i.e. any(~x) => all(x).
+(define_insn_and_split "*cbranchnev4si"
+  [(set (pc)
+    (if_then_else
+      (ne (subreg:DI
+	    (unspec:V4SI
+	      [(not:V4SI (match_operand:V4SI 0 "register_operand" "w"))
+	       (not:V4SI (match_dup 0))]
+		UNSPEC_UMAXV) 0)
+	   (const_int 0))
+	(label_ref (match_operand 1 ""))
+	(pc)))
+    (clobber (match_scratch:DI 2 "=w"))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_dup 0) (match_dup 0)] UNSPEC_UMINV))
+   (set (pc)
+    (if_then_else
+      (eq (subreg:DI (match_dup 2) 0)
+	  (const_int 0))
+	(label_ref (match_dup 1))
+	(pc)))]
+{
+  if (can_create_pseudo_p ())
+    operands[2] = gen_reg_rtx (V4SImode);
+})
+
 ;; Patterns comparing two vectors to produce a mask.
 
 (define_expand "vec_cmp<mode><mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..e81027bb50138be627f4dfdffb1557893a5a7723
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#define N 640
+int a[N] = {0};
+int b[N] = {0};
+
+
+/*
+** f1:
+**	...
+	cmeq	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+	uminp	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+	fmov	x[0-9]+, d[0-9]+
+	cbz	x[0-9]+, \.L[0-9]+
+**	...
+*/
+void f1 (int x)
+{
+  for (int i = 0; i < N; i++)
+    {
+      b[i] += a[i];
+      if (a[i] != x)
+	break;
+    }
+}