[v3,2/2] middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]

Message ID ZUi94WJQcd9fq5vi@arm.com
State Unresolved
Headers
Series [v3,1/2] middle-end: expand copysign handling from lockstep to nested iters |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Tamar Christina Nov. 6, 2023, 10:20 a.m. UTC
  Hi All,

This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
canonical and allows a target to expand this sequence efficiently.  Such
sequences are common in scientific code working with gradients.

There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
which I remove since this is a less efficient form.  The testsuite is also
updated in light of this.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/109154
	* match.pd: Add new neg+abs rule, remove inverse copysign rule.

gcc/testsuite/ChangeLog:

	PR tree-optimization/109154
	* gcc.dg/fold-copysign-1.c: Updated.
	* gcc.dg/pr55152-2.c: Updated.
	* gcc.dg/tree-ssa/abs-4.c: Updated.
	* gcc.dg/tree-ssa/backprop-6.c: Updated.
	* gcc.dg/tree-ssa/copy-sign-2.c: Updated.
	* gcc.dg/tree-ssa/mult-abs-2.c: Updated.
	* gcc.target/aarch64/fneg-abs_1.c: New test.
	* gcc.target/aarch64/fneg-abs_2.c: New test.
	* gcc.target/aarch64/fneg-abs_3.c: New test.
	* gcc.target/aarch64/fneg-abs_4.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_4.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/match.pd b/gcc/match.pd
index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644




--
diff --git a/gcc/match.pd b/gcc/match.pd
index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (hypots @0 (copysigns @1 @2))
    (hypots @0 @1))))
 
-/* copysign(x, CST) -> [-]abs (x).  */
-(for copysigns (COPYSIGN_ALL)
- (simplify
-  (copysigns @0 REAL_CST@1)
-  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (negate (abs @0))
-   (abs @0))))
+/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
+
+(simplify
+ (negate (abs @0))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
 
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -12,5 +12,5 @@ double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -10,4 +10,5 @@ int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -10,4 +10,5 @@ float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,4 +34,5 @@ float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
+**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
  

Comments

Richard Biener Nov. 6, 2023, 1:04 p.m. UTC | #1
On Mon, 6 Nov 2023, Tamar Christina wrote:

> Hi All,
> 
> This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> canonical and allows a target to expand this sequence efficiently.  Such
> sequences are common in scientific code working with gradients.
> 
> There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> which I remove since this is a less efficient form.  The testsuite is also
> updated in light of this.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/109154
> 	* match.pd: Add new neg+abs rule, remove inverse copysign rule.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/109154
> 	* gcc.dg/fold-copysign-1.c: Updated.
> 	* gcc.dg/pr55152-2.c: Updated.
> 	* gcc.dg/tree-ssa/abs-4.c: Updated.
> 	* gcc.dg/tree-ssa/backprop-6.c: Updated.
> 	* gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> 	* gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> 	* gcc.target/aarch64/fneg-abs_1.c: New test.
> 	* gcc.target/aarch64/fneg-abs_2.c: New test.
> 	* gcc.target/aarch64/fneg-abs_3.c: New test.
> 	* gcc.target/aarch64/fneg-abs_4.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (hypots @0 (copysigns @1 @2))
>     (hypots @0 @1))))
>  
> -/* copysign(x, CST) -> [-]abs (x).  */
> -(for copysigns (COPYSIGN_ALL)
> - (simplify
> -  (copysigns @0 REAL_CST@1)
> -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (negate (abs @0))
> -   (abs @0))))
> +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> +
> +(simplify
> + (negate (abs @0))
> + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))

You also drop (copysign, x, CST) -> abx (x) when x is not
negative - I think that's still worthwhile as it has one less
argument?

Keeping that might also need less testsuite adjustments?

Richard.

>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -12,5 +12,5 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -10,4 +10,5 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
>  
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
>  
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -10,4 +10,5 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,4 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**	orr	v[0-9]+.2s, #128, lsl #24
> +**	ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**	adrp	x0, .LC[0-9]+
> +**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	...
> +**	ldr	q[0-9]+, \[x0\]
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	str	q[0-9]+, \[x0\], 16
> +**	...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**	...
> +**	ldr	q[0-9]+, \[x0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	str	q[0-9]+, \[x0\], 16
> +**	...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**	orr	v[0-9]+.2s, #128, lsl #24
> +**	ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**	adrp	x0, .LC[0-9]+
> +**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	...
> +**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> +**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
> +**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> +**	...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**	...
> +**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> +**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> +**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> +**	...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
> 
> 
> 
> 
>
  
Tamar Christina Nov. 7, 2023, 9:25 a.m. UTC | #2
> 
> You also drop (copysign, x, CST) -> abx (x) when x is not negative - I think that's
> still worthwhile as it has one less argument?
> 
> Keeping that might also need less testsuite adjustments?

Done but still needed the testsuite updates.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/109154
	* match.pd: Add new neg+abs rule, remove inverse copysign rule.

gcc/testsuite/ChangeLog:

	PR tree-optimization/109154
	* gcc.dg/fold-copysign-1.c: Updated.
	* gcc.dg/pr55152-2.c: Updated.
	* gcc.dg/tree-ssa/abs-4.c: Updated.
	* gcc.dg/tree-ssa/backprop-6.c: Updated.
	* gcc.dg/tree-ssa/copy-sign-2.c: Updated.
	* gcc.dg/tree-ssa/mult-abs-2.c: Updated.
	* gcc.target/aarch64/fneg-abs_1.c: New test.
	* gcc.target/aarch64/fneg-abs_2.c: New test.
	* gcc.target/aarch64/fneg-abs_3.c: New test.
	* gcc.target/aarch64/fneg-abs_4.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
	* gcc.target/aarch64/sve/fneg-abs_4.c: New test.

--- inline copy of the patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 68a1587ea2465a890875448993e140425ef6a68f..5928acbb14e2a18addff38000bf30dd273d4b1a6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1118,14 +1118,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (hypots @0 (copysigns @1 @2))
    (hypots @0 @1))))
 
-/* copysign(x, CST) -> [-]abs (x).  */
+/* copysign(x, CST) -> abs (x).  */
 (for copysigns (COPYSIGN_ALL)
  (simplify
   (copysigns @0 REAL_CST@1)
-  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (negate (abs @0))
+  (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
    (abs @0))))
 
+/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
+(simplify
+ (negate (abs @0))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
+
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -12,5 +12,5 @@ double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -10,4 +10,5 @@ int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -10,4 +10,5 @@ float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,4 +34,5 @@ float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
+**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
  
Richard Biener Nov. 7, 2023, 9:29 a.m. UTC | #3
On Tue, 7 Nov 2023, Tamar Christina wrote:

> > 
> > You also drop (copysign, x, CST) -> abx (x) when x is not negative - I think that's
> > still worthwhile as it has one less argument?
> > 
> > Keeping that might also need less testsuite adjustments?
> 
> Done but still needed the testsuite updates.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK.

Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/109154
> 	* match.pd: Add new neg+abs rule, remove inverse copysign rule.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/109154
> 	* gcc.dg/fold-copysign-1.c: Updated.
> 	* gcc.dg/pr55152-2.c: Updated.
> 	* gcc.dg/tree-ssa/abs-4.c: Updated.
> 	* gcc.dg/tree-ssa/backprop-6.c: Updated.
> 	* gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> 	* gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> 	* gcc.target/aarch64/fneg-abs_1.c: New test.
> 	* gcc.target/aarch64/fneg-abs_2.c: New test.
> 	* gcc.target/aarch64/fneg-abs_3.c: New test.
> 	* gcc.target/aarch64/fneg-abs_4.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> 	* gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> 
> --- inline copy of the patch ---
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 68a1587ea2465a890875448993e140425ef6a68f..5928acbb14e2a18addff38000bf30dd273d4b1a6 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1118,14 +1118,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (hypots @0 (copysigns @1 @2))
>     (hypots @0 @1))))
>  
> -/* copysign(x, CST) -> [-]abs (x).  */
> +/* copysign(x, CST) -> abs (x).  */
>  (for copysigns (COPYSIGN_ALL)
>   (simplify
>    (copysigns @0 REAL_CST@1)
> -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (negate (abs @0))
> +  (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
>     (abs @0))))
>  
> +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> +(simplify
> + (negate (abs @0))
> + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> +
>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -12,5 +12,5 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -10,4 +10,5 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
>  
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
>  
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -10,4 +10,5 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,4 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**	orr	v[0-9]+.2s, #128, lsl #24
> +**	ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**	adrp	x0, .LC[0-9]+
> +**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	...
> +**	ldr	q[0-9]+, \[x0\]
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	str	q[0-9]+, \[x0\], 16
> +**	...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**	...
> +**	ldr	q[0-9]+, \[x0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	str	q[0-9]+, \[x0\], 16
> +**	...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**	orr	v[0-9]+.2s, #128, lsl #24
> +**	ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**	orr	v[0-9]+.4s, #128, lsl #24
> +**	ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**	adrp	x0, .LC[0-9]+
> +**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
> +**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**	ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**	...
> +**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> +**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
> +**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> +**	...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**	...
> +**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> +**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> +**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> +**	...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**	mov	x0, -9223372036854775808
> +**	fmov	d[0-9]+, x0
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**	movi	v[0-9]+.2s, 0x80, lsl 24
> +**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**	ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
>
  
Prathamesh Kulkarni Nov. 10, 2023, 12:24 p.m. UTC | #4
On Mon, 6 Nov 2023 at 15:50, Tamar Christina <tamar.christina@arm.com> wrote:
>
> Hi All,
>
> This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> canonical and allows a target to expand this sequence efficiently.  Such
> sequences are common in scientific code working with gradients.
>
> There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> which I remove since this is a less efficient form.  The testsuite is also
> updated in light of this.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Hi Tamar,
It seems the patch caused following regressions on arm:

Running gcc:gcc.dg/dg.exp ...
FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1

Running gcc:gcc.dg/tree-ssa/tree-ssa.exp ...
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN" 2
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR" 1
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
"Deleting[^\\n]* = -" 4
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
"Deleting[^\\n]* = ABS_EXPR <" 1
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
"Deleting[^\\n]* = \\.COPYSIGN" 2
FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized "ABS" 1
FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple ".COPYSIGN" 4
FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 4
FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
Link to log files:
https://ci.linaro.org/job/tcwg_gcc_check--master-arm-build/1240/artifact/artifacts/00-sumfiles/

Even for following test-case:
double g (double a)
{
  double t1 = fabs (a);
  double t2 = -t1;
  return t2;
}

It seems, the pattern gets applied but doesn't get eventually
simplified to copysign(a, -1).
forwprop dump shows:
Applying pattern match.pd:1131, gimple-match-4.cc:4134
double g (double a)
{
  double t2;
  double t1;

  <bb 2> :
  t1_2 = ABS_EXPR <a_1(D)>;
  t2_3 = -t1_2;
  return t2_3;

}

while on x86_64:
Applying pattern match.pd:1131, gimple-match-4.cc:4134
gimple_simplified to t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
Removing dead stmt:t1_2 = ABS_EXPR <a_1(D)>;
double g (double a)
{
  double t2;
  double t1;

  <bb 2> :
  t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
  return t2_3;

}

Thanks,
Prathamesh


>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         PR tree-optimization/109154
>         * match.pd: Add new neg+abs rule, remove inverse copysign rule.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/109154
>         * gcc.dg/fold-copysign-1.c: Updated.
>         * gcc.dg/pr55152-2.c: Updated.
>         * gcc.dg/tree-ssa/abs-4.c: Updated.
>         * gcc.dg/tree-ssa/backprop-6.c: Updated.
>         * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
>         * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
>         * gcc.target/aarch64/fneg-abs_1.c: New test.
>         * gcc.target/aarch64/fneg-abs_2.c: New test.
>         * gcc.target/aarch64/fneg-abs_3.c: New test.
>         * gcc.target/aarch64/fneg-abs_4.c: New test.
>         * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
>         * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
>         * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
>         * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
>
> --- inline copy of patch --
> diff --git a/gcc/match.pd b/gcc/match.pd
> index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (hypots @0 (copysigns @1 @2))
>     (hypots @0 @1))))
>
> -/* copysign(x, CST) -> [-]abs (x).  */
> -(for copysigns (COPYSIGN_ALL)
> - (simplify
> -  (copysigns @0 REAL_CST@1)
> -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (negate (abs @0))
> -   (abs @0))))
> +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> +
> +(simplify
> + (negate (abs @0))
> + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
>
>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -12,5 +12,5 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
>
> -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -10,4 +10,5 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
>
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
>
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
>
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -10,4 +10,5 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,4 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**     orr     v[0-9]+.2s, #128, lsl #24
> +**     ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**     orr     v[0-9]+.4s, #128, lsl #24
> +**     ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**     adrp    x0, .LC[0-9]+
> +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**     ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**     movi    v[0-9]+.2s, 0x80, lsl 24
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**     mov     x0, -9223372036854775808
> +**     fmov    d[0-9]+, x0
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**     ...
> +**     ldr     q[0-9]+, \[x0\]
> +**     orr     v[0-9]+.4s, #128, lsl #24
> +**     str     q[0-9]+, \[x0\], 16
> +**     ...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**     ...
> +**     ldr     q[0-9]+, \[x0\]
> +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**     str     q[0-9]+, \[x0\], 16
> +**     ...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> @@ -0,0 +1,39 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#pragma GCC target "+nosve"
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**     mov     x0, -9223372036854775808
> +**     fmov    d[0-9]+, x0
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**     movi    v[0-9]+.2s, 0x80, lsl 24
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +** t1:
> +**     orr     v[0-9]+.2s, #128, lsl #24
> +**     ret
> +*/
> +float32x2_t t1 (float32x2_t a)
> +{
> +  return vneg_f32 (vabs_f32 (a));
> +}
> +
> +/*
> +** t2:
> +**     orr     v[0-9]+.4s, #128, lsl #24
> +**     ret
> +*/
> +float32x4_t t2 (float32x4_t a)
> +{
> +  return vnegq_f32 (vabsq_f32 (a));
> +}
> +
> +/*
> +** t3:
> +**     adrp    x0, .LC[0-9]+
> +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> +**     ret
> +*/
> +float64x2_t t3 (float64x2_t a)
> +{
> +  return vnegq_f64 (vabsq_f64 (a));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**     movi    v[0-9]+.2s, 0x80, lsl 24
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float32_t f1 (float32_t a)
> +{
> +  return -fabsf (a);
> +}
> +
> +/*
> +** f2:
> +**     mov     x0, -9223372036854775808
> +**     fmov    d[0-9]+, x0
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float64_t f2 (float64_t a)
> +{
> +  return -fabs (a);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <arm_neon.h>
> +#include <math.h>
> +
> +/*
> +** f1:
> +**     ...
> +**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> +**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
> +**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> +**     ...
> +*/
> +void f1 (float32_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabsf (a[i]);
> +}
> +
> +/*
> +** f2:
> +**     ...
> +**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> +**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> +**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> +**     ...
> +*/
> +void f2 (float64_t *a, int n)
> +{
> +  for (int i = 0; i < (n & -8); i++)
> +   a[i] = -fabs (a[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> +
> +#include <string.h>
> +
> +/*
> +** negabs:
> +**     mov     x0, -9223372036854775808
> +**     fmov    d[0-9]+, x0
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +double negabs (double x)
> +{
> +   unsigned long long y;
> +   memcpy (&y, &x, sizeof(double));
> +   y = y | (1UL << 63);
> +   memcpy (&x, &y, sizeof(double));
> +   return x;
> +}
> +
> +/*
> +** negabsf:
> +**     movi    v[0-9]+.2s, 0x80, lsl 24
> +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> +**     ret
> +*/
> +float negabsf (float x)
> +{
> +   unsigned int y;
> +   memcpy (&y, &x, sizeof(float));
> +   y = y | (1U << 31);
> +   memcpy (&x, &y, sizeof(float));
> +   return x;
> +}
> +
>
>
>
>
> --
  
Tamar Christina Nov. 10, 2023, 12:44 p.m. UTC | #5
Hi Prathamesh,

Yes Arm requires SIMD for copysign. The testcases fail because they don't turn on Neon.

I'll update them.

Regards,
Tamar
  
Richard Biener Nov. 10, 2023, 1:12 p.m. UTC | #6
On Fri, 10 Nov 2023, Tamar Christina wrote:

> 
> Hi Prathamesh,
> 
> Yes Arm requires SIMD for copysign. The testcases fail because they don't turn on Neon.
> 
> I'll update them.

On x86_64 with -m32 I see

FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR" 
1
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN" 
2
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop 
"Deleting[^\\\\n]* = -" 4
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop 
"Deleting[^\\\\n]* = \\\\.COPYSIGN" 2
FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop 
"Deleting[^\\\\n]* = ABS_EXPR <" 1
FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"

maybe add a copysign effective target?

> Regards,
> Tamar
> ________________________________
> From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
> Sent: Friday, November 10, 2023 12:24 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; nd <nd@arm.com>; rguenther@suse.de <rguenther@suse.de>; jlaw@ventanamicro.com <jlaw@ventanamicro.com>
> Subject: Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]
> 
> On Mon, 6 Nov 2023 at 15:50, Tamar Christina <tamar.christina@arm.com> wrote:
> >
> > Hi All,
> >
> > This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> > canonical and allows a target to expand this sequence efficiently.  Such
> > sequences are common in scientific code working with gradients.
> >
> > There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> > which I remove since this is a less efficient form.  The testsuite is also
> > updated in light of this.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> Hi Tamar,
> It seems the patch caused following regressions on arm:
> 
> Running gcc:gcc.dg/dg.exp ...
> FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> 
> Running gcc:gcc.dg/tree-ssa/tree-ssa.exp ...
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN" 2
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR" 1
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\n]* = -" 4
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\n]* = ABS_EXPR <" 1
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\n]* = \\.COPYSIGN" 2
> FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized "ABS" 1
> FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple ".COPYSIGN" 4
> FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 4
> FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> Link to log files:
> https://ci.linaro.org/job/tcwg_gcc_check--master-arm-build/1240/artifact/artifacts/00-sumfiles/
> 
> Even for following test-case:
> double g (double a)
> {
>   double t1 = fabs (a);
>   double t2 = -t1;
>   return t2;
> }
> 
> It seems, the pattern gets applied but doesn't get eventually
> simplified to copysign(a, -1).
> forwprop dump shows:
> Applying pattern match.pd:1131, gimple-match-4.cc:4134
> double g (double a)
> {
>   double t2;
>   double t1;
> 
>   <bb 2> :
>   t1_2 = ABS_EXPR <a_1(D)>;
>   t2_3 = -t1_2;
>   return t2_3;
> 
> }
> 
> while on x86_64:
> Applying pattern match.pd:1131, gimple-match-4.cc:4134
> gimple_simplified to t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> Removing dead stmt:t1_2 = ABS_EXPR <a_1(D)>;
> double g (double a)
> {
>   double t2;
>   double t1;
> 
>   <bb 2> :
>   t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
>   return t2_3;
> 
> }
> 
> Thanks,
> Prathamesh
> 
> 
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >         PR tree-optimization/109154
> >         * match.pd: Add new neg+abs rule, remove inverse copysign rule.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         PR tree-optimization/109154
> >         * gcc.dg/fold-copysign-1.c: Updated.
> >         * gcc.dg/pr55152-2.c: Updated.
> >         * gcc.dg/tree-ssa/abs-4.c: Updated.
> >         * gcc.dg/tree-ssa/backprop-6.c: Updated.
> >         * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> >         * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> >         * gcc.target/aarch64/fneg-abs_1.c: New test.
> >         * gcc.target/aarch64/fneg-abs_2.c: New test.
> >         * gcc.target/aarch64/fneg-abs_3.c: New test.
> >         * gcc.target/aarch64/fneg-abs_4.c: New test.
> >         * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> >         * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> >         * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> >         * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >     (hypots @0 (copysigns @1 @2))
> >     (hypots @0 @1))))
> >
> > -/* copysign(x, CST) -> [-]abs (x).  */
> > -(for copysigns (COPYSIGN_ALL)
> > - (simplify
> > -  (copysigns @0 REAL_CST@1)
> > -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> > -   (negate (abs @0))
> > -   (abs @0))))
> > +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> > +
> > +(simplify
> > + (negate (abs @0))
> > + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> >
> >  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
> >  (for copysigns (COPYSIGN_ALL)
> > diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> > --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > @@ -12,5 +12,5 @@ double bar (double x)
> >    return __builtin_copysign (x, minuszero);
> >  }
> >
> > -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> > +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> > diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> > index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> > --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> > +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> > @@ -10,4 +10,5 @@ int f(int a)
> >    return (a<-a)?a:-a;
> >  }
> >
> > -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
> >
> >  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
> >  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> > -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
> >  TEST_FUNCTION (double, )
> >  TEST_FUNCTION (long double, l)
> >
> > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > @@ -10,4 +10,5 @@ float f1(float x)
> >    float t = __builtin_copysignf (1.0f, -x);
> >    return x * t;
> >  }
> > -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > @@ -34,4 +34,5 @@ float i1(float x)
> >  {
> >    return x * (x <= 0.f ? 1.f : -1.f);
> >  }
> > -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> > +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#pragma GCC target "+nosve"
> > +
> > +#include <arm_neon.h>
> > +
> > +/*
> > +** t1:
> > +**     orr     v[0-9]+.2s, #128, lsl #24
> > +**     ret
> > +*/
> > +float32x2_t t1 (float32x2_t a)
> > +{
> > +  return vneg_f32 (vabs_f32 (a));
> > +}
> > +
> > +/*
> > +** t2:
> > +**     orr     v[0-9]+.4s, #128, lsl #24
> > +**     ret
> > +*/
> > +float32x4_t t2 (float32x4_t a)
> > +{
> > +  return vnegq_f32 (vabsq_f32 (a));
> > +}
> > +
> > +/*
> > +** t3:
> > +**     adrp    x0, .LC[0-9]+
> > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > +**     ret
> > +*/
> > +float64x2_t t3 (float64x2_t a)
> > +{
> > +  return vnegq_f64 (vabsq_f64 (a));
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#pragma GCC target "+nosve"
> > +
> > +#include <arm_neon.h>
> > +#include <math.h>
> > +
> > +/*
> > +** f1:
> > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float32_t f1 (float32_t a)
> > +{
> > +  return -fabsf (a);
> > +}
> > +
> > +/*
> > +** f2:
> > +**     mov     x0, -9223372036854775808
> > +**     fmov    d[0-9]+, x0
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float64_t f2 (float64_t a)
> > +{
> > +  return -fabs (a);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > @@ -0,0 +1,36 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#pragma GCC target "+nosve"
> > +
> > +#include <arm_neon.h>
> > +#include <math.h>
> > +
> > +/*
> > +** f1:
> > +**     ...
> > +**     ldr     q[0-9]+, \[x0\]
> > +**     orr     v[0-9]+.4s, #128, lsl #24
> > +**     str     q[0-9]+, \[x0\], 16
> > +**     ...
> > +*/
> > +void f1 (float32_t *a, int n)
> > +{
> > +  for (int i = 0; i < (n & -8); i++)
> > +   a[i] = -fabsf (a[i]);
> > +}
> > +
> > +/*
> > +** f2:
> > +**     ...
> > +**     ldr     q[0-9]+, \[x0\]
> > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > +**     str     q[0-9]+, \[x0\], 16
> > +**     ...
> > +*/
> > +void f2 (float64_t *a, int n)
> > +{
> > +  for (int i = 0; i < (n & -8); i++)
> > +   a[i] = -fabs (a[i]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#pragma GCC target "+nosve"
> > +
> > +#include <string.h>
> > +
> > +/*
> > +** negabs:
> > +**     mov     x0, -9223372036854775808
> > +**     fmov    d[0-9]+, x0
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +double negabs (double x)
> > +{
> > +   unsigned long long y;
> > +   memcpy (&y, &x, sizeof(double));
> > +   y = y | (1UL << 63);
> > +   memcpy (&x, &y, sizeof(double));
> > +   return x;
> > +}
> > +
> > +/*
> > +** negabsf:
> > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float negabsf (float x)
> > +{
> > +   unsigned int y;
> > +   memcpy (&y, &x, sizeof(float));
> > +   y = y | (1U << 31);
> > +   memcpy (&x, &y, sizeof(float));
> > +   return x;
> > +}
> > +
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#include <arm_neon.h>
> > +
> > +/*
> > +** t1:
> > +**     orr     v[0-9]+.2s, #128, lsl #24
> > +**     ret
> > +*/
> > +float32x2_t t1 (float32x2_t a)
> > +{
> > +  return vneg_f32 (vabs_f32 (a));
> > +}
> > +
> > +/*
> > +** t2:
> > +**     orr     v[0-9]+.4s, #128, lsl #24
> > +**     ret
> > +*/
> > +float32x4_t t2 (float32x4_t a)
> > +{
> > +  return vnegq_f32 (vabsq_f32 (a));
> > +}
> > +
> > +/*
> > +** t3:
> > +**     adrp    x0, .LC[0-9]+
> > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > +**     ret
> > +*/
> > +float64x2_t t3 (float64x2_t a)
> > +{
> > +  return vnegq_f64 (vabsq_f64 (a));
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > @@ -0,0 +1,29 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#include <arm_neon.h>
> > +#include <math.h>
> > +
> > +/*
> > +** f1:
> > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float32_t f1 (float32_t a)
> > +{
> > +  return -fabsf (a);
> > +}
> > +
> > +/*
> > +** f2:
> > +**     mov     x0, -9223372036854775808
> > +**     fmov    d[0-9]+, x0
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float64_t f2 (float64_t a)
> > +{
> > +  return -fabs (a);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > @@ -0,0 +1,34 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#include <arm_neon.h>
> > +#include <math.h>
> > +
> > +/*
> > +** f1:
> > +**     ...
> > +**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> > +**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
> > +**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> > +**     ...
> > +*/
> > +void f1 (float32_t *a, int n)
> > +{
> > +  for (int i = 0; i < (n & -8); i++)
> > +   a[i] = -fabsf (a[i]);
> > +}
> > +
> > +/*
> > +** f2:
> > +**     ...
> > +**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> > +**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> > +**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> > +**     ...
> > +*/
> > +void f2 (float64_t *a, int n)
> > +{
> > +  for (int i = 0; i < (n & -8); i++)
> > +   a[i] = -fabs (a[i]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > @@ -0,0 +1,37 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > +
> > +#include <string.h>
> > +
> > +/*
> > +** negabs:
> > +**     mov     x0, -9223372036854775808
> > +**     fmov    d[0-9]+, x0
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +double negabs (double x)
> > +{
> > +   unsigned long long y;
> > +   memcpy (&y, &x, sizeof(double));
> > +   y = y | (1UL << 63);
> > +   memcpy (&x, &y, sizeof(double));
> > +   return x;
> > +}
> > +
> > +/*
> > +** negabsf:
> > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > +**     ret
> > +*/
> > +float negabsf (float x)
> > +{
> > +   unsigned int y;
> > +   memcpy (&y, &x, sizeof(float));
> > +   y = y | (1U << 31);
> > +   memcpy (&x, &y, sizeof(float));
> > +   return x;
> > +}
> > +
> >
> >
> >
> >
> > --
>
  
Andrew Pinski Nov. 10, 2023, 9:22 p.m. UTC | #7
On Fri, Nov 10, 2023 at 5:12 AM Richard Biener <rguenther@suse.de> wrote:
>
> On Fri, 10 Nov 2023, Tamar Christina wrote:
>
> >
> > Hi Prathamesh,
> >
> > Yes Arm requires SIMD for copysign. The testcases fail because they don't turn on Neon.
> >
> > I'll update them.
>
> On x86_64 with -m32 I see
>
> FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR"
> 1
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN"
> 2
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\\\n]* = -" 4
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\\\n]* = \\\\.COPYSIGN" 2
> FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> "Deleting[^\\\\n]* = ABS_EXPR <" 1
> FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
>
> maybe add a copysign effective target?

I get the feeling that the internal function for copysign should not
be a direct internal function for scalar modes and call
expand_copysign instead when expanding.
This will fix some if not all of the issues where COPYSIGN is now
trying to show up.

BY the way this is most likely PR 88786 (and PR 112468 and a few
others). and PR 58797 .

Thanks,
Andrew



>
> > Regards,
> > Tamar
> > ________________________________
> > From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
> > Sent: Friday, November 10, 2023 12:24 PM
> > To: Tamar Christina <Tamar.Christina@arm.com>
> > Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; nd <nd@arm.com>; rguenther@suse.de <rguenther@suse.de>; jlaw@ventanamicro.com <jlaw@ventanamicro.com>
> > Subject: Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]
> >
> > On Mon, 6 Nov 2023 at 15:50, Tamar Christina <tamar.christina@arm.com> wrote:
> > >
> > > Hi All,
> > >
> > > This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> > > canonical and allows a target to expand this sequence efficiently.  Such
> > > sequences are common in scientific code working with gradients.
> > >
> > > There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> > > which I remove since this is a less efficient form.  The testsuite is also
> > > updated in light of this.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > Hi Tamar,
> > It seems the patch caused following regressions on arm:
> >
> > Running gcc:gcc.dg/dg.exp ...
> > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> >
> > Running gcc:gcc.dg/tree-ssa/tree-ssa.exp ...
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN" 2
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR" 1
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\n]* = -" 4
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\n]* = ABS_EXPR <" 1
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\n]* = \\.COPYSIGN" 2
> > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized "ABS" 1
> > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple ".COPYSIGN" 4
> > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 4
> > FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> > Link to log files:
> > https://ci.linaro.org/job/tcwg_gcc_check--master-arm-build/1240/artifact/artifacts/00-sumfiles/
> >
> > Even for following test-case:
> > double g (double a)
> > {
> >   double t1 = fabs (a);
> >   double t2 = -t1;
> >   return t2;
> > }
> >
> > It seems, the pattern gets applied but doesn't get eventually
> > simplified to copysign(a, -1).
> > forwprop dump shows:
> > Applying pattern match.pd:1131, gimple-match-4.cc:4134
> > double g (double a)
> > {
> >   double t2;
> >   double t1;
> >
> >   <bb 2> :
> >   t1_2 = ABS_EXPR <a_1(D)>;
> >   t2_3 = -t1_2;
> >   return t2_3;
> >
> > }
> >
> > while on x86_64:
> > Applying pattern match.pd:1131, gimple-match-4.cc:4134
> > gimple_simplified to t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> > Removing dead stmt:t1_2 = ABS_EXPR <a_1(D)>;
> > double g (double a)
> > {
> >   double t2;
> >   double t1;
> >
> >   <bb 2> :
> >   t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> >   return t2_3;
> >
> > }
> >
> > Thanks,
> > Prathamesh
> >
> >
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR tree-optimization/109154
> > >         * match.pd: Add new neg+abs rule, remove inverse copysign rule.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         PR tree-optimization/109154
> > >         * gcc.dg/fold-copysign-1.c: Updated.
> > >         * gcc.dg/pr55152-2.c: Updated.
> > >         * gcc.dg/tree-ssa/abs-4.c: Updated.
> > >         * gcc.dg/tree-ssa/backprop-6.c: Updated.
> > >         * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> > >         * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> > >         * gcc.target/aarch64/fneg-abs_1.c: New test.
> > >         * gcc.target/aarch64/fneg-abs_2.c: New test.
> > >         * gcc.target/aarch64/fneg-abs_3.c: New test.
> > >         * gcc.target/aarch64/fneg-abs_4.c: New test.
> > >         * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> > >         * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> > >         * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> > >         * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >     (hypots @0 (copysigns @1 @2))
> > >     (hypots @0 @1))))
> > >
> > > -/* copysign(x, CST) -> [-]abs (x).  */
> > > -(for copysigns (COPYSIGN_ALL)
> > > - (simplify
> > > -  (copysigns @0 REAL_CST@1)
> > > -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> > > -   (negate (abs @0))
> > > -   (abs @0))))
> > > +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> > > +
> > > +(simplify
> > > + (negate (abs @0))
> > > + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> > >
> > >  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
> > >  (for copysigns (COPYSIGN_ALL)
> > > diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> > > --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > @@ -12,5 +12,5 @@ double bar (double x)
> > >    return __builtin_copysign (x, minuszero);
> > >  }
> > >
> > > -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> > > +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> > > --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> > > +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > @@ -10,4 +10,5 @@ int f(int a)
> > >    return (a<-a)?a:-a;
> > >  }
> > >
> > > -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> > > +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> > > --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
> > >
> > >  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
> > >  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> > > -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> > > --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
> > >  TEST_FUNCTION (double, )
> > >  TEST_FUNCTION (long double, l)
> > >
> > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> > > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > @@ -10,4 +10,5 @@ float f1(float x)
> > >    float t = __builtin_copysignf (1.0f, -x);
> > >    return x * t;
> > >  }
> > > -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> > > --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > @@ -34,4 +34,5 @@ float i1(float x)
> > >  {
> > >    return x * (x <= 0.f ? 1.f : -1.f);
> > >  }
> > > -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> > > +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#pragma GCC target "+nosve"
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +/*
> > > +** t1:
> > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > +**     ret
> > > +*/
> > > +float32x2_t t1 (float32x2_t a)
> > > +{
> > > +  return vneg_f32 (vabs_f32 (a));
> > > +}
> > > +
> > > +/*
> > > +** t2:
> > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > +**     ret
> > > +*/
> > > +float32x4_t t2 (float32x4_t a)
> > > +{
> > > +  return vnegq_f32 (vabsq_f32 (a));
> > > +}
> > > +
> > > +/*
> > > +** t3:
> > > +**     adrp    x0, .LC[0-9]+
> > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > +**     ret
> > > +*/
> > > +float64x2_t t3 (float64x2_t a)
> > > +{
> > > +  return vnegq_f64 (vabsq_f64 (a));
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > @@ -0,0 +1,31 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#pragma GCC target "+nosve"
> > > +
> > > +#include <arm_neon.h>
> > > +#include <math.h>
> > > +
> > > +/*
> > > +** f1:
> > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float32_t f1 (float32_t a)
> > > +{
> > > +  return -fabsf (a);
> > > +}
> > > +
> > > +/*
> > > +** f2:
> > > +**     mov     x0, -9223372036854775808
> > > +**     fmov    d[0-9]+, x0
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float64_t f2 (float64_t a)
> > > +{
> > > +  return -fabs (a);
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > @@ -0,0 +1,36 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#pragma GCC target "+nosve"
> > > +
> > > +#include <arm_neon.h>
> > > +#include <math.h>
> > > +
> > > +/*
> > > +** f1:
> > > +**     ...
> > > +**     ldr     q[0-9]+, \[x0\]
> > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > +**     str     q[0-9]+, \[x0\], 16
> > > +**     ...
> > > +*/
> > > +void f1 (float32_t *a, int n)
> > > +{
> > > +  for (int i = 0; i < (n & -8); i++)
> > > +   a[i] = -fabsf (a[i]);
> > > +}
> > > +
> > > +/*
> > > +** f2:
> > > +**     ...
> > > +**     ldr     q[0-9]+, \[x0\]
> > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > +**     str     q[0-9]+, \[x0\], 16
> > > +**     ...
> > > +*/
> > > +void f2 (float64_t *a, int n)
> > > +{
> > > +  for (int i = 0; i < (n & -8); i++)
> > > +   a[i] = -fabs (a[i]);
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > @@ -0,0 +1,39 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#pragma GCC target "+nosve"
> > > +
> > > +#include <string.h>
> > > +
> > > +/*
> > > +** negabs:
> > > +**     mov     x0, -9223372036854775808
> > > +**     fmov    d[0-9]+, x0
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +double negabs (double x)
> > > +{
> > > +   unsigned long long y;
> > > +   memcpy (&y, &x, sizeof(double));
> > > +   y = y | (1UL << 63);
> > > +   memcpy (&x, &y, sizeof(double));
> > > +   return x;
> > > +}
> > > +
> > > +/*
> > > +** negabsf:
> > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float negabsf (float x)
> > > +{
> > > +   unsigned int y;
> > > +   memcpy (&y, &x, sizeof(float));
> > > +   y = y | (1U << 31);
> > > +   memcpy (&x, &y, sizeof(float));
> > > +   return x;
> > > +}
> > > +
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > @@ -0,0 +1,37 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#include <arm_neon.h>
> > > +
> > > +/*
> > > +** t1:
> > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > +**     ret
> > > +*/
> > > +float32x2_t t1 (float32x2_t a)
> > > +{
> > > +  return vneg_f32 (vabs_f32 (a));
> > > +}
> > > +
> > > +/*
> > > +** t2:
> > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > +**     ret
> > > +*/
> > > +float32x4_t t2 (float32x4_t a)
> > > +{
> > > +  return vnegq_f32 (vabsq_f32 (a));
> > > +}
> > > +
> > > +/*
> > > +** t3:
> > > +**     adrp    x0, .LC[0-9]+
> > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > +**     ret
> > > +*/
> > > +float64x2_t t3 (float64x2_t a)
> > > +{
> > > +  return vnegq_f64 (vabsq_f64 (a));
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > @@ -0,0 +1,29 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#include <arm_neon.h>
> > > +#include <math.h>
> > > +
> > > +/*
> > > +** f1:
> > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float32_t f1 (float32_t a)
> > > +{
> > > +  return -fabsf (a);
> > > +}
> > > +
> > > +/*
> > > +** f2:
> > > +**     mov     x0, -9223372036854775808
> > > +**     fmov    d[0-9]+, x0
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float64_t f2 (float64_t a)
> > > +{
> > > +  return -fabs (a);
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > @@ -0,0 +1,34 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#include <arm_neon.h>
> > > +#include <math.h>
> > > +
> > > +/*
> > > +** f1:
> > > +**     ...
> > > +**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> > > +**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
> > > +**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> > > +**     ...
> > > +*/
> > > +void f1 (float32_t *a, int n)
> > > +{
> > > +  for (int i = 0; i < (n & -8); i++)
> > > +   a[i] = -fabsf (a[i]);
> > > +}
> > > +
> > > +/*
> > > +** f2:
> > > +**     ...
> > > +**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> > > +**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> > > +**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> > > +**     ...
> > > +*/
> > > +void f2 (float64_t *a, int n)
> > > +{
> > > +  for (int i = 0; i < (n & -8); i++)
> > > +   a[i] = -fabs (a[i]);
> > > +}
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > new file mode 100644
> > > index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > @@ -0,0 +1,37 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O3" } */
> > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > +
> > > +#include <string.h>
> > > +
> > > +/*
> > > +** negabs:
> > > +**     mov     x0, -9223372036854775808
> > > +**     fmov    d[0-9]+, x0
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +double negabs (double x)
> > > +{
> > > +   unsigned long long y;
> > > +   memcpy (&y, &x, sizeof(double));
> > > +   y = y | (1UL << 63);
> > > +   memcpy (&x, &y, sizeof(double));
> > > +   return x;
> > > +}
> > > +
> > > +/*
> > > +** negabsf:
> > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > +**     ret
> > > +*/
> > > +float negabsf (float x)
> > > +{
> > > +   unsigned int y;
> > > +   memcpy (&y, &x, sizeof(float));
> > > +   y = y | (1U << 31);
> > > +   memcpy (&x, &y, sizeof(float));
> > > +   return x;
> > > +}
> > > +
> > >
> > >
> > >
> > >
> > > --
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
  
Richard Biener Nov. 13, 2023, 7:08 a.m. UTC | #8
On Fri, 10 Nov 2023, Andrew Pinski wrote:

> On Fri, Nov 10, 2023 at 5:12?AM Richard Biener <rguenther@suse.de> wrote:
> >
> > On Fri, 10 Nov 2023, Tamar Christina wrote:
> >
> > >
> > > Hi Prathamesh,
> > >
> > > Yes Arm requires SIMD for copysign. The testcases fail because they don't turn on Neon.
> > >
> > > I'll update them.
> >
> > On x86_64 with -m32 I see
> >
> > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR"
> > 1
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN"
> > 2
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\\\n]* = -" 4
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\\\n]* = \\\\.COPYSIGN" 2
> > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > "Deleting[^\\\\n]* = ABS_EXPR <" 1
> > FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> >
> > maybe add a copysign effective target?
> 
> I get the feeling that the internal function for copysign should not
> be a direct internal function for scalar modes and call
> expand_copysign instead when expanding.
> This will fix some if not all of the issues where COPYSIGN is now
> trying to show up.

But then I'd rather have a COPYSIGN_EXPR tree code, leaving internal-fns
to optab mappings.  We've discussed this and discarded any of this as
too much work right now.

But yes, the situation is a bit messy (as also discussed).

Richard.
 
> BY the way this is most likely PR 88786 (and PR 112468 and a few
> others). and PR 58797 .
>
> Thanks,
> Andrew
> 
> 
> 
> >
> > > Regards,
> > > Tamar
> > > ________________________________
> > > From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
> > > Sent: Friday, November 10, 2023 12:24 PM
> > > To: Tamar Christina <Tamar.Christina@arm.com>
> > > Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; nd <nd@arm.com>; rguenther@suse.de <rguenther@suse.de>; jlaw@ventanamicro.com <jlaw@ventanamicro.com>
> > > Subject: Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to copysign (x, -1) [PR109154]
> > >
> > > On Mon, 6 Nov 2023 at 15:50, Tamar Christina <tamar.christina@arm.com> wrote:
> > > >
> > > > Hi All,
> > > >
> > > > This patch transforms fneg (fabs (x)) into copysign (x, -1) which is more
> > > > canonical and allows a target to expand this sequence efficiently.  Such
> > > > sequences are common in scientific code working with gradients.
> > > >
> > > > There is an existing canonicalization of copysign (x, -1) to fneg (fabs (x))
> > > > which I remove since this is a less efficient form.  The testsuite is also
> > > > updated in light of this.
> > > >
> > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > > Hi Tamar,
> > > It seems the patch caused following regressions on arm:
> > >
> > > Running gcc:gcc.dg/dg.exp ...
> > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> > >
> > > Running gcc:gcc.dg/tree-ssa/tree-ssa.exp ...
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= .COPYSIGN" 2
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= ABS_EXPR" 1
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\n]* = -" 4
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\n]* = ABS_EXPR <" 1
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\n]* = \\.COPYSIGN" 2
> > > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized ".COPYSIGN" 1
> > > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized "ABS" 1
> > > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple ".COPYSIGN" 4
> > > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 4
> > > FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> > > Link to log files:
> > > https://ci.linaro.org/job/tcwg_gcc_check--master-arm-build/1240/artifact/artifacts/00-sumfiles/
> > >
> > > Even for following test-case:
> > > double g (double a)
> > > {
> > >   double t1 = fabs (a);
> > >   double t2 = -t1;
> > >   return t2;
> > > }
> > >
> > > It seems, the pattern gets applied but doesn't get eventually
> > > simplified to copysign(a, -1).
> > > forwprop dump shows:
> > > Applying pattern match.pd:1131, gimple-match-4.cc:4134
> > > double g (double a)
> > > {
> > >   double t2;
> > >   double t1;
> > >
> > >   <bb 2> :
> > >   t1_2 = ABS_EXPR <a_1(D)>;
> > >   t2_3 = -t1_2;
> > >   return t2_3;
> > >
> > > }
> > >
> > > while on x86_64:
> > > Applying pattern match.pd:1131, gimple-match-4.cc:4134
> > > gimple_simplified to t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> > > Removing dead stmt:t1_2 = ABS_EXPR <a_1(D)>;
> > > double g (double a)
> > > {
> > >   double t2;
> > >   double t1;
> > >
> > >   <bb 2> :
> > >   t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> > >   return t2_3;
> > >
> > > }
> > >
> > > Thanks,
> > > Prathamesh
> > >
> > >
> > > >
> > > > Ok for master?
> > > >
> > > > Thanks,
> > > > Tamar
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >         PR tree-optimization/109154
> > > >         * match.pd: Add new neg+abs rule, remove inverse copysign rule.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >         PR tree-optimization/109154
> > > >         * gcc.dg/fold-copysign-1.c: Updated.
> > > >         * gcc.dg/pr55152-2.c: Updated.
> > > >         * gcc.dg/tree-ssa/abs-4.c: Updated.
> > > >         * gcc.dg/tree-ssa/backprop-6.c: Updated.
> > > >         * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> > > >         * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> > > >         * gcc.target/aarch64/fneg-abs_1.c: New test.
> > > >         * gcc.target/aarch64/fneg-abs_2.c: New test.
> > > >         * gcc.target/aarch64/fneg-abs_3.c: New test.
> > > >         * gcc.target/aarch64/fneg-abs_4.c: New test.
> > > >         * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> > > >         * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> > > >         * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> > > >         * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> > > >
> > > > --- inline copy of patch --
> > > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > > index db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1795a74761b8af979b53 100644
> > > > --- a/gcc/match.pd
> > > > +++ b/gcc/match.pd
> > > > @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > >     (hypots @0 (copysigns @1 @2))
> > > >     (hypots @0 @1))))
> > > >
> > > > -/* copysign(x, CST) -> [-]abs (x).  */
> > > > -(for copysigns (COPYSIGN_ALL)
> > > > - (simplify
> > > > -  (copysigns @0 REAL_CST@1)
> > > > -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> > > > -   (negate (abs @0))
> > > > -   (abs @0))))
> > > > +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> > > > +
> > > > +(simplify
> > > > + (negate (abs @0))
> > > > + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> > > >
> > > >  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
> > > >  (for copysigns (COPYSIGN_ALL)
> > > > diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
> > > > --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > @@ -12,5 +12,5 @@ double bar (double x)
> > > >    return __builtin_copysign (x, minuszero);
> > > >  }
> > > >
> > > > -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> > > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
> > > > +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> > > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
> > > > --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > @@ -10,4 +10,5 @@ int f(int a)
> > > >    return (a<-a)?a:-a;
> > > >  }
> > > >
> > > > -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
> > > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> > > > +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
> > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
> > > >
> > > >  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
> > > >  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> > > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
> > > > -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> > > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> > > > +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> > > > +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
> > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)
> > > >  TEST_FUNCTION (double, )
> > > >  TEST_FUNCTION (long double, l)
> > > >
> > > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
> > > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
> > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
> > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > @@ -10,4 +10,5 @@ float f1(float x)
> > > >    float t = __builtin_copysignf (1.0f, -x);
> > > >    return x * t;
> > > >  }
> > > > -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> > > > +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> > > > +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
> > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > @@ -34,4 +34,5 @@ float i1(float x)
> > > >  {
> > > >    return x * (x <= 0.f ? 1.f : -1.f);
> > > >  }
> > > > -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> > > > +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> > > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > > @@ -0,0 +1,39 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#pragma GCC target "+nosve"
> > > > +
> > > > +#include <arm_neon.h>
> > > > +
> > > > +/*
> > > > +** t1:
> > > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > > +**     ret
> > > > +*/
> > > > +float32x2_t t1 (float32x2_t a)
> > > > +{
> > > > +  return vneg_f32 (vabs_f32 (a));
> > > > +}
> > > > +
> > > > +/*
> > > > +** t2:
> > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > +**     ret
> > > > +*/
> > > > +float32x4_t t2 (float32x4_t a)
> > > > +{
> > > > +  return vnegq_f32 (vabsq_f32 (a));
> > > > +}
> > > > +
> > > > +/*
> > > > +** t3:
> > > > +**     adrp    x0, .LC[0-9]+
> > > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > +**     ret
> > > > +*/
> > > > +float64x2_t t3 (float64x2_t a)
> > > > +{
> > > > +  return vnegq_f64 (vabsq_f64 (a));
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > > @@ -0,0 +1,31 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#pragma GCC target "+nosve"
> > > > +
> > > > +#include <arm_neon.h>
> > > > +#include <math.h>
> > > > +
> > > > +/*
> > > > +** f1:
> > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float32_t f1 (float32_t a)
> > > > +{
> > > > +  return -fabsf (a);
> > > > +}
> > > > +
> > > > +/*
> > > > +** f2:
> > > > +**     mov     x0, -9223372036854775808
> > > > +**     fmov    d[0-9]+, x0
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float64_t f2 (float64_t a)
> > > > +{
> > > > +  return -fabs (a);
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > > @@ -0,0 +1,36 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#pragma GCC target "+nosve"
> > > > +
> > > > +#include <arm_neon.h>
> > > > +#include <math.h>
> > > > +
> > > > +/*
> > > > +** f1:
> > > > +**     ...
> > > > +**     ldr     q[0-9]+, \[x0\]
> > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > +**     str     q[0-9]+, \[x0\], 16
> > > > +**     ...
> > > > +*/
> > > > +void f1 (float32_t *a, int n)
> > > > +{
> > > > +  for (int i = 0; i < (n & -8); i++)
> > > > +   a[i] = -fabsf (a[i]);
> > > > +}
> > > > +
> > > > +/*
> > > > +** f2:
> > > > +**     ...
> > > > +**     ldr     q[0-9]+, \[x0\]
> > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > +**     str     q[0-9]+, \[x0\], 16
> > > > +**     ...
> > > > +*/
> > > > +void f2 (float64_t *a, int n)
> > > > +{
> > > > +  for (int i = 0; i < (n & -8); i++)
> > > > +   a[i] = -fabs (a[i]);
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > > @@ -0,0 +1,39 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#pragma GCC target "+nosve"
> > > > +
> > > > +#include <string.h>
> > > > +
> > > > +/*
> > > > +** negabs:
> > > > +**     mov     x0, -9223372036854775808
> > > > +**     fmov    d[0-9]+, x0
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +double negabs (double x)
> > > > +{
> > > > +   unsigned long long y;
> > > > +   memcpy (&y, &x, sizeof(double));
> > > > +   y = y | (1UL << 63);
> > > > +   memcpy (&x, &y, sizeof(double));
> > > > +   return x;
> > > > +}
> > > > +
> > > > +/*
> > > > +** negabsf:
> > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float negabsf (float x)
> > > > +{
> > > > +   unsigned int y;
> > > > +   memcpy (&y, &x, sizeof(float));
> > > > +   y = y | (1U << 31);
> > > > +   memcpy (&x, &y, sizeof(float));
> > > > +   return x;
> > > > +}
> > > > +
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > > @@ -0,0 +1,37 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#include <arm_neon.h>
> > > > +
> > > > +/*
> > > > +** t1:
> > > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > > +**     ret
> > > > +*/
> > > > +float32x2_t t1 (float32x2_t a)
> > > > +{
> > > > +  return vneg_f32 (vabs_f32 (a));
> > > > +}
> > > > +
> > > > +/*
> > > > +** t2:
> > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > +**     ret
> > > > +*/
> > > > +float32x4_t t2 (float32x4_t a)
> > > > +{
> > > > +  return vnegq_f32 (vabsq_f32 (a));
> > > > +}
> > > > +
> > > > +/*
> > > > +** t3:
> > > > +**     adrp    x0, .LC[0-9]+
> > > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > +**     ret
> > > > +*/
> > > > +float64x2_t t3 (float64x2_t a)
> > > > +{
> > > > +  return vnegq_f64 (vabsq_f64 (a));
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > > @@ -0,0 +1,29 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#include <arm_neon.h>
> > > > +#include <math.h>
> > > > +
> > > > +/*
> > > > +** f1:
> > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float32_t f1 (float32_t a)
> > > > +{
> > > > +  return -fabsf (a);
> > > > +}
> > > > +
> > > > +/*
> > > > +** f2:
> > > > +**     mov     x0, -9223372036854775808
> > > > +**     fmov    d[0-9]+, x0
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float64_t f2 (float64_t a)
> > > > +{
> > > > +  return -fabs (a);
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > > @@ -0,0 +1,34 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#include <arm_neon.h>
> > > > +#include <math.h>
> > > > +
> > > > +/*
> > > > +** f1:
> > > > +**     ...
> > > > +**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> > > > +**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
> > > > +**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> > > > +**     ...
> > > > +*/
> > > > +void f1 (float32_t *a, int n)
> > > > +{
> > > > +  for (int i = 0; i < (n & -8); i++)
> > > > +   a[i] = -fabsf (a[i]);
> > > > +}
> > > > +
> > > > +/*
> > > > +** f2:
> > > > +**     ...
> > > > +**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> > > > +**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> > > > +**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> > > > +**     ...
> > > > +*/
> > > > +void f2 (float64_t *a, int n)
> > > > +{
> > > > +  for (int i = 0; i < (n & -8); i++)
> > > > +   a[i] = -fabs (a[i]);
> > > > +}
> > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > > new file mode 100644
> > > > index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > > @@ -0,0 +1,37 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O3" } */
> > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
> > > > +
> > > > +#include <string.h>
> > > > +
> > > > +/*
> > > > +** negabs:
> > > > +**     mov     x0, -9223372036854775808
> > > > +**     fmov    d[0-9]+, x0
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +double negabs (double x)
> > > > +{
> > > > +   unsigned long long y;
> > > > +   memcpy (&y, &x, sizeof(double));
> > > > +   y = y | (1UL << 63);
> > > > +   memcpy (&x, &y, sizeof(double));
> > > > +   return x;
> > > > +}
> > > > +
> > > > +/*
> > > > +** negabsf:
> > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > +**     ret
> > > > +*/
> > > > +float negabsf (float x)
> > > > +{
> > > > +   unsigned int y;
> > > > +   memcpy (&y, &x, sizeof(float));
> > > > +   y = y | (1U << 31);
> > > > +   memcpy (&x, &y, sizeof(float));
> > > > +   return x;
> > > > +}
> > > > +
> > > >
> > > >
> > > >
> > > >
> > > > --
> > >
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
  
Tamar Christina Nov. 13, 2023, 7:13 a.m. UTC | #9
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Monday, November 13, 2023 7:09 AM
> To: Andrew Pinski <pinskia@gmail.com>
> Cc: Tamar Christina <Tamar.Christina@arm.com>; Prathamesh Kulkarni
> <prathamesh.kulkarni@linaro.org>; gcc-patches@gcc.gnu.org; nd
> <nd@arm.com>; jlaw@ventanamicro.com
> Subject: Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg (fabs (x)) to
> copysign (x, -1) [PR109154]
> 
> On Fri, 10 Nov 2023, Andrew Pinski wrote:
> 
> > On Fri, Nov 10, 2023 at 5:12?AM Richard Biener <rguenther@suse.de>
> wrote:
> > >
> > > On Fri, 10 Nov 2023, Tamar Christina wrote:
> > >
> > > >
> > > > Hi Prathamesh,
> > > >
> > > > Yes Arm requires SIMD for copysign. The testcases fail because they don't
> turn on Neon.
> > > >
> > > > I'll update them.
> > >
> > > On x86_64 with -m32 I see
> > >
> > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized ".COPYSIGN"
> > > 1
> > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR" 1
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "=
> ABS_EXPR"
> > > 1
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -" 1
> > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "=
> .COPYSIGN"
> > > 2
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\\\n]* = -" 4
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\\\n]* = \\\\.COPYSIGN" 2
> > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > "Deleting[^\\\\n]* = ABS_EXPR <" 1
> > > FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> > >
> > > maybe add a copysign effective target?
> >
> > I get the feeling that the internal function for copysign should not
> > be a direct internal function for scalar modes and call
> > expand_copysign instead when expanding.
> > This will fix some if not all of the issues where COPYSIGN is now
> > trying to show up.
> 
> But then I'd rather have a COPYSIGN_EXPR tree code, leaving internal-fns to
> optab mappings.  We've discussed this and discarded any of this as too much
> work right now.

I have a patch mostly written for next stage1 that will allow IFNs to have a fallback
Target hook as an option.  This would then allow us to remove things like XORSIGN
as well.  Atm it's just a prototype and I don't have time to finish it before stage1 ends
but it cleans things up a lot in targets that don't use the copysign RTX code.

Regards,
Tamar
> 
> But yes, the situation is a bit messy (as also discussed).
> 
> Richard.
> 
> > BY the way this is most likely PR 88786 (and PR 112468 and a few
> > others). and PR 58797 .
> >
> > Thanks,
> > Andrew
> >
> >
> >
> > >
> > > > Regards,
> > > > Tamar
> > > > ________________________________
> > > > From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
> > > > Sent: Friday, November 10, 2023 12:24 PM
> > > > To: Tamar Christina <Tamar.Christina@arm.com>
> > > > Cc: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>; nd
> > > > <nd@arm.com>; rguenther@suse.de <rguenther@suse.de>;
> > > > jlaw@ventanamicro.com <jlaw@ventanamicro.com>
> > > > Subject: Re: [PATCH v3 2/2]middle-end match.pd: optimize fneg
> > > > (fabs (x)) to copysign (x, -1) [PR109154]
> > > >
> > > > On Mon, 6 Nov 2023 at 15:50, Tamar Christina
> <tamar.christina@arm.com> wrote:
> > > > >
> > > > > Hi All,
> > > > >
> > > > > This patch transforms fneg (fabs (x)) into copysign (x, -1)
> > > > > which is more canonical and allows a target to expand this
> > > > > sequence efficiently.  Such sequences are common in scientific code
> working with gradients.
> > > > >
> > > > > There is an existing canonicalization of copysign (x, -1) to
> > > > > fneg (fabs (x)) which I remove since this is a less efficient
> > > > > form.  The testsuite is also updated in light of this.
> > > > >
> > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > > > Hi Tamar,
> > > > It seems the patch caused following regressions on arm:
> > > >
> > > > Running gcc:gcc.dg/dg.exp ...
> > > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized
> > > > ".COPYSIGN" 1
> > > > FAIL: gcc.dg/pr55152-2.c scan-tree-dump-times optimized "ABS_EXPR"
> > > > 1
> > > >
> > > > Running gcc:gcc.dg/tree-ssa/tree-ssa.exp ...
> > > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "= -"
> > > > 1
> > > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "=
> > > > .COPYSIGN" 2
> > > > FAIL: gcc.dg/tree-ssa/abs-4.c scan-tree-dump-times optimized "=
> > > > ABS_EXPR" 1
> > > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > > "Deleting[^\\n]* = -" 4
> > > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > > "Deleting[^\\n]* = ABS_EXPR <" 1
> > > > FAIL: gcc.dg/tree-ssa/backprop-6.c scan-tree-dump-times backprop
> > > > "Deleting[^\\n]* = \\.COPYSIGN" 2
> > > > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized
> > > > ".COPYSIGN" 1
> > > > FAIL: gcc.dg/tree-ssa/copy-sign-2.c scan-tree-dump-times optimized
> > > > "ABS" 1
> > > > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple
> > > > ".COPYSIGN" 4
> > > > FAIL: gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple
> > > > "ABS" 4
> > > > FAIL: gcc.dg/tree-ssa/phi-opt-24.c scan-tree-dump-not phiopt2 "if"
> > > > Link to log files:
> > > > https://ci.linaro.org/job/tcwg_gcc_check--master-arm-build/1240/ar
> > > > tifact/artifacts/00-sumfiles/
> > > >
> > > > Even for following test-case:
> > > > double g (double a)
> > > > {
> > > >   double t1 = fabs (a);
> > > >   double t2 = -t1;
> > > >   return t2;
> > > > }
> > > >
> > > > It seems, the pattern gets applied but doesn't get eventually
> > > > simplified to copysign(a, -1).
> > > > forwprop dump shows:
> > > > Applying pattern match.pd:1131, gimple-match-4.cc:4134 double g
> > > > (double a) {
> > > >   double t2;
> > > >   double t1;
> > > >
> > > >   <bb 2> :
> > > >   t1_2 = ABS_EXPR <a_1(D)>;
> > > >   t2_3 = -t1_2;
> > > >   return t2_3;
> > > >
> > > > }
> > > >
> > > > while on x86_64:
> > > > Applying pattern match.pd:1131, gimple-match-4.cc:4134
> > > > gimple_simplified to t2_3 = .COPYSIGN (a_1(D), -1.0e+0); Removing
> > > > dead stmt:t1_2 = ABS_EXPR <a_1(D)>; double g (double a) {
> > > >   double t2;
> > > >   double t1;
> > > >
> > > >   <bb 2> :
> > > >   t2_3 = .COPYSIGN (a_1(D), -1.0e+0);
> > > >   return t2_3;
> > > >
> > > > }
> > > >
> > > > Thanks,
> > > > Prathamesh
> > > >
> > > >
> > > > >
> > > > > Ok for master?
> > > > >
> > > > > Thanks,
> > > > > Tamar
> > > > >
> > > > > gcc/ChangeLog:
> > > > >
> > > > >         PR tree-optimization/109154
> > > > >         * match.pd: Add new neg+abs rule, remove inverse copysign rule.
> > > > >
> > > > > gcc/testsuite/ChangeLog:
> > > > >
> > > > >         PR tree-optimization/109154
> > > > >         * gcc.dg/fold-copysign-1.c: Updated.
> > > > >         * gcc.dg/pr55152-2.c: Updated.
> > > > >         * gcc.dg/tree-ssa/abs-4.c: Updated.
> > > > >         * gcc.dg/tree-ssa/backprop-6.c: Updated.
> > > > >         * gcc.dg/tree-ssa/copy-sign-2.c: Updated.
> > > > >         * gcc.dg/tree-ssa/mult-abs-2.c: Updated.
> > > > >         * gcc.target/aarch64/fneg-abs_1.c: New test.
> > > > >         * gcc.target/aarch64/fneg-abs_2.c: New test.
> > > > >         * gcc.target/aarch64/fneg-abs_3.c: New test.
> > > > >         * gcc.target/aarch64/fneg-abs_4.c: New test.
> > > > >         * gcc.target/aarch64/sve/fneg-abs_1.c: New test.
> > > > >         * gcc.target/aarch64/sve/fneg-abs_2.c: New test.
> > > > >         * gcc.target/aarch64/sve/fneg-abs_3.c: New test.
> > > > >         * gcc.target/aarch64/sve/fneg-abs_4.c: New test.
> > > > >
> > > > > --- inline copy of patch --
> > > > > diff --git a/gcc/match.pd b/gcc/match.pd index
> > > > >
> db95931df0672cf4ef08cca36085c3aa6831519e..7a023d510c283c43a87b1
> 7
> > > > > 95a74761b8af979b53 100644
> > > > > --- a/gcc/match.pd
> > > > > +++ b/gcc/match.pd
> > > > > @@ -1106,13 +1106,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN
> (RINT)
> > > > >     (hypots @0 (copysigns @1 @2))
> > > > >     (hypots @0 @1))))
> > > > >
> > > > > -/* copysign(x, CST) -> [-]abs (x).  */ -(for copysigns
> > > > > (COPYSIGN_ALL)
> > > > > - (simplify
> > > > > -  (copysigns @0 REAL_CST@1)
> > > > > -  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> > > > > -   (negate (abs @0))
> > > > > -   (abs @0))))
> > > > > +/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
> > > > > +
> > > > > +(simplify
> > > > > + (negate (abs @0))
> > > > > + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> > > > >
> > > > >  /* copysign(copysign(x, y), z) -> copysign(x, z).  */  (for
> > > > > copysigns (COPYSIGN_ALL) diff --git
> > > > > a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > > b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > > index
> > > > >
> f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f
> > > > > 68e62801d21c2df6a6 100644
> > > > > --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > > +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> > > > > @@ -12,5 +12,5 @@ double bar (double x)
> > > > >    return __builtin_copysign (x, minuszero);  }
> > > > >
> > > > > -/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
> > > > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" }
> > > > > } */
> > > > > +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1
> > > > > +"cddce1" } } */
> > > > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" }
> > > > > +} */
> > > > > diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > > b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > > index
> > > > >
> 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921
> 4
> > > > > 57b02ff0b88cc63ce6 100644
> > > > > --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > > +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> > > > > @@ -10,4 +10,5 @@ int f(int a)
> > > > >    return (a<-a)?a:-a;
> > > > >  }
> > > > >
> > > > > -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" }
> > > > > } */
> > > > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized"
> > > > > +} } */
> > > > > +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" }
> > > > > +} */
> > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > > b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > > index
> > > > >
> 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b
> 3
> > > > > a52d733368805ad31d 100644
> > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> > > > > @@ -9,5 +9,6 @@ long double abs_ld(long double x) { return
> > > > > __builtin_signbit(x) ? x : -x; }
> > > > >
> > > > >  /* __builtin_signbit(x) ? x : -x. Should be convert into -
> > > > > ABS_EXP<x> */
> > > > >  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> > > > > -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3
> > > > > "optimized"} } */
> > > > > -/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1
> > > > > +"optimized"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2
> > > > > +"optimized"} } */
> > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > > b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > > index
> > > > >
> 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91
> > > > > fa1343cb2718db7ae1 100644
> > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> > > > > @@ -26,5 +26,6 @@ TEST_FUNCTION (float, f)  TEST_FUNCTION
> > > > > (double, )  TEST_FUNCTION (long double, l)
> > > > >
> > > > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6
> > > > > "backprop" } } */
> > > > > -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR
> > > > > <} 3 "backprop" } } */
> > > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4
> > > > > +"backprop" } } */
> > > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* =
> > > > > +\.COPYSIGN} 2 "backprop" } } */
> > > > > +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR
> > > > > +<} 1 "backprop" } } */
> > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > > index
> > > > >
> de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c0010658
> 8
> > > > > ef411fbd8c292a5cad 100644
> > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> > > > > @@ -10,4 +10,5 @@ float f1(float x)
> > > > >    float t = __builtin_copysignf (1.0f, -x);
> > > > >    return x * t;
> > > > >  }
> > > > > -/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> > > > > +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"}
> > > > > +} */
> > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > > b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > > index
> > > > >
> a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e
> 28
> > > > > bd8ae0db896e63ade0 100644
> > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> > > > > @@ -34,4 +34,5 @@ float i1(float x)  {
> > > > >    return x * (x <= 0.f ? 1.f : -1.f);  }
> > > > > -/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> > > > > +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} }
> > > > > +*/
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..f823013c3ddf6b3a266
> c3a
> > > > > bfcbf2642fc2a75fa6
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
> > > > > @@ -0,0 +1,39 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#pragma GCC target "+nosve"
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +
> > > > > +/*
> > > > > +** t1:
> > > > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > > > +**     ret
> > > > > +*/
> > > > > +float32x2_t t1 (float32x2_t a)
> > > > > +{
> > > > > +  return vneg_f32 (vabs_f32 (a)); }
> > > > > +
> > > > > +/*
> > > > > +** t2:
> > > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > > +**     ret
> > > > > +*/
> > > > > +float32x4_t t2 (float32x4_t a)
> > > > > +{
> > > > > +  return vnegq_f32 (vabsq_f32 (a)); }
> > > > > +
> > > > > +/*
> > > > > +** t3:
> > > > > +**     adrp    x0, .LC[0-9]+
> > > > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > > +**     ret
> > > > > +*/
> > > > > +float64x2_t t3 (float64x2_t a)
> > > > > +{
> > > > > +  return vnegq_f64 (vabsq_f64 (a)); }
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..141121176b309e4b2a
> a413
> > > > > dc55271a6e3c93d5e1
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
> > > > > @@ -0,0 +1,31 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#pragma GCC target "+nosve"
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +#include <math.h>
> > > > > +
> > > > > +/*
> > > > > +** f1:
> > > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float32_t f1 (float32_t a)
> > > > > +{
> > > > > +  return -fabsf (a);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** f2:
> > > > > +**     mov     x0, -9223372036854775808
> > > > > +**     fmov    d[0-9]+, x0
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float64_t f2 (float64_t a)
> > > > > +{
> > > > > +  return -fabs (a);
> > > > > +}
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..b4652173a95d104ddf
> a70c
> > > > > 497f0627a61ea89d3b
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
> > > > > @@ -0,0 +1,36 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#pragma GCC target "+nosve"
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +#include <math.h>
> > > > > +
> > > > > +/*
> > > > > +** f1:
> > > > > +**     ...
> > > > > +**     ldr     q[0-9]+, \[x0\]
> > > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > > +**     str     q[0-9]+, \[x0\], 16
> > > > > +**     ...
> > > > > +*/
> > > > > +void f1 (float32_t *a, int n)
> > > > > +{
> > > > > +  for (int i = 0; i < (n & -8); i++)
> > > > > +   a[i] = -fabsf (a[i]);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** f2:
> > > > > +**     ...
> > > > > +**     ldr     q[0-9]+, \[x0\]
> > > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > > +**     str     q[0-9]+, \[x0\], 16
> > > > > +**     ...
> > > > > +*/
> > > > > +void f2 (float64_t *a, int n)
> > > > > +{
> > > > > +  for (int i = 0; i < (n & -8); i++)
> > > > > +   a[i] = -fabs (a[i]);
> > > > > +}
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..10879dea74462d34b2
> 6160
> > > > > eeb0bd54ead063166b
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
> > > > > @@ -0,0 +1,39 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#pragma GCC target "+nosve"
> > > > > +
> > > > > +#include <string.h>
> > > > > +
> > > > > +/*
> > > > > +** negabs:
> > > > > +**     mov     x0, -9223372036854775808
> > > > > +**     fmov    d[0-9]+, x0
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +double negabs (double x)
> > > > > +{
> > > > > +   unsigned long long y;
> > > > > +   memcpy (&y, &x, sizeof(double));
> > > > > +   y = y | (1UL << 63);
> > > > > +   memcpy (&x, &y, sizeof(double));
> > > > > +   return x;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** negabsf:
> > > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float negabsf (float x)
> > > > > +{
> > > > > +   unsigned int y;
> > > > > +   memcpy (&y, &x, sizeof(float));
> > > > > +   y = y | (1U << 31);
> > > > > +   memcpy (&x, &y, sizeof(float));
> > > > > +   return x;
> > > > > +}
> > > > > +
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..0c7664e6de77a49768
> 2952
> > > > > 653ffd417453854d52
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
> > > > > @@ -0,0 +1,37 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +
> > > > > +/*
> > > > > +** t1:
> > > > > +**     orr     v[0-9]+.2s, #128, lsl #24
> > > > > +**     ret
> > > > > +*/
> > > > > +float32x2_t t1 (float32x2_t a)
> > > > > +{
> > > > > +  return vneg_f32 (vabs_f32 (a)); }
> > > > > +
> > > > > +/*
> > > > > +** t2:
> > > > > +**     orr     v[0-9]+.4s, #128, lsl #24
> > > > > +**     ret
> > > > > +*/
> > > > > +float32x4_t t2 (float32x4_t a)
> > > > > +{
> > > > > +  return vnegq_f32 (vabsq_f32 (a)); }
> > > > > +
> > > > > +/*
> > > > > +** t3:
> > > > > +**     adrp    x0, .LC[0-9]+
> > > > > +**     ldr     q[0-9]+, \[x0, #:lo12:.LC0\]
> > > > > +**     orr     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
> > > > > +**     ret
> > > > > +*/
> > > > > +float64x2_t t3 (float64x2_t a)
> > > > > +{
> > > > > +  return vnegq_f64 (vabsq_f64 (a)); }
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..a60cd31b9294af2dac6
> 9ee
> > > > > d1c93f899bd5c78fca
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
> > > > > @@ -0,0 +1,29 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +#include <math.h>
> > > > > +
> > > > > +/*
> > > > > +** f1:
> > > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float32_t f1 (float32_t a)
> > > > > +{
> > > > > +  return -fabsf (a);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** f2:
> > > > > +**     mov     x0, -9223372036854775808
> > > > > +**     fmov    d[0-9]+, x0
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float64_t f2 (float64_t a)
> > > > > +{
> > > > > +  return -fabs (a);
> > > > > +}
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..1bf34328d8841de8e6
> b0a5
> > > > > 458562a9f00e31c275
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
> > > > > @@ -0,0 +1,34 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#include <arm_neon.h>
> > > > > +#include <math.h>
> > > > > +
> > > > > +/*
> > > > > +** f1:
> > > > > +**     ...
> > > > > +**     ld1w    z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
> > > > > +**     orr     z[0-9]+.s, z[0-9]+.s, #0x80000000
> > > > > +**     st1w    z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
> > > > > +**     ...
> > > > > +*/
> > > > > +void f1 (float32_t *a, int n)
> > > > > +{
> > > > > +  for (int i = 0; i < (n & -8); i++)
> > > > > +   a[i] = -fabsf (a[i]);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** f2:
> > > > > +**     ...
> > > > > +**     ld1d    z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
> > > > > +**     orr     z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
> > > > > +**     st1d    z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
> > > > > +**     ...
> > > > > +*/
> > > > > +void f2 (float64_t *a, int n)
> > > > > +{
> > > > > +  for (int i = 0; i < (n & -8); i++)
> > > > > +   a[i] = -fabs (a[i]);
> > > > > +}
> > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > > > b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > > > new file mode 100644
> > > > > index
> > > > >
> 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d0
> 1f66
> > > > > 04ca7be87e3744d494
> > > > > --- /dev/null
> > > > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
> > > > > @@ -0,0 +1,37 @@
> > > > > +/* { dg-do compile } */
> > > > > +/* { dg-options "-O3" } */
> > > > > +/* { dg-final { check-function-bodies "**" "" "" { target lp64
> > > > > +} } } */
> > > > > +
> > > > > +#include <string.h>
> > > > > +
> > > > > +/*
> > > > > +** negabs:
> > > > > +**     mov     x0, -9223372036854775808
> > > > > +**     fmov    d[0-9]+, x0
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +double negabs (double x)
> > > > > +{
> > > > > +   unsigned long long y;
> > > > > +   memcpy (&y, &x, sizeof(double));
> > > > > +   y = y | (1UL << 63);
> > > > > +   memcpy (&x, &y, sizeof(double));
> > > > > +   return x;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > +** negabsf:
> > > > > +**     movi    v[0-9]+.2s, 0x80, lsl 24
> > > > > +**     orr     v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
> > > > > +**     ret
> > > > > +*/
> > > > > +float negabsf (float x)
> > > > > +{
> > > > > +   unsigned int y;
> > > > > +   memcpy (&y, &x, sizeof(float));
> > > > > +   y = y | (1U << 31);
> > > > > +   memcpy (&x, &y, sizeof(float));
> > > > > +   return x;
> > > > > +}
> > > > > +
> > > > >
> > > > >
> > > > >
> > > > >
> > > > > --
> > > >
> > >
> > > --
> > > Richard Biener <rguenther@suse.de>
> > > SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461
> > > Nuernberg, Germany;
> > > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> > > Nuernberg)
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG
> Nuernberg)
  

Patch

--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1106,13 +1106,11 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (hypots @0 (copysigns @1 @2))
    (hypots @0 @1))))
 
-/* copysign(x, CST) -> [-]abs (x).  */
-(for copysigns (COPYSIGN_ALL)
- (simplify
-  (copysigns @0 REAL_CST@1)
-  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (negate (abs @0))
-   (abs @0))))
+/* Transform fneg (fabs (X)) -> copysign (X, -1).  */
+
+(simplify
+ (negate (abs @0))
+ (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
 
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f17d65c24ee4dca9867827d040fe0a404c515e7b..f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -12,5 +12,5 @@  double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 54db0f2062da105a829d6690ac8ed9891fe2b588..605f202ed6bc7aa8fe921457b02ff0b88cc63ce6 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -10,4 +10,5 @@  int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index 6197519faf7b55aed7bc162cd0a14dd2145210ca..e1b825f37f69ac3c4666b3a52d733368805ad31d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -9,5 +9,6 @@  long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 3 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index 31f05716f1498dc709cac95fa20fb5796642c77e..c3a138642d6ff7be984e91fa1343cb2718db7ae1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -26,5 +26,6 @@  TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index de52c5f7c8062958353d91f5031193defc9f3f91..e5d565c4b9832c00106588ef411fbd8c292a5cad 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -10,4 +10,5 @@  float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a41f1baf25669a4fd301a586a49ba5e3c5b966ab..a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,4 +34,5 @@  float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..f823013c3ddf6b3a266c3abfcbf2642fc2a75fa6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_1.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..141121176b309e4b2aa413dc55271a6e3c93d5e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -0,0 +1,31 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4652173a95d104ddfa70c497f0627a61ea89d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_3.c
@@ -0,0 +1,36 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ldr	q[0-9]+, \[x0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	str	q[0-9]+, \[x0\], 16
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..10879dea74462d34b26160eeb0bd54ead063166b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_4.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#pragma GCC target "+nosve"
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..0c7664e6de77a497682952653ffd417453854d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_1.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** t1:
+**	orr	v[0-9]+.2s, #128, lsl #24
+**	ret
+*/
+float32x2_t t1 (float32x2_t a)
+{
+  return vneg_f32 (vabs_f32 (a));
+}
+
+/*
+** t2:
+**	orr	v[0-9]+.4s, #128, lsl #24
+**	ret
+*/
+float32x4_t t2 (float32x4_t a)
+{
+  return vnegq_f32 (vabsq_f32 (a));
+}
+
+/*
+** t3:
+**	adrp	x0, .LC[0-9]+
+**	ldr	q[0-9]+, \[x0, #:lo12:.LC0\]
+**	orr	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	ret
+*/
+float64x2_t t3 (float64x2_t a)
+{
+  return vnegq_f64 (vabsq_f64 (a));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a60cd31b9294af2dac69eed1c93f899bd5c78fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float32_t f1 (float32_t a)
+{
+  return -fabsf (a);
+}
+
+/*
+** f2:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float64_t f2 (float64_t a)
+{
+  return -fabs (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1bf34328d8841de8e6b0a5458562a9f00e31c275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_3.c
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+#include <math.h>
+
+/*
+** f1:
+**	...
+**	ld1w	z[0-9]+.s, p[0-9]+/z, \[x0, x2, lsl 2\]
+**	orr	z[0-9]+.s, z[0-9]+.s, #0x80000000
+**	st1w	z[0-9]+.s, p[0-9]+, \[x0, x2, lsl 2\]
+**	...
+*/
+void f1 (float32_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabsf (a[i]);
+}
+
+/*
+** f2:
+**	...
+**	ld1d	z[0-9]+.d, p[0-9]+/z, \[x0, x2, lsl 3\]
+**	orr	z[0-9]+.d, z[0-9]+.d, #0x8000000000000000
+**	st1d	z[0-9]+.d, p[0-9]+, \[x0, x2, lsl 3\]
+**	...
+*/
+void f2 (float64_t *a, int n)
+{
+  for (int i = 0; i < (n & -8); i++)
+   a[i] = -fabs (a[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..21f2a8da2a5d44e3d01f6604ca7be87e3744d494
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_4.c
@@ -0,0 +1,37 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <string.h>
+
+/*
+** negabs:
+**	mov	x0, -9223372036854775808
+**	fmov	d[0-9]+, x0
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+double negabs (double x)
+{
+   unsigned long long y;
+   memcpy (&y, &x, sizeof(double));
+   y = y | (1UL << 63);
+   memcpy (&x, &y, sizeof(double));
+   return x;
+}
+
+/*
+** negabsf:
+**	movi	v[0-9]+.2s, 0x80, lsl 24
+**	orr	v[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b
+**	ret
+*/
+float negabsf (float x)
+{
+   unsigned int y;
+   memcpy (&y, &x, sizeof(float));
+   y = y | (1U << 31);
+   memcpy (&x, &y, sizeof(float));
+   return x;
+}
+