MATCH: Move `(X & ~Y) | (~X & Y)` over to use bitwise_inverted_equal_p

Message ID 20230825162122.3599370-1-apinski@marvell.com
State Accepted
Headers
Series MATCH: Move `(X & ~Y) | (~X & Y)` over to use bitwise_inverted_equal_p |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Andrew Pinski Aug. 25, 2023, 4:21 p.m. UTC
  This moves the pattern `(X & ~Y) | (~X & Y)` to use bitwise_inverted_equal_p
so we can simplify earlier the case where X and Y are defined by comparisons.
We were able to optimize to (!X)^(!Y) in the end due to the pattern added in
r14-3110-g7fb65f102851248bafa0815 and the older pattern r13-4620-g4d9db4bdd458 .
But folding it earlier is better.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Note pr87009.c now gets `return x ^ s; in one case where the test had been expecting
`return s ^ x;` both are valid and would be expectly the same; just we now chose a slightly
different order of simplification which causes the order of the operands to be different.

gcc/ChangeLog:

	* match.pd (`(X & ~Y) | (~X & Y)`): Use bitwise_inverted_equal_p
	instead of specifically checking for ~X.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/cmpbit-3.c: New test.
	* gcc.dg/pr87009.c: Update test.
---
 gcc/match.pd                             | 13 +++++-----
 gcc/testsuite/gcc.dg/pr87009.c           |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c | 33 ++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
  

Comments

Richard Biener Aug. 28, 2023, 7:04 a.m. UTC | #1
On Fri, Aug 25, 2023 at 6:22 PM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This moves the pattern `(X & ~Y) | (~X & Y)` to use bitwise_inverted_equal_p
> so we can simplify earlier the case where X and Y are defined by comparisons.
> We were able to optimize to (!X)^(!Y) in the end due to the pattern added in
> r14-3110-g7fb65f102851248bafa0815 and the older pattern r13-4620-g4d9db4bdd458 .
> But folding it earlier is better.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> Note pr87009.c now gets `return x ^ s; in one case where the test had been expecting
> `return s ^ x;` both are valid and would be expectly the same; just we now chose a slightly
> different order of simplification which causes the order of the operands to be different.
>
> gcc/ChangeLog:
>
>         * match.pd (`(X & ~Y) | (~X & Y)`): Use bitwise_inverted_equal_p
>         instead of specifically checking for ~X.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/cmpbit-3.c: New test.
>         * gcc.dg/pr87009.c: Update test.
> ---
>  gcc/match.pd                             | 13 +++++-----
>  gcc/testsuite/gcc.dg/pr87009.c           |  2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c | 33 ++++++++++++++++++++++++
>  3 files changed, 41 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 70884bd48eb..e41403664d0 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1228,12 +1228,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  /* Simplify (X & ~Y) |^+ (~X & Y) -> X ^ Y.  */
>  (for op (bit_ior bit_xor plus)
>   (simplify
> -  (op (bit_and:c @0 (bit_not @1)) (bit_and:c (bit_not @0) @1))
> -   (bit_xor @0 @1))
> - (simplify
> -  (op:c (bit_and @0 INTEGER_CST@2) (bit_and (bit_not @0) INTEGER_CST@1))
> -  (if (~wi::to_wide (@2) == wi::to_wide (@1))
> -   (bit_xor @0 @1))))
> +  (op (bit_and:c @0 @2) (bit_and:c @3 @1))
> +  (with { bool wascmp0, wascmp1; }
> +   (if (bitwise_inverted_equal_p (@2, @1, wascmp0)
> +        && bitwise_inverted_equal_p (@0, @3, wascmp1)
> +       && ((!wascmp0 && !wascmp1)
> +           || element_precision (type) == 1))
> +   (bit_xor @0 @1)))))
>
>  /* PR53979: Transform ((a ^ b) | a) -> (a | b) */
>  (simplify
> diff --git a/gcc/testsuite/gcc.dg/pr87009.c b/gcc/testsuite/gcc.dg/pr87009.c
> index eb8a4ecd920..6f0341d17cc 100644
> --- a/gcc/testsuite/gcc.dg/pr87009.c
> +++ b/gcc/testsuite/gcc.dg/pr87009.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-original" } */
> -/* { dg-final { scan-tree-dump-times "return s \\^ x;" 4 "original" } } */
> +/* { dg-final { scan-tree-dump-times "return s \\^ x;|return x \\^ s;" 4 "original" } } */
>
>  int f1 (int x, int s)
>  {
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
> new file mode 100644
> index 00000000000..936c0934a10
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized-raw -fdump-tree-dse1-raw -fdump-tree-forwprop1" } */
> +
> +_Bool f(int a, int b)
> +{
> +  _Bool X = a==1, Y = b == 2;
> +return (X & !Y) | (!X & Y);
> +}
> +
> +
> +_Bool f1(int a, int b)
> +{
> +  _Bool X = a==1, Y = b == 2;
> +  _Bool c = (X & !Y);
> +  _Bool d = (!X & Y);
> +  return c | d;
> +}
> +
> +/* Both of these should be optimized to (a==1) ^ (b==2) or (a != 1) ^ (b != 2) */
> +/* { dg-final { scan-tree-dump-not "gimple_cond " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "gimple_phi " "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "optimized" } } */
> +
> +/* Both of these should be optimized early in the pipeline after forwprop1 */
> +/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "forwprop1" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "forwprop1" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "forwprop1" { xfail *-*-* } } } */
> +/* Note forwprop1 does not remove all unused statements sometimes so test dse1 also. */
> +/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "dse1" } } */
> +/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "dse1" } } */
> +/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "dse1" } } */
> --
> 2.31.1
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 70884bd48eb..e41403664d0 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1228,12 +1228,13 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* Simplify (X & ~Y) |^+ (~X & Y) -> X ^ Y.  */
 (for op (bit_ior bit_xor plus)
  (simplify
-  (op (bit_and:c @0 (bit_not @1)) (bit_and:c (bit_not @0) @1))
-   (bit_xor @0 @1))
- (simplify
-  (op:c (bit_and @0 INTEGER_CST@2) (bit_and (bit_not @0) INTEGER_CST@1))
-  (if (~wi::to_wide (@2) == wi::to_wide (@1))
-   (bit_xor @0 @1))))
+  (op (bit_and:c @0 @2) (bit_and:c @3 @1))
+  (with { bool wascmp0, wascmp1; }
+   (if (bitwise_inverted_equal_p (@2, @1, wascmp0)
+        && bitwise_inverted_equal_p (@0, @3, wascmp1)
+	&& ((!wascmp0 && !wascmp1)
+	    || element_precision (type) == 1))
+   (bit_xor @0 @1)))))
 
 /* PR53979: Transform ((a ^ b) | a) -> (a | b) */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/pr87009.c b/gcc/testsuite/gcc.dg/pr87009.c
index eb8a4ecd920..6f0341d17cc 100644
--- a/gcc/testsuite/gcc.dg/pr87009.c
+++ b/gcc/testsuite/gcc.dg/pr87009.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-original" } */
-/* { dg-final { scan-tree-dump-times "return s \\^ x;" 4 "original" } } */
+/* { dg-final { scan-tree-dump-times "return s \\^ x;|return x \\^ s;" 4 "original" } } */
 
 int f1 (int x, int s)
 {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
new file mode 100644
index 00000000000..936c0934a10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-3.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw -fdump-tree-dse1-raw -fdump-tree-forwprop1" } */
+
+_Bool f(int a, int b)
+{
+  _Bool X = a==1, Y = b == 2;
+return (X & !Y) | (!X & Y);
+}
+
+
+_Bool f1(int a, int b)
+{
+  _Bool X = a==1, Y = b == 2;
+  _Bool c = (X & !Y);
+  _Bool d = (!X & Y);
+  return c | d;
+}
+
+/* Both of these should be optimized to (a==1) ^ (b==2) or (a != 1) ^ (b != 2) */
+/* { dg-final { scan-tree-dump-not "gimple_cond " "optimized" } } */
+/* { dg-final { scan-tree-dump-not "gimple_phi " "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "optimized" } } */
+
+/* Both of these should be optimized early in the pipeline after forwprop1 */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "forwprop1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "forwprop1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "forwprop1" { xfail *-*-* } } } */
+/* Note forwprop1 does not remove all unused statements sometimes so test dse1 also. */
+/* { dg-final { scan-tree-dump-times "ne_expr|eq_expr, " 4 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 2 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_assign " 6 "dse1" } } */