MATCH: Add `(X & ~Y) & Y` and `(X | ~Y) | Y`

Message ID 20230923194921.2101021-1-apinski@marvell.com
State Accepted
Headers
Series MATCH: Add `(X & ~Y) & Y` and `(X | ~Y) | Y` |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Andrew Pinski Sept. 23, 2023, 7:49 p.m. UTC
  Even though this gets optimized by reassociation, catching it more often
will always be better.

Note the reason why I didn't add `(X ^ ~Y) ^ Y` is that it gets caught
by prefering `~(X ^ Y)` to `(X ^ ~Y)` which then it is caught by the
the pattern for `(X ^ Y) ^ Y` already.

	PR tree-optimization/111543

gcc/ChangeLog:

	* match.pd (`(X & ~Y) & Y`, `(X | ~Y) | Y`): New patterns.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/bitops-4.c: New test.
---
 gcc/match.pd                             | 17 +++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c | 18 ++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
  

Comments

Richard Biener Sept. 24, 2023, 6:35 a.m. UTC | #1
On Sat, Sep 23, 2023 at 9:50 PM Andrew Pinski <apinski@marvell.com> wrote:
>
> Even though this gets optimized by reassociation, catching it more often
> will always be better.
>
> Note the reason why I didn't add `(X ^ ~Y) ^ Y` is that it gets caught
> by prefering `~(X ^ Y)` to `(X ^ ~Y)` which then it is caught by the
> the pattern for `(X ^ Y) ^ Y` already.

OK

>         PR tree-optimization/111543
>
> gcc/ChangeLog:
>
>         * match.pd (`(X & ~Y) & Y`, `(X | ~Y) | Y`): New patterns.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/bitops-4.c: New test.
> ---
>  gcc/match.pd                             | 17 +++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c | 18 ++++++++++++++++++
>  2 files changed, 35 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 0aa815f4118..a17778fbaa6 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1806,6 +1806,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  (simplify
>   (bit_xor:c (convert1? (bit_xor:c @0 @@1)) (convert2? @1))
>   (convert @0))
> +
> +/* (X & ~Y) & Y -> 0 */
> +(simplify
> + (bit_and:c (bit_and @0 @1) @2)
> + (with { bool wascmp; }
> +  (if (bitwise_inverted_equal_p (@0, @2, wascmp)
> +       || bitwise_inverted_equal_p (@1, @2, wascmp))
> +   { wascmp ? constant_boolean_node (false, type) : build_zero_cst (type); })))
> +/* (X | ~Y) | Y -> -1 */
> +(simplify
> + (bit_ior:c (bit_ior @0 @1) @2)
> + (with { bool wascmp; }
> +  (if ((bitwise_inverted_equal_p (@0, @2, wascmp)
> +        || bitwise_inverted_equal_p (@1, @2, wascmp))
> +       && (!wascmp || element_precision (type) == 1))
> +   { build_all_ones_cst (TREE_TYPE (@0)); })))
> +
>  /* (X & Y) & (X & Z) -> (X & Y) & Z
>     (X | Y) | (X | Z) -> (X | Y) | Z  */
>  (for op (bit_and bit_ior)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
> new file mode 100644
> index 00000000000..73c8f39d28f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-ccp1" } */
> +/* PR tree-optimization/111543 */
> +
> +void f_or(int a, int b, int *por)
> +{
> +        int c = ~a;
> +        *por = (c | b) | a;
> +}
> +void f_and(int a, int b, int *pand)
> +{
> +        int c = ~a;
> +        *pand = (c & b) & a;
> +}
> +/* { dg-final { scan-tree-dump-times "pand_\[0-9\]+.D. = 0" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "por_\[0-9\]+.D. = -1" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "pand_\[0-9\]+.D. = 0" 1 "ccp1" } } */
> +/* { dg-final { scan-tree-dump-times "por_\[0-9\]+.D. = -1" 1 "ccp1" } } */
> --
> 2.31.1
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 0aa815f4118..a17778fbaa6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1806,6 +1806,23 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (simplify
  (bit_xor:c (convert1? (bit_xor:c @0 @@1)) (convert2? @1))
  (convert @0))
+
+/* (X & ~Y) & Y -> 0 */
+(simplify
+ (bit_and:c (bit_and @0 @1) @2)
+ (with { bool wascmp; }
+  (if (bitwise_inverted_equal_p (@0, @2, wascmp)
+       || bitwise_inverted_equal_p (@1, @2, wascmp))
+   { wascmp ? constant_boolean_node (false, type) : build_zero_cst (type); })))
+/* (X | ~Y) | Y -> -1 */
+(simplify
+ (bit_ior:c (bit_ior @0 @1) @2)
+ (with { bool wascmp; }
+  (if ((bitwise_inverted_equal_p (@0, @2, wascmp)
+        || bitwise_inverted_equal_p (@1, @2, wascmp))
+       && (!wascmp || element_precision (type) == 1))
+   { build_all_ones_cst (TREE_TYPE (@0)); })))
+
 /* (X & Y) & (X & Z) -> (X & Y) & Z
    (X | Y) | (X | Z) -> (X | Y) | Z  */
 (for op (bit_and bit_ior)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
new file mode 100644
index 00000000000..73c8f39d28f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-4.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-ccp1" } */
+/* PR tree-optimization/111543 */
+
+void f_or(int a, int b, int *por)
+{
+        int c = ~a;
+        *por = (c | b) | a;
+}
+void f_and(int a, int b, int *pand)
+{
+        int c = ~a;
+        *pand = (c & b) & a;
+}
+/* { dg-final { scan-tree-dump-times "pand_\[0-9\]+.D. = 0" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "por_\[0-9\]+.D. = -1" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "pand_\[0-9\]+.D. = 0" 1 "ccp1" } } */
+/* { dg-final { scan-tree-dump-times "por_\[0-9\]+.D. = -1" 1 "ccp1" } } */