Fix 100864: `(a&!b) | b` is not opimized to `a | b` for comparisons

Message ID 20230722232649.1617746-1-apinski@marvell.com
State Accepted
Headers
Series Fix 100864: `(a&!b) | b` is not opimized to `a | b` for comparisons |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Andrew Pinski July 22, 2023, 11:26 p.m. UTC
  This adds a special case of the `(a&~b) | b` pattern where
`b` and `~b` are comparisons.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

	PR tree-optimization/100864
	* match.pd ((~x & y) | x -> x | y): Add comparison variant.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/bitops-3.c: New test.
---
 gcc/match.pd                             | 17 +++++-
 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
  

Comments

Richard Biener July 23, 2023, 8:38 a.m. UTC | #1
> Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
> 
> This adds a special case of the `(a&~b) | b` pattern where
> `b` and `~b` are comparisons.
> 
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Don’t we have an existing match for inversion s we could amend?

> gcc/ChangeLog:
> 
>    PR tree-optimization/100864
>    * match.pd ((~x & y) | x -> x | y): Add comparison variant.
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.dg/tree-ssa/bitops-3.c: New test.
> ---
> gcc/match.pd                             | 17 +++++-
> gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> 2 files changed, 83 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> 
> diff --git a/gcc/match.pd b/gcc/match.pd
> index bfd15d6cd4a..dd4a2df537d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  /* (~x & y) | x -> x | y */
>  (simplify
>   (bitop:c (rbitop:c (bit_not @0) @1) @0)
> -  (bitop @0 @1)))
> +  (bitop @0 @1))
> + /* Similar but for comparisons which have been inverted already,
> +    Note it is hard to simulate the inverted tcc_comparison due
> +    NaNs; That is == and != are sometimes inversions and sometimes not.
> +    So a double for loop is needed and then compare the inverse code
> +    with the result of invert_tree_comparison is needed.
> +    This works fine for vector compares as -1 and 0 are bitwise
> +    inverses.  */
> + (for cmp (tcc_comparison)
> +  (for icmp (tcc_comparison)
> +   (simplify
> +    (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> +     (with { enum tree_code ic = invert_tree_comparison
> +             (cmp, HONOR_NANS (@0)); }
> +      (if (ic == icmp)
> +       (bitop @3 @2)))))))
> 
> /* ((x | y) & z) | x -> (z & y) | x */
> (simplify
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> new file mode 100644
> index 00000000000..68fff4edce9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> @@ -0,0 +1,67 @@
> +/* PR tree-optimization/100864 */
> +
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> +
> +#define op_ne !=
> +#define op_eq ==
> +#define op_lt <
> +#define op_le <=
> +#define op_gt >
> +#define op_ge >=
> +
> +#define operators(t) \
> +t(ne) \
> +t(eq) \
> +t(lt) \
> +t(le) \
> +t(gt) \
> +t(ge)
> +
> +#define cmpfunc(v, op) \
> +__attribute__((noipa)) \
> +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> +{ \
> +  v _Bool c = (a op_##op b); \
> +  v _Bool d = !c; \
> +  return (e & d) | c; \
> +}
> +
> +#define cmp_funcs(op) \
> +cmpfunc(, op) \
> +cmpfunc(volatile , op)
> +
> +operators(cmp_funcs)
> +
> +#define test(op) \
> +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> + __builtin_abort();
> + 
> +int main()
> +{
> +  for(int a = -3; a <= 3; a++)
> +    for(int b = -3; b <= 3; b++)
> +      {
> +    _Bool e = 0;
> +    operators(test)
> +    e = 1;
> +    operators(test)
> +      }
> +  return 0;
> +}
> +
> +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> +/* There are 6 different comparison operators testing here. */
> +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> +/* Each operator should show up twice
> +   (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> +/* { dg-final { scan-tree-dump-times "ne_expr,"      16 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "eq_expr,"       2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "lt_expr,"       2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "le_expr,"       2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "gt_expr,"       2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ge_expr,"       2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_not_expr,"  6 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_and_expr,"  6 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> \ No newline at end of file
> -- 
> 2.31.1
>
  
Andrew Pinski July 28, 2023, 6:34 a.m. UTC | #2
On Sun, Jul 23, 2023 at 1:39 AM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
>
>
> > Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
> >
> > This adds a special case of the `(a&~b) | b` pattern where
> > `b` and `~b` are comparisons.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> Don’t we have an existing match for inversion s we could amend?

We don't currently but I might be able to pattern the function off of
what was similarly done for bitwise_equal_p .
I noticed the patch which added bitwise_equal_p even could benefit
from this similar thing.

Thanks,
Andrew

>
> > gcc/ChangeLog:
> >
> >    PR tree-optimization/100864
> >    * match.pd ((~x & y) | x -> x | y): Add comparison variant.
> >
> > gcc/testsuite/ChangeLog:
> >
> >    * gcc.dg/tree-ssa/bitops-3.c: New test.
> > ---
> > gcc/match.pd                             | 17 +++++-
> > gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> > 2 files changed, 83 insertions(+), 1 deletion(-)
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index bfd15d6cd4a..dd4a2df537d 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >  /* (~x & y) | x -> x | y */
> >  (simplify
> >   (bitop:c (rbitop:c (bit_not @0) @1) @0)
> > -  (bitop @0 @1)))
> > +  (bitop @0 @1))
> > + /* Similar but for comparisons which have been inverted already,
> > +    Note it is hard to simulate the inverted tcc_comparison due
> > +    NaNs; That is == and != are sometimes inversions and sometimes not.
> > +    So a double for loop is needed and then compare the inverse code
> > +    with the result of invert_tree_comparison is needed.
> > +    This works fine for vector compares as -1 and 0 are bitwise
> > +    inverses.  */
> > + (for cmp (tcc_comparison)
> > +  (for icmp (tcc_comparison)
> > +   (simplify
> > +    (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> > +     (with { enum tree_code ic = invert_tree_comparison
> > +             (cmp, HONOR_NANS (@0)); }
> > +      (if (ic == icmp)
> > +       (bitop @3 @2)))))))
> >
> > /* ((x | y) & z) | x -> (z & y) | x */
> > (simplify
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > new file mode 100644
> > index 00000000000..68fff4edce9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > @@ -0,0 +1,67 @@
> > +/* PR tree-optimization/100864 */
> > +
> > +/* { dg-do run } */
> > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> > +
> > +#define op_ne !=
> > +#define op_eq ==
> > +#define op_lt <
> > +#define op_le <=
> > +#define op_gt >
> > +#define op_ge >=
> > +
> > +#define operators(t) \
> > +t(ne) \
> > +t(eq) \
> > +t(lt) \
> > +t(le) \
> > +t(gt) \
> > +t(ge)
> > +
> > +#define cmpfunc(v, op) \
> > +__attribute__((noipa)) \
> > +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> > +{ \
> > +  v _Bool c = (a op_##op b); \
> > +  v _Bool d = !c; \
> > +  return (e & d) | c; \
> > +}
> > +
> > +#define cmp_funcs(op) \
> > +cmpfunc(, op) \
> > +cmpfunc(volatile , op)
> > +
> > +operators(cmp_funcs)
> > +
> > +#define test(op) \
> > +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> > + __builtin_abort();
> > +
> > +int main()
> > +{
> > +  for(int a = -3; a <= 3; a++)
> > +    for(int b = -3; b <= 3; b++)
> > +      {
> > +    _Bool e = 0;
> > +    operators(test)
> > +    e = 1;
> > +    operators(test)
> > +      }
> > +  return 0;
> > +}
> > +
> > +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> > +/* There are 6 different comparison operators testing here. */
> > +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> > +/* Each operator should show up twice
> > +   (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> > +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> > +/* { dg-final { scan-tree-dump-times "ne_expr,"      16 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "eq_expr,"       2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "lt_expr,"       2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "le_expr,"       2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "gt_expr,"       2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "ge_expr,"       2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_not_expr,"  6 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_and_expr,"  6 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> > \ No newline at end of file
> > --
> > 2.31.1
> >
  
Richard Biener July 28, 2023, 6:40 a.m. UTC | #3
On Fri, Jul 28, 2023 at 8:34 AM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Sun, Jul 23, 2023 at 1:39 AM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> >
> >
> > > Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
> > >
> > > This adds a special case of the `(a&~b) | b` pattern where
> > > `b` and `~b` are comparisons.
> > >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > Don’t we have an existing match for inversion s we could amend?
>
> We don't currently but I might be able to pattern the function off of
> what was similarly done for bitwise_equal_p .
> I noticed the patch which added bitwise_equal_p even could benefit
> from this similar thing.

OK, I thought of logical_inverted_value but that isn't a 1:1 match here.

Richard.

> Thanks,
> Andrew
>
> >
> > > gcc/ChangeLog:
> > >
> > >    PR tree-optimization/100864
> > >    * match.pd ((~x & y) | x -> x | y): Add comparison variant.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >    * gcc.dg/tree-ssa/bitops-3.c: New test.
> > > ---
> > > gcc/match.pd                             | 17 +++++-
> > > gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> > > 2 files changed, 83 insertions(+), 1 deletion(-)
> > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index bfd15d6cd4a..dd4a2df537d 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >  /* (~x & y) | x -> x | y */
> > >  (simplify
> > >   (bitop:c (rbitop:c (bit_not @0) @1) @0)
> > > -  (bitop @0 @1)))
> > > +  (bitop @0 @1))
> > > + /* Similar but for comparisons which have been inverted already,
> > > +    Note it is hard to simulate the inverted tcc_comparison due
> > > +    NaNs; That is == and != are sometimes inversions and sometimes not.
> > > +    So a double for loop is needed and then compare the inverse code
> > > +    with the result of invert_tree_comparison is needed.
> > > +    This works fine for vector compares as -1 and 0 are bitwise
> > > +    inverses.  */
> > > + (for cmp (tcc_comparison)
> > > +  (for icmp (tcc_comparison)
> > > +   (simplify
> > > +    (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> > > +     (with { enum tree_code ic = invert_tree_comparison
> > > +             (cmp, HONOR_NANS (@0)); }
> > > +      (if (ic == icmp)
> > > +       (bitop @3 @2)))))))
> > >
> > > /* ((x | y) & z) | x -> (z & y) | x */
> > > (simplify
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > > new file mode 100644
> > > index 00000000000..68fff4edce9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > > @@ -0,0 +1,67 @@
> > > +/* PR tree-optimization/100864 */
> > > +
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> > > +
> > > +#define op_ne !=
> > > +#define op_eq ==
> > > +#define op_lt <
> > > +#define op_le <=
> > > +#define op_gt >
> > > +#define op_ge >=
> > > +
> > > +#define operators(t) \
> > > +t(ne) \
> > > +t(eq) \
> > > +t(lt) \
> > > +t(le) \
> > > +t(gt) \
> > > +t(ge)
> > > +
> > > +#define cmpfunc(v, op) \
> > > +__attribute__((noipa)) \
> > > +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> > > +{ \
> > > +  v _Bool c = (a op_##op b); \
> > > +  v _Bool d = !c; \
> > > +  return (e & d) | c; \
> > > +}
> > > +
> > > +#define cmp_funcs(op) \
> > > +cmpfunc(, op) \
> > > +cmpfunc(volatile , op)
> > > +
> > > +operators(cmp_funcs)
> > > +
> > > +#define test(op) \
> > > +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> > > + __builtin_abort();
> > > +
> > > +int main()
> > > +{
> > > +  for(int a = -3; a <= 3; a++)
> > > +    for(int b = -3; b <= 3; b++)
> > > +      {
> > > +    _Bool e = 0;
> > > +    operators(test)
> > > +    e = 1;
> > > +    operators(test)
> > > +      }
> > > +  return 0;
> > > +}
> > > +
> > > +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> > > +/* There are 6 different comparison operators testing here. */
> > > +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> > > +/* Each operator should show up twice
> > > +   (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> > > +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> > > +/* { dg-final { scan-tree-dump-times "ne_expr,"      16 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "eq_expr,"       2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "lt_expr,"       2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "le_expr,"       2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "gt_expr,"       2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "ge_expr,"       2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_not_expr,"  6 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_and_expr,"  6 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> > > \ No newline at end of file
> > > --
> > > 2.31.1
> > >
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index bfd15d6cd4a..dd4a2df537d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1928,7 +1928,22 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  /* (~x & y) | x -> x | y */
  (simplify
   (bitop:c (rbitop:c (bit_not @0) @1) @0)
-  (bitop @0 @1)))
+  (bitop @0 @1))
+ /* Similar but for comparisons which have been inverted already,
+    Note it is hard to simulate the inverted tcc_comparison due
+    NaNs; That is == and != are sometimes inversions and sometimes not.
+    So a double for loop is needed and then compare the inverse code
+    with the result of invert_tree_comparison is needed.
+    This works fine for vector compares as -1 and 0 are bitwise
+    inverses.  */
+ (for cmp (tcc_comparison)
+  (for icmp (tcc_comparison)
+   (simplify
+    (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
+     (with { enum tree_code ic = invert_tree_comparison
+             (cmp, HONOR_NANS (@0)); }
+      (if (ic == icmp)
+       (bitop @3 @2)))))))
 
 /* ((x | y) & z) | x -> (z & y) | x */
 (simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
new file mode 100644
index 00000000000..68fff4edce9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
@@ -0,0 +1,67 @@ 
+/* PR tree-optimization/100864 */
+
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+
+#define op_ne !=
+#define op_eq ==
+#define op_lt <
+#define op_le <=
+#define op_gt >
+#define op_ge >=
+
+#define operators(t) \
+t(ne) \
+t(eq) \
+t(lt) \
+t(le) \
+t(gt) \
+t(ge)
+
+#define cmpfunc(v, op) \
+__attribute__((noipa)) \
+_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
+{ \
+  v _Bool c = (a op_##op b); \
+  v _Bool d = !c; \
+  return (e & d) | c; \
+}
+
+#define cmp_funcs(op) \
+cmpfunc(, op) \
+cmpfunc(volatile , op)
+
+operators(cmp_funcs)
+
+#define test(op) \
+if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
+ __builtin_abort();
+ 
+int main()
+{
+  for(int a = -3; a <= 3; a++)
+    for(int b = -3; b <= 3; b++)
+      {
+	_Bool e = 0;
+	operators(test)
+	e = 1;
+	operators(test)
+      }
+  return 0;
+}
+
+/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
+/* There are 6 different comparison operators testing here. */
+/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
+/* Each operator should show up twice
+   (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
+/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
+/* { dg-final { scan-tree-dump-times "ne_expr,"      16 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "eq_expr,"       2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "lt_expr,"       2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "le_expr,"       2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "gt_expr,"       2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ge_expr,"       2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr,"  6 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr,"  6 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
\ No newline at end of file