Fix 100864: `(a&!b) | b` is not opimized to `a | b` for comparisons
Checks
Commit Message
This adds a special case of the `(a&~b) | b` pattern where
`b` and `~b` are comparisons.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
gcc/ChangeLog:
PR tree-optimization/100864
* match.pd ((~x & y) | x -> x | y): Add comparison variant.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/bitops-3.c: New test.
---
gcc/match.pd | 17 +++++-
gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
2 files changed, 83 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
Comments
> Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
>
> This adds a special case of the `(a&~b) | b` pattern where
> `b` and `~b` are comparisons.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Don’t we have an existing match for inversion s we could amend?
> gcc/ChangeLog:
>
> PR tree-optimization/100864
> * match.pd ((~x & y) | x -> x | y): Add comparison variant.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/bitops-3.c: New test.
> ---
> gcc/match.pd | 17 +++++-
> gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> 2 files changed, 83 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index bfd15d6cd4a..dd4a2df537d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* (~x & y) | x -> x | y */
> (simplify
> (bitop:c (rbitop:c (bit_not @0) @1) @0)
> - (bitop @0 @1)))
> + (bitop @0 @1))
> + /* Similar but for comparisons which have been inverted already,
> + Note it is hard to simulate the inverted tcc_comparison due
> + NaNs; That is == and != are sometimes inversions and sometimes not.
> + So a double for loop is needed and then compare the inverse code
> + with the result of invert_tree_comparison is needed.
> + This works fine for vector compares as -1 and 0 are bitwise
> + inverses. */
> + (for cmp (tcc_comparison)
> + (for icmp (tcc_comparison)
> + (simplify
> + (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> + (with { enum tree_code ic = invert_tree_comparison
> + (cmp, HONOR_NANS (@0)); }
> + (if (ic == icmp)
> + (bitop @3 @2)))))))
>
> /* ((x | y) & z) | x -> (z & y) | x */
> (simplify
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> new file mode 100644
> index 00000000000..68fff4edce9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> @@ -0,0 +1,67 @@
> +/* PR tree-optimization/100864 */
> +
> +/* { dg-do run } */
> +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> +
> +#define op_ne !=
> +#define op_eq ==
> +#define op_lt <
> +#define op_le <=
> +#define op_gt >
> +#define op_ge >=
> +
> +#define operators(t) \
> +t(ne) \
> +t(eq) \
> +t(lt) \
> +t(le) \
> +t(gt) \
> +t(ge)
> +
> +#define cmpfunc(v, op) \
> +__attribute__((noipa)) \
> +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> +{ \
> + v _Bool c = (a op_##op b); \
> + v _Bool d = !c; \
> + return (e & d) | c; \
> +}
> +
> +#define cmp_funcs(op) \
> +cmpfunc(, op) \
> +cmpfunc(volatile , op)
> +
> +operators(cmp_funcs)
> +
> +#define test(op) \
> +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> + __builtin_abort();
> +
> +int main()
> +{
> + for(int a = -3; a <= 3; a++)
> + for(int b = -3; b <= 3; b++)
> + {
> + _Bool e = 0;
> + operators(test)
> + e = 1;
> + operators(test)
> + }
> + return 0;
> +}
> +
> +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> +/* There are 6 different comparison operators testing here. */
> +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> +/* Each operator should show up twice
> + (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> +/* { dg-final { scan-tree-dump-times "ne_expr," 16 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "eq_expr," 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "lt_expr," 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "le_expr," 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "gt_expr," 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ge_expr," 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_not_expr," 6 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_and_expr," 6 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> \ No newline at end of file
> --
> 2.31.1
>
On Sun, Jul 23, 2023 at 1:39 AM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
>
>
> > Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
> >
> > This adds a special case of the `(a&~b) | b` pattern where
> > `b` and `~b` are comparisons.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> Don’t we have an existing match for inversion s we could amend?
We don't currently but I might be able to pattern the function off of
what was similarly done for bitwise_equal_p .
I noticed the patch which added bitwise_equal_p even could benefit
from this similar thing.
Thanks,
Andrew
>
> > gcc/ChangeLog:
> >
> > PR tree-optimization/100864
> > * match.pd ((~x & y) | x -> x | y): Add comparison variant.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/tree-ssa/bitops-3.c: New test.
> > ---
> > gcc/match.pd | 17 +++++-
> > gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> > 2 files changed, 83 insertions(+), 1 deletion(-)
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index bfd15d6cd4a..dd4a2df537d 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > /* (~x & y) | x -> x | y */
> > (simplify
> > (bitop:c (rbitop:c (bit_not @0) @1) @0)
> > - (bitop @0 @1)))
> > + (bitop @0 @1))
> > + /* Similar but for comparisons which have been inverted already,
> > + Note it is hard to simulate the inverted tcc_comparison due
> > + NaNs; That is == and != are sometimes inversions and sometimes not.
> > + So a double for loop is needed and then compare the inverse code
> > + with the result of invert_tree_comparison is needed.
> > + This works fine for vector compares as -1 and 0 are bitwise
> > + inverses. */
> > + (for cmp (tcc_comparison)
> > + (for icmp (tcc_comparison)
> > + (simplify
> > + (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> > + (with { enum tree_code ic = invert_tree_comparison
> > + (cmp, HONOR_NANS (@0)); }
> > + (if (ic == icmp)
> > + (bitop @3 @2)))))))
> >
> > /* ((x | y) & z) | x -> (z & y) | x */
> > (simplify
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > new file mode 100644
> > index 00000000000..68fff4edce9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > @@ -0,0 +1,67 @@
> > +/* PR tree-optimization/100864 */
> > +
> > +/* { dg-do run } */
> > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> > +
> > +#define op_ne !=
> > +#define op_eq ==
> > +#define op_lt <
> > +#define op_le <=
> > +#define op_gt >
> > +#define op_ge >=
> > +
> > +#define operators(t) \
> > +t(ne) \
> > +t(eq) \
> > +t(lt) \
> > +t(le) \
> > +t(gt) \
> > +t(ge)
> > +
> > +#define cmpfunc(v, op) \
> > +__attribute__((noipa)) \
> > +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> > +{ \
> > + v _Bool c = (a op_##op b); \
> > + v _Bool d = !c; \
> > + return (e & d) | c; \
> > +}
> > +
> > +#define cmp_funcs(op) \
> > +cmpfunc(, op) \
> > +cmpfunc(volatile , op)
> > +
> > +operators(cmp_funcs)
> > +
> > +#define test(op) \
> > +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> > + __builtin_abort();
> > +
> > +int main()
> > +{
> > + for(int a = -3; a <= 3; a++)
> > + for(int b = -3; b <= 3; b++)
> > + {
> > + _Bool e = 0;
> > + operators(test)
> > + e = 1;
> > + operators(test)
> > + }
> > + return 0;
> > +}
> > +
> > +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> > +/* There are 6 different comparison operators testing here. */
> > +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> > +/* Each operator should show up twice
> > + (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> > +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> > +/* { dg-final { scan-tree-dump-times "ne_expr," 16 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "eq_expr," 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "lt_expr," 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "le_expr," 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "gt_expr," 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "ge_expr," 2 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_not_expr," 6 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_and_expr," 6 "optimized"} } */
> > +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> > \ No newline at end of file
> > --
> > 2.31.1
> >
On Fri, Jul 28, 2023 at 8:34 AM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Sun, Jul 23, 2023 at 1:39 AM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> >
> >
> > > Am 23.07.2023 um 01:27 schrieb Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org>:
> > >
> > > This adds a special case of the `(a&~b) | b` pattern where
> > > `b` and `~b` are comparisons.
> > >
> > > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > Don’t we have an existing match for inversion s we could amend?
>
> We don't currently but I might be able to pattern the function off of
> what was similarly done for bitwise_equal_p .
> I noticed the patch which added bitwise_equal_p even could benefit
> from this similar thing.
OK, I thought of logical_inverted_value but that isn't a 1:1 match here.
Richard.
> Thanks,
> Andrew
>
> >
> > > gcc/ChangeLog:
> > >
> > > PR tree-optimization/100864
> > > * match.pd ((~x & y) | x -> x | y): Add comparison variant.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/tree-ssa/bitops-3.c: New test.
> > > ---
> > > gcc/match.pd | 17 +++++-
> > > gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c | 67 ++++++++++++++++++++++++
> > > 2 files changed, 83 insertions(+), 1 deletion(-)
> > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index bfd15d6cd4a..dd4a2df537d 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > > /* (~x & y) | x -> x | y */
> > > (simplify
> > > (bitop:c (rbitop:c (bit_not @0) @1) @0)
> > > - (bitop @0 @1)))
> > > + (bitop @0 @1))
> > > + /* Similar but for comparisons which have been inverted already,
> > > + Note it is hard to simulate the inverted tcc_comparison due
> > > + NaNs; That is == and != are sometimes inversions and sometimes not.
> > > + So a double for loop is needed and then compare the inverse code
> > > + with the result of invert_tree_comparison is needed.
> > > + This works fine for vector compares as -1 and 0 are bitwise
> > > + inverses. */
> > > + (for cmp (tcc_comparison)
> > > + (for icmp (tcc_comparison)
> > > + (simplify
> > > + (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
> > > + (with { enum tree_code ic = invert_tree_comparison
> > > + (cmp, HONOR_NANS (@0)); }
> > > + (if (ic == icmp)
> > > + (bitop @3 @2)))))))
> > >
> > > /* ((x | y) & z) | x -> (z & y) | x */
> > > (simplify
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > > new file mode 100644
> > > index 00000000000..68fff4edce9
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-3.c
> > > @@ -0,0 +1,67 @@
> > > +/* PR tree-optimization/100864 */
> > > +
> > > +/* { dg-do run } */
> > > +/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
> > > +
> > > +#define op_ne !=
> > > +#define op_eq ==
> > > +#define op_lt <
> > > +#define op_le <=
> > > +#define op_gt >
> > > +#define op_ge >=
> > > +
> > > +#define operators(t) \
> > > +t(ne) \
> > > +t(eq) \
> > > +t(lt) \
> > > +t(le) \
> > > +t(gt) \
> > > +t(ge)
> > > +
> > > +#define cmpfunc(v, op) \
> > > +__attribute__((noipa)) \
> > > +_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
> > > +{ \
> > > + v _Bool c = (a op_##op b); \
> > > + v _Bool d = !c; \
> > > + return (e & d) | c; \
> > > +}
> > > +
> > > +#define cmp_funcs(op) \
> > > +cmpfunc(, op) \
> > > +cmpfunc(volatile , op)
> > > +
> > > +operators(cmp_funcs)
> > > +
> > > +#define test(op) \
> > > +if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
> > > + __builtin_abort();
> > > +
> > > +int main()
> > > +{
> > > + for(int a = -3; a <= 3; a++)
> > > + for(int b = -3; b <= 3; b++)
> > > + {
> > > + _Bool e = 0;
> > > + operators(test)
> > > + e = 1;
> > > + operators(test)
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
> > > +/* There are 6 different comparison operators testing here. */
> > > +/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
> > > +/* Each operator should show up twice
> > > + (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
> > > +/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
> > > +/* { dg-final { scan-tree-dump-times "ne_expr," 16 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "eq_expr," 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "lt_expr," 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "le_expr," 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "gt_expr," 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "ge_expr," 2 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_not_expr," 6 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_and_expr," 6 "optimized"} } */
> > > +/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
> > > \ No newline at end of file
> > > --
> > > 2.31.1
> > >
@@ -1928,7 +1928,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* (~x & y) | x -> x | y */
(simplify
(bitop:c (rbitop:c (bit_not @0) @1) @0)
- (bitop @0 @1)))
+ (bitop @0 @1))
+ /* Similar but for comparisons which have been inverted already,
+ Note it is hard to simulate the inverted tcc_comparison due
+ NaNs; That is == and != are sometimes inversions and sometimes not.
+ So a double for loop is needed and then compare the inverse code
+ with the result of invert_tree_comparison is needed.
+ This works fine for vector compares as -1 and 0 are bitwise
+ inverses. */
+ (for cmp (tcc_comparison)
+ (for icmp (tcc_comparison)
+ (simplify
+ (bitop:c (rbitop:c (icmp @0 @1) @2) (cmp@3 @0 @1))
+ (with { enum tree_code ic = invert_tree_comparison
+ (cmp, HONOR_NANS (@0)); }
+ (if (ic == icmp)
+ (bitop @3 @2)))))))
/* ((x | y) & z) | x -> (z & y) | x */
(simplify
new file mode 100644
@@ -0,0 +1,67 @@
+/* PR tree-optimization/100864 */
+
+/* { dg-do run } */
+/* { dg-options "-O1 -fdump-tree-optimized-raw" } */
+
+#define op_ne !=
+#define op_eq ==
+#define op_lt <
+#define op_le <=
+#define op_gt >
+#define op_ge >=
+
+#define operators(t) \
+t(ne) \
+t(eq) \
+t(lt) \
+t(le) \
+t(gt) \
+t(ge)
+
+#define cmpfunc(v, op) \
+__attribute__((noipa)) \
+_Bool func_##op##_##v(v int a, v int b, v _Bool e) \
+{ \
+ v _Bool c = (a op_##op b); \
+ v _Bool d = !c; \
+ return (e & d) | c; \
+}
+
+#define cmp_funcs(op) \
+cmpfunc(, op) \
+cmpfunc(volatile , op)
+
+operators(cmp_funcs)
+
+#define test(op) \
+if (func_##op##_ (a, b, e) != func_##op##_volatile (a, b, e)) \
+ __builtin_abort();
+
+int main()
+{
+ for(int a = -3; a <= 3; a++)
+ for(int b = -3; b <= 3; b++)
+ {
+ _Bool e = 0;
+ operators(test)
+ e = 1;
+ operators(test)
+ }
+ return 0;
+}
+
+/* Check to make sure we optimize `(a&!b) | b` -> `a | b`. */
+/* There are 6 different comparison operators testing here. */
+/* bit_not_expr and bit_and_expr should show up for each one (volatile). */
+/* Each operator should show up twice
+ (except for `!=` which shows up 2*6 (each tester) + 2 (the 2 loops) extra = 16). */
+/* bit_ior_expr will show up for each operator twice (non-volatile and volatile). */
+/* { dg-final { scan-tree-dump-times "ne_expr," 16 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "eq_expr," 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "lt_expr," 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "le_expr," 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "gt_expr," 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ge_expr," 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr," 6 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr," 6 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr," 12 "optimized"} } */
\ No newline at end of file