MATCH: Simplify (a CMP1 b) ^ (a CMP2 b)
Checks
Commit Message
This adds the missing optimizations here.
Note we don't need to match where CMP1 and CMP2 are complements of each
other as that is already handled elsewhere.
I added a new executable testcase to make sure we optimize it correctly
as I had originally messed up one of the entries for the resulting
comparison to make sure they were 100% correct.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
PR tree-optimization/107881
gcc/ChangeLog:
* match.pd (`(a CMP1 b) ^ (a CMP2 b)`): New pattern.
(`(a CMP1 b) == (a CMP2 b)`): New pattern.
gcc/testsuite/ChangeLog:
* gcc.c-torture/execute/pr107881-1.c: New test.
* gcc.dg/tree-ssa/cmpeq-4.c: New test.
* gcc.dg/tree-ssa/cmpxor-1.c: New test.
---
gcc/match.pd | 20 +++
.../gcc.c-torture/execute/pr107881-1.c | 115 ++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c | 51 ++++++++
gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c | 51 ++++++++
4 files changed, 237 insertions(+)
create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr107881-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c
Comments
On Tue, Sep 12, 2023 at 6:22 AM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This adds the missing optimizations here.
> Note we don't need to match where CMP1 and CMP2 are complements of each
> other as that is already handled elsewhere.
>
> I added a new executable testcase to make sure we optimize it correctly
> as I had originally messed up one of the entries for the resulting
> comparison to make sure they were 100% correct.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
OK.
> PR tree-optimization/107881
>
> gcc/ChangeLog:
>
> * match.pd (`(a CMP1 b) ^ (a CMP2 b)`): New pattern.
> (`(a CMP1 b) == (a CMP2 b)`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.c-torture/execute/pr107881-1.c: New test.
> * gcc.dg/tree-ssa/cmpeq-4.c: New test.
> * gcc.dg/tree-ssa/cmpxor-1.c: New test.
> ---
> gcc/match.pd | 20 +++
> .../gcc.c-torture/execute/pr107881-1.c | 115 ++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c | 51 ++++++++
> gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c | 51 ++++++++
> 4 files changed, 237 insertions(+)
> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr107881-1.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index e96e385c6fa..39c7ea1088f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3154,6 +3154,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> { constant_boolean_node (true, type); })
> ))))))
>
> +/* Optimize (a CMP b) ^ (a CMP b) */
> +/* Optimize (a CMP b) != (a CMP b) */
> +(for op (bit_xor ne)
> + (for cmp1 (lt lt lt le le le)
> + cmp2 (gt eq ne ge eq ne)
> + rcmp (ne le gt ne lt ge)
> + (simplify
> + (op:c (cmp1:c @0 @1) (cmp2:c @0 @1))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
> + (rcmp @0 @1)))))
> +
> +/* Optimize (a CMP b) == (a CMP b) */
> +(for cmp1 (lt lt lt le le le)
> + cmp2 (gt eq ne ge eq ne)
> + rcmp (eq gt le eq ge lt)
> + (simplify
> + (eq:c (cmp1:c @0 @1) (cmp2:c @0 @1))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
> + (rcmp @0 @1))))
> +
> /* We can't reassociate at all for saturating types. */
> (if (!TYPE_SATURATING (type))
>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c b/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c
> new file mode 100644
> index 00000000000..063ec4c2797
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr107881-1.c
> @@ -0,0 +1,115 @@
> +#define func(vol, op1, op2, op3) \
> +_Bool op1##_##op2##_##op3##_##vol (int a, int b) \
> +{ \
> + vol _Bool x = op_##op1(a, b); \
> + vol _Bool y = op_##op2(a, b); \
> + return op_##op3(x, y); \
> +}
> +
> +#define op_lt(a, b) ((a) < (b))
> +#define op_le(a, b) ((a) <= (b))
> +#define op_eq(a, b) ((a) == (b))
> +#define op_ne(a, b) ((a) != (b))
> +#define op_gt(a, b) ((a) > (b))
> +#define op_ge(a, b) ((a) >= (b))
> +#define op_xor(a, b) ((a) ^ (b))
> +
> +
> +#define funcs(a) \
> + a(lt,lt,ne) \
> + a(lt,lt,eq) \
> + a(lt,lt,xor) \
> + a(lt,le,ne) \
> + a(lt,le,eq) \
> + a(lt,le,xor) \
> + a(lt,gt,ne) \
> + a(lt,gt,eq) \
> + a(lt,gt,xor) \
> + a(lt,ge,ne) \
> + a(lt,ge,eq) \
> + a(lt,ge,xor) \
> + a(lt,eq,ne) \
> + a(lt,eq,eq) \
> + a(lt,eq,xor) \
> + a(lt,ne,ne) \
> + a(lt,ne,eq) \
> + a(lt,ne,xor) \
> + \
> + a(le,lt,ne) \
> + a(le,lt,eq) \
> + a(le,lt,xor) \
> + a(le,le,ne) \
> + a(le,le,eq) \
> + a(le,le,xor) \
> + a(le,gt,ne) \
> + a(le,gt,eq) \
> + a(le,gt,xor) \
> + a(le,ge,ne) \
> + a(le,ge,eq) \
> + a(le,ge,xor) \
> + a(le,eq,ne) \
> + a(le,eq,eq) \
> + a(le,eq,xor) \
> + a(le,ne,ne) \
> + a(le,ne,eq) \
> + a(le,ne,xor) \
> + \
> + a(gt,lt,ne) \
> + a(gt,lt,eq) \
> + a(gt,lt,xor) \
> + a(gt,le,ne) \
> + a(gt,le,eq) \
> + a(gt,le,xor) \
> + a(gt,gt,ne) \
> + a(gt,gt,eq) \
> + a(gt,gt,xor) \
> + a(gt,ge,ne) \
> + a(gt,ge,eq) \
> + a(gt,ge,xor) \
> + a(gt,eq,ne) \
> + a(gt,eq,eq) \
> + a(gt,eq,xor) \
> + a(gt,ne,ne) \
> + a(gt,ne,eq) \
> + a(gt,ne,xor) \
> + \
> + a(ge,lt,ne) \
> + a(ge,lt,eq) \
> + a(ge,lt,xor) \
> + a(ge,le,ne) \
> + a(ge,le,eq) \
> + a(ge,le,xor) \
> + a(ge,gt,ne) \
> + a(ge,gt,eq) \
> + a(ge,gt,xor) \
> + a(ge,ge,ne) \
> + a(ge,ge,eq) \
> + a(ge,ge,xor) \
> + a(ge,eq,ne) \
> + a(ge,eq,eq) \
> + a(ge,eq,xor) \
> + a(ge,ne,ne) \
> + a(ge,ne,eq) \
> + a(ge,ne,xor)
> +
> +#define funcs1(a,b,c) \
> +func(,a,b,c) \
> +func(volatile,a,b,c)
> +
> +funcs(funcs1)
> +
> +#define test(op1,op2,op3) \
> +do { \
> + if (op1##_##op2##_##op3##_(x,y) \
> + != op1##_##op2##_##op3##_volatile(x,y)) \
> + __builtin_abort(); \
> +} while(0);
> +
> +int main()
> +{
> + for(int x = -10; x < 10; x++)
> + for(int y = -10; y < 10; y++)
> + {
> + funcs(test)
> + }
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c
> new file mode 100644
> index 00000000000..868d80fdcca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpeq-4.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-original" } */
> +/* PR tree-optimization/107881 */
> +
> +_Bool ltgt_eq(int a, int b)
> +{
> + _Bool c = a < b;
> + _Bool d = a > b;
> + return c == d; // a == b
> +}
> +/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. == b_\[0-9\]+.D.|b_\[0-9\]+.D. == a_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool lteq_eq(int x, int y)
> +{
> + _Bool c = x < y;
> + _Bool d = x == y;
> + return c == d; // x > y
> +}
> +/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. > y_\[0-9\]+.D.|y_\[0-9\]+.D. < x_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool ltne_eq(int z, int w)
> +{
> + _Bool c = z < w;
> + _Bool d = z != w;
> + return c == d; // z <= w
> +}
> +/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. <= w_\[0-9\]+.D.|w_\[0-9\]+.D. >= y_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool lege_eq(int i, int j)
> +{
> + _Bool c = i <= j;
> + _Bool d = i >= j;
> + return c == d; // i == j
> +}
> +/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. == j_\[0-9\]+.D.|j_\[0-9\]+.D. == i_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool leeq_eq(int k, int l)
> +{
> + _Bool c = k <= l;
> + _Bool d = k == l;
> + return c == d; // k >= l
> +}
> +/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. >= l_\[0-9\]+.D.|l_\[0-9\]+.D. <= k_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool lene_eq(int m, int n)
> +{
> + _Bool c = m <= n;
> + _Bool d = m != n;
> + return c == d; // m < n
> +}
> +/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. < n_\[0-9\]+.D.|n_\[0-9\]+.D. > m_\[0-9\]+.D." "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c b/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c
> new file mode 100644
> index 00000000000..8de2d9d2244
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpxor-1.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* PR tree-optimization/107881 */
> +
> +_Bool ltgtxor(int a, int b)
> +{
> + _Bool c = a < b;
> + _Bool d = a > b;
> + return c ^ d; // a != b
> +}
> +/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. != b_\[0-9\]+.D.|b_\[0-9\]+.D. != a_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool lteqxor(int x, int y)
> +{
> + _Bool c = x < y;
> + _Bool d = x == y;
> + return c ^ d; // x <= y (basically | here)
> +}
> +/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. <= y_\[0-9\]+.D.|y_\[0-9\]+.D. >= x_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool ltnexor(int z, int w)
> +{
> + _Bool c = z < w;
> + _Bool d = z != w;
> + return c ^ d; // z > w
> +}
> +/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. > w_\[0-9\]+.D.|w_\[0-9\]+.D. < y_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool legexor(int i, int j)
> +{
> + _Bool c = i <= j;
> + _Bool d = i >= j;
> + return c ^ d; // i != j
> +}
> +/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. != j_\[0-9\]+.D.|j_\[0-9\]+.D. != i_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool leeqxor(int k, int l)
> +{
> + _Bool c = k <= l;
> + _Bool d = k == l;
> + return c ^ d; // k < l
> +}
> +/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. < l_\[0-9\]+.D.|l_\[0-9\]+.D. > k_\[0-9\]+.D." "optimized" } } */
> +
> +_Bool lenexor(int m, int n)
> +{
> + _Bool c = m <= n;
> + _Bool d = m != n;
> + return c ^ d; // m >= n
> +}
> +/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. >= n_\[0-9\]+.D.|n_\[0-9\]+.D. <= m_\[0-9\]+.D." "optimized" } } */
> --
> 2.31.1
>
@@ -3154,6 +3154,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ constant_boolean_node (true, type); })
))))))
+/* Optimize (a CMP b) ^ (a CMP b) */
+/* Optimize (a CMP b) != (a CMP b) */
+(for op (bit_xor ne)
+ (for cmp1 (lt lt lt le le le)
+ cmp2 (gt eq ne ge eq ne)
+ rcmp (ne le gt ne lt ge)
+ (simplify
+ (op:c (cmp1:c @0 @1) (cmp2:c @0 @1))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+ (rcmp @0 @1)))))
+
+/* Optimize (a CMP b) == (a CMP b) */
+(for cmp1 (lt lt lt le le le)
+ cmp2 (gt eq ne ge eq ne)
+ rcmp (eq gt le eq ge lt)
+ (simplify
+ (eq:c (cmp1:c @0 @1) (cmp2:c @0 @1))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+ (rcmp @0 @1))))
+
/* We can't reassociate at all for saturating types. */
(if (!TYPE_SATURATING (type))
new file mode 100644
@@ -0,0 +1,115 @@
+#define func(vol, op1, op2, op3) \
+_Bool op1##_##op2##_##op3##_##vol (int a, int b) \
+{ \
+ vol _Bool x = op_##op1(a, b); \
+ vol _Bool y = op_##op2(a, b); \
+ return op_##op3(x, y); \
+}
+
+#define op_lt(a, b) ((a) < (b))
+#define op_le(a, b) ((a) <= (b))
+#define op_eq(a, b) ((a) == (b))
+#define op_ne(a, b) ((a) != (b))
+#define op_gt(a, b) ((a) > (b))
+#define op_ge(a, b) ((a) >= (b))
+#define op_xor(a, b) ((a) ^ (b))
+
+
+#define funcs(a) \
+ a(lt,lt,ne) \
+ a(lt,lt,eq) \
+ a(lt,lt,xor) \
+ a(lt,le,ne) \
+ a(lt,le,eq) \
+ a(lt,le,xor) \
+ a(lt,gt,ne) \
+ a(lt,gt,eq) \
+ a(lt,gt,xor) \
+ a(lt,ge,ne) \
+ a(lt,ge,eq) \
+ a(lt,ge,xor) \
+ a(lt,eq,ne) \
+ a(lt,eq,eq) \
+ a(lt,eq,xor) \
+ a(lt,ne,ne) \
+ a(lt,ne,eq) \
+ a(lt,ne,xor) \
+ \
+ a(le,lt,ne) \
+ a(le,lt,eq) \
+ a(le,lt,xor) \
+ a(le,le,ne) \
+ a(le,le,eq) \
+ a(le,le,xor) \
+ a(le,gt,ne) \
+ a(le,gt,eq) \
+ a(le,gt,xor) \
+ a(le,ge,ne) \
+ a(le,ge,eq) \
+ a(le,ge,xor) \
+ a(le,eq,ne) \
+ a(le,eq,eq) \
+ a(le,eq,xor) \
+ a(le,ne,ne) \
+ a(le,ne,eq) \
+ a(le,ne,xor) \
+ \
+ a(gt,lt,ne) \
+ a(gt,lt,eq) \
+ a(gt,lt,xor) \
+ a(gt,le,ne) \
+ a(gt,le,eq) \
+ a(gt,le,xor) \
+ a(gt,gt,ne) \
+ a(gt,gt,eq) \
+ a(gt,gt,xor) \
+ a(gt,ge,ne) \
+ a(gt,ge,eq) \
+ a(gt,ge,xor) \
+ a(gt,eq,ne) \
+ a(gt,eq,eq) \
+ a(gt,eq,xor) \
+ a(gt,ne,ne) \
+ a(gt,ne,eq) \
+ a(gt,ne,xor) \
+ \
+ a(ge,lt,ne) \
+ a(ge,lt,eq) \
+ a(ge,lt,xor) \
+ a(ge,le,ne) \
+ a(ge,le,eq) \
+ a(ge,le,xor) \
+ a(ge,gt,ne) \
+ a(ge,gt,eq) \
+ a(ge,gt,xor) \
+ a(ge,ge,ne) \
+ a(ge,ge,eq) \
+ a(ge,ge,xor) \
+ a(ge,eq,ne) \
+ a(ge,eq,eq) \
+ a(ge,eq,xor) \
+ a(ge,ne,ne) \
+ a(ge,ne,eq) \
+ a(ge,ne,xor)
+
+#define funcs1(a,b,c) \
+func(,a,b,c) \
+func(volatile,a,b,c)
+
+funcs(funcs1)
+
+#define test(op1,op2,op3) \
+do { \
+ if (op1##_##op2##_##op3##_(x,y) \
+ != op1##_##op2##_##op3##_volatile(x,y)) \
+ __builtin_abort(); \
+} while(0);
+
+int main()
+{
+ for(int x = -10; x < 10; x++)
+ for(int y = -10; y < 10; y++)
+ {
+ funcs(test)
+ }
+}
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-original" } */
+/* PR tree-optimization/107881 */
+
+_Bool ltgt_eq(int a, int b)
+{
+ _Bool c = a < b;
+ _Bool d = a > b;
+ return c == d; // a == b
+}
+/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. == b_\[0-9\]+.D.|b_\[0-9\]+.D. == a_\[0-9\]+.D." "optimized" } } */
+
+_Bool lteq_eq(int x, int y)
+{
+ _Bool c = x < y;
+ _Bool d = x == y;
+ return c == d; // x > y
+}
+/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. > y_\[0-9\]+.D.|y_\[0-9\]+.D. < x_\[0-9\]+.D." "optimized" } } */
+
+_Bool ltne_eq(int z, int w)
+{
+ _Bool c = z < w;
+ _Bool d = z != w;
+ return c == d; // z <= w
+}
+/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. <= w_\[0-9\]+.D.|w_\[0-9\]+.D. >= y_\[0-9\]+.D." "optimized" } } */
+
+_Bool lege_eq(int i, int j)
+{
+ _Bool c = i <= j;
+ _Bool d = i >= j;
+ return c == d; // i == j
+}
+/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. == j_\[0-9\]+.D.|j_\[0-9\]+.D. == i_\[0-9\]+.D." "optimized" } } */
+
+_Bool leeq_eq(int k, int l)
+{
+ _Bool c = k <= l;
+ _Bool d = k == l;
+ return c == d; // k >= l
+}
+/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. >= l_\[0-9\]+.D.|l_\[0-9\]+.D. <= k_\[0-9\]+.D." "optimized" } } */
+
+_Bool lene_eq(int m, int n)
+{
+ _Bool c = m <= n;
+ _Bool d = m != n;
+ return c == d; // m < n
+}
+/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. < n_\[0-9\]+.D.|n_\[0-9\]+.D. > m_\[0-9\]+.D." "optimized" } } */
new file mode 100644
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/107881 */
+
+_Bool ltgtxor(int a, int b)
+{
+ _Bool c = a < b;
+ _Bool d = a > b;
+ return c ^ d; // a != b
+}
+/* { dg-final { scan-tree-dump "a_\[0-9\]+.D. != b_\[0-9\]+.D.|b_\[0-9\]+.D. != a_\[0-9\]+.D." "optimized" } } */
+
+_Bool lteqxor(int x, int y)
+{
+ _Bool c = x < y;
+ _Bool d = x == y;
+ return c ^ d; // x <= y (basically | here)
+}
+/* { dg-final { scan-tree-dump "x_\[0-9\]+.D. <= y_\[0-9\]+.D.|y_\[0-9\]+.D. >= x_\[0-9\]+.D." "optimized" } } */
+
+_Bool ltnexor(int z, int w)
+{
+ _Bool c = z < w;
+ _Bool d = z != w;
+ return c ^ d; // z > w
+}
+/* { dg-final { scan-tree-dump "z_\[0-9\]+.D. > w_\[0-9\]+.D.|w_\[0-9\]+.D. < y_\[0-9\]+.D." "optimized" } } */
+
+_Bool legexor(int i, int j)
+{
+ _Bool c = i <= j;
+ _Bool d = i >= j;
+ return c ^ d; // i != j
+}
+/* { dg-final { scan-tree-dump "i_\[0-9\]+.D. != j_\[0-9\]+.D.|j_\[0-9\]+.D. != i_\[0-9\]+.D." "optimized" } } */
+
+_Bool leeqxor(int k, int l)
+{
+ _Bool c = k <= l;
+ _Bool d = k == l;
+ return c ^ d; // k < l
+}
+/* { dg-final { scan-tree-dump "k_\[0-9\]+.D. < l_\[0-9\]+.D.|l_\[0-9\]+.D. > k_\[0-9\]+.D." "optimized" } } */
+
+_Bool lenexor(int m, int n)
+{
+ _Bool c = m <= n;
+ _Bool d = m != n;
+ return c ^ d; // m >= n
+}
+/* { dg-final { scan-tree-dump "m_\[0-9\]+.D. >= n_\[0-9\]+.D.|n_\[0-9\]+.D. <= m_\[0-9\]+.D." "optimized" } } */