MATCH: `(nop_convert)-a` into -(nop_convert)a if the negate is single use and a is known not to be signed min value
Checks
Commit Message
This pushes the conversion further down the chain which allows to optimize away more
conversions in many cases.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
PR tree-optimization/107765
PR tree-optimization/107137
gcc/ChangeLog:
* match.pd (`(nop_convert)-a`): New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/neg-cast-1.c: New test.
* gcc.dg/tree-ssa/neg-cast-2.c: New test.
* gcc.dg/tree-ssa/neg-cast-3.c: New test.
---
gcc/match.pd | 31 ++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c | 17 ++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c | 20 ++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c | 15 +++++++++++
4 files changed, 83 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c
Comments
On Fri, Sep 1, 2023 at 4:27 AM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This pushes the conversion further down the chain which allows to optimize away more
> conversions in many cases.
But when building (T1)(T2)-x it will make simplifying (T1)(T2) more difficult
as we'd need a
(convert (negate (convert ...)))
pattern for that? So I'm not convinced this is the correct approach to the
cases you want to optimize? The testcase actually are of the
form (T1)-(T2)x so hoisting the other way around would have worked as well
(if the outer convert would have been folded).
Are there any existing cases where we push/pull (nop) conversions around
unary operations?
Should we pay the price and simply have patterns for
(convert (unary (convert ...)))?
[how nice is the RTL world without signedness of operands but
signed/unsigned operation variants ...]
Richard.
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
> PR tree-optimization/107765
> PR tree-optimization/107137
>
> gcc/ChangeLog:
>
> * match.pd (`(nop_convert)-a`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/neg-cast-1.c: New test.
> * gcc.dg/tree-ssa/neg-cast-2.c: New test.
> * gcc.dg/tree-ssa/neg-cast-3.c: New test.
> ---
> gcc/match.pd | 31 ++++++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c | 17 ++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c | 20 ++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c | 15 +++++++++++
> 4 files changed, 83 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 487a7e38719..3cff9b03d92 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -959,6 +959,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> #endif
> ))))
>
> +/* (nop_cast)-var -> -(nop_cast)(var)
> + if -var is known to not overflow; that is does not include
> + the signed integer MIN. */
> +(simplify
> + (convert (negate:s @0))
> + (if (INTEGRAL_TYPE_P (type)
> + && tree_nop_conversion_p (type, TREE_TYPE (@0)))
> + (with {
> + /* If the top is not set, there is no overflow happening. */
> + bool contains_signed_min = !wi::ges_p (tree_nonzero_bits (@0), 0);
> +#if GIMPLE
> + int_range_max vr;
> + if (contains_signed_min
> + && TREE_CODE (@0) == SSA_NAME
> + && get_range_query (cfun)->range_of_expr (vr, @0)
> + && !vr.undefined_p ())
> + {
> + tree stype = signed_type_for (type);
> + auto minvalue = wi::min_value (stype);
> + int_range_max valid_range (TREE_TYPE (@0), minvalue, minvalue);
> + vr.intersect (valid_range);
> + /* If the range does not include min value,
> + then we can do this change around. */
> + if (vr.undefined_p ())
> + contains_signed_min = false;
> + }
> +#endif
> + }
> + (if (!contains_signed_min)
> + (negate (convert @0))))))
> +
> (for op (negate abs)
> /* Simplify cos(-x) and cos(|x|) -> cos(x). Similarly for cosh. */
> (for coss (COS COSH)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
> new file mode 100644
> index 00000000000..7ddf40aca29
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-evrp" } */
> +/* PR tree-optimization/107765 */
> +
> +#include <limits.h>
> +
> +int a(int input)
> +{
> + if (input == INT_MIN) __builtin_unreachable();
> + unsigned t = input;
> + int tt = -t;
> + return tt == -input;
> +}
> +
> +/* Should be able to optimize this down to just `return 1;` during evrp. */
> +/* { dg-final { scan-tree-dump "return 1;" "evrp" } } */
> +/* { dg-final { scan-tree-dump-not " - " "evrp" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
> new file mode 100644
> index 00000000000..ce49079e235
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-fre3 -fdump-tree-optimized" } */
> +/* part of PR tree-optimization/108397 */
> +
> +long long
> +foo (unsigned char o)
> +{
> + unsigned long long t1 = -(long long) (o == 0);
> + unsigned long long t2 = -(long long) (t1 != 0);
> + unsigned long long t3 = -(long long) (t1 <= t2);
> + return t3;
> +}
> +
> +/* Should be able to optimize this down to just `return -1;` during fre3. */
> +/* { dg-final { scan-tree-dump "return -1;" "fre3" } } */
> +/* FRE does not remove all dead statements */
> +/* { dg-final { scan-tree-dump-not " - " "fre3" { xfail *-*-* } } } */
> +
> +/* { dg-final { scan-tree-dump "return -1;" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " - " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c
> new file mode 100644
> index 00000000000..a26a6051bda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop2 -fdump-tree-optimized" } */
> +/* PR tree-optimization/107137 */
> +
> +unsigned f(_Bool a)
> +{
> + int t = a;
> + t = -t;
> + return t;
> +}
> +
> +/* There should be no cast to int at all. */
> +/* Forwprop2 does not remove all of the statements. */
> +/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "forwprop2" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "optimized" } } */
> --
> 2.31.1
>
@@ -959,6 +959,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
#endif
))))
+/* (nop_cast)-var -> -(nop_cast)(var)
+ if -var is known to not overflow; that is does not include
+ the signed integer MIN. */
+(simplify
+ (convert (negate:s @0))
+ (if (INTEGRAL_TYPE_P (type)
+ && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+ (with {
+ /* If the top is not set, there is no overflow happening. */
+ bool contains_signed_min = !wi::ges_p (tree_nonzero_bits (@0), 0);
+#if GIMPLE
+ int_range_max vr;
+ if (contains_signed_min
+ && TREE_CODE (@0) == SSA_NAME
+ && get_range_query (cfun)->range_of_expr (vr, @0)
+ && !vr.undefined_p ())
+ {
+ tree stype = signed_type_for (type);
+ auto minvalue = wi::min_value (stype);
+ int_range_max valid_range (TREE_TYPE (@0), minvalue, minvalue);
+ vr.intersect (valid_range);
+ /* If the range does not include min value,
+ then we can do this change around. */
+ if (vr.undefined_p ())
+ contains_signed_min = false;
+ }
+#endif
+ }
+ (if (!contains_signed_min)
+ (negate (convert @0))))))
+
(for op (negate abs)
/* Simplify cos(-x) and cos(|x|) -> cos(x). Similarly for cosh. */
(for coss (COS COSH)
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+/* PR tree-optimization/107765 */
+
+#include <limits.h>
+
+int a(int input)
+{
+ if (input == INT_MIN) __builtin_unreachable();
+ unsigned t = input;
+ int tt = -t;
+ return tt == -input;
+}
+
+/* Should be able to optimize this down to just `return 1;` during evrp. */
+/* { dg-final { scan-tree-dump "return 1;" "evrp" } } */
+/* { dg-final { scan-tree-dump-not " - " "evrp" } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-fre3 -fdump-tree-optimized" } */
+/* part of PR tree-optimization/108397 */
+
+long long
+foo (unsigned char o)
+{
+ unsigned long long t1 = -(long long) (o == 0);
+ unsigned long long t2 = -(long long) (t1 != 0);
+ unsigned long long t3 = -(long long) (t1 <= t2);
+ return t3;
+}
+
+/* Should be able to optimize this down to just `return -1;` during fre3. */
+/* { dg-final { scan-tree-dump "return -1;" "fre3" } } */
+/* FRE does not remove all dead statements */
+/* { dg-final { scan-tree-dump-not " - " "fre3" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump "return -1;" "optimized" } } */
+/* { dg-final { scan-tree-dump-not " - " "optimized" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop2 -fdump-tree-optimized" } */
+/* PR tree-optimization/107137 */
+
+unsigned f(_Bool a)
+{
+ int t = a;
+ t = -t;
+ return t;
+}
+
+/* There should be no cast to int at all. */
+/* Forwprop2 does not remove all of the statements. */
+/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "forwprop2" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "optimized" } } */