[V6] Optimize '(X - N * M) / N' to 'X / N - M' if valid
Checks
Commit Message
Hi,
Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
the below conditions:
1. There is no wrap/overflow/underflow.
wrap/overflow/underflow breaks the arithmetic operation.
2. "X - N * M" and "X" are not of opposite sign.
Here, the operation "/" would be "trunc_div", the fractional part is
discarded towards zero. If "X - N * M" and "X" are in different signs,
then trunc_div discards the fractional parts (of /N) in different
directions.
Compare the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
This patch adds comments and update the pattern on "(t + C)" to be more
tight.
Bootstrap & regtest pass on ppc64{,le} and x86_64.
Is this patch ok for trunk?
BR,
Jeff (Jiufu Guo)
PR tree-optimization/108757
gcc/ChangeLog:
* match.pd ((X - N * M) / N): New pattern.
((X + N * M) / N): New pattern.
((X + C) div_rshift N): New pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/pr108757-1.c: New test.
* gcc.dg/pr108757-2.c: New test.
* gcc.dg/pr108757.h: New test.
---
gcc/match.pd | 78 ++++++++++
gcc/testsuite/gcc.dg/pr108757-1.c | 18 +++
gcc/testsuite/gcc.dg/pr108757-2.c | 19 +++
gcc/testsuite/gcc.dg/pr108757.h | 233 ++++++++++++++++++++++++++++++
4 files changed, 348 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
Comments
On Fri, 1 Sep 2023, Jiufu Guo wrote:
> Hi,
>
> Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
> the below conditions:
> 1. There is no wrap/overflow/underflow.
> wrap/overflow/underflow breaks the arithmetic operation.
> 2. "X - N * M" and "X" are not of opposite sign.
> Here, the operation "/" would be "trunc_div", the fractional part is
> discarded towards zero. If "X - N * M" and "X" are in different signs,
> then trunc_div discards the fractional parts (of /N) in different
> directions.
>
> Compare the previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
> This patch adds comments and update the pattern on "(t + C)" to be more
> tight.
>
> Bootstrap & regtest pass on ppc64{,le} and x86_64.
> Is this patch ok for trunk?
>
> BR,
> Jeff (Jiufu Guo)
>
> PR tree-optimization/108757
>
> gcc/ChangeLog:
>
> * match.pd ((X - N * M) / N): New pattern.
> ((X + N * M) / N): New pattern.
> ((X + C) div_rshift N): New pattern.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr108757-1.c: New test.
> * gcc.dg/pr108757-2.c: New test.
> * gcc.dg/pr108757.h: New test.
>
> ---
> gcc/match.pd | 78 ++++++++++
> gcc/testsuite/gcc.dg/pr108757-1.c | 18 +++
> gcc/testsuite/gcc.dg/pr108757-2.c | 19 +++
> gcc/testsuite/gcc.dg/pr108757.h | 233 ++++++++++++++++++++++++++++++
> 4 files changed, 348 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
> create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
> create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> #endif
> ))))
>
> +#if GIMPLE
> +(for div (trunc_div exact_div)
> + /* Simplify (X + M*N) / N -> X / N + M. */
> + (simplify
> + (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
> + (with {value_range vr0, vr1, vr2, vr3, vr4;}
> + (if (INTEGRAL_TYPE_P (type)
> + && get_range_query (cfun)->range_of_expr (vr1, @1)
> + && get_range_query (cfun)->range_of_expr (vr2, @2)
> + /* "N*M" doesn't overflow. */
> + && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> + && get_range_query (cfun)->range_of_expr (vr0, @0)
> + && get_range_query (cfun)->range_of_expr (vr3, @3)
> + /* "X+(N*M)" doesn't overflow. */
> + && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
> + && get_range_query (cfun)->range_of_expr (vr4, @4)
> + /* "X+N*M" is not with opposite sign as "X". */
> + && (TYPE_UNSIGNED (type)
> + || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> + || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
> + (plus (div @0 @2) @1))))
> +
> + /* Simplify (X - M*N) / N -> X / N - M. */
> + (simplify
> + (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
> + (with {value_range vr0, vr1, vr2, vr3, vr4;}
> + (if (INTEGRAL_TYPE_P (type)
> + && get_range_query (cfun)->range_of_expr (vr1, @1)
> + && get_range_query (cfun)->range_of_expr (vr2, @2)
> + /* "N * M" doesn't overflow. */
> + && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
> + && get_range_query (cfun)->range_of_expr (vr0, @0)
> + && get_range_query (cfun)->range_of_expr (vr3, @3)
> + /* "X - (N*M)" doesn't overflow. */
> + && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
> + && get_range_query (cfun)->range_of_expr (vr4, @4)
> + /* "X-N*M" is not with opposite sign as "X". */
> + && (TYPE_UNSIGNED (type)
> + || (vr0.nonnegative_p () && vr4.nonnegative_p ())
> + || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
> + (minus (div @0 @2) @1)))))
> +
> +/* Simplify
> + (X + C) / N -> X / N + C / N where C is multiple of N.
> + (X + C) >> N -> X >> N + C>>N if low N bits of C is 0. */
> +(for op (trunc_div exact_div rshift)
> + (simplify
> + (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
> + (with
> + {
> + wide_int c = wi::to_wide (@1);
> + wide_int n = wi::to_wide (@2);
> + bool shift = op == RSHIFT_EXPR;
> + #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
> + : wi::div_trunc (v, n, TYPE_SIGN (type)))
> + #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
> + : wi::multiple_of_p (v, n, TYPE_SIGN (type)))
please indent these full left
> + value_range vr0, vr1, vr3;
> + }
> + (if (INTEGRAL_TYPE_P (type)
> + && get_range_query (cfun)->range_of_expr (vr0, @0))
> + (if (exact_mod (c)
> + && get_range_query (cfun)->range_of_expr (vr1, @1)
> + /* "X+C" doesn't overflow. */
> + && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
> + && get_range_query (cfun)->range_of_expr (vr3, @3)
> + /* "X+C" and "X" are not of opposite sign. */
> + && (TYPE_UNSIGNED (type)
> + || (vr0.nonnegative_p () && vr3.nonnegative_p ())
> + || (vr0.nonpositive_p () && vr3.nonpositive_p ())))
> + (plus (op @0 @2) { wide_int_to_tree (type, plus_op1 (c)); })
> + (if (TYPE_UNSIGNED (type) && c.sign_mask () < 0
> + && exact_mod (-c)
> + /* unsigned "X-(-C)" doesn't underflow. */
> + && wi::geu_p (vr0.lower_bound (), -c))
> + (plus (op @0 @2) { wide_int_to_tree (type, -plus_op1 (-c)); })))))))
and #undef them here again.
OK with that change.
Thanks,
Richard.
> +#endif
> +
> (for op (negate abs)
> /* Simplify cos(-x) and cos(|x|) -> cos(x). Similarly for cosh. */
> (for coss (COS COSH)
> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c b/gcc/testsuite/gcc.dg/pr108757-1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..7908f4bdcb8e225fe311b668efbe8f6db525b4d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
> @@ -0,0 +1,18 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <limits.h>
> +#define N 5
> +#define M 3
> +#define GAP 0
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c b/gcc/testsuite/gcc.dg/pr108757-2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..82bebd09944f0be2e0690c604723bfe6182acec3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
> @@ -0,0 +1,19 @@
> +/* PR tree-optimization/108757 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
> +
> +#include <limits.h>
> +#define N 4
> +#define M 3
> +#define GAP 2
> +typedef unsigned int UINT;
> +typedef int INT;
> +#define UMAX UINT_MAX
> +#define IMAX INT_MAX
> +#define IMIN INT_MIN
> +#include "pr108757.h"
> +
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.dg/pr108757.h b/gcc/testsuite/gcc.dg/pr108757.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..5550199c1ef952f54eaa83087cec25e992057c34
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr108757.h
> @@ -0,0 +1,233 @@
> +#define NOINLINE __attribute__ ((noinline))
> +UINT NOINLINE
> +opt_u1 (UINT x)
> +{
> + if (x < (M * N) - GAP)
> + return 0;
> + UINT a = x - (M * N);
> + UINT b = a / N;
> + return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u2 (UINT x)
> +{
> + if (x > (UMAX - (M * N) + GAP))
> + return 0;
> + UINT a = x + (M * N);
> + UINT b = a / N;
> + return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s1 (INT x)
> +{
> + if (x < (M * N) - GAP)
> + return 0;
> + INT a = x - (M * N);
> + INT b = a / N;
> + return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s2 (INT x)
> +{
> + if (x < IMIN + (M * N) - GAP || x > 0)
> + return 0;
> + INT a = x - (M * N);
> + INT b = a / N;
> + return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s3 (INT x)
> +{
> + if (x < (M * N) - GAP)
> + return 0;
> + INT a = x - (M * N);
> + INT b = a / -N;
> + return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s4 (INT x)
> +{
> + if (x < IMIN + (M * N) - GAP || x > 0)
> + return 0;
> + INT a = x - (M * N);
> + INT b = a / -N;
> + return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s5 (INT x)
> +{
> + if (x > (-M * N) + GAP)
> + return 0;
> + INT a = x - (-M * N);
> + INT b = a / N;
> + return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s6 (INT x)
> +{
> + if (x > IMAX - (M * N) + GAP || x < 0)
> + return 0;
> + INT a = x - (-M * N);
> + INT b = a / N;
> + return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s7 (INT x)
> +{
> + if (x > (M * -N) + GAP)
> + return 0;
> + INT a = x - (M * -N);
> + INT b = a / -N;
> + return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s8 (INT x)
> +{
> + if (x > IMAX - (M * N) + GAP || x < 0)
> + return 0;
> + INT a = x - (M * -N);
> + INT b = a / -N;
> + return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u3 (UINT x)
> +{
> + if (x < (M << N) - GAP)
> + return 0;
> + UINT a = x - (M << N);
> + UINT b = a >> N;
> + return b + M;
> +}
> +
> +UINT NOINLINE
> +opt_u4 (UINT x)
> +{
> + if (x > (UMAX - (M << N)) + GAP)
> + return 0;
> + UINT a = x + (M << N);
> + UINT b = a >> N;
> + return b - M;
> +}
> +
> +INT NOINLINE
> +opt_s9 (INT x)
> +{
> + if (x < (M << N) - GAP)
> + return 0;
> + INT a = x - (M << N);
> + INT b = a >> N;
> + return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s10 (INT x)
> +{
> + if (x < IMIN + (M << N) - GAP || x > 0)
> + return 0;
> + INT a = x - (M << N);
> + INT b = a >> N;
> + return b + M;
> +}
> +
> +INT NOINLINE
> +opt_s11 (INT x)
> +{
> + if (x > (-M << N) + GAP)
> + return 0;
> + INT a = x - (-M << N);
> + INT b = a >> N;
> + return b + -M;
> +}
> +
> +INT NOINLINE
> +opt_s12 (INT x)
> +{
> + if (x > IMAX - (M << N) + GAP || x < 0)
> + return 0;
> + INT a = x - (-M << N);
> + INT b = a >> N;
> + return b + -M;
> +}
> +
> +UINT NOINLINE
> +opt_u5 (UINT x, UINT n, UINT m)
> +{
> + if (n > N || m > M)
> + return 0;
> + if (x < (M*N) - GAP)
> + return 0;
> + UINT a = x - (m * n);
> + UINT b = a / n;
> + return b + m;
> +}
> +
> +UINT NOINLINE
> +opt_u6 (UINT x, UINT n, UINT m)
> +{
> + if (n > N || m > M)
> + return 0;
> + if (x > (UMAX - M*N) + GAP)
> + return 0;
> + UINT a = x + (m * n);
> + UINT b = a / n;
> + return b - m;
> +}
> +
> +INT NOINLINE
> +opt_s13 (INT x, INT n, INT m)
> +{
> + if (n > N || m > M || n < 0 || m < 0)
> + return 0;
> + if (x < (M*N) - GAP)
> + return 0;
> + INT a = x - (m * n);
> + INT b = a / n;
> + return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s14 (INT x, INT n, INT m)
> +{
> + if (n > N || m > M || n < 0 || m < 0)
> + return 0;
> + if (x > -M*N + GAP)
> + return 0;
> + INT a = x + (m * n);
> + INT b = a / n;
> + return b - m;
> +}
> +
> +INT
> +opt_s15 (INT x, INT n, INT m)
> +{
> + if (n > 0 || m > 0 || n < -N || m < -M)
> + return 0;
> + if (x < (M*N) - GAP)
> + return 0;
> + INT a = x - (m * n);
> + INT b = a / n;
> + return b + m;
> +}
> +
> +INT NOINLINE
> +opt_s16 (INT x, INT n, INT m)
> +{
> + if (n > 0 || m > 0 || n < -N || m < -M)
> + return 0;
> + if (x < 0 || x > (IMAX - M*N) + GAP)
> + return 0;
> + INT a = x + (m * n);
> + INT b = a / n;
> + return b - m;
> +}
> +
>
Hi,
Richard Biener <rguenther@suse.de> writes:
> On Fri, 1 Sep 2023, Jiufu Guo wrote:
>
>> Hi,
>>
>> Integer expression "(X - N * M) / N" can be optimized to "X / N - M" with
>> the below conditions:
>> 1. There is no wrap/overflow/underflow.
>> wrap/overflow/underflow breaks the arithmetic operation.
>> 2. "X - N * M" and "X" are not of opposite sign.
>> Here, the operation "/" would be "trunc_div", the fractional part is
>> discarded towards zero. If "X - N * M" and "X" are in different signs,
>> then trunc_div discards the fractional parts (of /N) in different
>> directions.
>>
>> Compare the previous version:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624801.html
>> This patch adds comments and update the pattern on "(t + C)" to be more
>> tight.
>>
>> Bootstrap & regtest pass on ppc64{,le} and x86_64.
>> Is this patch ok for trunk?
>>
>> BR,
>> Jeff (Jiufu Guo)
>>
>> PR tree-optimization/108757
>>
>> gcc/ChangeLog:
>>
>> * match.pd ((X - N * M) / N): New pattern.
>> ((X + N * M) / N): New pattern.
>> ((X + C) div_rshift N): New pattern.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.dg/pr108757-1.c: New test.
>> * gcc.dg/pr108757-2.c: New test.
>> * gcc.dg/pr108757.h: New test.
>>
>> ---
>> gcc/match.pd | 78 ++++++++++
>> gcc/testsuite/gcc.dg/pr108757-1.c | 18 +++
>> gcc/testsuite/gcc.dg/pr108757-2.c | 19 +++
>> gcc/testsuite/gcc.dg/pr108757.h | 233 ++++++++++++++++++++++++++++++
>> 4 files changed, 348 insertions(+)
>> create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
>> create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
>> create mode 100644 gcc/testsuite/gcc.dg/pr108757.h
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index fa598d5ca2e470f9cc3b82469e77d743b12f107e..863bc7299cdefc622a7806a4d32e37268c50d453 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> #endif
>> ))))
>>
>> +#if GIMPLE
>> +(for div (trunc_div exact_div)
>> + /* Simplify (X + M*N) / N -> X / N + M. */
>> + (simplify
>> + (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
>> + (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> + (if (INTEGRAL_TYPE_P (type)
>> + && get_range_query (cfun)->range_of_expr (vr1, @1)
>> + && get_range_query (cfun)->range_of_expr (vr2, @2)
>> + /* "N*M" doesn't overflow. */
>> + && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> + && get_range_query (cfun)->range_of_expr (vr0, @0)
>> + && get_range_query (cfun)->range_of_expr (vr3, @3)
>> + /* "X+(N*M)" doesn't overflow. */
>> + && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
>> + && get_range_query (cfun)->range_of_expr (vr4, @4)
>> + /* "X+N*M" is not with opposite sign as "X". */
>> + && (TYPE_UNSIGNED (type)
>> + || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> + || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>> + (plus (div @0 @2) @1))))
>> +
>> + /* Simplify (X - M*N) / N -> X / N - M. */
>> + (simplify
>> + (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
>> + (with {value_range vr0, vr1, vr2, vr3, vr4;}
>> + (if (INTEGRAL_TYPE_P (type)
>> + && get_range_query (cfun)->range_of_expr (vr1, @1)
>> + && get_range_query (cfun)->range_of_expr (vr2, @2)
>> + /* "N * M" doesn't overflow. */
>> + && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
>> + && get_range_query (cfun)->range_of_expr (vr0, @0)
>> + && get_range_query (cfun)->range_of_expr (vr3, @3)
>> + /* "X - (N*M)" doesn't overflow. */
>> + && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
>> + && get_range_query (cfun)->range_of_expr (vr4, @4)
>> + /* "X-N*M" is not with opposite sign as "X". */
>> + && (TYPE_UNSIGNED (type)
>> + || (vr0.nonnegative_p () && vr4.nonnegative_p ())
>> + || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
>> + (minus (div @0 @2) @1)))))
>> +
>> +/* Simplify
>> + (X + C) / N -> X / N + C / N where C is multiple of N.
>> + (X + C) >> N -> X >> N + C>>N if low N bits of C is 0. */
>> +(for op (trunc_div exact_div rshift)
>> + (simplify
>> + (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
>> + (with
>> + {
>> + wide_int c = wi::to_wide (@1);
>> + wide_int n = wi::to_wide (@2);
>> + bool shift = op == RSHIFT_EXPR;
>> + #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
>> + : wi::div_trunc (v, n, TYPE_SIGN (type)))
>> + #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
>> + : wi::multiple_of_p (v, n, TYPE_SIGN (type)))
>
> please indent these full left
>
>> + value_range vr0, vr1, vr3;
>> + }
>> + (if (INTEGRAL_TYPE_P (type)
>> + && get_range_query (cfun)->range_of_expr (vr0, @0))
>> + (if (exact_mod (c)
>> + && get_range_query (cfun)->range_of_expr (vr1, @1)
>> + /* "X+C" doesn't overflow. */
>> + && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
>> + && get_range_query (cfun)->range_of_expr (vr3, @3)
>> + /* "X+C" and "X" are not of opposite sign. */
>> + && (TYPE_UNSIGNED (type)
>> + || (vr0.nonnegative_p () && vr3.nonnegative_p ())
>> + || (vr0.nonpositive_p () && vr3.nonpositive_p ())))
>> + (plus (op @0 @2) { wide_int_to_tree (type, plus_op1 (c)); })
>> + (if (TYPE_UNSIGNED (type) && c.sign_mask () < 0
>> + && exact_mod (-c)
>> + /* unsigned "X-(-C)" doesn't underflow. */
>> + && wi::geu_p (vr0.lower_bound (), -c))
>> + (plus (op @0 @2) { wide_int_to_tree (type, -plus_op1 (-c)); })))))))
>
> and #undef them here again.
>
> OK with that change.
Thanks for your very helpful review!!
Committed to r14-3644.
BR,
Jeff (Jiufu Guo)
>
> Thanks,
> Richard.
>
>> +#endif
>> +
>> (for op (negate abs)
>> /* Simplify cos(-x) and cos(|x|) -> cos(x). Similarly for cosh. */
>> (for coss (COS COSH)
>> diff --git a/gcc/testsuite/gcc.dg/pr108757-1.c b/gcc/testsuite/gcc.dg/pr108757-1.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..7908f4bdcb8e225fe311b668efbe8f6db525b4d5
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757-1.c
>> @@ -0,0 +1,18 @@
>> +/* PR tree-optimization/108757 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-optimized" } */
>> +
>> +#include <limits.h>
>> +#define N 5
>> +#define M 3
>> +#define GAP 0
>> +typedef unsigned int UINT;
>> +typedef int INT;
>> +#define UMAX UINT_MAX
>> +#define IMAX INT_MAX
>> +#define IMIN INT_MIN
>> +#include "pr108757.h"
>> +
>> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
>> +/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
>> +/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
>> diff --git a/gcc/testsuite/gcc.dg/pr108757-2.c b/gcc/testsuite/gcc.dg/pr108757-2.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..82bebd09944f0be2e0690c604723bfe6182acec3
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757-2.c
>> @@ -0,0 +1,19 @@
>> +/* PR tree-optimization/108757 */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
>> +
>> +#include <limits.h>
>> +#define N 4
>> +#define M 3
>> +#define GAP 2
>> +typedef unsigned int UINT;
>> +typedef int INT;
>> +#define UMAX UINT_MAX
>> +#define IMAX INT_MAX
>> +#define IMIN INT_MIN
>> +#include "pr108757.h"
>> +
>> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
>> +/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
>> +
>> diff --git a/gcc/testsuite/gcc.dg/pr108757.h b/gcc/testsuite/gcc.dg/pr108757.h
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..5550199c1ef952f54eaa83087cec25e992057c34
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/pr108757.h
>> @@ -0,0 +1,233 @@
>> +#define NOINLINE __attribute__ ((noinline))
>> +UINT NOINLINE
>> +opt_u1 (UINT x)
>> +{
>> + if (x < (M * N) - GAP)
>> + return 0;
>> + UINT a = x - (M * N);
>> + UINT b = a / N;
>> + return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u2 (UINT x)
>> +{
>> + if (x > (UMAX - (M * N) + GAP))
>> + return 0;
>> + UINT a = x + (M * N);
>> + UINT b = a / N;
>> + return b - M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s1 (INT x)
>> +{
>> + if (x < (M * N) - GAP)
>> + return 0;
>> + INT a = x - (M * N);
>> + INT b = a / N;
>> + return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s2 (INT x)
>> +{
>> + if (x < IMIN + (M * N) - GAP || x > 0)
>> + return 0;
>> + INT a = x - (M * N);
>> + INT b = a / N;
>> + return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s3 (INT x)
>> +{
>> + if (x < (M * N) - GAP)
>> + return 0;
>> + INT a = x - (M * N);
>> + INT b = a / -N;
>> + return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s4 (INT x)
>> +{
>> + if (x < IMIN + (M * N) - GAP || x > 0)
>> + return 0;
>> + INT a = x - (M * N);
>> + INT b = a / -N;
>> + return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s5 (INT x)
>> +{
>> + if (x > (-M * N) + GAP)
>> + return 0;
>> + INT a = x - (-M * N);
>> + INT b = a / N;
>> + return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s6 (INT x)
>> +{
>> + if (x > IMAX - (M * N) + GAP || x < 0)
>> + return 0;
>> + INT a = x - (-M * N);
>> + INT b = a / N;
>> + return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s7 (INT x)
>> +{
>> + if (x > (M * -N) + GAP)
>> + return 0;
>> + INT a = x - (M * -N);
>> + INT b = a / -N;
>> + return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s8 (INT x)
>> +{
>> + if (x > IMAX - (M * N) + GAP || x < 0)
>> + return 0;
>> + INT a = x - (M * -N);
>> + INT b = a / -N;
>> + return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u3 (UINT x)
>> +{
>> + if (x < (M << N) - GAP)
>> + return 0;
>> + UINT a = x - (M << N);
>> + UINT b = a >> N;
>> + return b + M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u4 (UINT x)
>> +{
>> + if (x > (UMAX - (M << N)) + GAP)
>> + return 0;
>> + UINT a = x + (M << N);
>> + UINT b = a >> N;
>> + return b - M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s9 (INT x)
>> +{
>> + if (x < (M << N) - GAP)
>> + return 0;
>> + INT a = x - (M << N);
>> + INT b = a >> N;
>> + return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s10 (INT x)
>> +{
>> + if (x < IMIN + (M << N) - GAP || x > 0)
>> + return 0;
>> + INT a = x - (M << N);
>> + INT b = a >> N;
>> + return b + M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s11 (INT x)
>> +{
>> + if (x > (-M << N) + GAP)
>> + return 0;
>> + INT a = x - (-M << N);
>> + INT b = a >> N;
>> + return b + -M;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s12 (INT x)
>> +{
>> + if (x > IMAX - (M << N) + GAP || x < 0)
>> + return 0;
>> + INT a = x - (-M << N);
>> + INT b = a >> N;
>> + return b + -M;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u5 (UINT x, UINT n, UINT m)
>> +{
>> + if (n > N || m > M)
>> + return 0;
>> + if (x < (M*N) - GAP)
>> + return 0;
>> + UINT a = x - (m * n);
>> + UINT b = a / n;
>> + return b + m;
>> +}
>> +
>> +UINT NOINLINE
>> +opt_u6 (UINT x, UINT n, UINT m)
>> +{
>> + if (n > N || m > M)
>> + return 0;
>> + if (x > (UMAX - M*N) + GAP)
>> + return 0;
>> + UINT a = x + (m * n);
>> + UINT b = a / n;
>> + return b - m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s13 (INT x, INT n, INT m)
>> +{
>> + if (n > N || m > M || n < 0 || m < 0)
>> + return 0;
>> + if (x < (M*N) - GAP)
>> + return 0;
>> + INT a = x - (m * n);
>> + INT b = a / n;
>> + return b + m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s14 (INT x, INT n, INT m)
>> +{
>> + if (n > N || m > M || n < 0 || m < 0)
>> + return 0;
>> + if (x > -M*N + GAP)
>> + return 0;
>> + INT a = x + (m * n);
>> + INT b = a / n;
>> + return b - m;
>> +}
>> +
>> +INT
>> +opt_s15 (INT x, INT n, INT m)
>> +{
>> + if (n > 0 || m > 0 || n < -N || m < -M)
>> + return 0;
>> + if (x < (M*N) - GAP)
>> + return 0;
>> + INT a = x - (m * n);
>> + INT b = a / n;
>> + return b + m;
>> +}
>> +
>> +INT NOINLINE
>> +opt_s16 (INT x, INT n, INT m)
>> +{
>> + if (n > 0 || m > 0 || n < -N || m < -M)
>> + return 0;
>> + if (x < 0 || x > (IMAX - M*N) + GAP)
>> + return 0;
>> + INT a = x + (m * n);
>> + INT b = a / n;
>> + return b - m;
>> +}
>> +
>>
@@ -959,6 +959,84 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
#endif
))))
+#if GIMPLE
+(for div (trunc_div exact_div)
+ /* Simplify (X + M*N) / N -> X / N + M. */
+ (simplify
+ (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
+ (with {value_range vr0, vr1, vr2, vr3, vr4;}
+ (if (INTEGRAL_TYPE_P (type)
+ && get_range_query (cfun)->range_of_expr (vr1, @1)
+ && get_range_query (cfun)->range_of_expr (vr2, @2)
+ /* "N*M" doesn't overflow. */
+ && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+ && get_range_query (cfun)->range_of_expr (vr0, @0)
+ && get_range_query (cfun)->range_of_expr (vr3, @3)
+ /* "X+(N*M)" doesn't overflow. */
+ && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
+ && get_range_query (cfun)->range_of_expr (vr4, @4)
+ /* "X+N*M" is not with opposite sign as "X". */
+ && (TYPE_UNSIGNED (type)
+ || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+ || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
+ (plus (div @0 @2) @1))))
+
+ /* Simplify (X - M*N) / N -> X / N - M. */
+ (simplify
+ (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
+ (with {value_range vr0, vr1, vr2, vr3, vr4;}
+ (if (INTEGRAL_TYPE_P (type)
+ && get_range_query (cfun)->range_of_expr (vr1, @1)
+ && get_range_query (cfun)->range_of_expr (vr2, @2)
+ /* "N * M" doesn't overflow. */
+ && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+ && get_range_query (cfun)->range_of_expr (vr0, @0)
+ && get_range_query (cfun)->range_of_expr (vr3, @3)
+ /* "X - (N*M)" doesn't overflow. */
+ && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
+ && get_range_query (cfun)->range_of_expr (vr4, @4)
+ /* "X-N*M" is not with opposite sign as "X". */
+ && (TYPE_UNSIGNED (type)
+ || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+ || (vr0.nonpositive_p () && vr4.nonpositive_p ())))
+ (minus (div @0 @2) @1)))))
+
+/* Simplify
+ (X + C) / N -> X / N + C / N where C is multiple of N.
+ (X + C) >> N -> X >> N + C>>N if low N bits of C is 0. */
+(for op (trunc_div exact_div rshift)
+ (simplify
+ (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+ (with
+ {
+ wide_int c = wi::to_wide (@1);
+ wide_int n = wi::to_wide (@2);
+ bool shift = op == RSHIFT_EXPR;
+ #define plus_op1(v) (shift ? wi::rshift (v, n, TYPE_SIGN (type)) \
+ : wi::div_trunc (v, n, TYPE_SIGN (type)))
+ #define exact_mod(v) (shift ? wi::ctz (v) >= n.to_shwi () \
+ : wi::multiple_of_p (v, n, TYPE_SIGN (type)))
+ value_range vr0, vr1, vr3;
+ }
+ (if (INTEGRAL_TYPE_P (type)
+ && get_range_query (cfun)->range_of_expr (vr0, @0))
+ (if (exact_mod (c)
+ && get_range_query (cfun)->range_of_expr (vr1, @1)
+ /* "X+C" doesn't overflow. */
+ && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
+ && get_range_query (cfun)->range_of_expr (vr3, @3)
+ /* "X+C" and "X" are not of opposite sign. */
+ && (TYPE_UNSIGNED (type)
+ || (vr0.nonnegative_p () && vr3.nonnegative_p ())
+ || (vr0.nonpositive_p () && vr3.nonpositive_p ())))
+ (plus (op @0 @2) { wide_int_to_tree (type, plus_op1 (c)); })
+ (if (TYPE_UNSIGNED (type) && c.sign_mask () < 0
+ && exact_mod (-c)
+ /* unsigned "X-(-C)" doesn't underflow. */
+ && wi::geu_p (vr0.lower_bound (), -c))
+ (plus (op @0 @2) { wide_int_to_tree (type, -plus_op1 (-c)); })))))))
+#endif
+
(for op (negate abs)
/* Simplify cos(-x) and cos(|x|) -> cos(x). Similarly for cosh. */
(for coss (COS COSH)
new file mode 100644
@@ -0,0 +1,18 @@
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <limits.h>
+#define N 5
+#define M 3
+#define GAP 0
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\+ " "optimized" } } *
+/* { dg-final { scan-tree-dump-not " = x_\[0-9\]+\\(D\\) \\- " "optimized" } } */
+/* { dg-final { scan-tree-dump-not " = b_\[0-9\]+ \\+ " "optimized" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* PR tree-optimization/108757 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fwrapv" } */
+
+#include <limits.h>
+#define N 4
+#define M 3
+#define GAP 2
+typedef unsigned int UINT;
+typedef int INT;
+#define UMAX UINT_MAX
+#define IMAX INT_MAX
+#define IMIN INT_MIN
+#include "pr108757.h"
+
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\+ " 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " = x_\[0-9\]+\\(D\\) \\- " 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ x_\[0-9\]+\\(D\\)" 3 "optimized" } } */
+
new file mode 100644
@@ -0,0 +1,233 @@
+#define NOINLINE __attribute__ ((noinline))
+UINT NOINLINE
+opt_u1 (UINT x)
+{
+ if (x < (M * N) - GAP)
+ return 0;
+ UINT a = x - (M * N);
+ UINT b = a / N;
+ return b + M;
+}
+
+UINT NOINLINE
+opt_u2 (UINT x)
+{
+ if (x > (UMAX - (M * N) + GAP))
+ return 0;
+ UINT a = x + (M * N);
+ UINT b = a / N;
+ return b - M;
+}
+
+INT NOINLINE
+opt_s1 (INT x)
+{
+ if (x < (M * N) - GAP)
+ return 0;
+ INT a = x - (M * N);
+ INT b = a / N;
+ return b + M;
+}
+
+INT NOINLINE
+opt_s2 (INT x)
+{
+ if (x < IMIN + (M * N) - GAP || x > 0)
+ return 0;
+ INT a = x - (M * N);
+ INT b = a / N;
+ return b + M;
+}
+
+INT NOINLINE
+opt_s3 (INT x)
+{
+ if (x < (M * N) - GAP)
+ return 0;
+ INT a = x - (M * N);
+ INT b = a / -N;
+ return b + -M;
+}
+
+INT NOINLINE
+opt_s4 (INT x)
+{
+ if (x < IMIN + (M * N) - GAP || x > 0)
+ return 0;
+ INT a = x - (M * N);
+ INT b = a / -N;
+ return b + -M;
+}
+
+INT NOINLINE
+opt_s5 (INT x)
+{
+ if (x > (-M * N) + GAP)
+ return 0;
+ INT a = x - (-M * N);
+ INT b = a / N;
+ return b + -M;
+}
+
+INT NOINLINE
+opt_s6 (INT x)
+{
+ if (x > IMAX - (M * N) + GAP || x < 0)
+ return 0;
+ INT a = x - (-M * N);
+ INT b = a / N;
+ return b + -M;
+}
+
+INT NOINLINE
+opt_s7 (INT x)
+{
+ if (x > (M * -N) + GAP)
+ return 0;
+ INT a = x - (M * -N);
+ INT b = a / -N;
+ return b + M;
+}
+
+INT NOINLINE
+opt_s8 (INT x)
+{
+ if (x > IMAX - (M * N) + GAP || x < 0)
+ return 0;
+ INT a = x - (M * -N);
+ INT b = a / -N;
+ return b + M;
+}
+
+UINT NOINLINE
+opt_u3 (UINT x)
+{
+ if (x < (M << N) - GAP)
+ return 0;
+ UINT a = x - (M << N);
+ UINT b = a >> N;
+ return b + M;
+}
+
+UINT NOINLINE
+opt_u4 (UINT x)
+{
+ if (x > (UMAX - (M << N)) + GAP)
+ return 0;
+ UINT a = x + (M << N);
+ UINT b = a >> N;
+ return b - M;
+}
+
+INT NOINLINE
+opt_s9 (INT x)
+{
+ if (x < (M << N) - GAP)
+ return 0;
+ INT a = x - (M << N);
+ INT b = a >> N;
+ return b + M;
+}
+
+INT NOINLINE
+opt_s10 (INT x)
+{
+ if (x < IMIN + (M << N) - GAP || x > 0)
+ return 0;
+ INT a = x - (M << N);
+ INT b = a >> N;
+ return b + M;
+}
+
+INT NOINLINE
+opt_s11 (INT x)
+{
+ if (x > (-M << N) + GAP)
+ return 0;
+ INT a = x - (-M << N);
+ INT b = a >> N;
+ return b + -M;
+}
+
+INT NOINLINE
+opt_s12 (INT x)
+{
+ if (x > IMAX - (M << N) + GAP || x < 0)
+ return 0;
+ INT a = x - (-M << N);
+ INT b = a >> N;
+ return b + -M;
+}
+
+UINT NOINLINE
+opt_u5 (UINT x, UINT n, UINT m)
+{
+ if (n > N || m > M)
+ return 0;
+ if (x < (M*N) - GAP)
+ return 0;
+ UINT a = x - (m * n);
+ UINT b = a / n;
+ return b + m;
+}
+
+UINT NOINLINE
+opt_u6 (UINT x, UINT n, UINT m)
+{
+ if (n > N || m > M)
+ return 0;
+ if (x > (UMAX - M*N) + GAP)
+ return 0;
+ UINT a = x + (m * n);
+ UINT b = a / n;
+ return b - m;
+}
+
+INT NOINLINE
+opt_s13 (INT x, INT n, INT m)
+{
+ if (n > N || m > M || n < 0 || m < 0)
+ return 0;
+ if (x < (M*N) - GAP)
+ return 0;
+ INT a = x - (m * n);
+ INT b = a / n;
+ return b + m;
+}
+
+INT NOINLINE
+opt_s14 (INT x, INT n, INT m)
+{
+ if (n > N || m > M || n < 0 || m < 0)
+ return 0;
+ if (x > -M*N + GAP)
+ return 0;
+ INT a = x + (m * n);
+ INT b = a / n;
+ return b - m;
+}
+
+INT
+opt_s15 (INT x, INT n, INT m)
+{
+ if (n > 0 || m > 0 || n < -N || m < -M)
+ return 0;
+ if (x < (M*N) - GAP)
+ return 0;
+ INT a = x - (m * n);
+ INT b = a / n;
+ return b + m;
+}
+
+INT NOINLINE
+opt_s16 (INT x, INT n, INT m)
+{
+ if (n > 0 || m > 0 || n < -N || m < -M)
+ return 0;
+ if (x < 0 || x > (IMAX - M*N) + GAP)
+ return 0;
+ INT a = x + (m * n);
+ INT b = a / n;
+ return b - m;
+}
+