MATCH: Add simplifications for `(a * zero_one) ==/!= CST`

Message ID 20230916054920.1653864-1-apinski@marvell.com
State Accepted
Headers
Series MATCH: Add simplifications for `(a * zero_one) ==/!= CST` |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Andrew Pinski Sept. 16, 2023, 5:49 a.m. UTC
  Transforming `(a * b@[0,1]) != 0` into `((cast)b) & a != 0`
will produce better code as a lot of the time b is defined
by a comparison.
Also since canonicalize `a & -zero_one` into `a * zero_one` we
start to lose information when doing comparisons against 0.
In the case of PR 110992, we lose that `a != 0` on the branch
and then don't do a jump threading like we should.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

	PR tree-optimization/110992

gcc/ChangeLog:

	* match.pd (`a * zero_one !=/== CST`): New pattern.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/vrp116.c: Update test to avoid the
	extra comparison.
	* gcc.c-torture/execute/pr110992-1.c: New test.
	* gcc.dg/tree-ssa/pr110992-1.c: New test.
	* gcc.dg/tree-ssa/pr110992-2.c: New test.
---
 gcc/match.pd                                  | 15 +++++++
 .../gcc.c-torture/execute/pr110992-1.c        | 43 +++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c    | 21 +++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c    | 17 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/vrp116.c        |  2 +-
 5 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
  

Comments

Richard Biener Sept. 18, 2023, 7:08 a.m. UTC | #1
On Sat, Sep 16, 2023 at 7:50 AM Andrew Pinski via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Transforming `(a * b@[0,1]) != 0` into `((cast)b) & a != 0`

that isn't strictly a simplification (one more op), and your
alternate transform is even worse in this regard.

> will produce better code as a lot of the time b is defined
> by a comparison.

what if not?  How does it simplify then?

> Also since canonicalize `a & -zero_one` into `a * zero_one` we
> start to lose information when doing comparisons against 0.
> In the case of PR 110992, we lose that `a != 0` on the branch

How so?  Ranger should be happy with both forms, no?

> and then don't do a jump threading like we should.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
>
>         PR tree-optimization/110992
>
> gcc/ChangeLog:
>
>         * match.pd (`a * zero_one !=/== CST`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/vrp116.c: Update test to avoid the
>         extra comparison.
>         * gcc.c-torture/execute/pr110992-1.c: New test.
>         * gcc.dg/tree-ssa/pr110992-1.c: New test.
>         * gcc.dg/tree-ssa/pr110992-2.c: New test.
> ---
>  gcc/match.pd                                  | 15 +++++++
>  .../gcc.c-torture/execute/pr110992-1.c        | 43 +++++++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c    | 21 +++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c    | 17 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/vrp116.c        |  2 +-
>  5 files changed, 97 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 39c9c81966a..97405e6a5c3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -2197,6 +2197,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (if (INTEGRAL_TYPE_P (type))
>    (bit_and @0 @1)))
>
> +/* (a * b@[0,1]) == CST
> +     ->
> +       CST == 0 ? (a == CST | b == 0) : (a == CST & b != 0)
> +   (a * b@[0,1]) != CST
> +     ->
> +       CST != 0 ? (a != CST | b == 0) : (a != CST & b != 0)  */
> +(for cmp (ne eq)
> + (simplify
> +  (cmp (mult:cs @0 zero_one_valued_p@1) INTEGER_CST@2)
> +  (if ((cmp == EQ_EXPR) ^ (wi::to_wide (@2) != 0))
> +   (bit_ior
> +    (cmp @0 @2)
> +    (convert (bit_xor @1 { build_one_cst (TREE_TYPE (@1)); })))
> +   (bit_and (cmp @0 @2) (convert @1)))))
> +
>  (for cmp (tcc_comparison)
>       icmp (inverted_tcc_comparison)
>   /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> new file mode 100644
> index 00000000000..edb7eb75ef2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> @@ -0,0 +1,43 @@
> +#define CST 5
> +#define OP !=
> +#define op_eq ==
> +#define op_ne !=
> +
> +#define function(vol,op, cst) \
> +__attribute__((noipa)) \
> +_Bool func_##op##_##cst##_##vol(vol int a, vol _Bool b) \
> +{ \
> +  vol int d = (a * b); \
> +  return d op_##op cst; \
> +}
> +
> +#define funcdefs(op,cst) \
> +function(,op,cst) \
> +function(volatile,op,cst)
> +
> +#define funcs(f) \
> +f(eq,0) \
> +f(eq,1) \
> +f(eq,5) \
> +f(ne,0) \
> +f(ne,1) \
> +f(ne,5)
> +
> +funcs(funcdefs)
> +
> +#define test(op,cst) \
> +do { \
> + if(func_##op##_##cst##_(a,b) != func_##op##_##cst##_volatile(a,b))\
> +   __builtin_abort(); \
> +} while(0);
> +
> +int main(void)
> +{
> +        for(int a = -10; a <= 10; a++)
> +        {
> +                _Bool b = 0;
> +                funcs(test)
> +                b = 1;
> +                funcs(test)
> +        }
> +}
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> new file mode 100644
> index 00000000000..825fd63f84c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +static unsigned b;
> +static short c = 4;
> +void foo(void);
> +static short(a)(short d, short g) { return d * g; }
> +void e();
> +static char f() {
> +  b = 0;
> +  return 0;
> +}
> +int main() {
> +  int h = b;
> +  if ((short)(a(c && e, 65535) & h)) {
> +    foo();
> +    h || f();
> +  }
> +}
> +
> +/* There should be no calls to foo left. */
> +/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> new file mode 100644
> index 00000000000..6082949a218
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +static unsigned b;
> +static short c = 4;
> +void foo(void);
> +int main() {
> +  int h = b;
> +  int d = c != 0;
> +  if (h*d) {
> +    foo();
> +    if (!h) b = 20;
> +  }
> +}
> +
> +
> +/* There should be no calls to foo left. */
> +/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> index 9e68a774aee..16b31e320a0 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> @@ -6,7 +6,7 @@ f (int m1, int m2, int c)
>  {
>    int d = m1 > m2;
>    int e = d * c;
> -  return e ? m1 : m2;
> +  return e;
>  }
>
>  /* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" } } */
> --
> 2.31.1
>
  
Andrew Pinski Sept. 18, 2023, 11:55 p.m. UTC | #2
On Mon, Sep 18, 2023 at 12:09 AM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Sat, Sep 16, 2023 at 7:50 AM Andrew Pinski via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Transforming `(a * b@[0,1]) != 0` into `((cast)b) & a != 0`
>
> that isn't strictly a simplification (one more op), and your
> alternate transform is even worse in this regard.

Right, I agree here. I was trying to workaround a ranger issue (see below).

>
> > will produce better code as a lot of the time b is defined
> > by a comparison.
>
> what if not?  How does it simplify then?
>
> > Also since canonicalize `a & -zero_one` into `a * zero_one` we
> > start to lose information when doing comparisons against 0.
> > In the case of PR 110992, we lose that `a != 0` on the branch
>
> How so?  Ranger should be happy with both forms, no?

Ranger does not handle going backwards on the multiply case; only on
the bit_and case.
I tried figuring out how to understand that works but I got lost in
the ranger code.  Maybe Andrew or Aldy could look into figuring out
how to improve ranger here.

Thanks,
Andrew

>
> > and then don't do a jump threading like we should.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> >         PR tree-optimization/110992
> >
> > gcc/ChangeLog:
> >
> >         * match.pd (`a * zero_one !=/== CST`): New pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/tree-ssa/vrp116.c: Update test to avoid the
> >         extra comparison.
> >         * gcc.c-torture/execute/pr110992-1.c: New test.
> >         * gcc.dg/tree-ssa/pr110992-1.c: New test.
> >         * gcc.dg/tree-ssa/pr110992-2.c: New test.
> > ---
> >  gcc/match.pd                                  | 15 +++++++
> >  .../gcc.c-torture/execute/pr110992-1.c        | 43 +++++++++++++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c    | 21 +++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c    | 17 ++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/vrp116.c        |  2 +-
> >  5 files changed, 97 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 39c9c81966a..97405e6a5c3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -2197,6 +2197,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >   (if (INTEGRAL_TYPE_P (type))
> >    (bit_and @0 @1)))
> >
> > +/* (a * b@[0,1]) == CST
> > +     ->
> > +       CST == 0 ? (a == CST | b == 0) : (a == CST & b != 0)
> > +   (a * b@[0,1]) != CST
> > +     ->
> > +       CST != 0 ? (a != CST | b == 0) : (a != CST & b != 0)  */
> > +(for cmp (ne eq)
> > + (simplify
> > +  (cmp (mult:cs @0 zero_one_valued_p@1) INTEGER_CST@2)
> > +  (if ((cmp == EQ_EXPR) ^ (wi::to_wide (@2) != 0))
> > +   (bit_ior
> > +    (cmp @0 @2)
> > +    (convert (bit_xor @1 { build_one_cst (TREE_TYPE (@1)); })))
> > +   (bit_and (cmp @0 @2) (convert @1)))))
> > +
> >  (for cmp (tcc_comparison)
> >       icmp (inverted_tcc_comparison)
> >   /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
> > diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> > new file mode 100644
> > index 00000000000..edb7eb75ef2
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
> > @@ -0,0 +1,43 @@
> > +#define CST 5
> > +#define OP !=
> > +#define op_eq ==
> > +#define op_ne !=
> > +
> > +#define function(vol,op, cst) \
> > +__attribute__((noipa)) \
> > +_Bool func_##op##_##cst##_##vol(vol int a, vol _Bool b) \
> > +{ \
> > +  vol int d = (a * b); \
> > +  return d op_##op cst; \
> > +}
> > +
> > +#define funcdefs(op,cst) \
> > +function(,op,cst) \
> > +function(volatile,op,cst)
> > +
> > +#define funcs(f) \
> > +f(eq,0) \
> > +f(eq,1) \
> > +f(eq,5) \
> > +f(ne,0) \
> > +f(ne,1) \
> > +f(ne,5)
> > +
> > +funcs(funcdefs)
> > +
> > +#define test(op,cst) \
> > +do { \
> > + if(func_##op##_##cst##_(a,b) != func_##op##_##cst##_volatile(a,b))\
> > +   __builtin_abort(); \
> > +} while(0);
> > +
> > +int main(void)
> > +{
> > +        for(int a = -10; a <= 10; a++)
> > +        {
> > +                _Bool b = 0;
> > +                funcs(test)
> > +                b = 1;
> > +                funcs(test)
> > +        }
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> > new file mode 100644
> > index 00000000000..825fd63f84c
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-optimized" } */
> > +static unsigned b;
> > +static short c = 4;
> > +void foo(void);
> > +static short(a)(short d, short g) { return d * g; }
> > +void e();
> > +static char f() {
> > +  b = 0;
> > +  return 0;
> > +}
> > +int main() {
> > +  int h = b;
> > +  if ((short)(a(c && e, 65535) & h)) {
> > +    foo();
> > +    h || f();
> > +  }
> > +}
> > +
> > +/* There should be no calls to foo left. */
> > +/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> > new file mode 100644
> > index 00000000000..6082949a218
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
> > @@ -0,0 +1,17 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-optimized" } */
> > +static unsigned b;
> > +static short c = 4;
> > +void foo(void);
> > +int main() {
> > +  int h = b;
> > +  int d = c != 0;
> > +  if (h*d) {
> > +    foo();
> > +    if (!h) b = 20;
> > +  }
> > +}
> > +
> > +
> > +/* There should be no calls to foo left. */
> > +/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> > index 9e68a774aee..16b31e320a0 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
> > @@ -6,7 +6,7 @@ f (int m1, int m2, int c)
> >  {
> >    int d = m1 > m2;
> >    int e = d * c;
> > -  return e ? m1 : m2;
> > +  return e;
> >  }
> >
> >  /* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" } } */
> > --
> > 2.31.1
> >
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index 39c9c81966a..97405e6a5c3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2197,6 +2197,21 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type))
   (bit_and @0 @1)))
 
+/* (a * b@[0,1]) == CST
+     ->
+       CST == 0 ? (a == CST | b == 0) : (a == CST & b != 0)
+   (a * b@[0,1]) != CST
+     ->
+       CST != 0 ? (a != CST | b == 0) : (a != CST & b != 0)  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (mult:cs @0 zero_one_valued_p@1) INTEGER_CST@2)
+  (if ((cmp == EQ_EXPR) ^ (wi::to_wide (@2) != 0))
+   (bit_ior
+    (cmp @0 @2)
+    (convert (bit_xor @1 { build_one_cst (TREE_TYPE (@1)); })))
+   (bit_and (cmp @0 @2) (convert @1)))))
+
 (for cmp (tcc_comparison)
      icmp (inverted_tcc_comparison)
  /* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.  */
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
new file mode 100644
index 00000000000..edb7eb75ef2
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr110992-1.c
@@ -0,0 +1,43 @@ 
+#define CST 5
+#define OP !=
+#define op_eq ==
+#define op_ne !=
+
+#define function(vol,op, cst) \
+__attribute__((noipa)) \
+_Bool func_##op##_##cst##_##vol(vol int a, vol _Bool b) \
+{ \
+  vol int d = (a * b); \
+  return d op_##op cst; \
+}
+
+#define funcdefs(op,cst) \
+function(,op,cst) \
+function(volatile,op,cst)
+
+#define funcs(f) \
+f(eq,0) \
+f(eq,1) \
+f(eq,5) \
+f(ne,0) \
+f(ne,1) \
+f(ne,5)
+
+funcs(funcdefs)
+
+#define test(op,cst) \
+do { \
+ if(func_##op##_##cst##_(a,b) != func_##op##_##cst##_volatile(a,b))\
+   __builtin_abort(); \
+} while(0);
+
+int main(void)
+{
+        for(int a = -10; a <= 10; a++)
+        {
+                _Bool b = 0;
+                funcs(test)
+                b = 1;
+                funcs(test)
+        }
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
new file mode 100644
index 00000000000..825fd63f84c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-1.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+static unsigned b;
+static short c = 4;
+void foo(void);
+static short(a)(short d, short g) { return d * g; }
+void e();
+static char f() {
+  b = 0;
+  return 0;
+}
+int main() {
+  int h = b;
+  if ((short)(a(c && e, 65535) & h)) {
+    foo();
+    h || f();
+  }
+}
+
+/* There should be no calls to foo left. */
+/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
new file mode 100644
index 00000000000..6082949a218
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110992-2.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+static unsigned b;
+static short c = 4;
+void foo(void);
+int main() {
+  int h = b;
+  int d = c != 0;
+  if (h*d) {
+    foo();
+    if (!h) b = 20;
+  }
+}
+
+
+/* There should be no calls to foo left. */
+/* { dg-final { scan-tree-dump-not " foo " "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
index 9e68a774aee..16b31e320a0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp116.c
@@ -6,7 +6,7 @@  f (int m1, int m2, int c)
 {
   int d = m1 > m2;
   int e = d * c;
-  return e ? m1 : m2;
+  return e;
 }
 
 /* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" } } */