tree-optimization/88540 - FP x > y ? x : y if-conversion without -ffast-math
Checks
Commit Message
The following makes sure that FP x > y ? x : y style max/min operations
are if-converted at the GIMPLE level. While we can neither match
it to MAX_EXPR nor .FMAX as both have different semantics with IEEE
than the ternary ?: operation we can make sure to maintain this form
as a COND_EXPR so backends have the chance to match this to instructions
their ISA offers.
The patch does this in phiopt where we recognize min/max and instead
of giving up when we have to honor NaNs we alter the generated code
to a COND_EXPR.
This resolves PR88540 and we can then SLP vectorize the min operation
for its testcase. It also resolves part of the regressions observed
with the change matching bit-inserts of bit-field-refs to vec_perm.
Expansion from a COND_EXPR rather than from compare-and-branch
gcc.target/i386/pr54855-9.c by producing extra moves while the
corresponding min/max operations are now already synthesized by
RTL expansion, register selection isn't optimal. This can be also
provoked without this change by altering the operand order in the source.
I have XFAILed that part of the test.
Bootstrapped and tested on x86_64-unknown-linux-gnu ontop of the
patch fixing if-converted RTL expansion when constants are involved.
Comments welcome but I plan to push this once that dependency is acked.
Thanks,
Richard.
PR tree-optimization/88540
* tree-ssa-phiopt.cc (minmax_replacement): Do not give up
with NaNs but handle the simple case by if-converting to a
COND_EXPR.
* gcc.target/i386/pr88540.c: New testcase.
* gcc.target/i386/pr54855-9.c: XFAIL check for redundant moves.
* gcc.target/i386/pr54855-12.c: Adjust.
* gcc.target/i386/pr54855-13.c: Likewise.
* gcc.dg/tree-ssa/split-path-12.c: Likewise.
---
gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c | 4 +++-
gcc/testsuite/gcc.target/i386/pr54855-12.c | 2 +-
gcc/testsuite/gcc.target/i386/pr54855-13.c | 2 +-
gcc/testsuite/gcc.target/i386/pr54855-9.c | 4 ++--
gcc/testsuite/gcc.target/i386/pr88540.c | 10 +++++++++
gcc/tree-ssa-phiopt.cc | 21 ++++++++++++++-----
6 files changed, 33 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr88540.c
@@ -16,4 +16,6 @@ foo(double *d1, double *d2, double *d3, int num, double *ip)
return dmax[0] + dmax[1] + dmax[2];
}
-/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
+/* Split-paths shouldn't do anything here, if there's a diamond it would
+ be if-convertible. */
+/* { dg-final { scan-tree-dump-not "Duplicating join block" "split-paths" } } */
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
/* { dg-final { scan-assembler-times "minss" 1 } } */
-/* { dg-final { scan-assembler-not "movaps" } } */
-/* { dg-final { scan-assembler-not "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "movss" { xfail *-*-* } } } */
typedef float vec __attribute__((vector_size(16)));
new file mode 100644
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+ for (int n = 0; n < 2; ++n)
+ d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
+}
+
+/* { dg-final { scan-assembler "minpd" } } */
@@ -1580,10 +1580,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
tree type = TREE_TYPE (PHI_RESULT (phi));
- /* The optimization may be unsafe due to NaNs. */
- if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
- return false;
-
gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
enum tree_code cmp = gimple_cond_code (cond);
tree rhs = gimple_cond_rhs (cond);
@@ -1770,6 +1766,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
else
return false;
}
+ else if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ /* The optimization may be unsafe due to NaNs. */
+ return false;
else if (middle_bb != alt_middle_bb && threeway_p)
{
/* Recognize the following case:
@@ -2103,7 +2102,19 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
/* Emit the statement to compute min/max. */
gimple_seq stmts = NULL;
tree phi_result = PHI_RESULT (phi);
- result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
+
+ /* When we can't use a MIN/MAX_EXPR still make sure the expression
+ stays in a form to be recognized by ISA that map to IEEE x > y ? x : y
+ semantics (that's not IEEE max semantics). */
+ if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ {
+ result = gimple_build (&stmts, cmp, boolean_type_node,
+ gimple_cond_lhs (cond), rhs);
+ result = gimple_build (&stmts, COND_EXPR, TREE_TYPE (phi_result),
+ result, arg_true, arg_false);
+ }
+ else
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
gsi = gsi_last_bb (cond_bb);
gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);