tree-optimization/110991 - unroll size estimate after vectorization

Message ID 20230814132958.A6644385840B@sourceware.org
State Accepted
Headers
Series tree-optimization/110991 - unroll size estimate after vectorization |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Richard Biener Aug. 14, 2023, 1:29 p.m. UTC
  The following testcase shows that we are bad at identifying inductions
that will be optimized away after vectorizing them because SCEV doesn't
handle vectorized defs.  The following rolls a simpler identification
of SSA cycles covering a PHI and an assignment with a binary operator
with a constant second operand.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Note, I also have a more general approach (will reply to this mail
with an RFC).

Any comments on this particular change?

	PR tree-optimization/110991
	* tree-ssa-loop-ivcanon.cc (constant_after_peeling): Handle
	VIEW_CONVERT_EXPR <op>, handle more simple IV-like SSA cycles
	that will end up constant.

	* gcc.dg/tree-ssa/cunroll-16.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c | 17 ++++++++
 gcc/tree-ssa-loop-ivcanon.cc               | 46 +++++++++++++++++++++-
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
new file mode 100644
index 00000000000..9bb66ff8299
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-16.c
@@ -0,0 +1,17 @@ 
+/* PR/110991 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunroll-details -fdump-tree-optimized" } */
+
+static unsigned char a;
+static signed char b;
+void foo(void);
+int main() {
+  a = 25;
+  for (; a > 13; --a)
+    b = a > 127 ?: a << 3;
+  if (!b)
+    foo();
+}
+
+/* { dg-final { scan-tree-dump "optimized: loop with \[0-9\]\+ iterations completely unrolled" "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index a895e8e65be..99e50ee2efe 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -166,6 +166,11 @@  constant_after_peeling (tree op, gimple *stmt, class loop *loop)
   if (CONSTANT_CLASS_P (op))
     return true;
 
+  /* Get at the actual SSA operand.  */
+  if (handled_component_p (op)
+      && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
+    op = TREE_OPERAND (op, 0);
+
   /* We can still fold accesses to constant arrays when index is known.  */
   if (TREE_CODE (op) != SSA_NAME)
     {
@@ -198,7 +203,46 @@  constant_after_peeling (tree op, gimple *stmt, class loop *loop)
   tree ev = analyze_scalar_evolution (loop, op);
   if (chrec_contains_undetermined (ev)
       || chrec_contains_symbols (ev))
-    return false;
+    {
+      if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
+	{
+	  gassign *ass = nullptr;
+	  gphi *phi = nullptr;
+	  if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
+	    {
+	      ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
+	      if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
+		phi = dyn_cast <gphi *>
+			(SSA_NAME_DEF_STMT (gimple_assign_rhs1  (ass)));
+	    }
+	  else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
+	    {
+	      phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
+	      if (gimple_bb (phi) == loop->header)
+		{
+		  tree def = gimple_phi_arg_def_from_edge
+		    (phi, loop_latch_edge (loop));
+		  if (TREE_CODE (def) == SSA_NAME
+		      && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
+		    ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
+		}
+	    }
+	  if (ass && phi)
+	    {
+	      tree rhs1 = gimple_assign_rhs1 (ass);
+	      if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
+		  && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
+		  && rhs1 == gimple_phi_result (phi)
+		  && gimple_bb (phi) == loop->header
+		  && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
+		      == gimple_assign_lhs (ass))
+		  && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
+					 (phi, loop_preheader_edge (loop)))))
+		return true;
+	    }
+	}
+      return false;
+    }
   return true;
 }