PING: [PATCH] Do not count unused scalar use when marking STMT_VINFO_LIVE_P [PR113091]

Message ID LV2PR01MB7839A8288448AC9BB7DA2517F7692@LV2PR01MB7839.prod.exchangelabs.com
State Not Applicable
Headers
Series PING: [PATCH] Do not count unused scalar use when marking STMT_VINFO_LIVE_P [PR113091] |

Checks

Context Check Description
snail/gcc-patch-check fail Git am fail log

Commit Message

Feng Xue OS Jan. 10, 2024, 11:42 p.m. UTC
  Hi, Richard,

  Would you please talk a look at this patch?

Thanks,
Feng
  

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
new file mode 100644
index 00000000000..ff822e90b4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-slp-details -ftree-slp-vectorize" } */
+
+int test(unsigned array[8]);
+
+int foo(char *a, char *b)
+{
+  unsigned array[8];
+
+  array[0] = (a[0] - b[0]);
+  array[1] = (a[1] - b[1]);
+  array[2] = (a[2] - b[2]);
+  array[3] = (a[3] - b[3]);
+  array[4] = (a[4] - b[4]);
+  array[5] = (a[5] - b[5]);
+  array[6] = (a[6] - b[6]);
+  array[7] = (a[7] - b[7]);
+
+  return test(array);
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index a82fca45161..d36ff37114e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6418,6 +6418,84 @@  vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   return res;
 }

+/* Given a definition DEF, analyze if it will have any live scalar use after
+   performing SLP vectorization whose information is represented by BB_VINFO,
+   and record result into hash map SCALAR_USE_MAP as cache for later fast
+   check.  */
+
+static bool
+vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
+                       hash_map<tree, bool> &scalar_use_map)
+{
+  imm_use_iterator use_iter;
+  gimple *use_stmt;
+
+  if (bool *res = scalar_use_map.get (def))
+    return *res;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
+    {
+      if (is_gimple_debug (use_stmt))
+       continue;
+
+      stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
+
+      if (!use_stmt_info)
+       break;
+
+      if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
+       continue;
+
+      /* Do not step forward when encounter PHI statement, since it may
+        involve cyclic reference and cause infinite recursive invocation.  */
+      if (gimple_code (use_stmt) == GIMPLE_PHI)
+       break;
+
+      /* When pattern recognition is involved, a statement whose definition is
+        consumed in some pattern, may not be included in the final replacement
+        pattern statements, so would be skipped when building SLP graph.
+
+        * Original
+         char a_c = *(char *) a;
+         char b_c = *(char *) b;
+         unsigned short a_s = (unsigned short) a_c;
+         int a_i = (int) a_s;
+         int b_i = (int) b_c;
+         int r_i = a_i - b_i;
+
+        * After pattern replacement
+         a_s = (unsigned short) a_c;
+         a_i = (int) a_s;
+
+         patt_b_s = (unsigned short) b_c;    // b_i = (int) b_c
+         patt_b_i = (int) patt_b_s;          // b_i = (int) b_c
+
+         patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
+         patt_r_i = (int) patt_r_s;          // r_i = a_i - b_i
+
+        The definitions of a_i(original statement) and b_i(pattern statement)
+        are related to, but actually not part of widen_minus pattern.
+        Vectorizing the pattern does not cause these definition statements to
+        be marked as PURE_SLP.  For this case, we need to recursively check
+        whether their uses are all absorbed into vectorized code.  But there
+        is an exception that some use may participate in an vectorized
+        operation via an external SLP node containing that use as an element.
+        The parameter "scalar_use_map" tags such kind of SSA as having scalar
+        use in advance.  */
+      tree lhs = gimple_get_lhs (use_stmt);
+
+      if (!lhs || TREE_CODE (lhs) != SSA_NAME
+         || vec_slp_has_scalar_use (bb_vinfo, lhs, scalar_use_map))
+       break;
+    }
+
+  bool found = !end_imm_use_stmt_p (&use_iter);
+  bool added = scalar_use_map.put (def, found);
+
+  gcc_assert (!added);
+  return found;
+}
+
 /* Mark lanes of NODE that are live outside of the basic-block vectorized
    region and that can be vectorized using vectorizable_live_operation
    with STMT_VINFO_LIVE_P.  Not handled live operations will cause the
@@ -6427,6 +6505,7 @@  static void
 vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
                             slp_instance instance,
                             stmt_vector_for_cost *cost_vec,
+                            hash_map<tree, bool> &scalar_use_map,
                             hash_set<stmt_vec_info> &svisited,
                             hash_set<slp_tree> &visited)
 {
@@ -6451,32 +6530,22 @@  vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
       def_operand_p def_p;
       FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
        {
-         imm_use_iterator use_iter;
-         gimple *use_stmt;
-         stmt_vec_info use_stmt_info;
-         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
-           if (!is_gimple_debug (use_stmt))
-             {
-               use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
-               if (!use_stmt_info
-                   || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
-                 {
-                   STMT_VINFO_LIVE_P (stmt_info) = true;
-                   if (vectorizable_live_operation (bb_vinfo, stmt_info,
-                                                    node, instance, i,
-                                                    false, cost_vec))
-                     /* ???  So we know we can vectorize the live stmt
-                        from one SLP node.  If we cannot do so from all
-                        or none consistently we'd have to record which
-                        SLP node (and lane) we want to use for the live
-                        operation.  So make sure we can code-generate
-                        from all nodes.  */
-                     mark_visited = false;
-                   else
-                     STMT_VINFO_LIVE_P (stmt_info) = false;
-                   break;
-                 }
-             }
+         if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p),
+                                     scalar_use_map))
+           {
+             STMT_VINFO_LIVE_P (stmt_info) = true;
+             if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
+                                              instance, i, false, cost_vec))
+               /* ???  So we know we can vectorize the live stmt from one SLP
+                  node.  If we cannot do so from all or none consistently
+                  we'd have to record which SLP node (and lane) we want to
+                  use for the live operation.  So make sure we can
+                  code-generate from all nodes.  */
+               mark_visited = false;
+             else
+               STMT_VINFO_LIVE_P (stmt_info) = false;
+           }
+
          /* We have to verify whether we can insert the lane extract
             before all uses.  The following is a conservative approximation.
             We cannot put this into vectorizable_live_operation because
@@ -6495,6 +6564,10 @@  vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
             from the latest stmt in a node.  So we compensate for this
             during code-generation, simply not replacing uses for those
             hopefully rare cases.  */
+         imm_use_iterator use_iter;
+         gimple *use_stmt;
+         stmt_vec_info use_stmt_info;
+
          if (STMT_VINFO_LIVE_P (stmt_info))
            FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
              if (!is_gimple_debug (use_stmt)
@@ -6517,8 +6590,56 @@  vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
-      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
-                                  cost_vec, svisited, visited);
+      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec,
+                                  scalar_use_map, svisited, visited);
+}
+
+/* Traverse all slp instances of BB_VINFO, and mark lanes of every node that
+   are live outside of the basic-block vectorized region and that can be
+   vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P.  */
+
+static void
+vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo)
+{
+  if (bb_vinfo->slp_instances.is_empty ())
+    return;
+
+  hash_set<stmt_vec_info> svisited;
+  hash_set<slp_tree> visited;
+  hash_map<tree, bool> scalar_use_map;
+  auto_vec<slp_tree> worklist;
+
+  for (slp_instance instance : bb_vinfo->slp_instances)
+    if (!visited.add (SLP_INSTANCE_TREE (instance)))
+      worklist.safe_push (SLP_INSTANCE_TREE (instance));
+
+  do
+    {
+      slp_tree node = worklist.pop ();
+
+      if (SLP_TREE_DEF_TYPE (node) == vect_external_def)
+       {
+         for (tree op : SLP_TREE_SCALAR_OPS (node))
+           if (TREE_CODE (op) == SSA_NAME)
+             scalar_use_map.put (op, true);
+       }
+      else
+       {
+         for (slp_tree child : SLP_TREE_CHILDREN (node))
+           if (child && !visited.add (child))
+             worklist.safe_push (child);
+       }
+    } while (!worklist.is_empty ());
+
+  visited.empty ();
+
+  for (slp_instance instance : bb_vinfo->slp_instances)
+    {
+      vect_location = instance->location ();
+      vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
+                                  instance, &instance->cost_vec,
+                                  scalar_use_map, svisited, visited);
+    }
 }

 /* Determine whether we can vectorize the reduction epilogue for INSTANCE.  */
@@ -6684,17 +6805,7 @@  vect_slp_analyze_operations (vec_info *vinfo)

   /* Compute vectorizable live stmts.  */
   if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
-    {
-      hash_set<stmt_vec_info> svisited;
-      hash_set<slp_tree> visited;
-      for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
-       {
-         vect_location = instance->location ();
-         vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
-                                      instance, &instance->cost_vec, svisited,
-                                      visited);
-       }
-    }
+    vect_bb_slp_mark_live_stmts (bb_vinfo);

   return !vinfo->slp_instances.is_empty ();
 }