new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-slp-details -ftree-slp-vectorize" } */
+
+int test(unsigned array[8]);
+
+int foo(char *a, char *b)
+{
+ unsigned array[8];
+
+ array[0] = (a[0] - b[0]);
+ array[1] = (a[1] - b[1]);
+ array[2] = (a[2] - b[2]);
+ array[3] = (a[3] - b[3]);
+ array[4] = (a[4] - b[4]);
+ array[5] = (a[5] - b[5]);
+ array[6] = (a[6] - b[6]);
+ array[7] = (a[7] - b[7]);
+
+ return test(array);
+}
+
+/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using SLP" 1 "slp2" } } */
@@ -6418,6 +6418,84 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
return res;
}
+/* Given a definition DEF, analyze if it will have any live scalar use after
+ performing SLP vectorization whose information is represented by BB_VINFO,
+ and record result into hash map SCALAR_USE_MAP as cache for later fast
+ check. */
+
+static bool
+vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
+ hash_map<tree, bool> &scalar_use_map)
+{
+ imm_use_iterator use_iter;
+ gimple *use_stmt;
+
+ if (bool *res = scalar_use_map.get (def))
+ return *res;
+
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
+ {
+ if (is_gimple_debug (use_stmt))
+ continue;
+
+ stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
+
+ if (!use_stmt_info)
+ break;
+
+ if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
+ continue;
+
+ /* Do not step forward when encounter PHI statement, since it may
+ involve cyclic reference and cause infinite recursive invocation. */
+ if (gimple_code (use_stmt) == GIMPLE_PHI)
+ break;
+
+ /* When pattern recognition is involved, a statement whose definition is
+ consumed in some pattern, may not be included in the final replacement
+ pattern statements, so would be skipped when building SLP graph.
+
+ * Original
+ char a_c = *(char *) a;
+ char b_c = *(char *) b;
+ unsigned short a_s = (unsigned short) a_c;
+ int a_i = (int) a_s;
+ int b_i = (int) b_c;
+ int r_i = a_i - b_i;
+
+ * After pattern replacement
+ a_s = (unsigned short) a_c;
+ a_i = (int) a_s;
+
+ patt_b_s = (unsigned short) b_c; // b_i = (int) b_c
+ patt_b_i = (int) patt_b_s; // b_i = (int) b_c
+
+ patt_r_s = widen_minus(a_c, b_c); // r_i = a_i - b_i
+ patt_r_i = (int) patt_r_s; // r_i = a_i - b_i
+
+ The definitions of a_i(original statement) and b_i(pattern statement)
+ are related to, but actually not part of widen_minus pattern.
+ Vectorizing the pattern does not cause these definition statements to
+ be marked as PURE_SLP. For this case, we need to recursively check
+ whether their uses are all absorbed into vectorized code. But there
+ is an exception that some use may participate in an vectorized
+ operation via an external SLP node containing that use as an element.
+ The parameter "scalar_use_map" tags such kind of SSA as having scalar
+ use in advance. */
+ tree lhs = gimple_get_lhs (use_stmt);
+
+ if (!lhs || TREE_CODE (lhs) != SSA_NAME
+ || vec_slp_has_scalar_use (bb_vinfo, lhs, scalar_use_map))
+ break;
+ }
+
+ bool found = !end_imm_use_stmt_p (&use_iter);
+ bool added = scalar_use_map.put (def, found);
+
+ gcc_assert (!added);
+ return found;
+}
+
/* Mark lanes of NODE that are live outside of the basic-block vectorized
region and that can be vectorized using vectorizable_live_operation
with STMT_VINFO_LIVE_P. Not handled live operations will cause the
@@ -6427,6 +6505,7 @@ static void
vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_instance instance,
stmt_vector_for_cost *cost_vec,
+ hash_map<tree, bool> &scalar_use_map,
hash_set<stmt_vec_info> &svisited,
hash_set<slp_tree> &visited)
{
@@ -6451,32 +6530,22 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
def_operand_p def_p;
FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
{
- imm_use_iterator use_iter;
- gimple *use_stmt;
- stmt_vec_info use_stmt_info;
- FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
- if (!is_gimple_debug (use_stmt))
- {
- use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
- if (!use_stmt_info
- || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
- {
- STMT_VINFO_LIVE_P (stmt_info) = true;
- if (vectorizable_live_operation (bb_vinfo, stmt_info,
- node, instance, i,
- false, cost_vec))
- /* ??? So we know we can vectorize the live stmt
- from one SLP node. If we cannot do so from all
- or none consistently we'd have to record which
- SLP node (and lane) we want to use for the live
- operation. So make sure we can code-generate
- from all nodes. */
- mark_visited = false;
- else
- STMT_VINFO_LIVE_P (stmt_info) = false;
- break;
- }
- }
+ if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p),
+ scalar_use_map))
+ {
+ STMT_VINFO_LIVE_P (stmt_info) = true;
+ if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
+ instance, i, false, cost_vec))
+ /* ??? So we know we can vectorize the live stmt from one SLP
+ node. If we cannot do so from all or none consistently
+ we'd have to record which SLP node (and lane) we want to
+ use for the live operation. So make sure we can
+ code-generate from all nodes. */
+ mark_visited = false;
+ else
+ STMT_VINFO_LIVE_P (stmt_info) = false;
+ }
+
/* We have to verify whether we can insert the lane extract
before all uses. The following is a conservative approximation.
We cannot put this into vectorizable_live_operation because
@@ -6495,6 +6564,10 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
from the latest stmt in a node. So we compensate for this
during code-generation, simply not replacing uses for those
hopefully rare cases. */
+ imm_use_iterator use_iter;
+ gimple *use_stmt;
+ stmt_vec_info use_stmt_info;
+
if (STMT_VINFO_LIVE_P (stmt_info))
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
if (!is_gimple_debug (use_stmt)
@@ -6517,8 +6590,56 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
- vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
- cost_vec, svisited, visited);
+ vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec,
+ scalar_use_map, svisited, visited);
+}
+
+/* Traverse all slp instances of BB_VINFO, and mark lanes of every node that
+ are live outside of the basic-block vectorized region and that can be
+ vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P. */
+
+static void
+vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo)
+{
+ if (bb_vinfo->slp_instances.is_empty ())
+ return;
+
+ hash_set<stmt_vec_info> svisited;
+ hash_set<slp_tree> visited;
+ hash_map<tree, bool> scalar_use_map;
+ auto_vec<slp_tree> worklist;
+
+ for (slp_instance instance : bb_vinfo->slp_instances)
+ if (!visited.add (SLP_INSTANCE_TREE (instance)))
+ worklist.safe_push (SLP_INSTANCE_TREE (instance));
+
+ do
+ {
+ slp_tree node = worklist.pop ();
+
+ if (SLP_TREE_DEF_TYPE (node) == vect_external_def)
+ {
+ for (tree op : SLP_TREE_SCALAR_OPS (node))
+ if (TREE_CODE (op) == SSA_NAME)
+ scalar_use_map.put (op, true);
+ }
+ else
+ {
+ for (slp_tree child : SLP_TREE_CHILDREN (node))
+ if (child && !visited.add (child))
+ worklist.safe_push (child);
+ }
+ } while (!worklist.is_empty ());
+
+ visited.empty ();
+
+ for (slp_instance instance : bb_vinfo->slp_instances)
+ {
+ vect_location = instance->location ();
+ vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
+ instance, &instance->cost_vec,
+ scalar_use_map, svisited, visited);
+ }
}
/* Determine whether we can vectorize the reduction epilogue for INSTANCE. */
@@ -6684,17 +6805,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
/* Compute vectorizable live stmts. */
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
- {
- hash_set<stmt_vec_info> svisited;
- hash_set<slp_tree> visited;
- for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
- {
- vect_location = instance->location ();
- vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
- instance, &instance->cost_vec, svisited,
- visited);
- }
- }
+ vect_bb_slp_mark_live_stmts (bb_vinfo);
return !vinfo->slp_instances.is_empty ();
}