Optimize vector codegen for invariant loads, fix SLP support
Checks
Commit Message
The following avoids creating duplicate stmts for invariant loads
which was necessary when the vector stmts were in a linked list.
It also fixes SLP support which didn't correctly create the
appropriate number of copies.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
* tree-vect-stmts.cc (vectorizable_load): Avoid useless
copies of VMAT_INVARIANT vectorized stmts, fix SLP support.
---
gcc/tree-vect-stmts.cc | 39 +++++++++++++++++++--------------------
1 file changed, 19 insertions(+), 20 deletions(-)
@@ -9612,27 +9612,26 @@ vectorizable_load (vec_info *vinfo,
gimple_set_vuse (new_stmt, vuse);
gsi_insert_on_edge_immediate (pe, new_stmt);
}
- /* These copies are all equivalent, but currently the representation
- requires a separate STMT_VINFO_VEC_STMT for each one. */
- gimple_stmt_iterator gsi2 = *gsi;
- gsi_next (&gsi2);
- for (j = 0; j < ncopies; j++)
+ /* These copies are all equivalent. */
+ if (hoist_p)
+ new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+ vectype, NULL);
+ else
{
- if (hoist_p)
- new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
- vectype, NULL);
- else
- new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
- vectype, &gsi2);
- gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
- if (slp)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
- else
- {
- if (j == 0)
- *vec_stmt = new_stmt;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
- }
+ gimple_stmt_iterator gsi2 = *gsi;
+ gsi_next (&gsi2);
+ new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
+ vectype, &gsi2);
+ }
+ gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ if (slp)
+ for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ else
+ {
+ for (j = 0; j < ncopies; ++j)
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ *vec_stmt = new_stmt;
}
return true;
}