[3/9] vect: Adjust vectorizable_load costing on VMAT_INVARIANT
Checks
Commit Message
This patch adjusts the cost handling on VMAT_INVARIANT in
function vectorizable_load. We don't call function
vect_model_load_cost for it any more.
To make the costing on VMAT_INVARIANT better, this patch is
to query hoist_defs_of_uses for hoisting decision, and add
costs for different "where" based on it. Currently function
hoist_defs_of_uses would always hoist the defs of all SSA
uses, adding one argument HOIST_P aims to avoid the actual
hoisting during costing phase.
gcc/ChangeLog:
* tree-vect-stmts.cc (hoist_defs_of_uses): Add one argument HOIST_P.
(vectorizable_load): Adjust the handling on VMAT_INVARIANT to respect
hoisting decision and without calling vect_model_load_cost.
(vect_model_load_cost): Assert it won't get VMAT_INVARIANT any more
and remove VMAT_INVARIANT related handlings.
---
gcc/tree-vect-stmts.cc | 61 +++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 22 deletions(-)
Comments
On Tue, Jun 13, 2023 at 4:03 AM Kewen Lin <linkw@linux.ibm.com> wrote:
>
> This patch adjusts the cost handling on VMAT_INVARIANT in
> function vectorizable_load. We don't call function
> vect_model_load_cost for it any more.
>
> To make the costing on VMAT_INVARIANT better, this patch is
> to query hoist_defs_of_uses for hoisting decision, and add
> costs for different "where" based on it. Currently function
> hoist_defs_of_uses would always hoist the defs of all SSA
> uses, adding one argument HOIST_P aims to avoid the actual
> hoisting during costing phase.
>
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (hoist_defs_of_uses): Add one argument HOIST_P.
> (vectorizable_load): Adjust the handling on VMAT_INVARIANT to respect
> hoisting decision and without calling vect_model_load_cost.
> (vect_model_load_cost): Assert it won't get VMAT_INVARIANT any more
> and remove VMAT_INVARIANT related handlings.
> ---
> gcc/tree-vect-stmts.cc | 61 +++++++++++++++++++++++++++---------------
> 1 file changed, 39 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 744cdf40e26..19c61d703c8 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1135,7 +1135,8 @@ vect_model_load_cost (vec_info *vinfo,
> slp_tree slp_node,
> stmt_vector_for_cost *cost_vec)
> {
> - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl);
> + gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl)
> + && memory_access_type != VMAT_INVARIANT);
>
> unsigned int inside_cost = 0, prologue_cost = 0;
> bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
> @@ -1238,16 +1239,6 @@ vect_model_load_cost (vec_info *vinfo,
> ncopies * assumed_nunits,
> scalar_load, stmt_info, 0, vect_body);
> }
> - else if (memory_access_type == VMAT_INVARIANT)
> - {
> - /* Invariant loads will ideally be hoisted and splat to a vector. */
> - prologue_cost += record_stmt_cost (cost_vec, 1,
> - scalar_load, stmt_info, 0,
> - vect_prologue);
> - prologue_cost += record_stmt_cost (cost_vec, 1,
> - scalar_to_vec, stmt_info, 0,
> - vect_prologue);
> - }
> else
> vect_get_load_cost (vinfo, stmt_info, ncopies,
> alignment_support_scheme, misalignment, first_stmt_p,
> @@ -9121,10 +9112,11 @@ permute_vec_elements (vec_info *vinfo,
> /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
> inserting them on the loops preheader edge. Returns true if we
> were successful in doing so (and thus STMT_INFO can be moved then),
> - otherwise returns false. */
> + otherwise returns false. HOIST_P indicates if we want to hoist the
> + definitions of all SSA uses, it would be false when we are costing. */
>
> static bool
> -hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
> +hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
> {
> ssa_op_iter i;
> tree op;
> @@ -9158,6 +9150,9 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
> if (!any)
> return true;
>
> + if (!hoist_p)
> + return true;
> +
> FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
> {
> gimple *def_stmt = SSA_NAME_DEF_STMT (op);
> @@ -9510,14 +9505,6 @@ vectorizable_load (vec_info *vinfo,
>
> if (memory_access_type == VMAT_INVARIANT)
> {
> - if (costing_p)
> - {
> - vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
> - memory_access_type, alignment_support_scheme,
> - misalignment, &gs_info, slp_node, cost_vec);
> - return true;
> - }
> -
> gcc_assert (!grouped_load && !mask && !bb_vinfo);
> /* If we have versioned for aliasing or the loop doesn't
> have any data dependencies that would preclude this,
> @@ -9525,7 +9512,37 @@ vectorizable_load (vec_info *vinfo,
> thus we can insert it on the preheader edge. */
> bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
> && !nested_in_vect_loop
> - && hoist_defs_of_uses (stmt_info, loop));
> + && hoist_defs_of_uses (stmt_info, loop, !costing_p));
'hoist_defs_of_uses' should ideally be computed once at analysis time and
the result remembered. It's not so easy in this case so maybe just
add a comment
for this here.
> + if (costing_p)
> + {
> + if (hoist_p)
> + {
> + unsigned int prologue_cost;
> + prologue_cost = record_stmt_cost (cost_vec, 1, scalar_load,
> + stmt_info, 0, vect_prologue);
> + prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
> + stmt_info, 0, vect_prologue);
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "vect_model_load_cost: inside_cost = 0, "
> + "prologue_cost = %d .\n",
> + prologue_cost);
> + }
> + else
> + {
> + unsigned int inside_cost;
> + inside_cost = record_stmt_cost (cost_vec, 1, scalar_load,
> + stmt_info, 0, vect_body);
> + inside_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
> + stmt_info, 0, vect_body);
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location,
> + "vect_model_load_cost: inside_cost = %d, "
> + "prologue_cost = 0 .\n",
> + inside_cost);
> + }
Please instead do
enum vect_cost_model_location loc = hoist_p ?
vect_prologue : vect_body;
and merge the two branches which otherwise look identical to me.
> + return true;
> + }
> if (hoist_p)
> {
> gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> --
> 2.31.1
>
@@ -1135,7 +1135,8 @@ vect_model_load_cost (vec_info *vinfo,
slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
- gcc_assert (memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl);
+ gcc_assert ((memory_access_type != VMAT_GATHER_SCATTER || !gs_info->decl)
+ && memory_access_type != VMAT_INVARIANT);
unsigned int inside_cost = 0, prologue_cost = 0;
bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
@@ -1238,16 +1239,6 @@ vect_model_load_cost (vec_info *vinfo,
ncopies * assumed_nunits,
scalar_load, stmt_info, 0, vect_body);
}
- else if (memory_access_type == VMAT_INVARIANT)
- {
- /* Invariant loads will ideally be hoisted and splat to a vector. */
- prologue_cost += record_stmt_cost (cost_vec, 1,
- scalar_load, stmt_info, 0,
- vect_prologue);
- prologue_cost += record_stmt_cost (cost_vec, 1,
- scalar_to_vec, stmt_info, 0,
- vect_prologue);
- }
else
vect_get_load_cost (vinfo, stmt_info, ncopies,
alignment_support_scheme, misalignment, first_stmt_p,
@@ -9121,10 +9112,11 @@ permute_vec_elements (vec_info *vinfo,
/* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
inserting them on the loops preheader edge. Returns true if we
were successful in doing so (and thus STMT_INFO can be moved then),
- otherwise returns false. */
+ otherwise returns false. HOIST_P indicates if we want to hoist the
+ definitions of all SSA uses, it would be false when we are costing. */
static bool
-hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
+hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
{
ssa_op_iter i;
tree op;
@@ -9158,6 +9150,9 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
if (!any)
return true;
+ if (!hoist_p)
+ return true;
+
FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
{
gimple *def_stmt = SSA_NAME_DEF_STMT (op);
@@ -9510,14 +9505,6 @@ vectorizable_load (vec_info *vinfo,
if (memory_access_type == VMAT_INVARIANT)
{
- if (costing_p)
- {
- vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
- memory_access_type, alignment_support_scheme,
- misalignment, &gs_info, slp_node, cost_vec);
- return true;
- }
-
gcc_assert (!grouped_load && !mask && !bb_vinfo);
/* If we have versioned for aliasing or the loop doesn't
have any data dependencies that would preclude this,
@@ -9525,7 +9512,37 @@ vectorizable_load (vec_info *vinfo,
thus we can insert it on the preheader edge. */
bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
&& !nested_in_vect_loop
- && hoist_defs_of_uses (stmt_info, loop));
+ && hoist_defs_of_uses (stmt_info, loop, !costing_p));
+ if (costing_p)
+ {
+ if (hoist_p)
+ {
+ unsigned int prologue_cost;
+ prologue_cost = record_stmt_cost (cost_vec, 1, scalar_load,
+ stmt_info, 0, vect_prologue);
+ prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_load_cost: inside_cost = 0, "
+ "prologue_cost = %d .\n",
+ prologue_cost);
+ }
+ else
+ {
+ unsigned int inside_cost;
+ inside_cost = record_stmt_cost (cost_vec, 1, scalar_load,
+ stmt_info, 0, vect_body);
+ inside_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
+ stmt_info, 0, vect_body);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_load_cost: inside_cost = %d, "
+ "prologue_cost = 0 .\n",
+ inside_cost);
+ }
+ return true;
+ }
if (hoist_p)
{
gassign *stmt = as_a <gassign *> (stmt_info->stmt);