[10/10] vect: Consider vec_perm costing for VMAT_CONTIGUOUS_REVERSE
Checks
Commit Message
For VMAT_CONTIGUOUS_REVERSE, the transform code in function
vectorizable_store generates a VEC_PERM_EXPR stmt before
storing, but it's never considered in costing.
This patch is to make it consider vec_perm in costing, it
adjusts the order of transform code a bit to make it easy
to early return for costing_p.
gcc/ChangeLog:
* tree-vect-stmts.cc (vectorizable_store): Consider generated
VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as
vec_perm.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test.
---
.../costmodel/ppc/costmodel-vect-store-2.c | 29 +++++++++
gcc/tree-vect-stmts.cc | 63 +++++++++++--------
2 files changed, 65 insertions(+), 27 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
Comments
On Thu, Sep 14, 2023 at 5:12 AM Kewen Lin <linkw@linux.ibm.com> wrote:
>
> For VMAT_CONTIGUOUS_REVERSE, the transform code in function
> vectorizable_store generates a VEC_PERM_EXPR stmt before
> storing, but it's never considered in costing.
>
> This patch is to make it consider vec_perm in costing, it
> adjusts the order of transform code a bit to make it easy
> to early return for costing_p.
OK.
> gcc/ChangeLog:
>
> * tree-vect-stmts.cc (vectorizable_store): Consider generated
> VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as
> vec_perm.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test.
> ---
> .../costmodel/ppc/costmodel-vect-store-2.c | 29 +++++++++
> gcc/tree-vect-stmts.cc | 63 +++++++++++--------
> 2 files changed, 65 insertions(+), 27 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
> new file mode 100644
> index 00000000000..72b67cf9040
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-additional-options "-mvsx" } */
> +
> +/* Verify we do cost the required vec_perm. */
> +
> +int
> +foo (int *a, int *b, int len)
> +{
> + int i;
> + int *a1 = a;
> + int *a0 = a1 - 4;
> + for (i = 0; i < len; i++)
> + {
> + *b = *a0 + *a1;
> + b--;
> + a0++;
> + a1++;
> + }
> + return 0;
> +}
> +
> +/* The reason why it doesn't check the exact count is that
> + we can get more than 1 vec_perm when it's compiled with
> + partial vector capability like Power10 (retrying for
> + the epilogue) or it's complied without unaligned vector
> + memory access support (realign). */
> +/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 3d451c80bca..ce925cc1d53 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9279,6 +9279,40 @@ vectorizable_store (vec_info *vinfo,
> stmt_vec_info next_stmt_info = first_stmt_info;
> for (i = 0; i < vec_num; i++)
> {
> + if (!costing_p)
> + {
> + if (slp)
> + vec_oprnd = vec_oprnds[i];
> + else if (grouped_store)
> + /* For grouped stores vectorized defs are interleaved in
> + vect_permute_store_chain(). */
> + vec_oprnd = result_chain[i];
> + }
> +
> + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> + {
> + if (costing_p)
> + inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
> + stmt_info, 0, vect_body);
> + else
> + {
> + tree perm_mask = perm_mask_for_reverse (vectype);
> + tree perm_dest = vect_create_destination_var (
> + vect_get_store_rhs (stmt_info), vectype);
> + tree new_temp = make_ssa_name (perm_dest);
> +
> + /* Generate the permute statement. */
> + gimple *perm_stmt
> + = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
> + vec_oprnd, perm_mask);
> + vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
> + gsi);
> +
> + perm_stmt = SSA_NAME_DEF_STMT (new_temp);
> + vec_oprnd = new_temp;
> + }
> + }
> +
> if (costing_p)
> {
> vect_get_store_cost (vinfo, stmt_info, 1,
> @@ -9294,8 +9328,6 @@ vectorizable_store (vec_info *vinfo,
>
> continue;
> }
> - unsigned misalign;
> - unsigned HOST_WIDE_INT align;
>
> tree final_mask = NULL_TREE;
> tree final_len = NULL_TREE;
> @@ -9315,13 +9347,8 @@ vectorizable_store (vec_info *vinfo,
> dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
> stmt_info, bump);
>
> - if (slp)
> - vec_oprnd = vec_oprnds[i];
> - else if (grouped_store)
> - /* For grouped stores vectorized defs are interleaved in
> - vect_permute_store_chain(). */
> - vec_oprnd = result_chain[i];
> -
> + unsigned misalign;
> + unsigned HOST_WIDE_INT align;
> align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
> if (alignment_support_scheme == dr_aligned)
> misalign = 0;
> @@ -9338,24 +9365,6 @@ vectorizable_store (vec_info *vinfo,
> misalign);
> align = least_bit_hwi (misalign | align);
>
> - if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> - {
> - tree perm_mask = perm_mask_for_reverse (vectype);
> - tree perm_dest
> - = vect_create_destination_var (vect_get_store_rhs (stmt_info),
> - vectype);
> - tree new_temp = make_ssa_name (perm_dest);
> -
> - /* Generate the permute statement. */
> - gimple *perm_stmt
> - = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
> - vec_oprnd, perm_mask);
> - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
> -
> - perm_stmt = SSA_NAME_DEF_STMT (new_temp);
> - vec_oprnd = new_temp;
> - }
> -
> /* Compute IFN when LOOP_LENS or final_mask valid. */
> machine_mode vmode = TYPE_MODE (vectype);
> machine_mode new_vmode = vmode;
> --
> 2.31.1
>
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-additional-options "-mvsx" } */
+
+/* Verify we do cost the required vec_perm. */
+
+int
+foo (int *a, int *b, int len)
+{
+ int i;
+ int *a1 = a;
+ int *a0 = a1 - 4;
+ for (i = 0; i < len; i++)
+ {
+ *b = *a0 + *a1;
+ b--;
+ a0++;
+ a1++;
+ }
+ return 0;
+}
+
+/* The reason why it doesn't check the exact count is that
+ we can get more than 1 vec_perm when it's compiled with
+ partial vector capability like Power10 (retrying for
+ the epilogue) or it's complied without unaligned vector
+ memory access support (realign). */
+/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */
@@ -9279,6 +9279,40 @@ vectorizable_store (vec_info *vinfo,
stmt_vec_info next_stmt_info = first_stmt_info;
for (i = 0; i < vec_num; i++)
{
+ if (!costing_p)
+ {
+ if (slp)
+ vec_oprnd = vec_oprnds[i];
+ else if (grouped_store)
+ /* For grouped stores vectorized defs are interleaved in
+ vect_permute_store_chain(). */
+ vec_oprnd = result_chain[i];
+ }
+
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ if (costing_p)
+ inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
+ stmt_info, 0, vect_body);
+ else
+ {
+ tree perm_mask = perm_mask_for_reverse (vectype);
+ tree perm_dest = vect_create_destination_var (
+ vect_get_store_rhs (stmt_info), vectype);
+ tree new_temp = make_ssa_name (perm_dest);
+
+ /* Generate the permute statement. */
+ gimple *perm_stmt
+ = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
+ vec_oprnd, perm_mask);
+ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+ gsi);
+
+ perm_stmt = SSA_NAME_DEF_STMT (new_temp);
+ vec_oprnd = new_temp;
+ }
+ }
+
if (costing_p)
{
vect_get_store_cost (vinfo, stmt_info, 1,
@@ -9294,8 +9328,6 @@ vectorizable_store (vec_info *vinfo,
continue;
}
- unsigned misalign;
- unsigned HOST_WIDE_INT align;
tree final_mask = NULL_TREE;
tree final_len = NULL_TREE;
@@ -9315,13 +9347,8 @@ vectorizable_store (vec_info *vinfo,
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
- if (slp)
- vec_oprnd = vec_oprnds[i];
- else if (grouped_store)
- /* For grouped stores vectorized defs are interleaved in
- vect_permute_store_chain(). */
- vec_oprnd = result_chain[i];
-
+ unsigned misalign;
+ unsigned HOST_WIDE_INT align;
align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
if (alignment_support_scheme == dr_aligned)
misalign = 0;
@@ -9338,24 +9365,6 @@ vectorizable_store (vec_info *vinfo,
misalign);
align = least_bit_hwi (misalign | align);
- if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
- {
- tree perm_mask = perm_mask_for_reverse (vectype);
- tree perm_dest
- = vect_create_destination_var (vect_get_store_rhs (stmt_info),
- vectype);
- tree new_temp = make_ssa_name (perm_dest);
-
- /* Generate the permute statement. */
- gimple *perm_stmt
- = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
- vec_oprnd, perm_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
- perm_stmt = SSA_NAME_DEF_STMT (new_temp);
- vec_oprnd = new_temp;
- }
-
/* Compute IFN when LOOP_LENS or final_mask valid. */
machine_mode vmode = TYPE_MODE (vectype);
machine_mode new_vmode = vmode;