[10/10] vect: Consider vec_perm costing for VMAT_CONTIGUOUS_REVERSE

Message ID 7514680ad7b9b859a054ca1a59356f58b5ac9089.1694657495.git.linkw@linux.ibm.com
State Unresolved
Headers
Series vect: Move costing next to the transform for vect store |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Kewen.Lin Sept. 14, 2023, 3:11 a.m. UTC
  For VMAT_CONTIGUOUS_REVERSE, the transform code in function
vectorizable_store generates a VEC_PERM_EXPR stmt before
storing, but it's never considered in costing.

This patch is to make it consider vec_perm in costing, it
adjusts the order of transform code a bit to make it easy
to early return for costing_p.

gcc/ChangeLog:

	* tree-vect-stmts.cc (vectorizable_store): Consider generated
	VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as
	vec_perm.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test.
---
 .../costmodel/ppc/costmodel-vect-store-2.c    | 29 +++++++++
 gcc/tree-vect-stmts.cc                        | 63 +++++++++++--------
 2 files changed, 65 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
  

Comments

Richard Biener Sept. 27, 2023, 11:30 a.m. UTC | #1
On Thu, Sep 14, 2023 at 5:12 AM Kewen Lin <linkw@linux.ibm.com> wrote:
>
> For VMAT_CONTIGUOUS_REVERSE, the transform code in function
> vectorizable_store generates a VEC_PERM_EXPR stmt before
> storing, but it's never considered in costing.
>
> This patch is to make it consider vec_perm in costing, it
> adjusts the order of transform code a bit to make it easy
> to early return for costing_p.

OK.

> gcc/ChangeLog:
>
>         * tree-vect-stmts.cc (vectorizable_store): Consider generated
>         VEC_PERM_EXPR stmt for VMAT_CONTIGUOUS_REVERSE in costing as
>         vec_perm.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c: New test.
> ---
>  .../costmodel/ppc/costmodel-vect-store-2.c    | 29 +++++++++
>  gcc/tree-vect-stmts.cc                        | 63 +++++++++++--------
>  2 files changed, 65 insertions(+), 27 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
> new file mode 100644
> index 00000000000..72b67cf9040
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-additional-options "-mvsx" } */
> +
> +/* Verify we do cost the required vec_perm.  */
> +
> +int
> +foo (int *a, int *b, int len)
> +{
> +  int i;
> +  int *a1 = a;
> +  int *a0 = a1 - 4;
> +  for (i = 0; i < len; i++)
> +    {
> +      *b = *a0 + *a1;
> +      b--;
> +      a0++;
> +      a1++;
> +    }
> +  return 0;
> +}
> +
> +/* The reason why it doesn't check the exact count is that
> +   we can get more than 1 vec_perm when it's compiled with
> +   partial vector capability like Power10 (retrying for
> +   the epilogue) or it's complied without unaligned vector
> +   memory access support (realign).  */
> +/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 3d451c80bca..ce925cc1d53 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9279,6 +9279,40 @@ vectorizable_store (vec_info *vinfo,
>        stmt_vec_info next_stmt_info = first_stmt_info;
>        for (i = 0; i < vec_num; i++)
>         {
> +         if (!costing_p)
> +           {
> +             if (slp)
> +               vec_oprnd = vec_oprnds[i];
> +             else if (grouped_store)
> +               /* For grouped stores vectorized defs are interleaved in
> +                  vect_permute_store_chain().  */
> +               vec_oprnd = result_chain[i];
> +           }
> +
> +         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> +           {
> +             if (costing_p)
> +               inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
> +                                                stmt_info, 0, vect_body);
> +             else
> +               {
> +                 tree perm_mask = perm_mask_for_reverse (vectype);
> +                 tree perm_dest = vect_create_destination_var (
> +                   vect_get_store_rhs (stmt_info), vectype);
> +                 tree new_temp = make_ssa_name (perm_dest);
> +
> +                 /* Generate the permute statement.  */
> +                 gimple *perm_stmt
> +                   = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
> +                                          vec_oprnd, perm_mask);
> +                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
> +                                              gsi);
> +
> +                 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
> +                 vec_oprnd = new_temp;
> +               }
> +           }
> +
>           if (costing_p)
>             {
>               vect_get_store_cost (vinfo, stmt_info, 1,
> @@ -9294,8 +9328,6 @@ vectorizable_store (vec_info *vinfo,
>
>               continue;
>             }
> -         unsigned misalign;
> -         unsigned HOST_WIDE_INT align;
>
>           tree final_mask = NULL_TREE;
>           tree final_len = NULL_TREE;
> @@ -9315,13 +9347,8 @@ vectorizable_store (vec_info *vinfo,
>             dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
>                                            stmt_info, bump);
>
> -         if (slp)
> -           vec_oprnd = vec_oprnds[i];
> -         else if (grouped_store)
> -           /* For grouped stores vectorized defs are interleaved in
> -              vect_permute_store_chain().  */
> -           vec_oprnd = result_chain[i];
> -
> +         unsigned misalign;
> +         unsigned HOST_WIDE_INT align;
>           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
>           if (alignment_support_scheme == dr_aligned)
>             misalign = 0;
> @@ -9338,24 +9365,6 @@ vectorizable_store (vec_info *vinfo,
>                                     misalign);
>           align = least_bit_hwi (misalign | align);
>
> -         if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> -           {
> -             tree perm_mask = perm_mask_for_reverse (vectype);
> -             tree perm_dest
> -               = vect_create_destination_var (vect_get_store_rhs (stmt_info),
> -                                              vectype);
> -             tree new_temp = make_ssa_name (perm_dest);
> -
> -             /* Generate the permute statement.  */
> -             gimple *perm_stmt
> -               = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
> -                                      vec_oprnd, perm_mask);
> -             vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
> -
> -             perm_stmt = SSA_NAME_DEF_STMT (new_temp);
> -             vec_oprnd = new_temp;
> -           }
> -
>           /* Compute IFN when LOOP_LENS or final_mask valid.  */
>           machine_mode vmode = TYPE_MODE (vectype);
>           machine_mode new_vmode = vmode;
> --
> 2.31.1
>
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
new file mode 100644
index 00000000000..72b67cf9040
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-store-2.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-additional-options "-mvsx" } */
+
+/* Verify we do cost the required vec_perm.  */
+
+int
+foo (int *a, int *b, int len)
+{
+  int i;
+  int *a1 = a;
+  int *a0 = a1 - 4;
+  for (i = 0; i < len; i++)
+    {
+      *b = *a0 + *a1;
+      b--;
+      a0++;
+      a1++;
+    }
+  return 0;
+}
+
+/* The reason why it doesn't check the exact count is that
+   we can get more than 1 vec_perm when it's compiled with
+   partial vector capability like Power10 (retrying for
+   the epilogue) or it's complied without unaligned vector
+   memory access support (realign).  */
+/* { dg-final { scan-tree-dump {\mvec_perm\M} "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3d451c80bca..ce925cc1d53 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9279,6 +9279,40 @@  vectorizable_store (vec_info *vinfo,
       stmt_vec_info next_stmt_info = first_stmt_info;
       for (i = 0; i < vec_num; i++)
 	{
+	  if (!costing_p)
+	    {
+	      if (slp)
+		vec_oprnd = vec_oprnds[i];
+	      else if (grouped_store)
+		/* For grouped stores vectorized defs are interleaved in
+		   vect_permute_store_chain().  */
+		vec_oprnd = result_chain[i];
+	    }
+
+	  if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+	    {
+	      if (costing_p)
+		inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
+						 stmt_info, 0, vect_body);
+	      else
+		{
+		  tree perm_mask = perm_mask_for_reverse (vectype);
+		  tree perm_dest = vect_create_destination_var (
+		    vect_get_store_rhs (stmt_info), vectype);
+		  tree new_temp = make_ssa_name (perm_dest);
+
+		  /* Generate the permute statement.  */
+		  gimple *perm_stmt
+		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
+					   vec_oprnd, perm_mask);
+		  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+					       gsi);
+
+		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
+		  vec_oprnd = new_temp;
+		}
+	    }
+
 	  if (costing_p)
 	    {
 	      vect_get_store_cost (vinfo, stmt_info, 1,
@@ -9294,8 +9328,6 @@  vectorizable_store (vec_info *vinfo,
 
 	      continue;
 	    }
-	  unsigned misalign;
-	  unsigned HOST_WIDE_INT align;
 
 	  tree final_mask = NULL_TREE;
 	  tree final_len = NULL_TREE;
@@ -9315,13 +9347,8 @@  vectorizable_store (vec_info *vinfo,
 	    dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
 					   stmt_info, bump);
 
-	  if (slp)
-	    vec_oprnd = vec_oprnds[i];
-	  else if (grouped_store)
-	    /* For grouped stores vectorized defs are interleaved in
-	       vect_permute_store_chain().  */
-	    vec_oprnd = result_chain[i];
-
+	  unsigned misalign;
+	  unsigned HOST_WIDE_INT align;
 	  align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
 	  if (alignment_support_scheme == dr_aligned)
 	    misalign = 0;
@@ -9338,24 +9365,6 @@  vectorizable_store (vec_info *vinfo,
 				    misalign);
 	  align = least_bit_hwi (misalign | align);
 
-	  if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
-	    {
-	      tree perm_mask = perm_mask_for_reverse (vectype);
-	      tree perm_dest
-		= vect_create_destination_var (vect_get_store_rhs (stmt_info),
-					       vectype);
-	      tree new_temp = make_ssa_name (perm_dest);
-
-	      /* Generate the permute statement.  */
-	      gimple *perm_stmt
-		= gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
-				       vec_oprnd, perm_mask);
-	      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
-	      perm_stmt = SSA_NAME_DEF_STMT (new_temp);
-	      vec_oprnd = new_temp;
-	    }
-
 	  /* Compute IFN when LOOP_LENS or final_mask valid.  */
 	  machine_mode vmode = TYPE_MODE (vectype);
 	  machine_mode new_vmode = vmode;