tree-optimization/110630 - enhance SLP permute support

Message ID 20230712110100.3B3AD3857726@sourceware.org
State Accepted
Headers
Series tree-optimization/110630 - enhance SLP permute support |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Richard Biener July 12, 2023, 11 a.m. UTC
  The following enhances the existing lowpart extraction support for
SLP VEC_PERM nodes to cover all vector aligned extractions.  This
allows the existing bb-slp-pr95839.c testcase to be vectorized
with mips -mpaired-single and the new bb-slp-pr95839-3.c testcase
with SSE2.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/110630
	* tree-vect-slp.cc (vect_add_slp_permutation): New
	offset parameter, honor that for the extract code generation.
	(vectorizable_slp_permutation_1): Handle offsetted identities.

	* gcc.dg/vect/bb-slp-pr95839.c: Make stricter.
	* gcc.dg/vect/bb-slp-pr95839-3.c: New variant testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c | 15 +++++++++++++++
 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c   |  1 +
 gcc/tree-vect-slp.cc                         | 14 +++++++++-----
 3 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c
new file mode 100644
index 00000000000..aaee8febf37
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-additional-options "-w -Wno-psabi" } */
+
+typedef float __attribute__((vector_size(32))) v8f32;
+
+v8f32 f(v8f32 a, v8f32 b)
+{
+  /* Check that we vectorize this CTOR without any loads.  */
+  return (v8f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3],
+		 a[4] + b[4], a[5] + b[5], a[6] + b[6], a[7] + b[7]};
+}
+
+/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */
+/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
index 931fd46aaaa..d87bbf125c0 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c
@@ -10,4 +10,5 @@  v4f32 f(v4f32 a, v4f32 b)
   return (v4f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]};
 }
 
+/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */
 /* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 355d078d66e..693621ca990 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8432,7 +8432,7 @@  vect_transform_slp_perm_load (vec_info *vinfo,
 static void
 vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 			  slp_tree node, tree first_def, tree second_def,
-			  tree mask_vec)
+			  tree mask_vec, poly_uint64 identity_offset)
 {
   tree vectype = SLP_TREE_VECTYPE (node);
 
@@ -8470,14 +8470,17 @@  vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
   else if (!types_compatible_p (TREE_TYPE (first_def), vectype))
     {
       /* For identity permutes we still need to handle the case
-	 of lowpart extracts or concats.  */
+	 of offsetted extracts or concats.  */
       unsigned HOST_WIDE_INT c;
       auto first_def_nunits
 	= TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
       if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits))
 	{
+	  unsigned HOST_WIDE_INT elsz
+	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (first_def))));
 	  tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def,
-				 TYPE_SIZE (vectype), bitsize_zero_node);
+				 TYPE_SIZE (vectype),
+				 bitsize_int (identity_offset * elsz));
 	  perm_stmt = gimple_build_assign (perm_dest, lowpart);
 	}
       else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
@@ -8709,7 +8712,8 @@  vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
 	{
 	  indices.new_vector (mask, second_vec.first == -1U ? 1 : 2,
 			      TYPE_VECTOR_SUBPARTS (op_vectype));
-	  bool identity_p = indices.series_p (0, 1, 0, 1);
+	  bool identity_p = (indices.series_p (0, 1, mask[0], 1)
+			     && constant_multiple_p (mask[0], nunits));
 	  machine_mode vmode = TYPE_MODE (vectype);
 	  machine_mode op_vmode = TYPE_MODE (op_vectype);
 	  unsigned HOST_WIDE_INT c;
@@ -8762,7 +8766,7 @@  vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
 		    = vect_get_slp_vect_def (second_node,
 					     second_vec.second + vi);
 		  vect_add_slp_permutation (vinfo, gsi, node, first_def,
-					    second_def, mask_vec);
+					    second_def, mask_vec, mask[0]);
 		}
 	    }