tree-optimization/112404 - two issues with SLP of .MASK_LOAD

Message ID 20231106130916.CCDC43875DF2@sourceware.org
State Unresolved
Headers
Series tree-optimization/112404 - two issues with SLP of .MASK_LOAD |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Richard Biener Nov. 6, 2023, 1:08 p.m. UTC
  The following fixes an oversight in vect_check_scalar_mask when
the mask is external or constant.  When doing BB vectorization
we need to provide a group_size, best via an overload accepting
the SLP node as argument.

When fixed we then run into the issue that we have not analyzed
alignment of the .MASK_LOADs because they were not identified
as loads by vect_gather_slp_loads.  Fixed by reworking the
detection.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

	PR tree-optimization/112404
	* tree-vectorizer.h (get_mask_type_for_scalar_type): Declare
	overload with SLP node argument.
	* tree-vect-stmts.cc (get_mask_type_for_scalar_type): Implement it.
	(vect_check_scalar_mask): Use it.
	* tree-vect-slp.cc (vect_gather_slp_loads): Properly identify
	loads also for nodes with children, like .MASK_LOAD.
	* tree-vect-loop.cc (vect_analyze_loop_2): Look at the
	representative for load nodes and check whether it is a grouped
	access before looking for load-lanes support.

	* gfortran.dg/pr112404.f90: New testcase.
---
 gcc/testsuite/gfortran.dg/pr112404.f90 | 23 +++++++++++++
 gcc/tree-vect-loop.cc                  | 47 ++++++++++++++------------
 gcc/tree-vect-slp.cc                   | 23 ++++++-------
 gcc/tree-vect-stmts.cc                 | 22 +++++++++++-
 gcc/tree-vectorizer.h                  |  1 +
 5 files changed, 82 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr112404.f90
  

Patch

diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90 b/gcc/testsuite/gfortran.dg/pr112404.f90
new file mode 100644
index 00000000000..573fa28164a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr112404.f90
@@ -0,0 +1,23 @@ 
+! { dg-do compile }
+! { dg-options "-Ofast" }
+! { dg-additional-options "-mavx2" { target avx2 } }
+   SUBROUTINE sfddagd( regime, znt, ite, jte )
+   REAL, DIMENSION( ime, IN) :: regime, znt
+   REAL, DIMENSION( ite, jte) :: wndcor_u 
+   LOGICAL wrf_dm_on_monitor
+   IF( int4 == 1 ) THEN
+     DO j=jts,jtf
+     DO i=itsu,itf
+       reg = regime(i-1,  j) 
+       IF( reg > 10.0 ) THEN
+         znt0 = znt(i-1,  j) + znt(i,  j) 
+         IF( znt0 <= 0.2) THEN
+           wndcor_u(i,j) = 0.2
+         ENDIF
+       ENDIF
+     ENDDO
+     ENDDO
+     IF ( wrf_dm_on_monitor()) THEN
+     ENDIF
+   ENDIF
+   END
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 362856a6507..5213aa0169c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2943,17 +2943,19 @@  start_over:
 		   != IFN_LAST)
 	    {
 	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
-		{
-		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
-		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
-		  /* Use SLP for strided accesses (or if we can't
-		     load-lanes).  */
-		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-		      || vect_load_lanes_supported
-			    (STMT_VINFO_VECTYPE (stmt_vinfo),
-			     DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
-		    break;
-		}
+		if (STMT_VINFO_GROUPED_ACCESS
+		      (SLP_TREE_REPRESENTATIVE (load_node)))
+		  {
+		    stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+			(SLP_TREE_REPRESENTATIVE (load_node));
+		    /* Use SLP for strided accesses (or if we can't
+		       load-lanes).  */
+		    if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+			|| vect_load_lanes_supported
+			     (STMT_VINFO_VECTYPE (stmt_vinfo),
+			      DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
+		      break;
+		  }
 
 	      can_use_lanes
 		= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
@@ -3261,16 +3263,19 @@  again:
 				       "unsupported grouped store\n");
       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
 	{
-	  vinfo = SLP_TREE_SCALAR_STMTS (node)[0];
-	  vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
-	  bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
-	  size = DR_GROUP_SIZE (vinfo);
-	  vectype = STMT_VINFO_VECTYPE (vinfo);
-	  if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
-	      && ! vect_grouped_load_supported (vectype, single_element_p,
-						size))
-	    return opt_result::failure_at (vinfo->stmt,
-					   "unsupported grouped load\n");
+	  vinfo = SLP_TREE_REPRESENTATIVE (node);
+	  if (STMT_VINFO_GROUPED_ACCESS (vinfo))
+	    {
+	      vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
+	      bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
+	      size = DR_GROUP_SIZE (vinfo);
+	      vectype = STMT_VINFO_VECTYPE (vinfo);
+	      if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
+		  && ! vect_grouped_load_supported (vectype, single_element_p,
+						    size))
+		return opt_result::failure_at (vinfo->stmt,
+					       "unsupported grouped load\n");
+	    }
 	}
     }
 
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6b8a7b628b6..13137ede8d4 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2898,22 +2898,21 @@  vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
   if (!node || visited.add (node))
     return;
 
-  if (SLP_TREE_CHILDREN (node).length () == 0)
+  if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+    return;
+
+  if (SLP_TREE_CODE (node) != VEC_PERM_EXPR)
     {
-      if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-	return;
-      stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
-      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+      stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
+      if (STMT_VINFO_DATA_REF (stmt_info)
 	  && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
 	loads.safe_push (node);
     }
-  else
-    {
-      unsigned i;
-      slp_tree child;
-      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-	vect_gather_slp_loads (loads, child, visited);
-    }
+
+  unsigned i;
+  slp_tree child;
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    vect_gather_slp_loads (loads, child, visited);
 }
 
 
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f895aaf3083..eefb1eec1ef 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2456,7 +2456,8 @@  vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
 
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   if (!mask_vectype)
-    mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
+    mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
+						  mask_node_1);
 
   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
     {
@@ -13386,6 +13387,25 @@  get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
   return truth_type_for (vectype);
 }
 
+/* Function get_mask_type_for_scalar_type.
+
+   Returns the mask type corresponding to a result of comparison
+   of vectors of specified SCALAR_TYPE as supported by target.
+   NODE, if nonnull, is the SLP tree node that will use the returned
+   vector type.  */
+
+tree
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+			       slp_tree node)
+{
+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
+
+  if (!vectype)
+    return NULL;
+
+  return truth_type_for (vectype);
+}
+
 /* Function get_same_sized_vectype
 
    Returns a vector type corresponding to SCALAR_TYPE of size
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 254d172231d..d2ddc2e4ad5 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2207,6 +2207,7 @@  extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
 extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
 extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
 extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
+extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree);
 extern tree get_same_sized_vectype (tree, tree);
 extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
 extern bool vect_get_loop_mask_type (loop_vec_info);