diff mbox series

tree-optimization/107451 - SLP load vectorization issue

Message ID	20221222112019.9549E138FD@imap2.suse-dmz.suse.de
State	Repeat Merge
Headers	Received-SPF: pass (google.com: domain of gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org designates 2620:52:3:1:0:246e:9693:128c as permitted sender) client-ip=2620:52:3:1:0:246e:9693:128c; DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 127763858D1E Date: Thu, 22 Dec 2022 12:20:19 +0100 (CET) To: gcc-patches@gcc.gnu.org Subject: [PATCH] tree-optimization/107451 - SLP load vectorization issue MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Message-Id: <20221222112019.9549E138FD@imap2.suse-dmz.suse.de> Precedence: list From: Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: Richard Biener <rguenther@suse.de> Errors-To: gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+ouuuleilei=gmail.com@gcc.gnu.org> X-getmail-retrieved-from-mailbox: =?utf-8?q?INBOX?=
Series	tree-optimization/107451 - SLP load vectorization issue \| tree-optimization/107451 - SLP load vectorization issue

Checks

Context	Check	Description
snail/gcc-patch-check	warning	Git am fail log

Commit Message

Richard Biener Dec. 22, 2022, 11:20 a.m. UTC

  When vectorizing SLP loads with permutations we can access excess
elements when the load vector type is bigger than the group size
and the vectorization factor covers less groups than necessary
to fill it.  Since we know the code will only access up to
group_size * VF elements in the unpermuted vector we can simply
fill the rest of the vector with whatever we want.  For simplicity
this patch chooses to repeat the last group.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/107451
	* tree-vect-stmts.c (vectorizable_load): Avoid loading
	SLP group members from group numbers in excess of the
	vectorization factor.

	* gcc.dg/torture/pr107451.c: New testcase.
---
 gcc/testsuite/gcc.dg/torture/pr107451.c | 27 +++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc                  | 20 ++++++++++++------
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr107451.c

diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/torture/pr107451.c b/gcc/testsuite/gcc.dg/torture/pr107451.c
new file mode 100644
index 00000000000..a17574c6896
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr107451.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+double getdot(int n, const double *x, int inc_x, const double *y)
+{
+  int i, ix = 0;
+  double dot[4] = { 0.0, 0.0, 0.0, 0.0 } ;
+
+  for(i = 0; i < n; i++) {
+      dot[0] += x[ix]   * y[ix]   ;
+      dot[1] += x[ix+1] * y[ix+1] ;
+      dot[2] += x[ix]   * y[ix+1] ;
+      dot[3] += x[ix+1] * y[ix]   ;
+      ix += inc_x ;
+  }
+
+  return dot[0] + dot[1] + dot[2] + dot[3];
+}
+
+int main()
+{
+  double x[2] = {0, 0}, y[2] = {0, 0};
+  if (getdot(1, x, 4096*4096, y) != 0.)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5485da58b38..8f8deaf82bc 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9235,6 +9235,7 @@  vectorizable_load (vec_info *vinfo,
       unsigned int group_el = 0;
       unsigned HOST_WIDE_INT
 	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+      unsigned int n_groups = 0;
       for (j = 0; j < ncopies; j++)
 	{
 	  if (nloads > 1)
@@ -9256,12 +9257,19 @@  vectorizable_load (vec_info *vinfo,
 	      if (! slp
 		  || group_el == group_size)
 		{
-		  tree newoff = copy_ssa_name (running_off);
-		  gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
-						      running_off, stride_step);
-		  vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
-
-		  running_off = newoff;
+		  n_groups++;
+		  /* When doing SLP make sure to not load elements from
+		     the next vector iteration, those will not be accessed
+		     so just use the last element again.  See PR107451.  */
+		  if (!slp || known_lt (n_groups, vf))
+		    {
+		      tree newoff = copy_ssa_name (running_off);
+		      gimple *incr
+			= gimple_build_assign (newoff, POINTER_PLUS_EXPR,
+					       running_off, stride_step);
+		      vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
+		      running_off = newoff;
+		    }
 		  group_el = 0;
 		}
 	    }