[v1] RISC-V: Bugfix for the const vector in single steps

Message ID 20231220023922.1076198-1-pan2.li@intel.com
State Unresolved
Headers
Series [v1] RISC-V: Bugfix for the const vector in single steps |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Li, Pan2 Dec. 20, 2023, 2:39 a.m. UTC
  From: Pan Li <pan2.li@intel.com>

For generating the const vector with single step, we have code
gen similar as below.  We have npatterns = 4.

v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }

v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}

v1 = vd + vid.

But this requires the diff is npattern size repeated like {3, 1, -1, 3}
as above. And it cannot take care of single step as below:

{ -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ...

This patch would like to add the restriction to above code gen and
implement one for the general case.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
	for the vid-diff code gen and implement general one.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/bug-7.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-v.cc                   | 73 +++++++++++++++----
 .../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++++
 2 files changed, 119 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
  

Comments

juzhe.zhong@rivai.ai Dec. 20, 2023, 2:50 a.m. UTC | #1
+       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))


->

for (i = 0; i < v.npatterns (); )
  check each nelt of npatterns is equal to vid.


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-12-20 10:39
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com>
 
For generating the const vector with single step, we have code
gen similar as below.  We have npatterns = 4.
 
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
 
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
          = {3, 1, -1, 3, 3, 1, -1, 3 ...}
 
v1 = vd + vid.
 
But this requires the diff is npattern size repeated like {3, 1, -1, 3}
as above. And it cannot take care of single step as below:
 
{ -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ...
 
This patch would like to add the restriction to above code gen and
implement one for the general case.
 
gcc/ChangeLog:
 
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc                   | 73 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c      | 61 ++++++++++++++++
2 files changed, 119 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
 
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..946588b7b1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,67 @@ expand_const_vector (rtx target, rtx src)
  else
    {
      /* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-    E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-       /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-       for (unsigned int i = 0; i < v.npatterns (); ++i)
+       poly_int64 ele_0 = rtx_to_poly_int64 (builder.elt (0));
+       poly_int64 ele_n
+ = rtx_to_poly_int64 (builder.elt (v.npatterns ()));
+
+       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
+ {
+   /* Case 1: For example as below:
+      {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+      We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+      repeated as below after minus vid.
+      {3, 1, -1, -3, 3, 1, -1, -3...}
+      Then we can simplify the diff code gen to at most
+      npatterns().  */
+
+   /* Step 1: Generate diff = TARGET - VID.  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     {
+      poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+     }
+
+   /* Step 2: Generate result = VID + diff.  */
+   rtx vec = v.build ();
+   rtx add_ops[] = {target, vid, vec};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
+ }
+       else
{
-   /* Calculate the diff between the target sequence and
-      vid sequence.  The elt (i) can be either const_int or
-      const_poly_int. */
-   poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-   v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+   /* Case 2: For example as below:
+      { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+    */
+
+   /* Step 1: Generate { a, b, a, b, ... }  */
+   for (unsigned int i = 0; i < v.npatterns (); ++i)
+     v.quick_push (builder.elt (i));
+   rtx new_base = v.build ();
+
+   /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+   rtx shift_count
+     = gen_int_mode (exact_log2 (builder.npatterns ()),
+     builder.inner_mode ());
+   rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+   /* Step 3: Generate tmp2 = tmp * step.  */
+   rtx tmp2 = gen_reg_rtx (builder.mode ());
+   rtx step
+     = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+   expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+   /* Step 4: Generate target = tmp2 + new_base.  */
+   rtx add_ops[] = {target, tmp2, new_base};
+   emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+    BINARY_OP, add_ops);
}
-       /* Step 2: Generate result = VID + diff.  */
-       rtx vec = v.build ();
-       rtx add_ops[] = {target, vid, vec};
-       emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
    }
}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+        struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+    }
+
+  return 0;
+}
-- 
2.34.1
  
Jeff Law Dec. 20, 2023, 4:02 a.m. UTC | #2
On 12/19/23 19:50, juzhe.zhong@rivai.ai wrote:
> 
> +       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
> 
> 
> ->
> 
> for (i = 0; i < v.npatterns (); )
>    check each nelt of npatterns is equal to vid.
Pan -- please indicate what testing was performed.  The standard is to 
test with and without the patch to verify there are no regressions.  You 
don't have to test every multilib or anything like that.  Just pick a 
configuration and test it.

No patch should be committed to the tree without this basic information. 
  We've been lax on that policy, but that really needs to change.

jeff
  
Li, Pan2 Dec. 20, 2023, 4:29 a.m. UTC | #3
Oh, I see. Thanks Jeff for suggestion, will refine the commit log in V2.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Wednesday, December 20, 2023 12:03 PM
To: juzhe.zhong@rivai.ai; Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Bugfix for the const vector in single steps



On 12/19/23 19:50, juzhe.zhong@rivai.ai wrote:
> 
> +       if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
> 
> 
> ->
> 
> for (i = 0; i < v.npatterns (); )
>    check each nelt of npatterns is equal to vid.
Pan -- please indicate what testing was performed.  The standard is to 
test with and without the patch to verify there are no regressions.  You 
don't have to test every multilib or anything like that.  Just pick a 
configuration and test it.

No patch should be committed to the tree without this basic information. 
  We've been lax on that policy, but that really needs to change.

jeff
  

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..946588b7b1f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,67 @@  expand_const_vector (rtx target, rtx src)
 	  else
 	    {
 	      /* Generate the variable-length vector following this rule:
-		 { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
-		   E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
-	      /* Step 2: Generate diff = TARGET - VID:
-		 { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 	      rvv_builder v (builder.mode (), builder.npatterns (), 1);
-	      for (unsigned int i = 0; i < v.npatterns (); ++i)
+	      poly_int64 ele_0 = rtx_to_poly_int64 (builder.elt (0));
+	      poly_int64 ele_n
+		= rtx_to_poly_int64 (builder.elt (v.npatterns ()));
+
+	      if (known_eq (ele_0 - 0, ele_n - v.npatterns ()))
+		{
+		  /* Case 1: For example as below:
+		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+		     repeated as below after minus vid.
+		     {3, 1, -1, -3, 3, 1, -1, -3...}
+		     Then we can simplify the diff code gen to at most
+		     npatterns().  */
+
+		  /* Step 1: Generate diff = TARGET - VID.  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    {
+		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		    }
+
+		  /* Step 2: Generate result = VID + diff.  */
+		  rtx vec = v.build ();
+		  rtx add_ops[] = {target, vid, vec};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
+		}
+	      else
 		{
-		  /* Calculate the diff between the target sequence and
-		     vid sequence.  The elt (i) can be either const_int or
-		     const_poly_int. */
-		  poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		  v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+		  /* Case 2: For example as below:
+		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+		   */
+
+		  /* Step 1: Generate { a, b, a, b, ... }  */
+		  for (unsigned int i = 0; i < v.npatterns (); ++i)
+		    v.quick_push (builder.elt (i));
+		  rtx new_base = v.build ();
+
+		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
+		  rtx shift_count
+		    = gen_int_mode (exact_log2 (builder.npatterns ()),
+				    builder.inner_mode ());
+		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+						 vid, shift_count, NULL_RTX,
+						 false, OPTAB_DIRECT);
+
+		  /* Step 3: Generate tmp2 = tmp * step.  */
+		  rtx tmp2 = gen_reg_rtx (builder.mode ());
+		  rtx step
+		    = simplify_binary_operation (MINUS, builder.inner_mode (),
+						 builder.elt (v.npatterns()),
+						 builder.elt (0));
+		  expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+		  /* Step 4: Generate target = tmp2 + new_base.  */
+		  rtx add_ops[] = {target, tmp2, new_base};
+		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+				   BINARY_OP, add_ops);
 		}
-	      /* Step 2: Generate result = VID + diff.  */
-	      rtx vec = v.build ();
-	      rtx add_ops[] = {target, vid, vec};
-	      emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				BINARY_OP, add_ops);
 	    }
 	}
       else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@ 
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+	       struct C * __restrict c)
+{
+  int i;
+
+  for (i = 0; i < N; ++i)
+    {
+      a[i].l = N - i;
+      a[i].r = i - N;
+
+      b[i].l = i - N;
+      b[i].r = i + N;
+
+      c[i].l = -1 - i;
+      c[i].r = 2 * N - 1 - i;
+    }
+}
+
+int
+main ()
+{
+  int i;
+
+  init_data_vec (a, b, c);
+
+#pragma GCC novector
+  for (i = 0; i < N; ++i)
+    {
+      a1[i].l = N - i;
+      a1[i].r = i - N;
+
+      b1[i].l = i - N;
+      b1[i].r = i + N;
+
+      c1[i].l = -1 - i;
+      c1[i].r = 2 * N - 1 - i;
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+	__builtin_abort ();
+
+      if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+	__builtin_abort ();
+
+      if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+	__builtin_abort ();
+    }
+
+  return 0;
+}