[v2] RISC-V: Bugfix for the const vector in single steps
Checks
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch would like to fix the below execution failure.
FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test
The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
= {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.
Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.
The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
The below tests are passed for this patch.
* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc | 84 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c | 61 ++++++++++++++
2 files changed, 130 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
Comments
+ bool diff_seq_repeated_p = true;
+
+ for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+ poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+ poly_int64 diff_1 = rtx_to_poly_int64 (
+ builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+ if (maybe_ne (diff_0, diff_1))
+ {
+ diff_seq_repeated_p = false;
+ break;
+ }
+ }
This code should be wrapped as a helper function inside class rvv_builder
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2023-12-20 14:56
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng; jeffreyalaw
Subject: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com>
This patch would like to fix the below execution failure.
FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test
The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
= {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.
Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.
The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
The below tests are passed for this patch.
* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc | 84 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c | 61 ++++++++++++++
2 files changed, 130 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..5a5899e85ae 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
else
{
/* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
- E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
- /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
rvv_builder v (builder.mode (), builder.npatterns (), 1);
- for (unsigned int i = 0; i < v.npatterns (); ++i)
+ bool diff_seq_repeated_p = true;
+
+ for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+ poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+ poly_int64 diff_1 = rtx_to_poly_int64 (
+ builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+ if (maybe_ne (diff_0, diff_1))
+ {
+ diff_seq_repeated_p = false;
+ break;
+ }
+ }
+
+ if (diff_seq_repeated_p)
+ {
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ {
+ poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ }
+
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {target, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
+ }
+ else
{
- /* Calculate the diff between the target sequence and
- vid sequence. The elt (i) can be either const_int or
- const_poly_int. */
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder.elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS). */
+ rtx shift_count
+ = gen_int_mode (exact_log2 (builder.npatterns ()),
+ builder.inner_mode ());
+ rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+ /* Step 3: Generate tmp2 = tmp * step. */
+ rtx tmp2 = gen_reg_rtx (builder.mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+ /* Step 4: Generate target = tmp2 + new_base. */
+ rtx add_ops[] = {target, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
}
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {target, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
}
}
else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+ struct C * __restrict c)
+{
+ int i;
+
+ for (i = 0; i < N; ++i)
+ {
+ a[i].l = N - i;
+ a[i].r = i - N;
+
+ b[i].l = i - N;
+ b[i].r = i + N;
+
+ c[i].l = -1 - i;
+ c[i].r = 2 * N - 1 - i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+
+ init_data_vec (a, b, c);
+
+#pragma GCC novector
+ for (i = 0; i < N; ++i)
+ {
+ a1[i].l = N - i;
+ a1[i].r = i - N;
+
+ b1[i].l = i - N;
+ b1[i].r = i + N;
+
+ c1[i].l = -1 - i;
+ c1[i].r = 2 * N - 1 - i;
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+ if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+ if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+ }
+
+ return 0;
+}
--
2.34.1
Thanks for reviewing, sure and will send V3 for it.
Pan
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, December 20, 2023 2:59 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>; jeffreyalaw <jeffreyalaw@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
+ bool diff_seq_repeated_p = true;
+
+ for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+ poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+ poly_int64 diff_1 = rtx_to_poly_int64 (
+ builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+ if (maybe_ne (diff_0, diff_1))
+ {
+ diff_seq_repeated_p = false;
+ break;
+ }
+ }
This code should be wrapped as a helper function inside class rvv_builder
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-12-20 14:56
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>; jeffreyalaw<mailto:jeffreyalaw@gmail.com>
Subject: [PATCH v2] RISC-V: Bugfix for the const vector in single steps
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
This patch would like to fix the below execution failure.
FAIL: gcc.dg/vect/pr92420.c -flto -ffat-lto-objects execution test
The will be one single step const vector like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
For such const vector generation with single step, we will generate vid
+ diff here. For example as below, given npatterns = 4.
v1= {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
v2 (diff) = {3 - 0, 2 - 1, 1 - 2, 0 - 3, 7 - 4, 6 - 5, 5 - 6, 4 - 7...}
= {3, 1, -1, 3, 3, 1, -1, 3 ...}
v1 = vd + vid.
Unfortunately, that cannot work well for { -4, 4, -3, 5, -2, 6, -1, 7, ...}
because it has one implicit requirement for the diff. Aka, the diff
sequence in npattern are repeated. For example the v2 (diff) as above.
The diff between { -4, 4, -3, 5, -2, 6, -1, 7, ...} and vid are not
npattern size repeated and then we have wrong code here. We implement
one new code gen the sequence like { -4, 4, -3, 5, -2, 6, -1, 7, ...}.
The below tests are passed for this patch.
* The RV64 regression test with rv64gcv configuration.
* The run test gcc.dg/vect/pr92420.c for below configurations.
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8
riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax
gcc/ChangeLog:
* config/riscv/riscv-v.cc (expand_const_vector): Add restriction
for the vid-diff code gen and implement general one.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/bug-7.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-v.cc | 84 +++++++++++++++----
.../gcc.target/riscv/rvv/autovec/bug-7.c | 61 ++++++++++++++
2 files changed, 130 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 486f5deb296..5a5899e85ae 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
else
{
/* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
- E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
- /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
rvv_builder v (builder.mode (), builder.npatterns (), 1);
- for (unsigned int i = 0; i < v.npatterns (); ++i)
+ bool diff_seq_repeated_p = true;
+
+ for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+ poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+ poly_int64 diff_1 = rtx_to_poly_int64 (
+ builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+ if (maybe_ne (diff_0, diff_1))
+ {
+ diff_seq_repeated_p = false;
+ break;
+ }
+ }
+
+ if (diff_seq_repeated_p)
+ {
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ {
+ poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ }
+
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {target, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
+ }
+ else
{
- /* Calculate the diff between the target sequence and
- vid sequence. The elt (i) can be either const_int or
- const_poly_int. */
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder.elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS). */
+ rtx shift_count
+ = gen_int_mode (exact_log2 (builder.npatterns ()),
+ builder.inner_mode ());
+ rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+ /* Step 3: Generate tmp2 = tmp * step. */
+ rtx tmp2 = gen_reg_rtx (builder.mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+ /* Step 4: Generate target = tmp2 + new_base. */
+ rtx add_ops[] = {target, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
}
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {target, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
}
}
else if (builder.interleaved_stepped_npatterns_p ())
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
new file mode 100644
index 00000000000..9acac391f65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/bug-7.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+ struct C * __restrict c)
+{
+ int i;
+
+ for (i = 0; i < N; ++i)
+ {
+ a[i].l = N - i;
+ a[i].r = i - N;
+
+ b[i].l = i - N;
+ b[i].r = i + N;
+
+ c[i].l = -1 - i;
+ c[i].r = 2 * N - 1 - i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+
+ init_data_vec (a, b, c);
+
+#pragma GCC novector
+ for (i = 0; i < N; ++i)
+ {
+ a1[i].l = N - i;
+ a1[i].r = i - N;
+
+ b1[i].l = i - N;
+ b1[i].r = i + N;
+
+ c1[i].l = -1 - i;
+ c1[i].r = 2 * N - 1 - i;
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+ if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+ if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+ }
+
+ return 0;
+}
--
2.34.1
@@ -1257,24 +1257,78 @@ expand_const_vector (rtx target, rtx src)
else
{
/* Generate the variable-length vector following this rule:
- { a, b, a, b, a + step, b + step, a + step*2, b + step*2, ...}
- E.g. { 3, 2, 1, 0, 7, 6, 5, 4, ... } */
- /* Step 2: Generate diff = TARGET - VID:
- { 3-0, 2-1, 1-2, 0-3, 7-4, 6-5, 5-6, 4-7, ... }*/
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
rvv_builder v (builder.mode (), builder.npatterns (), 1);
- for (unsigned int i = 0; i < v.npatterns (); ++i)
+ bool diff_seq_repeated_p = true;
+
+ for (unsigned i = 0; i < v.npatterns (); i++)
+ {
+ poly_int64 diff_0 = rtx_to_poly_int64 (builder.elt (i)) - i;
+ poly_int64 diff_1 = rtx_to_poly_int64 (
+ builder.elt (v.npatterns () + i)) - v.npatterns () - i;
+
+ if (maybe_ne (diff_0, diff_1))
+ {
+ diff_seq_repeated_p = false;
+ break;
+ }
+ }
+
+ if (diff_seq_repeated_p)
+ {
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ {
+ poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ }
+
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {target, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
+ }
+ else
{
- /* Calculate the diff between the target sequence and
- vid sequence. The elt (i) can be either const_int or
- const_poly_int. */
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder.elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS). */
+ rtx shift_count
+ = gen_int_mode (exact_log2 (builder.npatterns ()),
+ builder.inner_mode ());
+ rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
+ vid, shift_count, NULL_RTX,
+ false, OPTAB_DIRECT);
+
+ /* Step 3: Generate tmp2 = tmp * step. */
+ rtx tmp2 = gen_reg_rtx (builder.mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder.inner_mode (),
+ builder.elt (v.npatterns()),
+ builder.elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp);
+
+ /* Step 4: Generate target = tmp2 + new_base. */
+ rtx add_ops[] = {target, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
+ BINARY_OP, add_ops);
}
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {target, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
}
}
else if (builder.interleaved_stepped_npatterns_p ())
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#define N 4
+struct C { int l, r; };
+struct C a[N], b[N], c[N];
+struct C a1[N], b1[N], c1[N];
+
+void __attribute__((noinline))
+init_data_vec (struct C * __restrict a, struct C * __restrict b,
+ struct C * __restrict c)
+{
+ int i;
+
+ for (i = 0; i < N; ++i)
+ {
+ a[i].l = N - i;
+ a[i].r = i - N;
+
+ b[i].l = i - N;
+ b[i].r = i + N;
+
+ c[i].l = -1 - i;
+ c[i].r = 2 * N - 1 - i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+
+ init_data_vec (a, b, c);
+
+#pragma GCC novector
+ for (i = 0; i < N; ++i)
+ {
+ a1[i].l = N - i;
+ a1[i].r = i - N;
+
+ b1[i].l = i - N;
+ b1[i].r = i + N;
+
+ c1[i].l = -1 - i;
+ c1[i].r = 2 * N - 1 - i;
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i].l != a1[i].l || a[i].r != a1[i].r)
+ __builtin_abort ();
+
+ if (b[i].l != b1[i].l || b[i].r != b1[i].r)
+ __builtin_abort ();
+
+ if (c[i].l != c1[i].l || c[i].r != c1[i].r)
+ __builtin_abort ();
+ }
+
+ return 0;
+}