RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
Checks
Commit Message
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
This patch fixes the requirement of V_WHOLE and V_FRACT.
E.g. VNx8QI in V_WHOLE has no requirement which is incorrect.
Actually, VNx8QI should be whole(full) mode when TARGET_MIN_VLEN < 128
since when TARGET_MIN_VLEN == 128, VNx8QI is e8mf2 which is fractional
vector.
gcc/ChangeLog:
* config/riscv/vector-iterators.md: Fix requirement.
---
gcc/config/riscv/vector-iterators.md | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
Comments
On 6/9/23 16:32, juzhe.zhong@rivai.ai wrote:
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch fixes the requirement of V_WHOLE and V_FRACT.
> E.g. VNx8QI in V_WHOLE has no requirement which is incorrect.
> Actually, VNx8QI should be whole(full) mode when TARGET_MIN_VLEN < 128
> since when TARGET_MIN_VLEN == 128, VNx8QI is e8mf2 which is fractional
> vector.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md: Fix requirement.
I actually have the attached already on my local tree (as well as a test),
and wanted to post it with the vec_set patch. I think the alignment helps
a bit with readability.
From 147a459dfbf1fe9d5dd93148f475f42dee3bd94b Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@ventanamicro.com>
Date: Tue, 6 Jun 2023 17:29:26 +0200
Subject: [PATCH] RISC-V: Change V_WHOLE iterator to properly match
instruction.
Currently we emit e.g. an vl1r.v even when loading a mode whose size is
smaller than the hardware vector size. This can happen when reload
decides to switch to another alternative.
This patch fixes the iterator and adds a testcase for the problem.
gcc/ChangeLog:
* config/riscv/vector-iterators.md: Add guards for modes smaller
than the hardware vector size.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: New test.
---
gcc/config/riscv/vector-iterators.md | 65 ++++++++++++++-----
.../rvv/autovec/vls-vlmax/full-vec-move1.c | 23 +++++++
2 files changed, 72 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 90743ed76c5..0587325e82c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -430,32 +430,65 @@ (define_mode_iterator VNX64_QHI [
VNx64QI (VNx64HI "TARGET_MIN_VLEN >= 128")
])
+;; This iterator describes which modes can be moved/loaded/stored by
+;; full-register move instructions (e.g. vl1r.v).
+;; For now we support a maximum vector length of 1024 that can
+;; also be reached by combining multiple hardware registers (mf1, mf2, ...).
+;; This means that e.g. VNx64HI (with a size of 128 bytes) requires
+;; at least a minimum vector length of 128 bits = 16 bytes in order
+;; to be loadable by vl8r.v (mf8).
+;; Apart from that we must make sure that modes smaller than the
+;; vector size are properly guarded so that e.g. VNx4HI is not loaded
+;; by vl1r.v when VL == 128.
(define_mode_iterator V_WHOLE [
- (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
- (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
- (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
- (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
- (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_MIN_VLEN >= 128")
+ (VNx4QI "TARGET_MIN_VLEN == 32")
+ (VNx8QI "TARGET_MIN_VLEN <= 64")
+ (VNx16QI "TARGET_MIN_VLEN <= 128")
+ (VNx32QI "TARGET_MIN_VLEN <= 256")
+ (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx2HI "TARGET_MIN_VLEN == 32")
+ (VNx4HI "TARGET_MIN_VLEN <= 64")
+ (VNx8HI "TARGET_MIN_VLEN <= 128")
+ (VNx16HI "TARGET_MIN_VLEN <= 256")
+ (VNx32HI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx64HI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1SI "TARGET_MIN_VLEN == 32")
+ (VNx2SI "TARGET_MIN_VLEN <= 64")
+ (VNx4SI "TARGET_MIN_VLEN <= 128")
+ (VNx8SI "TARGET_MIN_VLEN <= 256")
+ (VNx16SI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx32SI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
+ (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 512")
+ (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
(VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
(VNx2HF "TARGET_VECTOR_ELEN_FP_16")
(VNx4HF "TARGET_VECTOR_ELEN_FP_16")
(VNx8HF "TARGET_VECTOR_ELEN_FP_16")
(VNx16HF "TARGET_VECTOR_ELEN_FP_16")
- (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
(VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
(VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
- (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
- (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
- (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
- (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 64")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 128")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 256")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 64")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
])
(define_mode_iterator V_FRACT [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
new file mode 100644
index 00000000000..d63f219a1e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+/* This would cause us to emit a vl1r.v for VNx4HImode even when
+ the hardware vector size vl > 64. */
+
+typedef int16_t V __attribute__((vector_size (128)));
+
+int main ()
+{
+ V v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ (v)[i] = i;
+ V res = v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ assert (res[i] == i);
+}
+
+/* { dg-final { scan-assembler-not {vl[1248]r.v} } } */
+/* { dg-final { scan-assembler-times {vl[1248]re16.v} 1 } } */
Ok. If you have done this plz go ahead.
I think it shouldn't be with vec_set patch.
Instead, it obviously should be the separate patch.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-06-09 22:37
To: juzhe.zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
On 6/9/23 16:32, juzhe.zhong@rivai.ai wrote:
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch fixes the requirement of V_WHOLE and V_FRACT.
> E.g. VNx8QI in V_WHOLE has no requirement which is incorrect.
> Actually, VNx8QI should be whole(full) mode when TARGET_MIN_VLEN < 128
> since when TARGET_MIN_VLEN == 128, VNx8QI is e8mf2 which is fractional
> vector.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md: Fix requirement.
I actually have the attached already on my local tree (as well as a test),
and wanted to post it with the vec_set patch. I think the alignment helps
a bit with readability.
From 147a459dfbf1fe9d5dd93148f475f42dee3bd94b Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@ventanamicro.com>
Date: Tue, 6 Jun 2023 17:29:26 +0200
Subject: [PATCH] RISC-V: Change V_WHOLE iterator to properly match
instruction.
Currently we emit e.g. an vl1r.v even when loading a mode whose size is
smaller than the hardware vector size. This can happen when reload
decides to switch to another alternative.
This patch fixes the iterator and adds a testcase for the problem.
gcc/ChangeLog:
* config/riscv/vector-iterators.md: Add guards for modes smaller
than the hardware vector size.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: New test.
---
gcc/config/riscv/vector-iterators.md | 65 ++++++++++++++-----
.../rvv/autovec/vls-vlmax/full-vec-move1.c | 23 +++++++
2 files changed, 72 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 90743ed76c5..0587325e82c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -430,32 +430,65 @@ (define_mode_iterator VNX64_QHI [
VNx64QI (VNx64HI "TARGET_MIN_VLEN >= 128")
])
+;; This iterator describes which modes can be moved/loaded/stored by
+;; full-register move instructions (e.g. vl1r.v).
+;; For now we support a maximum vector length of 1024 that can
+;; also be reached by combining multiple hardware registers (mf1, mf2, ...).
+;; This means that e.g. VNx64HI (with a size of 128 bytes) requires
+;; at least a minimum vector length of 128 bits = 16 bytes in order
+;; to be loadable by vl8r.v (mf8).
+;; Apart from that we must make sure that modes smaller than the
+;; vector size are properly guarded so that e.g. VNx4HI is not loaded
+;; by vl1r.v when VL == 128.
(define_mode_iterator V_WHOLE [
- (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
- (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
- (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
- (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
- (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_MIN_VLEN >= 128")
+ (VNx4QI "TARGET_MIN_VLEN == 32")
+ (VNx8QI "TARGET_MIN_VLEN <= 64")
+ (VNx16QI "TARGET_MIN_VLEN <= 128")
+ (VNx32QI "TARGET_MIN_VLEN <= 256")
+ (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx2HI "TARGET_MIN_VLEN == 32")
+ (VNx4HI "TARGET_MIN_VLEN <= 64")
+ (VNx8HI "TARGET_MIN_VLEN <= 128")
+ (VNx16HI "TARGET_MIN_VLEN <= 256")
+ (VNx32HI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx64HI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1SI "TARGET_MIN_VLEN == 32")
+ (VNx2SI "TARGET_MIN_VLEN <= 64")
+ (VNx4SI "TARGET_MIN_VLEN <= 128")
+ (VNx8SI "TARGET_MIN_VLEN <= 256")
+ (VNx16SI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx32SI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
+ (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 512")
+ (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
(VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
(VNx2HF "TARGET_VECTOR_ELEN_FP_16")
(VNx4HF "TARGET_VECTOR_ELEN_FP_16")
(VNx8HF "TARGET_VECTOR_ELEN_FP_16")
(VNx16HF "TARGET_VECTOR_ELEN_FP_16")
- (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
(VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
(VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
- (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
- (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
- (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
- (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 64")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 128")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 256")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 64")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
])
(define_mode_iterator V_FRACT [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
new file mode 100644
index 00000000000..d63f219a1e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+/* This would cause us to emit a vl1r.v for VNx4HImode even when
+ the hardware vector size vl > 64. */
+
+typedef int16_t V __attribute__((vector_size (128)));
+
+int main ()
+{
+ V v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ (v)[i] = i;
+ V res = v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ assert (res[i] == i);
+}
+
+/* { dg-final { scan-assembler-not {vl[1248]r.v} } } */
+/* { dg-final { scan-assembler-times {vl[1248]re16.v} 1 } } */
--
2.40.1
> I think it shouldn't be with vec_set patch.
> Instead, it obviously should be the separate patch.
Yes, I didn't mean in the actual same patch.
Regards
Robin
On 6/9/23 08:37, Robin Dapp wrote:
> On 6/9/23 16:32, juzhe.zhong@rivai.ai wrote:
>> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>>
>> This patch fixes the requirement of V_WHOLE and V_FRACT.
>> E.g. VNx8QI in V_WHOLE has no requirement which is incorrect.
>> Actually, VNx8QI should be whole(full) mode when TARGET_MIN_VLEN < 128
>> since when TARGET_MIN_VLEN == 128, VNx8QI is e8mf2 which is fractional
>> vector.
>>
>> gcc/ChangeLog:
>>
>> * config/riscv/vector-iterators.md: Fix requirement.
>
> I actually have the attached already on my local tree (as well as a test),
> and wanted to post it with the vec_set patch. I think the alignment helps
> a bit with readability.
>
> From 147a459dfbf1fe9d5dd93148f475f42dee3bd94b Mon Sep 17 00:00:00 2001
> From: Robin Dapp <rdapp@ventanamicro.com>
> Date: Tue, 6 Jun 2023 17:29:26 +0200
> Subject: [PATCH] RISC-V: Change V_WHOLE iterator to properly match
> instruction.
>
> Currently we emit e.g. an vl1r.v even when loading a mode whose size is
> smaller than the hardware vector size. This can happen when reload
> decides to switch to another alternative.
>
> This patch fixes the iterator and adds a testcase for the problem.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md: Add guards for modes smaller
> than the hardware vector size.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: New test.
Sounds like Juzhe is OK with this moving independently. So I'll rubber
stamp it. :-)
jeff
+ (VNx16QI "TARGET_MIN_VLEN <= 128")
+ (VNx32QI "TARGET_MIN_VLEN <= 256")
+ (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
This not correct, we always use VNx16QI as LMUL = m1 for min_vlen >= 128.
Requirement of TARGET_MIN_VLEN <= 128 is incorrect for VNx16QI.
VNx32QI,...etc likewise.
My patch should be the correct but with the test you append in this patch.
Thanks.
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-06-09 22:37
To: juzhe.zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; palmer; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
On 6/9/23 16:32, juzhe.zhong@rivai.ai wrote:
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch fixes the requirement of V_WHOLE and V_FRACT.
> E.g. VNx8QI in V_WHOLE has no requirement which is incorrect.
> Actually, VNx8QI should be whole(full) mode when TARGET_MIN_VLEN < 128
> since when TARGET_MIN_VLEN == 128, VNx8QI is e8mf2 which is fractional
> vector.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md: Fix requirement.
I actually have the attached already on my local tree (as well as a test),
and wanted to post it with the vec_set patch. I think the alignment helps
a bit with readability.
From 147a459dfbf1fe9d5dd93148f475f42dee3bd94b Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@ventanamicro.com>
Date: Tue, 6 Jun 2023 17:29:26 +0200
Subject: [PATCH] RISC-V: Change V_WHOLE iterator to properly match
instruction.
Currently we emit e.g. an vl1r.v even when loading a mode whose size is
smaller than the hardware vector size. This can happen when reload
decides to switch to another alternative.
This patch fixes the iterator and adds a testcase for the problem.
gcc/ChangeLog:
* config/riscv/vector-iterators.md: Add guards for modes smaller
than the hardware vector size.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c: New test.
---
gcc/config/riscv/vector-iterators.md | 65 ++++++++++++++-----
.../rvv/autovec/vls-vlmax/full-vec-move1.c | 23 +++++++
2 files changed, 72 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 90743ed76c5..0587325e82c 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -430,32 +430,65 @@ (define_mode_iterator VNX64_QHI [
VNx64QI (VNx64HI "TARGET_MIN_VLEN >= 128")
])
+;; This iterator describes which modes can be moved/loaded/stored by
+;; full-register move instructions (e.g. vl1r.v).
+;; For now we support a maximum vector length of 1024 that can
+;; also be reached by combining multiple hardware registers (mf1, mf2, ...).
+;; This means that e.g. VNx64HI (with a size of 128 bytes) requires
+;; at least a minimum vector length of 128 bits = 16 bytes in order
+;; to be loadable by vl8r.v (mf8).
+;; Apart from that we must make sure that modes smaller than the
+;; vector size are properly guarded so that e.g. VNx4HI is not loaded
+;; by vl1r.v when VL == 128.
(define_mode_iterator V_WHOLE [
- (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
- (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
- (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
- (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
- (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_MIN_VLEN >= 128")
+ (VNx4QI "TARGET_MIN_VLEN == 32")
+ (VNx8QI "TARGET_MIN_VLEN <= 64")
+ (VNx16QI "TARGET_MIN_VLEN <= 128")
+ (VNx32QI "TARGET_MIN_VLEN <= 256")
+ (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx2HI "TARGET_MIN_VLEN == 32")
+ (VNx4HI "TARGET_MIN_VLEN <= 64")
+ (VNx8HI "TARGET_MIN_VLEN <= 128")
+ (VNx16HI "TARGET_MIN_VLEN <= 256")
+ (VNx32HI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx64HI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1SI "TARGET_MIN_VLEN == 32")
+ (VNx2SI "TARGET_MIN_VLEN <= 64")
+ (VNx4SI "TARGET_MIN_VLEN <= 128")
+ (VNx8SI "TARGET_MIN_VLEN <= 256")
+ (VNx16SI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
+ (VNx32SI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
+ (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
+ (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN <= 512")
+ (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
(VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
(VNx2HF "TARGET_VECTOR_ELEN_FP_16")
(VNx4HF "TARGET_VECTOR_ELEN_FP_16")
(VNx8HF "TARGET_VECTOR_ELEN_FP_16")
(VNx16HF "TARGET_VECTOR_ELEN_FP_16")
- (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 64")
(VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
(VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
- (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
- (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
- (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
- (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
- (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
- (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 64")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 128")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN <= 256")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 64")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 128")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN <= 256")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64
+ && TARGET_MIN_VLEN <= 512")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128
+ && TARGET_MIN_VLEN <= 1024")
])
(define_mode_iterator V_FRACT [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
new file mode 100644
index 00000000000..d63f219a1e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+/* This would cause us to emit a vl1r.v for VNx4HImode even when
+ the hardware vector size vl > 64. */
+
+typedef int16_t V __attribute__((vector_size (128)));
+
+int main ()
+{
+ V v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ (v)[i] = i;
+ V res = v;
+ for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+ assert (res[i] == i);
+}
+
+/* { dg-final { scan-assembler-not {vl[1248]r.v} } } */
+/* { dg-final { scan-assembler-times {vl[1248]re16.v} 1 } } */
--
2.40.1
> + (VNx16QI "TARGET_MIN_VLEN <= 128")
> + (VNx32QI "TARGET_MIN_VLEN <= 256")
> + (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
> + (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
>
> This not correct, we always use VNx16QI as LMUL = m1 for min_vlen >= 128.
> Requirement of TARGET_MIN_VLEN <= 128 is incorrect for VNx16QI.
> VNx32QI,...etc likewise.
Please elaborate. What happens with a VNx16QI on a target with
min_vlen == 256? Is it a full 256-bit vector with only the first half
populated? If so, this need documentation either here or somewhere
else (but with a reference here).
Either you can pick my testcase and amend your patch (plus
streamline formatting as well adding a proper comment) or I change
mine. Your call.
Regards
Robin
I'd like you to defer to you commit my patch with your test (Jeff has approved my patch, just feel free to commit).
Here is the description:
We have 3 configuration for "-march"
1. zve32* (TARGET_MIN_VLEN == 32), the LMUL = 1 mode will be VNx4QI, VNx2HI, VNx1SI
2. zve64* (TARGET_MIN_VLEN == 64), the LMUL = 1 mode will be VNx8QI, VNx4HI, VNx2SI
3. zve64*_zvl128b (TARGET_MIN_VLEN >= 128), the LMUL = 1 mode will be VNx16QI, VNx8HI, VNx4SI
We dynamically adjust BYTES_PER_VECTOR according to TARGET_MIN_VLEN.
For TARGET_MIN_VLEN = 32 (chunk=32), the LMUL = 1 size = (4,4) bytes.
For TARGET_MIN_VLEN = 64 (chunk=64), the LMUL = 1 size = (8,8) bytes.
For TARGET_MIN_VLEN >= 128 (chunk=128), the LMUL = 1 size = (16*n,16*n) bytes.
I have explained it many times.
https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614935.html
https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612574.html
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-06-12 17:51
To: 钟居哲; gcc-patches
CC: rdapp.gcc; kito.cheng; palmer; Jeff Law
Subject: Re: [PATCH] RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
> + (VNx16QI "TARGET_MIN_VLEN <= 128")
> + (VNx32QI "TARGET_MIN_VLEN <= 256")
> + (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
> + (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
>
> This not correct, we always use VNx16QI as LMUL = m1 for min_vlen >= 128.
> Requirement of TARGET_MIN_VLEN <= 128 is incorrect for VNx16QI.
> VNx32QI,...etc likewise.
Please elaborate. What happens with a VNx16QI on a target with
min_vlen == 256? Is it a full 256-bit vector with only the first half
populated? If so, this need documentation either here or somewhere
else (but with a reference here).
Either you can pick my testcase and amend your patch (plus
streamline formatting as well adding a proper comment) or I change
mine. Your call.
Regards
Robin
Some more detail here:
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616051.html
On Mon, Jun 12, 2023 at 5:58 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I'd like you to defer to you commit my patch with your test (Jeff has approved my patch, just feel free to commit).
>
> Here is the description:
> We have 3 configuration for "-march"
> 1. zve32* (TARGET_MIN_VLEN == 32), the LMUL = 1 mode will be VNx4QI, VNx2HI, VNx1SI
> 2. zve64* (TARGET_MIN_VLEN == 64), the LMUL = 1 mode will be VNx8QI, VNx4HI, VNx2SI
> 3. zve64*_zvl128b (TARGET_MIN_VLEN >= 128), the LMUL = 1 mode will be VNx16QI, VNx8HI, VNx4SI
>
> We dynamically adjust BYTES_PER_VECTOR according to TARGET_MIN_VLEN.
> For TARGET_MIN_VLEN = 32 (chunk=32), the LMUL = 1 size = (4,4) bytes.
> For TARGET_MIN_VLEN = 64 (chunk=64), the LMUL = 1 size = (8,8) bytes.
> For TARGET_MIN_VLEN >= 128 (chunk=128), the LMUL = 1 size = (16*n,16*n) bytes.
> I have explained it many times.
> https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614935.html
> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612574.html
>
>
>
>
> juzhe.zhong@rivai.ai
>
> From: Robin Dapp
> Date: 2023-06-12 17:51
> To: 钟居哲; gcc-patches
> CC: rdapp.gcc; kito.cheng; palmer; Jeff Law
> Subject: Re: [PATCH] RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
> > + (VNx16QI "TARGET_MIN_VLEN <= 128")
> > + (VNx32QI "TARGET_MIN_VLEN <= 256")
> > + (VNx64QI "TARGET_MIN_VLEN >= 64 && TARGET_MIN_VLEN <= 512")
> > + (VNx128QI "TARGET_MIN_VLEN >= 128 && TARGET_MIN_VLEN <= 1024")
> >
> > This not correct, we always use VNx16QI as LMUL = m1 for min_vlen >= 128.
> > Requirement of TARGET_MIN_VLEN <= 128 is incorrect for VNx16QI.
> > VNx32QI,...etc likewise.
>
> Please elaborate. What happens with a VNx16QI on a target with
> min_vlen == 256? Is it a full 256-bit vector with only the first half
> populated? If so, this need documentation either here or somewhere
> else (but with a reference here).
>
> Either you can pick my testcase and amend your patch (plus
> streamline formatting as well adding a proper comment) or I change
> mine. Your call.
>
> Regards
> Robin
>
On 6/12/23 03:58, juzhe.zhong@rivai.ai wrote:
> I'd like you to defer to you commit my patch with your test (Jeff has
> approved my patch, just feel free to commit).
Then let's go with that. Juzhe's patch + Robin's test.
jeff
Committed, thanks Jeff.
Pan
-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Jeff Law via Gcc-patches
Sent: Tuesday, June 13, 2023 3:54 AM
To: juzhe.zhong@rivai.ai; Robin Dapp <rdapp.gcc@gmail.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Kito.cheng <kito.cheng@sifive.com>; palmer <palmer@rivosinc.com>
Subject: Re: [PATCH] RISC-V: Fix V_WHOLE && V_FRACT iterator requirement
On 6/12/23 03:58, juzhe.zhong@rivai.ai wrote:
> I'd like you to defer to you commit my patch with your test (Jeff has
> approved my patch, just feel free to commit).
Then let's go with that. Juzhe's patch + Robin's test.
jeff
@@ -447,21 +447,24 @@
])
(define_mode_iterator V_WHOLE [
- (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
- (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
- (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
+ (VNx4QI "TARGET_MIN_VLEN == 32") (VNx8QI "TARGET_MIN_VLEN < 128") VNx16QI VNx32QI
+ (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128")
+ (VNx2HI "TARGET_MIN_VLEN == 32") (VNx4HI "TARGET_MIN_VLEN < 128") VNx8HI VNx16HI
+ (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128")
+ (VNx1SI "TARGET_MIN_VLEN == 32") (VNx2SI "TARGET_MIN_VLEN < 128") VNx4SI VNx8SI
+ (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
(VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64")
(VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
(VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 32")
- (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 64")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
(VNx8HF "TARGET_VECTOR_ELEN_FP_16")
(VNx16HF "TARGET_VECTOR_ELEN_FP_16")
(VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
(VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
(VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
- (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
(VNx4SF "TARGET_VECTOR_ELEN_FP_32")
(VNx8SF "TARGET_VECTOR_ELEN_FP_32")
(VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
@@ -481,8 +484,8 @@
(VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
(VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
- (VNx1SI "TARGET_MIN_VLEN > 32 && TARGET_MIN_VLEN < 128") (VNx2SI "TARGET_MIN_VLEN >= 128")
- (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && TARGET_MIN_VLEN < 128")
+ (VNx1SI "TARGET_MIN_VLEN == 64") (VNx2SI "TARGET_MIN_VLEN >= 128")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 64")
(VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
])