[v2] RISC-V: Bugfix for RVV float reduction in ZVE32/64
Checks
Commit Message
From: Pan Li <pan2.li@intel.com>
The rvv integer reduction has 3 different patterns for zve128+, zve64
and zve32. They take the same iterator with different attributions.
However, we need the generated function code_for_reduc (code, mode1, mode2).
The implementation of code_for_reduc may look like below.
code_for_reduc (code, mode1, mode2)
{
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx16hf; // ZVE128+
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx8hf; // ZVE64
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx4hf; // ZVE32
}
Thus there will be a problem here. For example zve32, we will have
code_for_reduc (max, VNx1HF, VNx1HF) which will return the code of
the ZVE128+ instead of the ZVE32 logically.
This patch will merge the 3 patterns into pattern, and pass both the
input_vector and the ret_vector of code_for_reduc. For example, ZVE32
will be code_for_reduc (max, VNx1HF, VNx2HF), then the correct code of ZVE32
will be returned as expectation.
Please note both GCC 13 and 14 are impacted by this issue.
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-Authored by: Juzhe-Zhong <juzhe.zhong@rivai.ai>
PR target/110277
gcc/ChangeLog:
* config/riscv/riscv-vector-builtins-bases.cc: Adjust expand for
ret_mode.
* config/riscv/vector-iterators.md: Add VHF, VSF, VDF,
VHF_LMUL1, VSF_LMUL1, VDF_LMUL1, and remove unused attr.
* config/riscv/vector.md (@pred_reduc_<reduc><mode><vlmul1>): Removed.
(@pred_reduc_<reduc><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>): New pattern.
(@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr110277-1.c: New test.
* gcc.target/riscv/rvv/base/pr110277-1.h: New test.
* gcc.target/riscv/rvv/base/pr110277-2.c: New test.
* gcc.target/riscv/rvv/base/pr110277-2.h: New test.
---
.../riscv/riscv-vector-builtins-bases.cc | 5 +-
gcc/config/riscv/vector-iterators.md | 128 +++---
gcc/config/riscv/vector.md | 363 +++++++++++-------
.../gcc.target/riscv/rvv/base/pr110277-1.c | 9 +
.../gcc.target/riscv/rvv/base/pr110277-1.h | 33 ++
.../gcc.target/riscv/rvv/base/pr110277-2.c | 11 +
.../gcc.target/riscv/rvv/base/pr110277-2.h | 33 ++
7 files changed, 366 insertions(+), 216 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
Comments
Thanks for fixing it for me.
LGTM now.
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2023-06-18 10:57
To: gcc-patches
CC: juzhe.zhong; rdapp.gcc; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v2] RISC-V: Bugfix for RVV float reduction in ZVE32/64
From: Pan Li <pan2.li@intel.com>
The rvv integer reduction has 3 different patterns for zve128+, zve64
and zve32. They take the same iterator with different attributions.
However, we need the generated function code_for_reduc (code, mode1, mode2).
The implementation of code_for_reduc may look like below.
code_for_reduc (code, mode1, mode2)
{
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx16hf; // ZVE128+
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx8hf; // ZVE64
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx4hf; // ZVE32
}
Thus there will be a problem here. For example zve32, we will have
code_for_reduc (max, VNx1HF, VNx1HF) which will return the code of
the ZVE128+ instead of the ZVE32 logically.
This patch will merge the 3 patterns into pattern, and pass both the
input_vector and the ret_vector of code_for_reduc. For example, ZVE32
will be code_for_reduc (max, VNx1HF, VNx2HF), then the correct code of ZVE32
will be returned as expectation.
Please note both GCC 13 and 14 are impacted by this issue.
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-Authored by: Juzhe-Zhong <juzhe.zhong@rivai.ai>
PR target/110277
gcc/ChangeLog:
* config/riscv/riscv-vector-builtins-bases.cc: Adjust expand for
ret_mode.
* config/riscv/vector-iterators.md: Add VHF, VSF, VDF,
VHF_LMUL1, VSF_LMUL1, VDF_LMUL1, and remove unused attr.
* config/riscv/vector.md (@pred_reduc_<reduc><mode><vlmul1>): Removed.
(@pred_reduc_<reduc><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>): New pattern.
(@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr110277-1.c: New test.
* gcc.target/riscv/rvv/base/pr110277-1.h: New test.
* gcc.target/riscv/rvv/base/pr110277-2.c: New test.
* gcc.target/riscv/rvv/base/pr110277-2.h: New test.
---
.../riscv/riscv-vector-builtins-bases.cc | 5 +-
gcc/config/riscv/vector-iterators.md | 128 +++---
gcc/config/riscv/vector.md | 363 +++++++++++-------
.../gcc.target/riscv/rvv/base/pr110277-1.c | 9 +
.../gcc.target/riscv/rvv/base/pr110277-1.h | 33 ++
.../gcc.target/riscv/rvv/base/pr110277-2.c | 11 +
.../gcc.target/riscv/rvv/base/pr110277-2.h | 33 ++
7 files changed, 366 insertions(+), 216 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 53bd0ed2534..27545113996 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1400,8 +1400,7 @@ public:
machine_mode ret_mode = e.ret_mode ();
/* TODO: we will use ret_mode after all types of PR110265 are addressed. */
- if ((GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)
- || GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
+ if (GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
return e.use_exact_insn (
code_for_pred_reduc (CODE, e.vector_mode (), e.vector_mode ()));
else
@@ -1435,7 +1434,7 @@ public:
rtx expand (function_expander &e) const override
{
return e.use_exact_insn (
- code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.vector_mode ()));
+ code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.ret_mode ()));
}
};
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index e2c8ade98eb..6169116482a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -967,6 +967,33 @@ (define_mode_iterator VDI [
(VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
])
+(define_mode_iterator VHF [
+ (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VSF [
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VDF [
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+])
+
(define_mode_iterator VQI_LMUL1 [
(VNx16QI "TARGET_MIN_VLEN >= 128")
(VNx8QI "TARGET_MIN_VLEN == 64")
@@ -990,6 +1017,23 @@ (define_mode_iterator VDI_LMUL1 [
(VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
])
+(define_mode_iterator VHF_LMUL1 [
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 64")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VSF_LMUL1 [
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 64")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VDF_LMUL1 [
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN == 64")
+])
+
(define_mode_attr VLMULX2 [
(VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") (VNx64QI "VNx128QI")
(VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") (VNx32HI "VNx64HI")
@@ -1348,48 +1392,6 @@ (define_mode_attr VNCONVERT [
(VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI")
])
-(define_mode_attr VLMUL1 [
- (VNx1QI "VNx16QI") (VNx2QI "VNx16QI") (VNx4QI "VNx16QI")
- (VNx8QI "VNx16QI") (VNx16QI "VNx16QI") (VNx32QI "VNx16QI") (VNx64QI "VNx16QI") (VNx128QI "VNx16QI")
- (VNx1HI "VNx8HI") (VNx2HI "VNx8HI") (VNx4HI "VNx8HI")
- (VNx8HI "VNx8HI") (VNx16HI "VNx8HI") (VNx32HI "VNx8HI") (VNx64HI "VNx8HI")
- (VNx1SI "VNx4SI") (VNx2SI "VNx4SI") (VNx4SI "VNx4SI")
- (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI")
- (VNx1DI "VNx2DI") (VNx2DI "VNx2DI")
- (VNx4DI "VNx2DI") (VNx8DI "VNx2DI") (VNx16DI "VNx2DI")
- (VNx1HF "VNx8HF") (VNx2HF "VNx8HF") (VNx4HF "VNx8HF") (VNx8HF "VNx8HF") (VNx16HF "VNx8HF") (VNx32HF "VNx8HF") (VNx64HF "VNx8HF")
- (VNx1SF "VNx4SF") (VNx2SF "VNx4SF")
- (VNx4SF "VNx4SF") (VNx8SF "VNx4SF") (VNx16SF "VNx4SF") (VNx32SF "VNx4SF")
- (VNx1DF "VNx2DF") (VNx2DF "VNx2DF")
- (VNx4DF "VNx2DF") (VNx8DF "VNx2DF") (VNx16DF "VNx2DF")
-])
-
-(define_mode_attr VLMUL1_ZVE64 [
- (VNx1QI "VNx8QI") (VNx2QI "VNx8QI") (VNx4QI "VNx8QI")
- (VNx8QI "VNx8QI") (VNx16QI "VNx8QI") (VNx32QI "VNx8QI") (VNx64QI "VNx8QI")
- (VNx1HI "VNx4HI") (VNx2HI "VNx4HI") (VNx4HI "VNx4HI")
- (VNx8HI "VNx4HI") (VNx16HI "VNx4HI") (VNx32HI "VNx4HI")
- (VNx1SI "VNx2SI") (VNx2SI "VNx2SI") (VNx4SI "VNx2SI")
- (VNx8SI "VNx2SI") (VNx16SI "VNx2SI")
- (VNx1DI "VNx1DI") (VNx2DI "VNx1DI")
- (VNx4DI "VNx1DI") (VNx8DI "VNx1DI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF") (VNx16SF "VNx2SF")
- (VNx1DF "VNx1DF") (VNx2DF "VNx1DF")
- (VNx4DF "VNx1DF") (VNx8DF "VNx1DF")
-])
-
-(define_mode_attr VLMUL1_ZVE32 [
- (VNx1QI "VNx4QI") (VNx2QI "VNx4QI") (VNx4QI "VNx4QI")
- (VNx8QI "VNx4QI") (VNx16QI "VNx4QI") (VNx32QI "VNx4QI")
- (VNx1HI "VNx2HI") (VNx2HI "VNx2HI") (VNx4HI "VNx2HI")
- (VNx8HI "VNx2HI") (VNx16HI "VNx2HI")
- (VNx1SI "VNx1SI") (VNx2SI "VNx1SI") (VNx4SI "VNx1SI")
- (VNx8SI "VNx1SI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF")
-])
-
(define_mode_attr VWLMUL1 [
(VNx1QI "VNx8HI") (VNx2QI "VNx8HI") (VNx4QI "VNx8HI")
(VNx8QI "VNx8HI") (VNx16QI "VNx8HI") (VNx32QI "VNx8HI") (VNx64QI "VNx8HI") (VNx128QI "VNx8HI")
@@ -1421,48 +1423,6 @@ (define_mode_attr VWLMUL1_ZVE32 [
(VNx8HI "VNx1SI") (VNx16HI "VNx1SI")
])
-(define_mode_attr vlmul1 [
- (VNx1QI "vnx16qi") (VNx2QI "vnx16qi") (VNx4QI "vnx16qi")
- (VNx8QI "vnx16qi") (VNx16QI "vnx16qi") (VNx32QI "vnx16qi") (VNx64QI "vnx16qi") (VNx128QI "vnx16qi")
- (VNx1HI "vnx8hi") (VNx2HI "vnx8hi") (VNx4HI "vnx8hi")
- (VNx8HI "vnx8hi") (VNx16HI "vnx8hi") (VNx32HI "vnx8hi") (VNx64HI "vnx8hi")
- (VNx1SI "vnx4si") (VNx2SI "vnx4si") (VNx4SI "vnx4si")
- (VNx8SI "vnx4si") (VNx16SI "vnx4si") (VNx32SI "vnx4si")
- (VNx1DI "vnx2di") (VNx2DI "vnx2di")
- (VNx4DI "vnx2di") (VNx8DI "vnx2di") (VNx16DI "vnx2di")
- (VNx1HF "vnx8hf") (VNx2HF "vnx8hf") (VNx4HF "vnx8hf") (VNx8HF "vnx8hf") (VNx16HF "vnx8hf") (VNx32HF "vnx8hf") (VNx64HF "vnx8hf")
- (VNx1SF "vnx4sf") (VNx2SF "vnx4sf")
- (VNx4SF "vnx4sf") (VNx8SF "vnx4sf") (VNx16SF "vnx4sf") (VNx32SF "vnx4sf")
- (VNx1DF "vnx2df") (VNx2DF "vnx2df")
- (VNx4DF "vnx2df") (VNx8DF "vnx2df") (VNx16DF "vnx2df")
-])
-
-(define_mode_attr vlmul1_zve64 [
- (VNx1QI "vnx8qi") (VNx2QI "vnx8qi") (VNx4QI "vnx8qi")
- (VNx8QI "vnx8qi") (VNx16QI "vnx8qi") (VNx32QI "vnx8qi") (VNx64QI "vnx8qi")
- (VNx1HI "vnx4hi") (VNx2HI "vnx4hi") (VNx4HI "vnx4hi")
- (VNx8HI "vnx4hi") (VNx16HI "vnx4hi") (VNx32HI "vnx4hi")
- (VNx1SI "vnx2si") (VNx2SI "vnx2si") (VNx4SI "vnx2si")
- (VNx8SI "vnx2si") (VNx16SI "vnx2si")
- (VNx1DI "vnx1di") (VNx2DI "vnx1di")
- (VNx4DI "vnx1di") (VNx8DI "vnx1di")
- (VNx1SF "vnx2sf") (VNx2SF "vnx2sf")
- (VNx4SF "vnx2sf") (VNx8SF "vnx2sf") (VNx16SF "vnx2sf")
- (VNx1DF "vnx1df") (VNx2DF "vnx1df")
- (VNx4DF "vnx1df") (VNx8DF "vnx1df")
-])
-
-(define_mode_attr vlmul1_zve32 [
- (VNx1QI "vnx4qi") (VNx2QI "vnx4qi") (VNx4QI "vnx4qi")
- (VNx8QI "vnx4qi") (VNx16QI "vnx4qi") (VNx32QI "vnx4qi")
- (VNx1HI "vnx2hi") (VNx2HI "vnx2hi") (VNx4HI "vnx2hi")
- (VNx8HI "vnx2hi") (VNx16HI "vnx2hi")
- (VNx1SI "vnx1si") (VNx2SI "vnx1si") (VNx4SI "vnx1si")
- (VNx8SI "vnx1si")
- (VNx1SF "vnx1sf") (VNx2SF "vnx1sf")
- (VNx4SF "vnx1sf") (VNx8SF "vnx1sf")
-])
-
(define_mode_attr vwlmul1 [
(VNx1QI "vnx8hi") (VNx2QI "vnx8hi") (VNx4QI "vnx8hi")
(VNx8QI "vnx8hi") (VNx16QI "vnx8hi") (VNx32QI "vnx8hi") (VNx64QI "vnx8hi") (VNx128QI "vnx8hi")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index d396e278503..efce992a012 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7462,152 +7462,257 @@ (define_insn "@pred_widen_reduc_plus<v_su><mode><vwlmul1_zve32>"
[(set_attr "type" "viwred")
(set_attr "mode" "<MODE>")])
-(define_insn "@pred_reduc_<reduc><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for HF
+(define_insn "@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for SF
+(define_insn "@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+;; Float Reduction for DF
+(define_insn "@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+;; Float Ordered Reduction Sum for HF
+(define_insn "@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+;; Float Ordered Reduction Sum for SF
+(define_insn "@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+;; Float Ordered Reduction Sum for DF
+(define_insn "@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
(define_insn "@pred_widen_reduc_plus<order><mode><vwlmul1>"
[(set (match_operand:<VWLMUL1> 0 "register_operand" "=&vr, &vr")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
new file mode 100644
index 00000000000..24a4ba3b45f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve32f_zvfh -mabi=ilp32f -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
new file mode 100644
index 00000000000..67c296c2213
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmax_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmin_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredosum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredusum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f32m8_f32m1(vector, scalar, vl);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
new file mode 100644
index 00000000000..23d7361488a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+#include "pr110277-2.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
new file mode 100644
index 00000000000..7e5c81aa213
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmax_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmin_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredosum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredusum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f64m8_f64m1(vector, scalar, vl);
+}
--
2.34.1
Ok for trunk?
And a reminder to myself that this PATCH should be committed before the RVV widen reduction one.
Pan
From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: Sunday, June 18, 2023 9:15 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: rdapp.gcc <rdapp.gcc@gmail.com>; Jeff Law <jeffreyalaw@gmail.com>; Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Bugfix for RVV float reduction in ZVE32/64
Thanks for fixing it for me.
LGTM now.
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-06-18 10:57
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; rdapp.gcc<mailto:rdapp.gcc@gmail.com>; jeffreyalaw<mailto:jeffreyalaw@gmail.com>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v2] RISC-V: Bugfix for RVV float reduction in ZVE32/64
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
The rvv integer reduction has 3 different patterns for zve128+, zve64
and zve32. They take the same iterator with different attributions.
However, we need the generated function code_for_reduc (code, mode1, mode2).
The implementation of code_for_reduc may look like below.
code_for_reduc (code, mode1, mode2)
{
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx16hf; // ZVE128+
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx8hf; // ZVE64
if (code == max && mode1 == VNx1HF && mode2 == VNx1HF)
return CODE_FOR_pred_reduc_maxvnx1hfvnx4hf; // ZVE32
}
Thus there will be a problem here. For example zve32, we will have
code_for_reduc (max, VNx1HF, VNx1HF) which will return the code of
the ZVE128+ instead of the ZVE32 logically.
This patch will merge the 3 patterns into pattern, and pass both the
input_vector and the ret_vector of code_for_reduc. For example, ZVE32
will be code_for_reduc (max, VNx1HF, VNx2HF), then the correct code of ZVE32
will be returned as expectation.
Please note both GCC 13 and 14 are impacted by this issue.
Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
Co-Authored by: Juzhe-Zhong <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>>
PR target/110277
gcc/ChangeLog:
* config/riscv/riscv-vector-builtins-bases.cc: Adjust expand for
ret_mode.
* config/riscv/vector-iterators.md: Add VHF, VSF, VDF,
VHF_LMUL1, VSF_LMUL1, VDF_LMUL1, and remove unused attr.
* config/riscv/vector.md (@pred_reduc_<reduc><mode><vlmul1>): Removed.
(@pred_reduc_<reduc><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve32>): Ditto.
(@pred_reduc_plus<order><mode><vlmul1_zve64>): Ditto.
(@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>): New pattern.
(@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>): Ditto.
(@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr110277-1.c: New test.
* gcc.target/riscv/rvv/base/pr110277-1.h: New test.
* gcc.target/riscv/rvv/base/pr110277-2.c: New test.
* gcc.target/riscv/rvv/base/pr110277-2.h: New test.
---
.../riscv/riscv-vector-builtins-bases.cc | 5 +-
gcc/config/riscv/vector-iterators.md | 128 +++---
gcc/config/riscv/vector.md | 363 +++++++++++-------
.../gcc.target/riscv/rvv/base/pr110277-1.c | 9 +
.../gcc.target/riscv/rvv/base/pr110277-1.h | 33 ++
.../gcc.target/riscv/rvv/base/pr110277-2.c | 11 +
.../gcc.target/riscv/rvv/base/pr110277-2.h | 33 ++
7 files changed, 366 insertions(+), 216 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 53bd0ed2534..27545113996 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1400,8 +1400,7 @@ public:
machine_mode ret_mode = e.ret_mode ();
/* TODO: we will use ret_mode after all types of PR110265 are addressed. */
- if ((GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)
- || GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
+ if (GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
return e.use_exact_insn (
code_for_pred_reduc (CODE, e.vector_mode (), e.vector_mode ()));
else
@@ -1435,7 +1434,7 @@ public:
rtx expand (function_expander &e) const override
{
return e.use_exact_insn (
- code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.vector_mode ()));
+ code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.ret_mode ()));
}
};
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index e2c8ade98eb..6169116482a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -967,6 +967,33 @@ (define_mode_iterator VDI [
(VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
])
+(define_mode_iterator VHF [
+ (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VSF [
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VDF [
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+])
+
(define_mode_iterator VQI_LMUL1 [
(VNx16QI "TARGET_MIN_VLEN >= 128")
(VNx8QI "TARGET_MIN_VLEN == 64")
@@ -990,6 +1017,23 @@ (define_mode_iterator VDI_LMUL1 [
(VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
])
+(define_mode_iterator VHF_LMUL1 [
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 64")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VSF_LMUL1 [
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 64")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VDF_LMUL1 [
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN == 64")
+])
+
(define_mode_attr VLMULX2 [
(VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") (VNx64QI "VNx128QI")
(VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") (VNx32HI "VNx64HI")
@@ -1348,48 +1392,6 @@ (define_mode_attr VNCONVERT [
(VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI")
])
-(define_mode_attr VLMUL1 [
- (VNx1QI "VNx16QI") (VNx2QI "VNx16QI") (VNx4QI "VNx16QI")
- (VNx8QI "VNx16QI") (VNx16QI "VNx16QI") (VNx32QI "VNx16QI") (VNx64QI "VNx16QI") (VNx128QI "VNx16QI")
- (VNx1HI "VNx8HI") (VNx2HI "VNx8HI") (VNx4HI "VNx8HI")
- (VNx8HI "VNx8HI") (VNx16HI "VNx8HI") (VNx32HI "VNx8HI") (VNx64HI "VNx8HI")
- (VNx1SI "VNx4SI") (VNx2SI "VNx4SI") (VNx4SI "VNx4SI")
- (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI")
- (VNx1DI "VNx2DI") (VNx2DI "VNx2DI")
- (VNx4DI "VNx2DI") (VNx8DI "VNx2DI") (VNx16DI "VNx2DI")
- (VNx1HF "VNx8HF") (VNx2HF "VNx8HF") (VNx4HF "VNx8HF") (VNx8HF "VNx8HF") (VNx16HF "VNx8HF") (VNx32HF "VNx8HF") (VNx64HF "VNx8HF")
- (VNx1SF "VNx4SF") (VNx2SF "VNx4SF")
- (VNx4SF "VNx4SF") (VNx8SF "VNx4SF") (VNx16SF "VNx4SF") (VNx32SF "VNx4SF")
- (VNx1DF "VNx2DF") (VNx2DF "VNx2DF")
- (VNx4DF "VNx2DF") (VNx8DF "VNx2DF") (VNx16DF "VNx2DF")
-])
-
-(define_mode_attr VLMUL1_ZVE64 [
- (VNx1QI "VNx8QI") (VNx2QI "VNx8QI") (VNx4QI "VNx8QI")
- (VNx8QI "VNx8QI") (VNx16QI "VNx8QI") (VNx32QI "VNx8QI") (VNx64QI "VNx8QI")
- (VNx1HI "VNx4HI") (VNx2HI "VNx4HI") (VNx4HI "VNx4HI")
- (VNx8HI "VNx4HI") (VNx16HI "VNx4HI") (VNx32HI "VNx4HI")
- (VNx1SI "VNx2SI") (VNx2SI "VNx2SI") (VNx4SI "VNx2SI")
- (VNx8SI "VNx2SI") (VNx16SI "VNx2SI")
- (VNx1DI "VNx1DI") (VNx2DI "VNx1DI")
- (VNx4DI "VNx1DI") (VNx8DI "VNx1DI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF") (VNx16SF "VNx2SF")
- (VNx1DF "VNx1DF") (VNx2DF "VNx1DF")
- (VNx4DF "VNx1DF") (VNx8DF "VNx1DF")
-])
-
-(define_mode_attr VLMUL1_ZVE32 [
- (VNx1QI "VNx4QI") (VNx2QI "VNx4QI") (VNx4QI "VNx4QI")
- (VNx8QI "VNx4QI") (VNx16QI "VNx4QI") (VNx32QI "VNx4QI")
- (VNx1HI "VNx2HI") (VNx2HI "VNx2HI") (VNx4HI "VNx2HI")
- (VNx8HI "VNx2HI") (VNx16HI "VNx2HI")
- (VNx1SI "VNx1SI") (VNx2SI "VNx1SI") (VNx4SI "VNx1SI")
- (VNx8SI "VNx1SI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF")
-])
-
(define_mode_attr VWLMUL1 [
(VNx1QI "VNx8HI") (VNx2QI "VNx8HI") (VNx4QI "VNx8HI")
(VNx8QI "VNx8HI") (VNx16QI "VNx8HI") (VNx32QI "VNx8HI") (VNx64QI "VNx8HI") (VNx128QI "VNx8HI")
@@ -1421,48 +1423,6 @@ (define_mode_attr VWLMUL1_ZVE32 [
(VNx8HI "VNx1SI") (VNx16HI "VNx1SI")
])
-(define_mode_attr vlmul1 [
- (VNx1QI "vnx16qi") (VNx2QI "vnx16qi") (VNx4QI "vnx16qi")
- (VNx8QI "vnx16qi") (VNx16QI "vnx16qi") (VNx32QI "vnx16qi") (VNx64QI "vnx16qi") (VNx128QI "vnx16qi")
- (VNx1HI "vnx8hi") (VNx2HI "vnx8hi") (VNx4HI "vnx8hi")
- (VNx8HI "vnx8hi") (VNx16HI "vnx8hi") (VNx32HI "vnx8hi") (VNx64HI "vnx8hi")
- (VNx1SI "vnx4si") (VNx2SI "vnx4si") (VNx4SI "vnx4si")
- (VNx8SI "vnx4si") (VNx16SI "vnx4si") (VNx32SI "vnx4si")
- (VNx1DI "vnx2di") (VNx2DI "vnx2di")
- (VNx4DI "vnx2di") (VNx8DI "vnx2di") (VNx16DI "vnx2di")
- (VNx1HF "vnx8hf") (VNx2HF "vnx8hf") (VNx4HF "vnx8hf") (VNx8HF "vnx8hf") (VNx16HF "vnx8hf") (VNx32HF "vnx8hf") (VNx64HF "vnx8hf")
- (VNx1SF "vnx4sf") (VNx2SF "vnx4sf")
- (VNx4SF "vnx4sf") (VNx8SF "vnx4sf") (VNx16SF "vnx4sf") (VNx32SF "vnx4sf")
- (VNx1DF "vnx2df") (VNx2DF "vnx2df")
- (VNx4DF "vnx2df") (VNx8DF "vnx2df") (VNx16DF "vnx2df")
-])
-
-(define_mode_attr vlmul1_zve64 [
- (VNx1QI "vnx8qi") (VNx2QI "vnx8qi") (VNx4QI "vnx8qi")
- (VNx8QI "vnx8qi") (VNx16QI "vnx8qi") (VNx32QI "vnx8qi") (VNx64QI "vnx8qi")
- (VNx1HI "vnx4hi") (VNx2HI "vnx4hi") (VNx4HI "vnx4hi")
- (VNx8HI "vnx4hi") (VNx16HI "vnx4hi") (VNx32HI "vnx4hi")
- (VNx1SI "vnx2si") (VNx2SI "vnx2si") (VNx4SI "vnx2si")
- (VNx8SI "vnx2si") (VNx16SI "vnx2si")
- (VNx1DI "vnx1di") (VNx2DI "vnx1di")
- (VNx4DI "vnx1di") (VNx8DI "vnx1di")
- (VNx1SF "vnx2sf") (VNx2SF "vnx2sf")
- (VNx4SF "vnx2sf") (VNx8SF "vnx2sf") (VNx16SF "vnx2sf")
- (VNx1DF "vnx1df") (VNx2DF "vnx1df")
- (VNx4DF "vnx1df") (VNx8DF "vnx1df")
-])
-
-(define_mode_attr vlmul1_zve32 [
- (VNx1QI "vnx4qi") (VNx2QI "vnx4qi") (VNx4QI "vnx4qi")
- (VNx8QI "vnx4qi") (VNx16QI "vnx4qi") (VNx32QI "vnx4qi")
- (VNx1HI "vnx2hi") (VNx2HI "vnx2hi") (VNx4HI "vnx2hi")
- (VNx8HI "vnx2hi") (VNx16HI "vnx2hi")
- (VNx1SI "vnx1si") (VNx2SI "vnx1si") (VNx4SI "vnx1si")
- (VNx8SI "vnx1si")
- (VNx1SF "vnx1sf") (VNx2SF "vnx1sf")
- (VNx4SF "vnx1sf") (VNx8SF "vnx1sf")
-])
-
(define_mode_attr vwlmul1 [
(VNx1QI "vnx8hi") (VNx2QI "vnx8hi") (VNx4QI "vnx8hi")
(VNx8QI "vnx8hi") (VNx16QI "vnx8hi") (VNx32QI "vnx8hi") (VNx64QI "vnx8hi") (VNx128QI "vnx8hi")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index d396e278503..efce992a012 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7462,152 +7462,257 @@ (define_insn "@pred_widen_reduc_plus<v_su><mode><vwlmul1_zve32>"
[(set_attr "type" "viwred")
(set_attr "mode" "<MODE>")])
-(define_insn "@pred_reduc_<reduc><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for HF
+(define_insn "@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for SF
+(define_insn "@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+;; Float Reduction for DF
+(define_insn "@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+;; Float Ordered Reduction Sum for HF
+(define_insn "@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+;; Float Ordered Reduction Sum for SF
+(define_insn "@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+;; Float Ordered Reduction Sum for DF
+(define_insn "@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
(define_insn "@pred_widen_reduc_plus<order><mode><vwlmul1>"
[(set (match_operand:<VWLMUL1> 0 "register_operand" "=&vr, &vr")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
new file mode 100644
index 00000000000..24a4ba3b45f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve32f_zvfh -mabi=ilp32f -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
new file mode 100644
index 00000000000..67c296c2213
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmax_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmin_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredosum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredusum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f32m8_f32m1(vector, scalar, vl);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
new file mode 100644
index 00000000000..23d7361488a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+#include "pr110277-2.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
new file mode 100644
index 00000000000..7e5c81aa213
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmax_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmin_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredosum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredusum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f64m8_f64m1(vector, scalar, vl);
+}
--
2.34.1
On 6/18/23 07:14, 钟居哲 wrote:
> Thanks for fixing it for me.
> LGTM now.
OK for the trunk.
jeff
Committed, thanks Jeff.
Pan
-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com>
Sent: Monday, June 19, 2023 9:51 PM
To: 钟居哲 <juzhe.zhong@rivai.ai>; Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: rdapp.gcc <rdapp.gcc@gmail.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Bugfix for RVV float reduction in ZVE32/64
On 6/18/23 07:14, 钟居哲 wrote:
> Thanks for fixing it for me.
> LGTM now.
OK for the trunk.
jeff
@@ -1400,8 +1400,7 @@ public:
machine_mode ret_mode = e.ret_mode ();
/* TODO: we will use ret_mode after all types of PR110265 are addressed. */
- if ((GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)
- || GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
+ if (GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode))
return e.use_exact_insn (
code_for_pred_reduc (CODE, e.vector_mode (), e.vector_mode ()));
else
@@ -1435,7 +1434,7 @@ public:
rtx expand (function_expander &e) const override
{
return e.use_exact_insn (
- code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.vector_mode ()));
+ code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.ret_mode ()));
}
};
@@ -967,6 +967,33 @@ (define_mode_iterator VDI [
(VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
])
+(define_mode_iterator VHF [
+ (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
+ (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+ (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VSF [
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
+ (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+ (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+])
+
+(define_mode_iterator VDF [
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+ (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+])
+
(define_mode_iterator VQI_LMUL1 [
(VNx16QI "TARGET_MIN_VLEN >= 128")
(VNx8QI "TARGET_MIN_VLEN == 64")
@@ -990,6 +1017,23 @@ (define_mode_iterator VDI_LMUL1 [
(VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64")
])
+(define_mode_iterator VHF_LMUL1 [
+ (VNx8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+ (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 64")
+ (VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VSF_LMUL1 [
+ (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+ (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 64")
+ (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32")
+])
+
+(define_mode_iterator VDF_LMUL1 [
+ (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN == 64")
+])
+
(define_mode_attr VLMULX2 [
(VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") (VNx64QI "VNx128QI")
(VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") (VNx32HI "VNx64HI")
@@ -1348,48 +1392,6 @@ (define_mode_attr VNCONVERT [
(VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI")
])
-(define_mode_attr VLMUL1 [
- (VNx1QI "VNx16QI") (VNx2QI "VNx16QI") (VNx4QI "VNx16QI")
- (VNx8QI "VNx16QI") (VNx16QI "VNx16QI") (VNx32QI "VNx16QI") (VNx64QI "VNx16QI") (VNx128QI "VNx16QI")
- (VNx1HI "VNx8HI") (VNx2HI "VNx8HI") (VNx4HI "VNx8HI")
- (VNx8HI "VNx8HI") (VNx16HI "VNx8HI") (VNx32HI "VNx8HI") (VNx64HI "VNx8HI")
- (VNx1SI "VNx4SI") (VNx2SI "VNx4SI") (VNx4SI "VNx4SI")
- (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI")
- (VNx1DI "VNx2DI") (VNx2DI "VNx2DI")
- (VNx4DI "VNx2DI") (VNx8DI "VNx2DI") (VNx16DI "VNx2DI")
- (VNx1HF "VNx8HF") (VNx2HF "VNx8HF") (VNx4HF "VNx8HF") (VNx8HF "VNx8HF") (VNx16HF "VNx8HF") (VNx32HF "VNx8HF") (VNx64HF "VNx8HF")
- (VNx1SF "VNx4SF") (VNx2SF "VNx4SF")
- (VNx4SF "VNx4SF") (VNx8SF "VNx4SF") (VNx16SF "VNx4SF") (VNx32SF "VNx4SF")
- (VNx1DF "VNx2DF") (VNx2DF "VNx2DF")
- (VNx4DF "VNx2DF") (VNx8DF "VNx2DF") (VNx16DF "VNx2DF")
-])
-
-(define_mode_attr VLMUL1_ZVE64 [
- (VNx1QI "VNx8QI") (VNx2QI "VNx8QI") (VNx4QI "VNx8QI")
- (VNx8QI "VNx8QI") (VNx16QI "VNx8QI") (VNx32QI "VNx8QI") (VNx64QI "VNx8QI")
- (VNx1HI "VNx4HI") (VNx2HI "VNx4HI") (VNx4HI "VNx4HI")
- (VNx8HI "VNx4HI") (VNx16HI "VNx4HI") (VNx32HI "VNx4HI")
- (VNx1SI "VNx2SI") (VNx2SI "VNx2SI") (VNx4SI "VNx2SI")
- (VNx8SI "VNx2SI") (VNx16SI "VNx2SI")
- (VNx1DI "VNx1DI") (VNx2DI "VNx1DI")
- (VNx4DI "VNx1DI") (VNx8DI "VNx1DI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF") (VNx16SF "VNx2SF")
- (VNx1DF "VNx1DF") (VNx2DF "VNx1DF")
- (VNx4DF "VNx1DF") (VNx8DF "VNx1DF")
-])
-
-(define_mode_attr VLMUL1_ZVE32 [
- (VNx1QI "VNx4QI") (VNx2QI "VNx4QI") (VNx4QI "VNx4QI")
- (VNx8QI "VNx4QI") (VNx16QI "VNx4QI") (VNx32QI "VNx4QI")
- (VNx1HI "VNx2HI") (VNx2HI "VNx2HI") (VNx4HI "VNx2HI")
- (VNx8HI "VNx2HI") (VNx16HI "VNx2HI")
- (VNx1SI "VNx1SI") (VNx2SI "VNx1SI") (VNx4SI "VNx1SI")
- (VNx8SI "VNx1SI")
- (VNx1SF "VNx2SF") (VNx2SF "VNx2SF")
- (VNx4SF "VNx2SF") (VNx8SF "VNx2SF")
-])
-
(define_mode_attr VWLMUL1 [
(VNx1QI "VNx8HI") (VNx2QI "VNx8HI") (VNx4QI "VNx8HI")
(VNx8QI "VNx8HI") (VNx16QI "VNx8HI") (VNx32QI "VNx8HI") (VNx64QI "VNx8HI") (VNx128QI "VNx8HI")
@@ -1421,48 +1423,6 @@ (define_mode_attr VWLMUL1_ZVE32 [
(VNx8HI "VNx1SI") (VNx16HI "VNx1SI")
])
-(define_mode_attr vlmul1 [
- (VNx1QI "vnx16qi") (VNx2QI "vnx16qi") (VNx4QI "vnx16qi")
- (VNx8QI "vnx16qi") (VNx16QI "vnx16qi") (VNx32QI "vnx16qi") (VNx64QI "vnx16qi") (VNx128QI "vnx16qi")
- (VNx1HI "vnx8hi") (VNx2HI "vnx8hi") (VNx4HI "vnx8hi")
- (VNx8HI "vnx8hi") (VNx16HI "vnx8hi") (VNx32HI "vnx8hi") (VNx64HI "vnx8hi")
- (VNx1SI "vnx4si") (VNx2SI "vnx4si") (VNx4SI "vnx4si")
- (VNx8SI "vnx4si") (VNx16SI "vnx4si") (VNx32SI "vnx4si")
- (VNx1DI "vnx2di") (VNx2DI "vnx2di")
- (VNx4DI "vnx2di") (VNx8DI "vnx2di") (VNx16DI "vnx2di")
- (VNx1HF "vnx8hf") (VNx2HF "vnx8hf") (VNx4HF "vnx8hf") (VNx8HF "vnx8hf") (VNx16HF "vnx8hf") (VNx32HF "vnx8hf") (VNx64HF "vnx8hf")
- (VNx1SF "vnx4sf") (VNx2SF "vnx4sf")
- (VNx4SF "vnx4sf") (VNx8SF "vnx4sf") (VNx16SF "vnx4sf") (VNx32SF "vnx4sf")
- (VNx1DF "vnx2df") (VNx2DF "vnx2df")
- (VNx4DF "vnx2df") (VNx8DF "vnx2df") (VNx16DF "vnx2df")
-])
-
-(define_mode_attr vlmul1_zve64 [
- (VNx1QI "vnx8qi") (VNx2QI "vnx8qi") (VNx4QI "vnx8qi")
- (VNx8QI "vnx8qi") (VNx16QI "vnx8qi") (VNx32QI "vnx8qi") (VNx64QI "vnx8qi")
- (VNx1HI "vnx4hi") (VNx2HI "vnx4hi") (VNx4HI "vnx4hi")
- (VNx8HI "vnx4hi") (VNx16HI "vnx4hi") (VNx32HI "vnx4hi")
- (VNx1SI "vnx2si") (VNx2SI "vnx2si") (VNx4SI "vnx2si")
- (VNx8SI "vnx2si") (VNx16SI "vnx2si")
- (VNx1DI "vnx1di") (VNx2DI "vnx1di")
- (VNx4DI "vnx1di") (VNx8DI "vnx1di")
- (VNx1SF "vnx2sf") (VNx2SF "vnx2sf")
- (VNx4SF "vnx2sf") (VNx8SF "vnx2sf") (VNx16SF "vnx2sf")
- (VNx1DF "vnx1df") (VNx2DF "vnx1df")
- (VNx4DF "vnx1df") (VNx8DF "vnx1df")
-])
-
-(define_mode_attr vlmul1_zve32 [
- (VNx1QI "vnx4qi") (VNx2QI "vnx4qi") (VNx4QI "vnx4qi")
- (VNx8QI "vnx4qi") (VNx16QI "vnx4qi") (VNx32QI "vnx4qi")
- (VNx1HI "vnx2hi") (VNx2HI "vnx2hi") (VNx4HI "vnx2hi")
- (VNx8HI "vnx2hi") (VNx16HI "vnx2hi")
- (VNx1SI "vnx1si") (VNx2SI "vnx1si") (VNx4SI "vnx1si")
- (VNx8SI "vnx1si")
- (VNx1SF "vnx1sf") (VNx2SF "vnx1sf")
- (VNx4SF "vnx1sf") (VNx8SF "vnx1sf")
-])
-
(define_mode_attr vwlmul1 [
(VNx1QI "vnx8hi") (VNx2QI "vnx8hi") (VNx4QI "vnx8hi")
(VNx8QI "vnx8hi") (VNx16QI "vnx8hi") (VNx32QI "vnx8hi") (VNx64QI "vnx8hi") (VNx128QI "vnx8hi")
@@ -7462,152 +7462,257 @@ (define_insn "@pred_widen_reduc_plus<v_su><mode><vwlmul1_zve32>"
[(set_attr "type" "viwred")
(set_attr "mode" "<MODE>")])
-(define_insn "@pred_reduc_<reduc><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for HF
+(define_insn "@pred_reduc_<reduc><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
+;; Float Reduction for SF
+(define_insn "@pred_reduc_<reduc><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_<reduc><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
+;; Float Reduction for DF
+(define_insn "@pred_reduc_<reduc><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr, vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (any_freduc:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+ ] UNSPEC_VPREDICATE
+ )
+ (any_reduc:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<reduc>.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfredu")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfredu")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1>"
- [(set (match_operand:<VLMUL1> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1>
- [(unspec:<VLMUL1>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF
- (vec_duplicate:VF
- (vec_select:<VEL>
- (match_operand:<VLMUL1> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN >= 128"
+;; Float Ordered Reduction Sum for HF
+(define_insn "@pred_reduc_plus<order><VHF:mode><VHF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VHF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VHF_LMUL1
+ [
+ (unspec:VHF_LMUL1
+ [
+ (unspec:<VHF:VM>
+ [
+ (match_operand:<VHF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VHF
+ (vec_duplicate:VHF
+ (vec_select:<VEL>
+ (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VHF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VHF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve64>"
- [(set (match_operand:<VLMUL1_ZVE64> 0 "register_operand" "=vr, vr")
- (unspec:<VLMUL1_ZVE64>
- [(unspec:<VLMUL1_ZVE64>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK")
- (match_operand 6 "const_int_operand" " i, i")
- (match_operand 7 "const_int_operand" " i, i")
- (match_operand 8 "const_int_operand" " i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE64
- (vec_duplicate:VF_ZVE64
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE64> 4 "register_operand" " vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE64 3 "register_operand" " vr, vr"))
- (match_operand:<VLMUL1_ZVE64> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 64"
+;; Float Ordered Reduction Sum for SF
+(define_insn "@pred_reduc_plus<order><VSF:mode><VSF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VSF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VSF_LMUL1
+ [
+ (unspec:VSF_LMUL1
+ [
+ (unspec:<VSF:VM>
+ [
+ (match_operand:<VSF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VSF
+ (vec_duplicate:VSF
+ (vec_select:<VEL>
+ (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VSF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VSF:MODE>")
+ ]
+)
-(define_insn "@pred_reduc_plus<order><mode><vlmul1_zve32>"
- [(set (match_operand:<VLMUL1_ZVE32> 0 "register_operand" "=vd, vd, vr, vr")
- (unspec:<VLMUL1_ZVE32>
- [(unspec:<VLMUL1_ZVE32>
- [(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
- (match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)
- (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
- (plus:VF_ZVE32
- (vec_duplicate:VF_ZVE32
- (vec_select:<VEL>
- (match_operand:<VLMUL1_ZVE32> 4 "register_operand" " vr, vr, vr, vr")
- (parallel [(const_int 0)])))
- (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr"))
- (match_operand:<VLMUL1_ZVE32> 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))]
- "TARGET_VECTOR && TARGET_MIN_VLEN == 32"
+;; Float Ordered Reduction Sum for DF
+(define_insn "@pred_reduc_plus<order><VDF:mode><VDF_LMUL1:mode>"
+ [
+ (set
+ (match_operand:VDF_LMUL1 0 "register_operand" "=vr,vr")
+ (unspec:VDF_LMUL1
+ [
+ (unspec:VDF_LMUL1
+ [
+ (unspec:<VDF:VM>
+ [
+ (match_operand:<VDF:VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (match_operand 8 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)
+ ] UNSPEC_VPREDICATE
+ )
+ (plus:VDF
+ (vec_duplicate:VDF
+ (vec_select:<VEL>
+ (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr")
+ (parallel [(const_int 0)])
+ )
+ )
+ (match_operand:VDF 3 "register_operand" " vr, vr")
+ )
+ (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0")
+ ] UNSPEC_REDUC
+ )
+ ] ORDER
+ )
+ )
+ ]
+ "TARGET_VECTOR"
"vfred<order>sum.vs\t%0,%3,%4%p1"
- [(set_attr "type" "vfred<order>")
- (set_attr "mode" "<MODE>")])
+ [
+ (set_attr "type" "vfred<order>")
+ (set_attr "mode" "<VDF:MODE>")
+ ]
+)
(define_insn "@pred_widen_reduc_plus<order><mode><vwlmul1>"
[(set (match_operand:<VWLMUL1> 0 "register_operand" "=&vr, &vr")
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve32f_zvfh -mabi=ilp32f -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
new file mode 100644
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmax_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredmin_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredosum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f32m8_f32m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf2_f16m1(vector, scalar, vl);
+}
+
+vfloat32m1_t test_vfredusum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f32m8_f32m1(vector, scalar, vl);
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3 -Wno-psabi" } */
+
+#include "pr110277-1.h"
+#include "pr110277-2.h"
+
+/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */
+
new file mode 100644
@@ -0,0 +1,33 @@
+#include "riscv_vector.h"
+
+vfloat16m1_t test_vfredmax_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredmin_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredosum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat16m1_t test_vfredusum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f16mf4_f16m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmax_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmax_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredmin_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredmin_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredosum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredosum_vs_f64m8_f64m1(vector, scalar, vl);
+}
+
+vfloat64m1_t test_vfredusum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) {
+ return __riscv_vfredusum_vs_f64m8_f64m1(vector, scalar, vl);
+}