[3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm
Checks
Commit Message
Currently, following instructions generated in autovector:
flw
vsetvli
vfmv.v.f
...
vmfxx.vv
Two issues:
1. Additional vsetvl and vfmv instructions
2. Occupy one vector register and may results in smaller lmul
We expect:
flw
...
vmfxx.vf
Tested on RV32 and RV64
gcc/ChangeLog:
* config/riscv/autovec.md: Accept imm
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
(expand_vec_cmp): Ditto
* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests
Signed-off-by: demin.han <demin.han@starfivetech.com>
---
gcc/config/riscv/autovec.md | 2 +-
gcc/config/riscv/riscv-v.cc | 23 +++++++++----
gcc/config/riscv/riscv.cc | 2 +-
.../riscv/rvv/autovec/cmp/vcond-1.c | 34 +++++++++++++++++++
4 files changed, 52 insertions(+), 9 deletions(-)
Comments
Hi, han. I understand you are trying to support optimize vector-splat_vector into vector-scalar in "expand" stage, that is,
vv -> vx or vv -> vf.
It's a known issue that we know for a long time.
This patch is trying to transform vv->vf when the splat vector is duplicate from a constant (by recognize it is a CONST_VECTOR in expand stage),
but can't transform vv->vf when splat vector is duplicate from a register.
For example, like a[i] = b[i] > x ? c[i] : d[i], the x is a register, this case can not be optimized with your patch.
Actually, we have a solution to do all possible transformation (including the case I mentioned above) from vv to vx or vf by late-combine PASS which
is contributed by ARM Richard Sandiford: https://patchwork.ozlabs.org/project/gcc/patch/mptr0ljn9eh.fsf@arm.com/
You can try to apply this patch and experiment it locally yourself.
And I believe it will be landed in GCC-15. So I don't think we need this patch to do the optimization.
Thanks.
------------------ Original ------------------
From: "demin.han"<demin.han@starfivetech.com>;
Date: Fri, Mar 1, 2024 02:27 PM
To: "gcc-patches"<gcc-patches@gcc.gnu.org>;
Cc: "juzhe.zhong"<juzhe.zhong@rivai.ai>; "kito.cheng"<kito.cheng@gmail.com>; "Li, Pan2"<pan2.li@intel.com>; "jeffreyalaw"<jeffreyalaw@gmail.com>;
Subject: [PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm
Currently, following instructions generated in autovector:
flw
vsetvli
vfmv.v.f
...
vmfxx.vv
Two issues:
1. Additional vsetvl and vfmv instructions
2. Occupy one vector register and may results in smaller lmul
We expect:
flw
...
vmfxx.vf
Tested on RV32 and RV64
gcc/ChangeLog:
* config/riscv/autovec.md: Accept imm
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
(expand_vec_cmp): Ditto
* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests
Signed-off-by: demin.han <demin.han@starfivetech.com>
---
gcc/config/riscv/autovec.md | 2 +-
gcc/config/riscv/riscv-v.cc | 23 +++++++++----
gcc/config/riscv/riscv.cc | 2 +-
.../riscv/rvv/autovec/cmp/vcond-1.c | 34 +++++++++++++++++++
4 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..6cfb0800c45 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
[(match_operand:V_VLSF 2 "register_operand")
- (match_operand:V_VLSF 3 "register_operand")]))]
+ (match_operand:V_VLSF 3 "nonmemory_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14e75b9a117..2a188ac78e0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2610,9 +2610,15 @@ expand_vec_init (rtx target, rtx vals)
/* Get insn code for corresponding comparison. */
static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
{
insn_code icode;
+ if (FLOAT_MODE_P (mode))
+ {
+ icode = !scalar_p ? code_for_pred_cmp (mode)
+ : code_for_pred_cmp_scalar (mode);
+ return icode;
+ }
switch (code)
{
case EQ:
@@ -2628,10 +2634,7 @@ get_cmp_insn_code (rtx_code code, machine_mode mode)
case LTU:
case GE:
case GEU:
- if (FLOAT_MODE_P (mode))
- icode = code_for_pred_cmp (mode);
- else
- icode = code_for_pred_ltge (mode);
+ icode = code_for_pred_ltge (mode);
break;
default:
gcc_unreachable ();
@@ -2757,7 +2760,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
{
machine_mode mask_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
- insn_code icode = get_cmp_insn_code (code, data_mode);
if (code == LTGT)
{
@@ -2765,12 +2767,19 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
rtx gt = gen_reg_rtx (mask_mode);
expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
- icode = code_for_pred (IOR, mask_mode);
+ insn_code icode = code_for_pred (IOR, mask_mode);
rtx ops[] = {target, lt, gt};
emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
return;
}
+ rtx elt;
+ machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+ bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+ if (scalar_p)
+ op1 = force_reg (scalar_mode, elt);
+ insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
if (!mask && !maskoff)
{
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..1ffe4865c19 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1760,7 +1760,7 @@ riscv_const_insns (rtx x)
register vec_duplicate into vmv.v.x. */
scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
- && !immediate_operand (elt, Pmode))
+ && !FLOAT_MODE_P (smode) && !immediate_operand (elt, Pmode))
return 0;
/* Constants from -16 to 15 can be loaded with vmv.v.i.
The Wc0, Wc1 constraints are already covered by the
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 99a230d1c8a..7f6738518ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -141,6 +141,34 @@
TEST_VAR_ALL (DEF_VCOND_VAR)
TEST_IMM_ALL (DEF_VCOND_IMM)
+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX) \
+ T (float, float, COND, IMM, SUFFIX##_float_float) \
+ T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T) \
+ TEST_COND_IMM_FLOAT (T, >, 0.0, _gt) \
+ TEST_COND_IMM_FLOAT (T, <, 0.0, _lt) \
+ TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge) \
+ TEST_COND_IMM_FLOAT (T, <=, 0.0, _le) \
+ TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq) \
+ TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, 1.0, _gt1) \
+ TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1) \
+ TEST_COND_IMM_FLOAT (T, >=, 1.0, _ge1) \
+ TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1) \
+ TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1) \
+ TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, -1.0, _gt2) \
+ TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2) \
+ TEST_COND_IMM_FLOAT (T, >=, -1.0, _ge2) \
+ TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2) \
+ TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2) \
+ TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@ TEST_IMM_ALL (DEF_VCOND_IMM)
/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */
--
2.43.2
Hi juzhe,
Yes, for comparison between vector and scalar variable, this patch is not work, because the scalar is duplicated in loop vectorize pass.
I have not found idea for this situation, so solve vector-imm comparison first.
Thanks for remind this, I will try that patch.
Thanks.
From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: 2024年3月1日 15:49
To: Demin Han <demin.han@starfivetech.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; Li, Pan2 <pan2.li@intel.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; richard.sandiford <richard.sandiford@arm.com>
Subject: Re:[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm
Hi, han. I understand you are trying to support optimize vector-splat_vector into vector-scalar in "expand" stage, that is,
vv -> vx or vv -> vf.
It's a known issue that we know for a long time.
This patch is trying to transform vv->vf when the splat vector is duplicate from a constant (by recognize it is a CONST_VECTOR in expand stage),
but can't transform vv->vf when splat vector is duplicate from a register.
For example, like a[i] = b[i] > x ? c[i] : d[i], the x is a register, this case can not be optimized with your patch.
Actually, we have a solution to do all possible transformation (including the case I mentioned above) from vv to vx or vf by late-combine PASS which
is contributed by ARM Richard Sandiford: https://patchwork.ozlabs.org/project/gcc/patch/mptr0ljn9eh.fsf@arm.com/
You can try to apply this patch and experiment it locally yourself.
And I believe it will be landed in GCC-15. So I don't think we need this patch to do the optimization.
Thanks.
------------------ Original ------------------
From: "demin.han"<demin.han@starfivetech.com<mailto:demin.han@starfivetech.com>>;
Date: Fri, Mar 1, 2024 02:27 PM
To: "gcc-patches"<gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>;
Cc: "juzhe.zhong"<juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>>; "kito.cheng"<kito.cheng@gmail.com<mailto:kito.cheng@gmail.com>>; "Li, Pan2"<pan2.li@intel.com<mailto:pan2.li@intel.com>>; "jeffreyalaw"<jeffreyalaw@gmail.com<mailto:jeffreyalaw@gmail.com>>;
Subject: [PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm
Currently, following instructions generated in autovector:
flw
vsetvli
vfmv.v.f
...
vmfxx.vv
Two issues:
1. Additional vsetvl and vfmv instructions
2. Occupy one vector register and may results in smaller lmul
We expect:
flw
...
vmfxx.vf
Tested on RV32 and RV64
gcc/ChangeLog:
* config/riscv/autovec.md: Accept imm
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
(expand_vec_cmp): Ditto
* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests
Signed-off-by: demin.han <demin.han@starfivetech.com<mailto:demin.han@starfivetech.com>>
---
gcc/config/riscv/autovec.md | 2 +-
gcc/config/riscv/riscv-v.cc | 23 +++++++++----
gcc/config/riscv/riscv.cc | 2 +-
.../riscv/rvv/autovec/cmp/vcond-1.c | 34 +++++++++++++++++++
4 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..6cfb0800c45 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
[(match_operand:V_VLSF 2 "register_operand")
- (match_operand:V_VLSF 3 "register_operand")]))]
+ (match_operand:V_VLSF 3 "nonmemory_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14e75b9a117..2a188ac78e0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2610,9 +2610,15 @@ expand_vec_init (rtx target, rtx vals)
/* Get insn code for corresponding comparison. */
static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
{
insn_code icode;
+ if (FLOAT_MODE_P (mode))
+ {
+ icode = !scalar_p ? code_for_pred_cmp (mode)
+ : code_for_pred_cmp_scalar (mode);
+ return icode;
+ }
switch (code)
{
case EQ:
@@ -2628,10 +2634,7 @@ get_cmp_insn_code (rtx_code code, machine_mode mode)
case LTU:
case GE:
case GEU:
- if (FLOAT_MODE_P (mode))
- icode = code_for_pred_cmp (mode);
- else
- icode = code_for_pred_ltge (mode);
+ icode = code_for_pred_ltge (mode);
break;
default:
gcc_unreachable ();
@@ -2757,7 +2760,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
{
machine_mode mask_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
- insn_code icode = get_cmp_insn_code (code, data_mode);
if (code == LTGT)
{
@@ -2765,12 +2767,19 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
rtx gt = gen_reg_rtx (mask_mode);
expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
- icode = code_for_pred (IOR, mask_mode);
+ insn_code icode = code_for_pred (IOR, mask_mode);
rtx ops[] = {target, lt, gt};
emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
return;
}
+ rtx elt;
+ machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+ bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+ if (scalar_p)
+ op1 = force_reg (scalar_mode, elt);
+ insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
if (!mask && !maskoff)
{
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..1ffe4865c19 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1760,7 +1760,7 @@ riscv_const_insns (rtx x)
register vec_duplicate into vmv.v.x. */
scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
- && !immediate_operand (elt, Pmode))
+ && !FLOAT_MODE_P (smode) && !immediate_operand (elt, Pmode))
return 0;
/* Constants from -16 to 15 can be loaded with vmv.v.i.
The Wc0, Wc1 constraints are already covered by the
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 99a230d1c8a..7f6738518ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -141,6 +141,34 @@
TEST_VAR_ALL (DEF_VCOND_VAR)
TEST_IMM_ALL (DEF_VCOND_IMM)
+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX) \
+ T (float, float, COND, IMM, SUFFIX##_float_float) \
+ T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T) \
+ TEST_COND_IMM_FLOAT (T, >, 0.0, _gt) \
+ TEST_COND_IMM_FLOAT (T, <, 0.0, _lt) \
+ TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge) \
+ TEST_COND_IMM_FLOAT (T, <=, 0.0, _le) \
+ TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq) \
+ TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, 1.0, _gt1) \
+ TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1) \
+ TEST_COND_IMM_FLOAT (T, >=, 1.0, _ge1) \
+ TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1) \
+ TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1) \
+ TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, -1.0, _gt2) \
+ TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2) \
+ TEST_COND_IMM_FLOAT (T, >=, -1.0, _ge2) \
+ TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2) \
+ TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2) \
+ TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@ TEST_IMM_ALL (DEF_VCOND_IMM)
/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */
--
2.43.2
@@ -690,7 +690,7 @@ (define_expand "vec_cmp<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
[(match_operand:V_VLSF 2 "register_operand")
- (match_operand:V_VLSF 3 "register_operand")]))]
+ (match_operand:V_VLSF 3 "nonmemory_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
@@ -2610,9 +2610,15 @@ expand_vec_init (rtx target, rtx vals)
/* Get insn code for corresponding comparison. */
static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
{
insn_code icode;
+ if (FLOAT_MODE_P (mode))
+ {
+ icode = !scalar_p ? code_for_pred_cmp (mode)
+ : code_for_pred_cmp_scalar (mode);
+ return icode;
+ }
switch (code)
{
case EQ:
@@ -2628,10 +2634,7 @@ get_cmp_insn_code (rtx_code code, machine_mode mode)
case LTU:
case GE:
case GEU:
- if (FLOAT_MODE_P (mode))
- icode = code_for_pred_cmp (mode);
- else
- icode = code_for_pred_ltge (mode);
+ icode = code_for_pred_ltge (mode);
break;
default:
gcc_unreachable ();
@@ -2757,7 +2760,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
{
machine_mode mask_mode = GET_MODE (target);
machine_mode data_mode = GET_MODE (op0);
- insn_code icode = get_cmp_insn_code (code, data_mode);
if (code == LTGT)
{
@@ -2765,12 +2767,19 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
rtx gt = gen_reg_rtx (mask_mode);
expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
- icode = code_for_pred (IOR, mask_mode);
+ insn_code icode = code_for_pred (IOR, mask_mode);
rtx ops[] = {target, lt, gt};
emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
return;
}
+ rtx elt;
+ machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+ bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+ if (scalar_p)
+ op1 = force_reg (scalar_mode, elt);
+ insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
if (!mask && !maskoff)
{
@@ -1760,7 +1760,7 @@ riscv_const_insns (rtx x)
register vec_duplicate into vmv.v.x. */
scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
- && !immediate_operand (elt, Pmode))
+ && !FLOAT_MODE_P (smode) && !immediate_operand (elt, Pmode))
return 0;
/* Constants from -16 to 15 can be loaded with vmv.v.i.
The Wc0, Wc1 constraints are already covered by the
@@ -141,6 +141,34 @@
TEST_VAR_ALL (DEF_VCOND_VAR)
TEST_IMM_ALL (DEF_VCOND_IMM)
+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX) \
+ T (float, float, COND, IMM, SUFFIX##_float_float) \
+ T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T) \
+ TEST_COND_IMM_FLOAT (T, >, 0.0, _gt) \
+ TEST_COND_IMM_FLOAT (T, <, 0.0, _lt) \
+ TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge) \
+ TEST_COND_IMM_FLOAT (T, <=, 0.0, _le) \
+ TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq) \
+ TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, 1.0, _gt1) \
+ TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1) \
+ TEST_COND_IMM_FLOAT (T, >=, 1.0, _ge1) \
+ TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1) \
+ TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1) \
+ TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1) \
+ \
+ TEST_COND_IMM_FLOAT (T, >, -1.0, _gt2) \
+ TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2) \
+ TEST_COND_IMM_FLOAT (T, >=, -1.0, _ge2) \
+ TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2) \
+ TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2) \
+ TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@ TEST_IMM_ALL (DEF_VCOND_IMM)
/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */