[v2] RISC-V: Fix fortran ICE/PR111546 when RV32 vec_init
Checks
Commit Message
From: Pan Li <pan2.li@intel.com>
When broadcast the reperated element, we take the mask_int_mode
by mistake. This patch would like to fix it by leveraging the machine
mode of the element.
The below test case in RV32 will be fixed.
* gcc/testsuite/gfortran.dg/overload_5.f90
PR target/111546
gcc/ChangeLog:
* config/riscv/riscv-v.cc
(expand_vector_init_merge_repeating_sequence): Bugfix
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
Comments
LGTM
juzhe.zhong@rivai.ai
From: pan2.li
Date: 2023-09-24 13:50
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng; patrick
Subject: [PATCH v2] RISC-V: Fix fortran ICE/PR111546 when RV32 vec_init
From: Pan Li <pan2.li@intel.com>
When broadcast the reperated element, we take the mask_int_mode
by mistake. This patch would like to fix it by leveraging the machine
mode of the element.
The below test case in RV32 will be fixed.
* gcc/testsuite/gfortran.dg/overload_5.f90
PR target/111546
gcc/ChangeLog:
* config/riscv/riscv-v.cc
(expand_vector_init_merge_repeating_sequence): Bugfix
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-v.cc | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c2466b1354f..a1ffefb23f3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2054,12 +2054,17 @@ static void
expand_vector_init_merge_repeating_sequence (rtx target,
const rvv_builder &builder)
{
- machine_mode dup_mode = get_repeating_sequence_dup_machine_mode (builder);
- machine_mode mask_mode = get_mask_mode (builder.mode ());
+ /* We can't use BIT mode (BI) directly to generate mask = 0b01010...
+ since we don't have such instruction in RVV.
+ Instead, we should use INT mode (QI/HI/SI/DI) with integer move
+ instruction to generate the mask data we want. */
+ machine_mode mask_int_mode
+ = get_repeating_sequence_dup_machine_mode (builder);
+ machine_mode mask_bit_mode = get_mask_mode (builder.mode ());
uint64_t full_nelts = builder.full_nelts ().to_constant ();
/* Step 1: Broadcast the first pattern. */
- rtx ops[] = {target, force_reg (GET_MODE_INNER (dup_mode), builder.elt (0))};
+ rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()),
UNARY_OP, ops);
/* Step 2: Merge the rest iteration of pattern. */
@@ -2067,8 +2072,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
{
/* Step 2-1: Generate mask register v0 for each merge. */
rtx merge_mask = builder.get_merge_scalar_mask (i);
- rtx mask = gen_reg_rtx (mask_mode);
- rtx dup = gen_reg_rtx (dup_mode);
+ rtx mask = gen_reg_rtx (mask_bit_mode);
+ rtx dup = gen_reg_rtx (mask_int_mode);
if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x. */
{
@@ -2078,14 +2083,15 @@ expand_vector_init_merge_repeating_sequence (rtx target,
}
else /* vmv.v.x. */
{
- rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
+ rtx ops[] = {dup,
+ force_reg (GET_MODE_INNER (mask_int_mode), merge_mask)};
rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
Pmode);
- emit_nonvlmax_insn (code_for_pred_broadcast (dup_mode), UNARY_OP,
+ emit_nonvlmax_insn (code_for_pred_broadcast (mask_int_mode), UNARY_OP,
ops, vl);
}
- emit_move_insn (mask, gen_lowpart (mask_mode, dup));
+ emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
/* Step 2-2: Merge pattern according to the mask. */
rtx ops[] = {target, target, builder.elt (i), mask};
--
2.34.1
Committed, thanks Juzhe.
Pan
From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: Sunday, September 24, 2023 2:06 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>; patrick <patrick@rivosinc.com>
Subject: Re: [PATCH v2] RISC-V: Fix fortran ICE/PR111546 when RV32 vec_init
LGTM
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>
From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-24 13:50
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>; patrick<mailto:patrick@rivosinc.com>
Subject: [PATCH v2] RISC-V: Fix fortran ICE/PR111546 when RV32 vec_init
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
When broadcast the reperated element, we take the mask_int_mode
by mistake. This patch would like to fix it by leveraging the machine
mode of the element.
The below test case in RV32 will be fixed.
* gcc/testsuite/gfortran.dg/overload_5.f90
PR target/111546
gcc/ChangeLog:
* config/riscv/riscv-v.cc
(expand_vector_init_merge_repeating_sequence): Bugfix
Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-v.cc | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c2466b1354f..a1ffefb23f3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2054,12 +2054,17 @@ static void
expand_vector_init_merge_repeating_sequence (rtx target,
const rvv_builder &builder)
{
- machine_mode dup_mode = get_repeating_sequence_dup_machine_mode (builder);
- machine_mode mask_mode = get_mask_mode (builder.mode ());
+ /* We can't use BIT mode (BI) directly to generate mask = 0b01010...
+ since we don't have such instruction in RVV.
+ Instead, we should use INT mode (QI/HI/SI/DI) with integer move
+ instruction to generate the mask data we want. */
+ machine_mode mask_int_mode
+ = get_repeating_sequence_dup_machine_mode (builder);
+ machine_mode mask_bit_mode = get_mask_mode (builder.mode ());
uint64_t full_nelts = builder.full_nelts ().to_constant ();
/* Step 1: Broadcast the first pattern. */
- rtx ops[] = {target, force_reg (GET_MODE_INNER (dup_mode), builder.elt (0))};
+ rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()),
UNARY_OP, ops);
/* Step 2: Merge the rest iteration of pattern. */
@@ -2067,8 +2072,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
{
/* Step 2-1: Generate mask register v0 for each merge. */
rtx merge_mask = builder.get_merge_scalar_mask (i);
- rtx mask = gen_reg_rtx (mask_mode);
- rtx dup = gen_reg_rtx (dup_mode);
+ rtx mask = gen_reg_rtx (mask_bit_mode);
+ rtx dup = gen_reg_rtx (mask_int_mode);
if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x. */
{
@@ -2078,14 +2083,15 @@ expand_vector_init_merge_repeating_sequence (rtx target,
}
else /* vmv.v.x. */
{
- rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
+ rtx ops[] = {dup,
+ force_reg (GET_MODE_INNER (mask_int_mode), merge_mask)};
rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
Pmode);
- emit_nonvlmax_insn (code_for_pred_broadcast (dup_mode), UNARY_OP,
+ emit_nonvlmax_insn (code_for_pred_broadcast (mask_int_mode), UNARY_OP,
ops, vl);
}
- emit_move_insn (mask, gen_lowpart (mask_mode, dup));
+ emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
/* Step 2-2: Merge pattern according to the mask. */
rtx ops[] = {target, target, builder.elt (i), mask};
--
2.34.1
@@ -2054,12 +2054,17 @@ static void
expand_vector_init_merge_repeating_sequence (rtx target,
const rvv_builder &builder)
{
- machine_mode dup_mode = get_repeating_sequence_dup_machine_mode (builder);
- machine_mode mask_mode = get_mask_mode (builder.mode ());
+ /* We can't use BIT mode (BI) directly to generate mask = 0b01010...
+ since we don't have such instruction in RVV.
+ Instead, we should use INT mode (QI/HI/SI/DI) with integer move
+ instruction to generate the mask data we want. */
+ machine_mode mask_int_mode
+ = get_repeating_sequence_dup_machine_mode (builder);
+ machine_mode mask_bit_mode = get_mask_mode (builder.mode ());
uint64_t full_nelts = builder.full_nelts ().to_constant ();
/* Step 1: Broadcast the first pattern. */
- rtx ops[] = {target, force_reg (GET_MODE_INNER (dup_mode), builder.elt (0))};
+ rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()),
UNARY_OP, ops);
/* Step 2: Merge the rest iteration of pattern. */
@@ -2067,8 +2072,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
{
/* Step 2-1: Generate mask register v0 for each merge. */
rtx merge_mask = builder.get_merge_scalar_mask (i);
- rtx mask = gen_reg_rtx (mask_mode);
- rtx dup = gen_reg_rtx (dup_mode);
+ rtx mask = gen_reg_rtx (mask_bit_mode);
+ rtx dup = gen_reg_rtx (mask_int_mode);
if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x. */
{
@@ -2078,14 +2083,15 @@ expand_vector_init_merge_repeating_sequence (rtx target,
}
else /* vmv.v.x. */
{
- rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
+ rtx ops[] = {dup,
+ force_reg (GET_MODE_INNER (mask_int_mode), merge_mask)};
rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
Pmode);
- emit_nonvlmax_insn (code_for_pred_broadcast (dup_mode), UNARY_OP,
+ emit_nonvlmax_insn (code_for_pred_broadcast (mask_int_mode), UNARY_OP,
ops, vl);
}
- emit_move_insn (mask, gen_lowpart (mask_mode, dup));
+ emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
/* Step 2-2: Merge pattern according to the mask. */
rtx ops[] = {target, target, builder.elt (i), mask};