RISC-V: Support in-order floating-point reduction
Checks
Commit Message
This patch is depending on:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624995.html
Consider this following case:
float foo (float *__restrict a, int n)
{
float result = 1.0;
for (int i = 0; i < n; i++)
result += a[i];
return result;
}
Compile with **NO** -ffast-math:
Before this patch:
<source>:4:21: missed: couldn't vectorize loop
<source>:1:7: missed: not vectorized: relevant phi not supported: result_14 = PHI <result_11(6), 1.0e+0(5)>
After this patch:
foo:
lui a5,%hi(.LC0)
flw fa0,%lo(.LC0)(a5)
ble a1,zero,.L4
.L3:
vsetvli a5,a1,e32,m1,ta,ma
vle32.v v1,0(a0)
slli a4,a5,2
vsetivli zero,1,e32,m1,ta,ma
sub a1,a1,a5
vfmv.s.f v2,fa0
add a0,a0,a4
vsetvli zero,a5,e32,m1,ta,ma
vfredosum.vs v1,v1,v2 ----------> FOLD_LEFT_PLUS
vfmv.f.s fa0,v1
bne a1,zero,.L3
ret
.L4:
ret
gcc/ChangeLog:
* config/riscv/autovec.md (fold_left_plus_<mode>): New pattern.
(mask_len_fold_left_plus_<mode>): Ditto.
* config/riscv/riscv-protos.h (enum insn_type): New enum.
(enum reduction_type): Ditto.
(expand_reduction): Add in-order reduction.
* config/riscv/riscv-v.cc (emit_nonvlmax_fp_reduction_insn): New function.
(expand_reduction): Add in-order reduction.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c: New test.
---
gcc/config/riscv/autovec.md | 39 ++++++++++++++++
gcc/config/riscv/riscv-protos.h | 11 ++++-
gcc/config/riscv/riscv-v.cc | 45 ++++++++++++++++---
.../riscv/rvv/autovec/reduc/reduc_strict-1.c | 28 ++++++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-2.c | 26 +++++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-3.c | 18 ++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-4.c | 24 ++++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-5.c | 28 ++++++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-6.c | 18 ++++++++
.../riscv/rvv/autovec/reduc/reduc_strict-7.c | 21 +++++++++
.../rvv/autovec/reduc/reduc_strict_run-1.c | 29 ++++++++++++
.../rvv/autovec/reduc/reduc_strict_run-2.c | 31 +++++++++++++
12 files changed, 311 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c
Comments
> +enum reduction_type
> +{
> + UNORDERED_REDUDUCTION,
> + FOLD_LEFT_REDUDUCTION,
> + MASK_LEN_FOLD_LEFT_REDUDUCTION,
> +};
There are redundant 'DU's here ;)
Wouldn't it be sufficient to have an enum
enum reduction_type
{
UNORDERED,
FOLD_LEFT,
MASK_LEN_FOLD_LEFT,
};
?
Regards
Robin
The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
Could you give me another enum name?
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-07-20 15:41
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> +enum reduction_type
> +{
> + UNORDERED_REDUDUCTION,
> + FOLD_LEFT_REDUDUCTION,
> + MASK_LEN_FOLD_LEFT_REDUDUCTION,
> +};
There are redundant 'DU's here ;)
Wouldn't it be sufficient to have an enum
enum reduction_type
{
UNORDERED,
FOLD_LEFT,
MASK_LEN_FOLD_LEFT,
};
?
Regards
Robin
Seems like there is a potential vsetvli optimization chance in the example?
> After this patch:
> foo:
> lui a5,%hi(.LC0)
> flw fa0,%lo(.LC0)(a5)
> ble a1,zero,.L4
> .L3:
> vsetvli a5,a1,e32,m1,ta,ma
> vle32.v v1,0(a0)
> slli a4,a5,2
> vsetivli zero,1,e32,m1,ta,ma
This could just use "vsetvli a5,a1,e32,m1,ta,ma"
> sub a1,a1,a5
> vfmv.s.f v2,fa0
> add a0,a0,a4
> vsetvli zero,a5,e32,m1,ta,ma
And then this can be removed too.
> vfredosum.vs v1,v1,v2
> vfmv.f.s fa0,v1
> bne a1,zero,.L3
> ret
> .L4:
> ret
Oh, Yes.
It can be easily addressed by this:
emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops);
This patch emit scalar move insn with AVL = 1 for all reduction. It can easily addressed when we recognize it is mask_len_fold_left_plus reduction,
we assign the AVL to the scalar move insn.
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-07-20 15:42
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
Seems like there is a potential vsetvli optimization chance in the example?
> After this patch:
> foo:
> lui a5,%hi(.LC0)
> flw fa0,%lo(.LC0)(a5)
> ble a1,zero,.L4
> .L3:
> vsetvli a5,a1,e32,m1,ta,ma
> vle32.v v1,0(a0)
> slli a4,a5,2
> vsetivli zero,1,e32,m1,ta,ma
This could just use "vsetvli a5,a1,e32,m1,ta,ma"
> sub a1,a1,a5
> vfmv.s.f v2,fa0
> add a0,a0,a4
> vsetvli zero,a5,e32,m1,ta,ma
And then this can be removed too.
> vfredosum.vs v1,v1,v2
> vfmv.f.s fa0,v1
> bne a1,zero,.L3
> ret
> .L4:
> ret
> The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
>
> Could you give me another enum name?
I would have expected it to work when it's namespaced.
Regards
Robin
I have no ideal, just ICE comes when running regression:
during RTL pass: expand
auto.c: In function 'test_int32_t_float_unordered_var':
auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
24 | test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
| ^~~~~
auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
41 | TEST_LOOP (int32_t, float, CMP) \
| ^~~~~~~~~
auto.c:55:1: note: in expansion of macro 'TEST_CMP'
55 | TEST_CMP (unordered)
| ^~~~~~~~
0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
../../../riscv-gcc/gcc/config/riscv/autovec.md:559
0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
../../../riscv-gcc/gcc/recog.h:407
0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
../../../riscv-gcc/gcc/optabs.cc:8197
0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
../../../riscv-gcc/gcc/optabs.cc:8237
0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
../../../riscv-gcc/gcc/optabs.cc:8268
0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
../../../riscv-gcc/gcc/optabs.cc:6692
0x1124e4a do_store_flag
../../../riscv-gcc/gcc/expr.cc:13060
0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
../../../riscv-gcc/gcc/expr.cc:10265
0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
../../../riscv-gcc/gcc/expr.cc:10810
0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
../../../riscv-gcc/gcc/expr.cc:9015
0xf2e973 expand_normal(tree_node*)
../../../riscv-gcc/gcc/expr.h:316
0x12bb060 expand_vec_cond_mask_optab_fn
../../../riscv-gcc/gcc/internal-fn.cc:3059
0x12c27ca expand_VCOND_MASK
../../../riscv-gcc/gcc/internal-fn.def:184
0x12c52a5 expand_internal_call(internal_fn, gcall*)
../../../riscv-gcc/gcc/internal-fn.cc:4792
0x12c52d0 expand_internal_call(gcall*)
../../../riscv-gcc/gcc/internal-fn.cc:4800
0xf5e4c1 expand_call_stmt
../../../riscv-gcc/gcc/cfgexpand.cc:2737
0xf62871 expand_gimple_stmt_1
../../../riscv-gcc/gcc/cfgexpand.cc:3880
0xf62f0f expand_gimple_stmt
../../../riscv-gcc/gcc/cfgexpand.cc:4044
0xf6b8a9 expand_gimple_basic_block
../../../riscv-gcc/gcc/cfgexpand.cc:6096
This ICE happens when compiling vcond.cc tests
juzhe.zhong@rivai.ai
From: Robin Dapp
Date: 2023-07-20 15:57
To: juzhe.zhong@rivai.ai; gcc-patches
CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
>
> Could you give me another enum name?
I would have expected it to work when it's namespaced.
Regards
Robin
Seems like because you ` using namespace riscv_vector;` so the
UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
Hmmm, maybe enum class?
enum class reduction_type
{
UNORDERED,
FOLD_LEFT,
MASK_LEN_FOLD_LEFT,
};
and need use like this reduction_type::UNORDERED
On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have no ideal, just ICE comes when running regression:
>
> during RTL pass: expand
> auto.c: In function 'test_int32_t_float_unordered_var':
> auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> 24 | test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
> | ^~~~~
> auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> 41 | TEST_LOOP (int32_t, float, CMP) \
> | ^~~~~~~~~
> auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> 55 | TEST_CMP (unordered)
> | ^~~~~~~~
> 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> ../../../riscv-gcc/gcc/recog.h:407
> 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8197
> 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8237
> 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8268
> 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> ../../../riscv-gcc/gcc/optabs.cc:6692
> 0x1124e4a do_store_flag
> ../../../riscv-gcc/gcc/expr.cc:13060
> 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> ../../../riscv-gcc/gcc/expr.cc:10265
> 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> ../../../riscv-gcc/gcc/expr.cc:10810
> 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> ../../../riscv-gcc/gcc/expr.cc:9015
> 0xf2e973 expand_normal(tree_node*)
> ../../../riscv-gcc/gcc/expr.h:316
> 0x12bb060 expand_vec_cond_mask_optab_fn
> ../../../riscv-gcc/gcc/internal-fn.cc:3059
> 0x12c27ca expand_VCOND_MASK
> ../../../riscv-gcc/gcc/internal-fn.def:184
> 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> ../../../riscv-gcc/gcc/internal-fn.cc:4792
> 0x12c52d0 expand_internal_call(gcall*)
> ../../../riscv-gcc/gcc/internal-fn.cc:4800
> 0xf5e4c1 expand_call_stmt
> ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> 0xf62871 expand_gimple_stmt_1
> ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> 0xf62f0f expand_gimple_stmt
> ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> 0xf6b8a9 expand_gimple_basic_block
> ../../../riscv-gcc/gcc/cfgexpand.cc:6096
>
> This ICE happens when compiling vcond.cc tests
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-07-20 15:57
> To: juzhe.zhong@rivai.ai; gcc-patches
> CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> >
> > Could you give me another enum name?
>
> I would have expected it to work when it's namespaced.
>
> Regards
> Robin
>
>
I have tried this:
enum class reduction_type
{
UNORDERED,
FOLD_LEFT,
MASK_LEN_FOLD_LEFT,
};
But fail to build.....
/gcc/build -I../../../riscv-gcc/gcc/../include -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
-o build/gencondmd.o build/gencondmd.cc
In file included from ./tm_p.h:4:0,
from build/gencondmd.cc:29:
../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
reduction_type = UNORDERED);
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-07-20 16:03
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
Seems like because you ` using namespace riscv_vector;` so the
UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
Hmmm, maybe enum class?
enum class reduction_type
{
UNORDERED,
FOLD_LEFT,
MASK_LEN_FOLD_LEFT,
};
and need use like this reduction_type::UNORDERED
On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have no ideal, just ICE comes when running regression:
>
> during RTL pass: expand
> auto.c: In function 'test_int32_t_float_unordered_var':
> auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> 24 | test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
> | ^~~~~
> auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> 41 | TEST_LOOP (int32_t, float, CMP) \
> | ^~~~~~~~~
> auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> 55 | TEST_CMP (unordered)
> | ^~~~~~~~
> 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> ../../../riscv-gcc/gcc/recog.h:407
> 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8197
> 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8237
> 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> ../../../riscv-gcc/gcc/optabs.cc:8268
> 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> ../../../riscv-gcc/gcc/optabs.cc:6692
> 0x1124e4a do_store_flag
> ../../../riscv-gcc/gcc/expr.cc:13060
> 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> ../../../riscv-gcc/gcc/expr.cc:10265
> 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> ../../../riscv-gcc/gcc/expr.cc:10810
> 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> ../../../riscv-gcc/gcc/expr.cc:9015
> 0xf2e973 expand_normal(tree_node*)
> ../../../riscv-gcc/gcc/expr.h:316
> 0x12bb060 expand_vec_cond_mask_optab_fn
> ../../../riscv-gcc/gcc/internal-fn.cc:3059
> 0x12c27ca expand_VCOND_MASK
> ../../../riscv-gcc/gcc/internal-fn.def:184
> 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> ../../../riscv-gcc/gcc/internal-fn.cc:4792
> 0x12c52d0 expand_internal_call(gcall*)
> ../../../riscv-gcc/gcc/internal-fn.cc:4800
> 0xf5e4c1 expand_call_stmt
> ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> 0xf62871 expand_gimple_stmt_1
> ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> 0xf62f0f expand_gimple_stmt
> ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> 0xf6b8a9 expand_gimple_basic_block
> ../../../riscv-gcc/gcc/cfgexpand.cc:6096
>
> This ICE happens when compiling vcond.cc tests
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-07-20 15:57
> To: juzhe.zhong@rivai.ai; gcc-patches
> CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> >
> > Could you give me another enum name?
>
> I would have expected it to work when it's namespaced.
>
> Regards
> Robin
>
>
reduction_type = reduction_type::UNORDERED
On Thu, Jul 20, 2023 at 4:16 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have tried this:
> enum class reduction_type
> {
> UNORDERED,
> FOLD_LEFT,
> MASK_LEN_FOLD_LEFT,
> };
>
> But fail to build.....
>
> /gcc/build -I../../../riscv-gcc/gcc/../include -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
> -o build/gencondmd.o build/gencondmd.cc
> In file included from ./tm_p.h:4:0,
> from build/gencondmd.cc:29:
> ../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
> reduction_type = UNORDERED);
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-07-20 16:03
> To: juzhe.zhong@rivai.ai
> CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
> Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> Seems like because you ` using namespace riscv_vector;` so the
> UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
>
> Hmmm, maybe enum class?
>
> enum class reduction_type
> {
> UNORDERED,
> FOLD_LEFT,
> MASK_LEN_FOLD_LEFT,
> };
>
> and need use like this reduction_type::UNORDERED
>
> On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > I have no ideal, just ICE comes when running regression:
> >
> > during RTL pass: expand
> > auto.c: In function 'test_int32_t_float_unordered_var':
> > auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> > 24 | test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
> > | ^~~~~
> > auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> > 41 | TEST_LOOP (int32_t, float, CMP) \
> > | ^~~~~~~~~
> > auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> > 55 | TEST_CMP (unordered)
> > | ^~~~~~~~
> > 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> > ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> > 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> > ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> > 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> > ../../../riscv-gcc/gcc/recog.h:407
> > 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8197
> > 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8237
> > 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8268
> > 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> > ../../../riscv-gcc/gcc/optabs.cc:6692
> > 0x1124e4a do_store_flag
> > ../../../riscv-gcc/gcc/expr.cc:13060
> > 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> > ../../../riscv-gcc/gcc/expr.cc:10265
> > 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> > ../../../riscv-gcc/gcc/expr.cc:10810
> > 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> > ../../../riscv-gcc/gcc/expr.cc:9015
> > 0xf2e973 expand_normal(tree_node*)
> > ../../../riscv-gcc/gcc/expr.h:316
> > 0x12bb060 expand_vec_cond_mask_optab_fn
> > ../../../riscv-gcc/gcc/internal-fn.cc:3059
> > 0x12c27ca expand_VCOND_MASK
> > ../../../riscv-gcc/gcc/internal-fn.def:184
> > 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> > ../../../riscv-gcc/gcc/internal-fn.cc:4792
> > 0x12c52d0 expand_internal_call(gcall*)
> > ../../../riscv-gcc/gcc/internal-fn.cc:4800
> > 0xf5e4c1 expand_call_stmt
> > ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> > 0xf62871 expand_gimple_stmt_1
> > ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> > 0xf62f0f expand_gimple_stmt
> > ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> > 0xf6b8a9 expand_gimple_basic_block
> > ../../../riscv-gcc/gcc/cfgexpand.cc:6096
> >
> > This ICE happens when compiling vcond.cc tests
> > ________________________________
> > juzhe.zhong@rivai.ai
> >
> >
> > From: Robin Dapp
> > Date: 2023-07-20 15:57
> > To: juzhe.zhong@rivai.ai; gcc-patches
> > CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> > Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> > >
> > > Could you give me another enum name?
> >
> > I would have expected it to work when it's namespaced.
> >
> > Regards
> > Robin
> >
> >
>
Address all comments on V2 patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625038.html
Redundant vsetvli are elided by this code:
rtx len = type == reduction_type::MASK_LEN_FOLD_LEFT ? ops[4] : NULL_RTX;
emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops,
len);
Pass through len operand for MASK_LEN_FOLD_LEFT
Now the codegen:
foo:
lui a5,%hi(.LC0)
flw fa0,%lo(.LC0)(a5)
ble a1,zero,.L4
.L3:
vsetvli a5,a1,e32,m1,ta,ma
slli a4,a5,2
sub a1,a1,a5
vle32.v v1,0(a0)
vfmv.s.f v2,fa0
add a0,a0,a4
vfredosum.vs v1,v1,v2
vfmv.f.s fa0,v1
bne a1,zero,.L3
ret
juzhe.zhong@rivai.ai
From: Kito Cheng
Date: 2023-07-20 16:24
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
reduction_type = reduction_type::UNORDERED
On Thu, Jul 20, 2023 at 4:16 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have tried this:
> enum class reduction_type
> {
> UNORDERED,
> FOLD_LEFT,
> MASK_LEN_FOLD_LEFT,
> };
>
> But fail to build.....
>
> /gcc/build -I../../../riscv-gcc/gcc/../include -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
> -o build/gencondmd.o build/gencondmd.cc
> In file included from ./tm_p.h:4:0,
> from build/gencondmd.cc:29:
> ../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
> reduction_type = UNORDERED);
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-07-20 16:03
> To: juzhe.zhong@rivai.ai
> CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
> Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> Seems like because you ` using namespace riscv_vector;` so the
> UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
>
> Hmmm, maybe enum class?
>
> enum class reduction_type
> {
> UNORDERED,
> FOLD_LEFT,
> MASK_LEN_FOLD_LEFT,
> };
>
> and need use like this reduction_type::UNORDERED
>
> On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > I have no ideal, just ICE comes when running regression:
> >
> > during RTL pass: expand
> > auto.c: In function 'test_int32_t_float_unordered_var':
> > auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> > 24 | test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
> > | ^~~~~
> > auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> > 41 | TEST_LOOP (int32_t, float, CMP) \
> > | ^~~~~~~~~
> > auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> > 55 | TEST_CMP (unordered)
> > | ^~~~~~~~
> > 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> > ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> > 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> > ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> > 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> > ../../../riscv-gcc/gcc/recog.h:407
> > 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8197
> > 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8237
> > 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> > ../../../riscv-gcc/gcc/optabs.cc:8268
> > 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> > ../../../riscv-gcc/gcc/optabs.cc:6692
> > 0x1124e4a do_store_flag
> > ../../../riscv-gcc/gcc/expr.cc:13060
> > 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> > ../../../riscv-gcc/gcc/expr.cc:10265
> > 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> > ../../../riscv-gcc/gcc/expr.cc:10810
> > 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> > ../../../riscv-gcc/gcc/expr.cc:9015
> > 0xf2e973 expand_normal(tree_node*)
> > ../../../riscv-gcc/gcc/expr.h:316
> > 0x12bb060 expand_vec_cond_mask_optab_fn
> > ../../../riscv-gcc/gcc/internal-fn.cc:3059
> > 0x12c27ca expand_VCOND_MASK
> > ../../../riscv-gcc/gcc/internal-fn.def:184
> > 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> > ../../../riscv-gcc/gcc/internal-fn.cc:4792
> > 0x12c52d0 expand_internal_call(gcall*)
> > ../../../riscv-gcc/gcc/internal-fn.cc:4800
> > 0xf5e4c1 expand_call_stmt
> > ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> > 0xf62871 expand_gimple_stmt_1
> > ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> > 0xf62f0f expand_gimple_stmt
> > ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> > 0xf6b8a9 expand_gimple_basic_block
> > ../../../riscv-gcc/gcc/cfgexpand.cc:6096
> >
> > This ICE happens when compiling vcond.cc tests
> > ________________________________
> > juzhe.zhong@rivai.ai
> >
> >
> > From: Robin Dapp
> > Date: 2023-07-20 15:57
> > To: juzhe.zhong@rivai.ai; gcc-patches
> > CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> > Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> > >
> > > Could you give me another enum name?
> >
> > I would have expected it to work when it's namespaced.
> >
> > Regards
> > Robin
> >
> >
>
@@ -1687,3 +1687,42 @@
riscv_vector::expand_reduction (SMIN, operands, f);
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfredosum.vs
+;; -------------------------------------------------------------------------
+
+;; Unpredicated in-order FP reductions.
+(define_expand "fold_left_plus_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (match_operand:<VEL> 1 "register_operand")
+ (match_operand:VF 2 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_reduction (PLUS, operands,
+ operands[1],
+ riscv_vector::FOLD_LEFT_REDUDUCTION);
+ DONE;
+})
+
+;; Predicated in-order FP reductions.
+(define_expand "mask_len_fold_left_plus_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (match_operand:<VEL> 1 "register_operand")
+ (match_operand:VF 2 "register_operand")
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ if (rtx_equal_p (operands[4], const0_rtx))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ riscv_vector::expand_reduction (PLUS, operands,
+ operands[1],
+ riscv_vector::MASK_LEN_FOLD_LEFT_REDUDUCTION);
+ DONE;
+})
@@ -199,6 +199,7 @@ enum insn_type
RVV_GATHER_M_OP = 5,
RVV_SCATTER_M_OP = 4,
RVV_REDUCTION_OP = 3,
+ RVV_REDUCTION_TU_OP = RVV_REDUCTION_OP + 2,
};
enum vlmul_type
{
@@ -270,6 +271,13 @@ enum mask_policy
MASK_AGNOSTIC = 1,
MASK_ANY = 2,
};
+
+enum reduction_type
+{
+ UNORDERED_REDUDUCTION,
+ FOLD_LEFT_REDUDUCTION,
+ MASK_LEN_FOLD_LEFT_REDUDUCTION,
+};
enum tail_policy get_prefer_tail_policy ();
enum mask_policy get_prefer_mask_policy ();
rtx get_avl_type_rtx (enum avl_type);
@@ -282,7 +290,8 @@ bool has_vi_variant_p (rtx_code, rtx);
void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_binop (rtx_code, rtx *);
-void expand_reduction (rtx_code, rtx *, rtx);
+void expand_reduction (rtx_code, rtx *, rtx,
+ reduction_type = UNORDERED_REDUDUCTION);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx));
@@ -1196,6 +1196,26 @@ emit_vlmax_fp_reduction_insn (unsigned icode, int op_num, rtx *ops)
e.emit_insn ((enum insn_code) icode, ops);
}
+/* Emit reduction instruction. */
+static void
+emit_nonvlmax_fp_reduction_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (GET_MODE (ops[1])).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (op_num,
+ /* HAS_DEST_P */ true,
+ /* FULLY_UNMASKED_P */ false,
+ /* USE_REAL_MERGE_P */ true,
+ /* HAS_AVL_P */ true,
+ /* VLMAX_P */ false, dest_mode,
+ mask_mode);
+
+ e.set_policy (TAIL_ANY);
+ e.set_rounding_mode (FRM_DYN);
+ e.set_vl (vl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
/* Emit merge instruction. */
static machine_mode
@@ -3343,9 +3363,10 @@ expand_cond_len_ternop (unsigned icode, rtx *ops)
/* Expand reduction operations. */
void
-expand_reduction (rtx_code code, rtx *ops, rtx init)
+expand_reduction (rtx_code code, rtx *ops, rtx init, reduction_type type)
{
- machine_mode vmode = GET_MODE (ops[1]);
+ rtx vector = type == UNORDERED_REDUDUCTION ? ops[1] : ops[2];
+ machine_mode vmode = GET_MODE (vector);
machine_mode m1_mode = get_m1_mode (vmode).require ();
machine_mode m1_mmode = get_mask_mode (m1_mode).require ();
@@ -3356,13 +3377,25 @@ expand_reduction (rtx_code code, rtx *ops, rtx init)
emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops);
rtx m1_tmp2 = gen_reg_rtx (m1_mode);
- rtx reduc_ops[] = {m1_tmp2, ops[1], m1_tmp};
+ rtx reduc_ops[] = {m1_tmp2, vector, m1_tmp};
if (FLOAT_MODE_P (vmode) && code == PLUS)
{
- insn_code icode
- = code_for_pred_reduc_plus (UNSPEC_UNORDERED, vmode, m1_mode);
- emit_vlmax_fp_reduction_insn (icode, RVV_REDUCTION_OP, reduc_ops);
+ insn_code icode = code_for_pred_reduc_plus (type == UNORDERED_REDUDUCTION
+ ? UNSPEC_UNORDERED
+ : UNSPEC_ORDERED,
+ vmode, m1_mode);
+ if (type == MASK_LEN_FOLD_LEFT_REDUDUCTION)
+ {
+ rtx mask = ops[3];
+ rtx len = ops[4];
+ rtx mask_len_reduc_ops[]
+ = {m1_tmp2, mask, RVV_VUNDEF (m1_mode), vector, m1_tmp};
+ emit_nonvlmax_fp_reduction_insn (icode, RVV_REDUCTION_TU_OP,
+ mask_len_reduc_ops, len);
+ }
+ else
+ emit_vlmax_fp_reduction_insn (icode, RVV_REDUCTION_OP, reduc_ops);
}
else
{
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define NUM_ELEMS(TYPE) ((int)(5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *a, TYPE *b) \
+ { \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ r += a[i]; \
+ q -= b[i]; \
+ } \
+ return r * q; \
+ }
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 6 } } */
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define NUM_ELEMS(TYPE) ((int) (5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE) \
+void __attribute__ ((noinline, noclone)) \
+reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)], \
+ TYPE *restrict r, int n) \
+{ \
+ for (int i = 0; i < n; i++) \
+ { \
+ r[i] = 0; \
+ for (int j = 0; j < NUM_ELEMS (TYPE); j++) \
+ r[i] += a[i][j]; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 3 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][2];
+
+double
+slp_reduc_plus (int n)
+{
+ double tmp = 0.0;
+ for (int i = 0; i < n; i++)
+ {
+ tmp = tmp + mat[i][0];
+ tmp = tmp + mat[i][1];
+ }
+ return tmp;
+}
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][8];
+
+double
+slp_reduc_plus (int n)
+{
+ double tmp = 0.0;
+ for (int i = 0; i < n; i++)
+ {
+ tmp = tmp + mat[i][0];
+ tmp = tmp + mat[i][1];
+ tmp = tmp + mat[i][2];
+ tmp = tmp + mat[i][3];
+ tmp = tmp + mat[i][4];
+ tmp = tmp + mat[i][5];
+ tmp = tmp + mat[i][6];
+ tmp = tmp + mat[i][7];
+ }
+ return tmp;
+}
+
+/* { dg-final { scan-assembler {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][12];
+
+double
+slp_reduc_plus (int n)
+{
+ double tmp = 0.0;
+ for (int i = 0; i < n; i++)
+ {
+ tmp = tmp + mat[i][0];
+ tmp = tmp + mat[i][1];
+ tmp = tmp + mat[i][2];
+ tmp = tmp + mat[i][3];
+ tmp = tmp + mat[i][4];
+ tmp = tmp + mat[i][5];
+ tmp = tmp + mat[i][6];
+ tmp = tmp + mat[i][7];
+ tmp = tmp + mat[i][8];
+ tmp = tmp + mat[i][9];
+ tmp = tmp + mat[i][10];
+ tmp = tmp + mat[i][11];
+ }
+ return tmp;
+}
+
+/* { dg-final { scan-assembler {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+float
+double_reduc (float (*i)[16])
+{
+ float l = 0;
+
+#pragma GCC unroll 0
+ for (int a = 0; a < 8; a++)
+ for (int b = 0; b < 100; b++)
+ l += i[b][a];
+ return l;
+}
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+float
+double_reduc (float *i, float *j)
+{
+ float k = 0, l = 0;
+
+ for (int a = 0; a < 8; a++)
+ for (int b = 0; b < 100; b++)
+ {
+ k += i[b];
+ l += j[b];
+ }
+ return l * k;
+}
+
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
new file mode 100644
@@ -0,0 +1,29 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "reduc_strict-1.c"
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NUM_ELEMS (TYPE)]; \
+ TYPE b[NUM_ELEMS (TYPE)]; \
+ TYPE r = 0, q = 3; \
+ for (int i = 0; i < NUM_ELEMS (TYPE); i++) \
+ { \
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
+ b[i] = (i * 0.3) * (i & 1 ? 1 : -1); \
+ r += a[i]; \
+ q -= b[i]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ TYPE res = reduc_plus_##TYPE (a, b); \
+ if (res != r * q) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "reduc_strict-2.c"
+
+#define NROWS 5
+
+#define TEST_REDUC_PLUS(TYPE) \
+ { \
+ TYPE a[NROWS][NUM_ELEMS (TYPE)]; \
+ TYPE r[NROWS]; \
+ TYPE expected[NROWS] = {}; \
+ for (int i = 0; i < NROWS; ++i) \
+ for (int j = 0; j < NUM_ELEMS (TYPE); ++j) \
+ { \
+ a[i][j] = (i * 0.1 + j * 0.6) * (j & 1 ? 1 : -1); \
+ expected[i] += a[i][j]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ reduc_plus_##TYPE (a, r, NROWS); \
+ for (int i = 0; i < NROWS; ++i) \
+ if (r[i] != expected[i]) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (TEST_REDUC_PLUS);
+ return 0;
+}