RISC-V: Add RVV builtin vectorization cost model
Checks
Commit Message
This patch fixes PR11153:
ble a1,zero,.L8
addiw a5,a1,-1
li a4,4
addi sp,sp,-16
mv a2,a0
sext.w a3,a1
bleu a5,a4,.L9
srliw a4,a3,2
slli a4,a4,4
mv a5,a0
add a4,a4,a0
vsetivli zero,4,e32,m1,ta,ma
vmv.v.i v1,0
vse32.v v1,0(sp)
.L4:
vle32.v v1,0(a5) ---> This loop always processes 4 elements which is ok for VLEN = 128bits, but waste a huge amount of computation units when VLEN > 128bits
vle32.v v2,0(sp)
addi a5,a5,16
vadd.vv v1,v2,v1
vse32.v v1,0(sp)
bne a4,a5,.L4
ld a5,0(sp)
lw a4,0(sp)
andi a1,a1,-4
srai a5,a5,32
addw a5,a4,a5
lw a4,8(sp)
addw a5,a5,a4
ld a4,8(sp)
srai a4,a4,32
addw a0,a5,a4
beq a3,a1,.L15
.L3:
subw a3,a3,a1
slli a5,a1,32
slli a3,a3,32
srli a3,a3,32
srli a5,a5,30
add a2,a2,a5
vsetvli a5,a3,e8,mf4,tu,mu
vsetvli a4,zero,e32,m1,ta,ma
sub a1,a3,a5
vmv.v.i v1,0
vsetvli zero,a3,e32,m1,tu,ma
vle32.v v2,0(a2)
vmv.v.v v1,v2
bne a3,a5,.L21
.L7:
vsetvli a4,zero,e32,m1,ta,ma
vmv.s.x v2,zero
vredsum.vs v1,v1,v2
vmv.x.s a5,v1
addw a0,a0,a5
.L15:
addi sp,sp,16
jr ra
.L21:
slli a5,a5,2
add a2,a2,a5
vsetvli zero,a1,e32,m1,tu,ma
vle32.v v2,0(a2)
vadd.vv v1,v1,v2
j .L7
.L8:
li a0,0
ret
.L9:
li a1,0
li a0,0
j .L3
The rootcause of this is we missed RVV builtin vectorization cost model.
After this patch:
ble a1,zero,.L4
vsetvli a5,zero,e32,m1,ta,ma
vmv.v.i v1,0
.L3:
vsetvli a5,a1,e32,m1,tu,ma
vle32.v v2,0(a0)
slli a4,a5,2
sub a1,a1,a5
add a0,a0,a4
vadd.vv v1,v2,v1
bne a1,zero,.L3
li a5,0
vsetivli zero,1,e32,m1,ta,ma
vmv.s.x v2,a5
vsetvli a5,zero,e32,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L4:
li a0,0
ret
PR target/111153
gcc/ChangeLog:
* config/riscv/riscv-protos.h (struct common_vector_cost): New struct.
(struct scalable_vector_cost): Ditto.
(struct cpu_vector_cost): Ditto.
* config/riscv/riscv-vector-costs.cc (costs::add_stmt_cost): Add RVV builtin vectorization cost
* config/riscv/riscv.cc (struct riscv_tune_param): Ditto.
(get_common_costs): New function.
(riscv_builtin_vectorization_cost): Ditto.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New targethook.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: New test.
---
gcc/config/riscv/riscv-protos.h | 76 ++++++++++
gcc/config/riscv/riscv-vector-costs.cc | 5 +-
gcc/config/riscv/riscv.cc | 143 ++++++++++++++++++
.../vect/costmodel/riscv/rvv/pr111153.c | 18 +++
4 files changed, 239 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
Comments
LGTM
Juzhe-Zhong <juzhe.zhong@rivai.ai> 於 2023年12月14日 週四 11:24 寫道:
> This patch fixes PR11153:
>
> ble a1,zero,.L8
> addiw a5,a1,-1
> li a4,4
> addi sp,sp,-16
> mv a2,a0
> sext.w a3,a1
> bleu a5,a4,.L9
> srliw a4,a3,2
> slli a4,a4,4
> mv a5,a0
> add a4,a4,a0
> vsetivli zero,4,e32,m1,ta,ma
> vmv.v.i v1,0
> vse32.v v1,0(sp)
> .L4:
> vle32.v v1,0(a5) ---> This loop always processes 4 elements which
> is ok for VLEN = 128bits, but waste a huge amount of computation units when
> VLEN > 128bits
> vle32.v v2,0(sp)
> addi a5,a5,16
> vadd.vv v1,v2,v1
> vse32.v v1,0(sp)
> bne a4,a5,.L4
> ld a5,0(sp)
> lw a4,0(sp)
> andi a1,a1,-4
> srai a5,a5,32
> addw a5,a4,a5
> lw a4,8(sp)
> addw a5,a5,a4
> ld a4,8(sp)
> srai a4,a4,32
> addw a0,a5,a4
> beq a3,a1,.L15
> .L3:
> subw a3,a3,a1
> slli a5,a1,32
> slli a3,a3,32
> srli a3,a3,32
> srli a5,a5,30
> add a2,a2,a5
> vsetvli a5,a3,e8,mf4,tu,mu
> vsetvli a4,zero,e32,m1,ta,ma
> sub a1,a3,a5
> vmv.v.i v1,0
> vsetvli zero,a3,e32,m1,tu,ma
> vle32.v v2,0(a2)
> vmv.v.v v1,v2
> bne a3,a5,.L21
> .L7:
> vsetvli a4,zero,e32,m1,ta,ma
> vmv.s.x v2,zero
> vredsum.vs v1,v1,v2
> vmv.x.s a5,v1
> addw a0,a0,a5
> .L15:
> addi sp,sp,16
> jr ra
> .L21:
> slli a5,a5,2
> add a2,a2,a5
> vsetvli zero,a1,e32,m1,tu,ma
> vle32.v v2,0(a2)
> vadd.vv v1,v1,v2
> j .L7
> .L8:
> li a0,0
> ret
> .L9:
> li a1,0
> li a0,0
> j .L3
>
> The rootcause of this is we missed RVV builtin vectorization cost model.
>
> After this patch:
>
> ble a1,zero,.L4
> vsetvli a5,zero,e32,m1,ta,ma
> vmv.v.i v1,0
> .L3:
> vsetvli a5,a1,e32,m1,tu,ma
> vle32.v v2,0(a0)
> slli a4,a5,2
> sub a1,a1,a5
> add a0,a0,a4
> vadd.vv v1,v2,v1
> bne a1,zero,.L3
> li a5,0
> vsetivli zero,1,e32,m1,ta,ma
> vmv.s.x v2,a5
> vsetvli a5,zero,e32,m1,ta,ma
> vredsum.vs v1,v1,v2
> vmv.x.s a0,v1
> ret
> .L4:
> li a0,0
> ret
>
> PR target/111153
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-protos.h (struct common_vector_cost): New
> struct.
> (struct scalable_vector_cost): Ditto.
> (struct cpu_vector_cost): Ditto.
> * config/riscv/riscv-vector-costs.cc (costs::add_stmt_cost): Add
> RVV builtin vectorization cost
> * config/riscv/riscv.cc (struct riscv_tune_param): Ditto.
> (get_common_costs): New function.
> (riscv_builtin_vectorization_cost): Ditto.
> (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New targethook.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: New test.
>
> ---
> gcc/config/riscv/riscv-protos.h | 76 ++++++++++
> gcc/config/riscv/riscv-vector-costs.cc | 5 +-
> gcc/config/riscv/riscv.cc | 143 ++++++++++++++++++
> .../vect/costmodel/riscv/rvv/pr111153.c | 18 +++
> 4 files changed, 239 insertions(+), 3 deletions(-)
> create mode 100644
> gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
>
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h
> index 85ab1db2088..7de0b031001 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -200,6 +200,82 @@ struct riscv_cpu_info {
>
> extern const riscv_cpu_info *riscv_find_cpu (const char *);
>
> +/* Common vector costs in any kind of vectorization (e.g VLA and VLS). */
> +struct common_vector_cost
> +{
> + /* Cost of any integer vector operation, excluding the ones handled
> + specially below. */
> + const int int_stmt_cost;
> +
> + /* Cost of any fp vector operation, excluding the ones handled
> + specially below. */
> + const int fp_stmt_cost;
> +
> + /* Gather/scatter vectorization cost. */
> + const int gather_load_cost;
> + const int scatter_store_cost;
> +
> + /* Cost of a vector-to-scalar operation. */
> + const int vec_to_scalar_cost;
> +
> + /* Cost of a scalar-to-vector operation. */
> + const int scalar_to_vec_cost;
> +
> + /* Cost of a permute operation. */
> + const int permute_cost;
> +
> + /* Cost of an aligned vector load. */
> + const int align_load_cost;
> +
> + /* Cost of an aligned vector store. */
> + const int align_store_cost;
> +
> + /* Cost of an unaligned vector load. */
> + const int unalign_load_cost;
> +
> + /* Cost of an unaligned vector store. */
> + const int unalign_store_cost;
> +};
> +
> +/* scalable vectorization (VLA) specific cost. */
> +struct scalable_vector_cost : common_vector_cost
> +{
> + CONSTEXPR scalable_vector_cost (const common_vector_cost &base)
> + : common_vector_cost (base)
> + {}
> +
> + /* TODO: We will need more other kinds of vector cost for VLA.
> + E.g. fold_left reduction cost, lanes load/store cost, ..., etc. */
> +};
> +
> +/* Cost for vector insn classes. */
> +struct cpu_vector_cost
> +{
> + /* Cost of any integer scalar operation, excluding load and store. */
> + const int scalar_int_stmt_cost;
> +
> + /* Cost of any fp scalar operation, excluding load and store. */
> + const int scalar_fp_stmt_cost;
> +
> + /* Cost of a scalar load. */
> + const int scalar_load_cost;
> +
> + /* Cost of a scalar store. */
> + const int scalar_store_cost;
> +
> + /* Cost of a taken branch. */
> + const int cond_taken_branch_cost;
> +
> + /* Cost of a not-taken branch. */
> + const int cond_not_taken_branch_cost;
> +
> + /* Cost of an VLS modes operations. */
> + const common_vector_cost *vls;
> +
> + /* Cost of an VLA modes operations. */
> + const scalable_vector_cost *vla;
> +};
> +
> /* Routines implemented in riscv-selftests.cc. */
> #if CHECKING_P
> namespace selftest {
> diff --git a/gcc/config/riscv/riscv-vector-costs.cc
> b/gcc/config/riscv/riscv-vector-costs.cc
> index 7888cef58fe..e7bc9ed5233 100644
> --- a/gcc/config/riscv/riscv-vector-costs.cc
> +++ b/gcc/config/riscv/riscv-vector-costs.cc
> @@ -750,9 +750,8 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt
> kind,
> stmt_vec_info stmt_info, slp_tree, tree vectype,
> int misalign, vect_cost_model_location where)
> {
> - /* TODO: Use default STMT cost model.
> - We will support more accurate STMT cost model later. */
> - int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
> misalign);
> + int stmt_cost
> + = targetm.vectorize.builtin_vectorization_cost (kind, vectype,
> misalign);
>
> /* Do one-time initialization based on the vinfo. */
> loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 69a8a503f30..2dc44244309 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -281,6 +281,7 @@ struct riscv_tune_param
> bool slow_unaligned_access;
> bool use_divmod_expansion;
> unsigned int fusible_ops;
> + const struct cpu_vector_cost *vec_costs;
> };
>
>
> @@ -348,6 +349,50 @@ const enum reg_class
> riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
> VD_REGS, VD_REGS, VD_REGS, VD_REGS,
> };
>
> +/* Generic costs for VLS vector operations. */
> +static const common_vector_cost generic_vls_vector_cost = {
> + 1, /* int_stmt_cost */
> + 1, /* fp_stmt_cost */
> + 1, /* gather_load_cost */
> + 1, /* scatter_store_cost */
> + 1, /* vec_to_scalar_cost */
> + 1, /* scalar_to_vec_cost */
> + 1, /* permute_cost */
> + 3, /* align_load_cost */
> + 3, /* align_store_cost */
> + 3, /* unalign_load_cost */
> + 3, /* unalign_store_cost */
> +};
> +
> +/* Generic costs for VLA vector operations. */
> +static const scalable_vector_cost generic_vla_vector_cost = {
> + {
> + 1, /* int_stmt_cost */
> + 1, /* fp_stmt_cost */
> + 1, /* gather_load_cost */
> + 1, /* scatter_store_cost */
> + 1, /* vec_to_scalar_cost */
> + 1, /* scalar_to_vec_cost */
> + 1, /* permute_cost */
> + 3, /* align_load_cost */
> + 3, /* align_store_cost */
> + 3, /* unalign_load_cost */
> + 3, /* unalign_store_cost */
> + },
> +};
> +
> +/* Generic costs for vector insn classes. */
> +static const struct cpu_vector_cost generic_vector_cost = {
> + 1, /* scalar_int_stmt_cost */
> + 1, /* scalar_fp_stmt_cost */
> + 1, /* scalar_load_cost */
> + 1, /* scalar_store_cost */
> + 3, /* cond_taken_branch_cost */
> + 1, /* cond_not_taken_branch_cost */
> + &generic_vls_vector_cost, /* vls */
> + &generic_vla_vector_cost, /* vla */
> +};
> +
> /* Costs to use when optimizing for rocket. */
> static const struct riscv_tune_param rocket_tune_info = {
> {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
> @@ -362,6 +407,7 @@ static const struct riscv_tune_param rocket_tune_info
> = {
> true, /*
> slow_unaligned_access */
> false, /* use_divmod_expansion */
> RISCV_FUSE_NOTHING, /* fusible_ops */
> + NULL, /* vector cost */
> };
>
> /* Costs to use when optimizing for Sifive 7 Series. */
> @@ -378,6 +424,7 @@ static const struct riscv_tune_param
> sifive_7_tune_info = {
> true, /*
> slow_unaligned_access */
> false, /* use_divmod_expansion */
> RISCV_FUSE_NOTHING, /* fusible_ops */
> + NULL, /* vector cost */
> };
>
> /* Costs to use when optimizing for T-HEAD c906. */
> @@ -394,6 +441,7 @@ static const struct riscv_tune_param
> thead_c906_tune_info = {
> false, /* slow_unaligned_access */
> false, /* use_divmod_expansion */
> RISCV_FUSE_NOTHING, /* fusible_ops */
> + NULL, /* vector cost */
> };
>
> /* Costs to use when optimizing for a generic ooo profile. */
> @@ -410,6 +458,7 @@ static const struct riscv_tune_param
> generic_ooo_tune_info = {
> false, /* slow_unaligned_access */
> false, /* use_divmod_expansion */
> RISCV_FUSE_NOTHING, /* fusible_ops */
> + &generic_vector_cost, /* vector cost */
> };
>
> /* Costs to use when optimizing for size. */
> @@ -426,6 +475,7 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> false, /* slow_unaligned_access */
> false, /* use_divmod_expansion */
> RISCV_FUSE_NOTHING, /* fusible_ops */
> + NULL, /* vector cost */
> };
>
> static bool riscv_avoid_shrink_wrapping_separate ();
> @@ -10192,6 +10242,95 @@ riscv_frame_pointer_required (void)
> return riscv_save_frame_pointer && !crtl->is_leaf;
> }
>
> +/* Return the appropriate common costs for vectors of type VECTYPE. */
> +static const common_vector_cost *
> +get_common_costs (tree vectype)
> +{
> + const cpu_vector_cost *costs = tune_param->vec_costs;
> + gcc_assert (costs);
> +
> + if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
> + return costs->vls;
> + return costs->vla;
> +}
> +
> +/* Implement targetm.vectorize.builtin_vectorization_cost. */
> +
> +static int
> +riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
> + tree vectype, int misalign
> ATTRIBUTE_UNUSED)
> +{
> + unsigned elements;
> + const cpu_vector_cost *costs = tune_param->vec_costs;
> + bool fp = false;
> +
> + if (vectype != NULL)
> + fp = FLOAT_TYPE_P (vectype);
> +
> + if (costs != NULL)
> + {
> + const common_vector_cost *common_costs = get_common_costs (vectype);
> + gcc_assert (common_costs != NULL);
> + switch (type_of_cost)
> + {
> + case scalar_stmt:
> + return fp ? costs->scalar_fp_stmt_cost :
> costs->scalar_int_stmt_cost;
> +
> + case scalar_load:
> + return costs->scalar_load_cost;
> +
> + case scalar_store:
> + return costs->scalar_store_cost;
> +
> + case vector_stmt:
> + return fp ? common_costs->fp_stmt_cost :
> common_costs->int_stmt_cost;
> +
> + case vector_load:
> + return common_costs->align_load_cost;
> +
> + case vector_store:
> + return common_costs->align_store_cost;
> +
> + case vec_to_scalar:
> + return common_costs->vec_to_scalar_cost;
> +
> + case scalar_to_vec:
> + return common_costs->scalar_to_vec_cost;
> +
> + case unaligned_load:
> + return common_costs->unalign_load_cost;
> + case vector_gather_load:
> + return common_costs->gather_load_cost;
> +
> + case unaligned_store:
> + return common_costs->unalign_store_cost;
> + case vector_scatter_store:
> + return common_costs->scatter_store_cost;
> +
> + case cond_branch_taken:
> + return costs->cond_taken_branch_cost;
> +
> + case cond_branch_not_taken:
> + return costs->cond_not_taken_branch_cost;
> +
> + case vec_perm:
> + return common_costs->permute_cost;
> +
> + case vec_promote_demote:
> + return fp ? common_costs->fp_stmt_cost :
> common_costs->int_stmt_cost;
> +
> + case vec_construct:
> + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
> + return elements / 2 + 1;
> +
> + default:
> + gcc_unreachable ();
> + }
> + }
> +
> + return default_builtin_vectorization_cost (type_of_cost, vectype,
> misalign);
> +}
> +
> /* Implement targetm.vectorize.create_costs. */
>
> static vector_costs *
> @@ -10582,6 +10721,10 @@ extract_base_offset_in_addr (rtx mem, rtx *base,
> rtx *offset)
> #undef TARGET_FRAME_POINTER_REQUIRED
> #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
>
> +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
> +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
> + riscv_builtin_vectorization_cost
> +
> #undef TARGET_VECTORIZE_CREATE_COSTS
> #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
>
> diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> new file mode 100644
> index 00000000000..06e08ec5f2e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize
> -mtune=generic-ooo" } */
> +
> +#define DEF_REDUC_PLUS(TYPE)
> \
> + TYPE __attribute__ ((noinline, noclone))
> \
> + reduc_plus_##TYPE (TYPE *__restrict a, int n)
> \
> + {
> \
> + TYPE r = 0;
> \
> + for (int i = 0; i < n; ++i)
> \
> + r += a[i];
> \
> + return r;
> \
> + }
> +
> +#define TEST_PLUS(T) T (int)
> +
> +TEST_PLUS (DEF_REDUC_PLUS)
> +
> +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
> --
> 2.36.3
>
>
Committed, thanks Kito.
Pan
From: Kito Cheng <kito.cheng@gmail.com>
Sent: Thursday, December 14, 2023 2:45 PM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>; Kito Cheng <kito.cheng@sifive.com>; Jeff Law <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: [PATCH] RISC-V: Add RVV builtin vectorization cost model
LGTM
Juzhe-Zhong <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>> 於 2023年12月14日 週四 11:24 寫道:
This patch fixes PR11153:
ble a1,zero,.L8
addiw a5,a1,-1
li a4,4
addi sp,sp,-16
mv a2,a0
sext.w a3,a1
bleu a5,a4,.L9
srliw a4,a3,2
slli a4,a4,4
mv a5,a0
add a4,a4,a0
vsetivli zero,4,e32,m1,ta,ma
vmv.v.i v1,0
vse32.v v1,0(sp)
.L4:
vle32.v v1,0(a5) ---> This loop always processes 4 elements which is ok for VLEN = 128bits, but waste a huge amount of computation units when VLEN > 128bits
vle32.v v2,0(sp)
addi a5,a5,16
vadd.vv v1,v2,v1
vse32.v v1,0(sp)
bne a4,a5,.L4
ld a5,0(sp)
lw a4,0(sp)
andi a1,a1,-4
srai a5,a5,32
addw a5,a4,a5
lw a4,8(sp)
addw a5,a5,a4
ld a4,8(sp)
srai a4,a4,32
addw a0,a5,a4
beq a3,a1,.L15
.L3:
subw a3,a3,a1
slli a5,a1,32
slli a3,a3,32
srli a3,a3,32
srli a5,a5,30
add a2,a2,a5
vsetvli a5,a3,e8,mf4,tu,mu
vsetvli a4,zero,e32,m1,ta,ma
sub a1,a3,a5
vmv.v.i v1,0
vsetvli zero,a3,e32,m1,tu,ma
vle32.v v2,0(a2)
vmv.v.v v1,v2
bne a3,a5,.L21
.L7:
vsetvli a4,zero,e32,m1,ta,ma
vmv.s.x v2,zero
vredsum.vs v1,v1,v2
vmv.x.s a5,v1
addw a0,a0,a5
.L15:
addi sp,sp,16
jr ra
.L21:
slli a5,a5,2
add a2,a2,a5
vsetvli zero,a1,e32,m1,tu,ma
vle32.v v2,0(a2)
vadd.vv v1,v1,v2
j .L7
.L8:
li a0,0
ret
.L9:
li a1,0
li a0,0
j .L3
The rootcause of this is we missed RVV builtin vectorization cost model.
After this patch:
ble a1,zero,.L4
vsetvli a5,zero,e32,m1,ta,ma
vmv.v.i v1,0
.L3:
vsetvli a5,a1,e32,m1,tu,ma
vle32.v v2,0(a0)
slli a4,a5,2
sub a1,a1,a5
add a0,a0,a4
vadd.vv v1,v2,v1
bne a1,zero,.L3
li a5,0
vsetivli zero,1,e32,m1,ta,ma
vmv.s.x v2,a5
vsetvli a5,zero,e32,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L4:
li a0,0
ret
PR target/111153
gcc/ChangeLog:
* config/riscv/riscv-protos.h (struct common_vector_cost): New struct.
(struct scalable_vector_cost): Ditto.
(struct cpu_vector_cost): Ditto.
* config/riscv/riscv-vector-costs.cc (costs::add_stmt_cost): Add RVV builtin vectorization cost
* config/riscv/riscv.cc (struct riscv_tune_param): Ditto.
(get_common_costs): New function.
(riscv_builtin_vectorization_cost): Ditto.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New targethook.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: New test.
---
gcc/config/riscv/riscv-protos.h | 76 ++++++++++
gcc/config/riscv/riscv-vector-costs.cc | 5 +-
gcc/config/riscv/riscv.cc | 143 ++++++++++++++++++
.../vect/costmodel/riscv/rvv/pr111153.c | 18 +++
4 files changed, 239 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 85ab1db2088..7de0b031001 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -200,6 +200,82 @@ struct riscv_cpu_info {
extern const riscv_cpu_info *riscv_find_cpu (const char *);
+/* Common vector costs in any kind of vectorization (e.g VLA and VLS). */
+struct common_vector_cost
+{
+ /* Cost of any integer vector operation, excluding the ones handled
+ specially below. */
+ const int int_stmt_cost;
+
+ /* Cost of any fp vector operation, excluding the ones handled
+ specially below. */
+ const int fp_stmt_cost;
+
+ /* Gather/scatter vectorization cost. */
+ const int gather_load_cost;
+ const int scatter_store_cost;
+
+ /* Cost of a vector-to-scalar operation. */
+ const int vec_to_scalar_cost;
+
+ /* Cost of a scalar-to-vector operation. */
+ const int scalar_to_vec_cost;
+
+ /* Cost of a permute operation. */
+ const int permute_cost;
+
+ /* Cost of an aligned vector load. */
+ const int align_load_cost;
+
+ /* Cost of an aligned vector store. */
+ const int align_store_cost;
+
+ /* Cost of an unaligned vector load. */
+ const int unalign_load_cost;
+
+ /* Cost of an unaligned vector store. */
+ const int unalign_store_cost;
+};
+
+/* scalable vectorization (VLA) specific cost. */
+struct scalable_vector_cost : common_vector_cost
+{
+ CONSTEXPR scalable_vector_cost (const common_vector_cost &base)
+ : common_vector_cost (base)
+ {}
+
+ /* TODO: We will need more other kinds of vector cost for VLA.
+ E.g. fold_left reduction cost, lanes load/store cost, ..., etc. */
+};
+
+/* Cost for vector insn classes. */
+struct cpu_vector_cost
+{
+ /* Cost of any integer scalar operation, excluding load and store. */
+ const int scalar_int_stmt_cost;
+
+ /* Cost of any fp scalar operation, excluding load and store. */
+ const int scalar_fp_stmt_cost;
+
+ /* Cost of a scalar load. */
+ const int scalar_load_cost;
+
+ /* Cost of a scalar store. */
+ const int scalar_store_cost;
+
+ /* Cost of a taken branch. */
+ const int cond_taken_branch_cost;
+
+ /* Cost of a not-taken branch. */
+ const int cond_not_taken_branch_cost;
+
+ /* Cost of an VLS modes operations. */
+ const common_vector_cost *vls;
+
+ /* Cost of an VLA modes operations. */
+ const scalable_vector_cost *vla;
+};
+
/* Routines implemented in riscv-selftests.cc. */
#if CHECKING_P
namespace selftest {
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 7888cef58fe..e7bc9ed5233 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -750,9 +750,8 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree, tree vectype,
int misalign, vect_cost_model_location where)
{
- /* TODO: Use default STMT cost model.
- We will support more accurate STMT cost model later. */
- int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
+ int stmt_cost
+ = targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign);
/* Do one-time initialization based on the vinfo. */
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 69a8a503f30..2dc44244309 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -281,6 +281,7 @@ struct riscv_tune_param
bool slow_unaligned_access;
bool use_divmod_expansion;
unsigned int fusible_ops;
+ const struct cpu_vector_cost *vec_costs;
};
@@ -348,6 +349,50 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
VD_REGS, VD_REGS, VD_REGS, VD_REGS,
};
+/* Generic costs for VLS vector operations. */
+static const common_vector_cost generic_vls_vector_cost = {
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 1, /* gather_load_cost */
+ 1, /* scatter_store_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* permute_cost */
+ 3, /* align_load_cost */
+ 3, /* align_store_cost */
+ 3, /* unalign_load_cost */
+ 3, /* unalign_store_cost */
+};
+
+/* Generic costs for VLA vector operations. */
+static const scalable_vector_cost generic_vla_vector_cost = {
+ {
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 1, /* gather_load_cost */
+ 1, /* scatter_store_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* permute_cost */
+ 3, /* align_load_cost */
+ 3, /* align_store_cost */
+ 3, /* unalign_load_cost */
+ 3, /* unalign_store_cost */
+ },
+};
+
+/* Generic costs for vector insn classes. */
+static const struct cpu_vector_cost generic_vector_cost = {
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 1, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 3, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &generic_vls_vector_cost, /* vls */
+ &generic_vla_vector_cost, /* vla */
+};
+
/* Costs to use when optimizing for rocket. */
static const struct riscv_tune_param rocket_tune_info = {
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
@@ -362,6 +407,7 @@ static const struct riscv_tune_param rocket_tune_info = {
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -378,6 +424,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -394,6 +441,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for a generic ooo profile. */
@@ -410,6 +458,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ &generic_vector_cost, /* vector cost */
};
/* Costs to use when optimizing for size. */
@@ -426,6 +475,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
static bool riscv_avoid_shrink_wrapping_separate ();
@@ -10192,6 +10242,95 @@ riscv_frame_pointer_required (void)
return riscv_save_frame_pointer && !crtl->is_leaf;
}
+/* Return the appropriate common costs for vectors of type VECTYPE. */
+static const common_vector_cost *
+get_common_costs (tree vectype)
+{
+ const cpu_vector_cost *costs = tune_param->vec_costs;
+ gcc_assert (costs);
+
+ if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
+ return costs->vls;
+ return costs->vla;
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+
+static int
+riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+ const cpu_vector_cost *costs = tune_param->vec_costs;
+ bool fp = false;
+
+ if (vectype != NULL)
+ fp = FLOAT_TYPE_P (vectype);
+
+ if (costs != NULL)
+ {
+ const common_vector_cost *common_costs = get_common_costs (vectype);
+ gcc_assert (common_costs != NULL);
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
+
+ case scalar_load:
+ return costs->scalar_load_cost;
+
+ case scalar_store:
+ return costs->scalar_store_cost;
+
+ case vector_stmt:
+ return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
+
+ case vector_load:
+ return common_costs->align_load_cost;
+
+ case vector_store:
+ return common_costs->align_store_cost;
+
+ case vec_to_scalar:
+ return common_costs->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return common_costs->scalar_to_vec_cost;
+
+ case unaligned_load:
+ return common_costs->unalign_load_cost;
+ case vector_gather_load:
+ return common_costs->gather_load_cost;
+
+ case unaligned_store:
+ return common_costs->unalign_store_cost;
+ case vector_scatter_store:
+ return common_costs->scatter_store_cost;
+
+ case cond_branch_taken:
+ return costs->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return costs->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ return common_costs->permute_cost;
+
+ case vec_promote_demote:
+ return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
+
+ case vec_construct:
+ elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ return elements / 2 + 1;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
+}
+
/* Implement targetm.vectorize.create_costs. */
static vector_costs *
@@ -10582,6 +10721,10 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ riscv_builtin_vectorization_cost
+
#undef TARGET_VECTORIZE_CREATE_COSTS
#define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
new file mode 100644
index 00000000000..06e08ec5f2e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *__restrict a, int n) \
+ { \
+ TYPE r = 0; \
+ for (int i = 0; i < n; ++i) \
+ r += a[i]; \
+ return r; \
+ }
+
+#define TEST_PLUS(T) T (int)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
--
2.36.3
@@ -200,6 +200,82 @@ struct riscv_cpu_info {
extern const riscv_cpu_info *riscv_find_cpu (const char *);
+/* Common vector costs in any kind of vectorization (e.g VLA and VLS). */
+struct common_vector_cost
+{
+ /* Cost of any integer vector operation, excluding the ones handled
+ specially below. */
+ const int int_stmt_cost;
+
+ /* Cost of any fp vector operation, excluding the ones handled
+ specially below. */
+ const int fp_stmt_cost;
+
+ /* Gather/scatter vectorization cost. */
+ const int gather_load_cost;
+ const int scatter_store_cost;
+
+ /* Cost of a vector-to-scalar operation. */
+ const int vec_to_scalar_cost;
+
+ /* Cost of a scalar-to-vector operation. */
+ const int scalar_to_vec_cost;
+
+ /* Cost of a permute operation. */
+ const int permute_cost;
+
+ /* Cost of an aligned vector load. */
+ const int align_load_cost;
+
+ /* Cost of an aligned vector store. */
+ const int align_store_cost;
+
+ /* Cost of an unaligned vector load. */
+ const int unalign_load_cost;
+
+ /* Cost of an unaligned vector store. */
+ const int unalign_store_cost;
+};
+
+/* scalable vectorization (VLA) specific cost. */
+struct scalable_vector_cost : common_vector_cost
+{
+ CONSTEXPR scalable_vector_cost (const common_vector_cost &base)
+ : common_vector_cost (base)
+ {}
+
+ /* TODO: We will need more other kinds of vector cost for VLA.
+ E.g. fold_left reduction cost, lanes load/store cost, ..., etc. */
+};
+
+/* Cost for vector insn classes. */
+struct cpu_vector_cost
+{
+ /* Cost of any integer scalar operation, excluding load and store. */
+ const int scalar_int_stmt_cost;
+
+ /* Cost of any fp scalar operation, excluding load and store. */
+ const int scalar_fp_stmt_cost;
+
+ /* Cost of a scalar load. */
+ const int scalar_load_cost;
+
+ /* Cost of a scalar store. */
+ const int scalar_store_cost;
+
+ /* Cost of a taken branch. */
+ const int cond_taken_branch_cost;
+
+ /* Cost of a not-taken branch. */
+ const int cond_not_taken_branch_cost;
+
+ /* Cost of an VLS modes operations. */
+ const common_vector_cost *vls;
+
+ /* Cost of an VLA modes operations. */
+ const scalable_vector_cost *vla;
+};
+
/* Routines implemented in riscv-selftests.cc. */
#if CHECKING_P
namespace selftest {
@@ -750,9 +750,8 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree, tree vectype,
int misalign, vect_cost_model_location where)
{
- /* TODO: Use default STMT cost model.
- We will support more accurate STMT cost model later. */
- int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
+ int stmt_cost
+ = targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign);
/* Do one-time initialization based on the vinfo. */
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
@@ -281,6 +281,7 @@ struct riscv_tune_param
bool slow_unaligned_access;
bool use_divmod_expansion;
unsigned int fusible_ops;
+ const struct cpu_vector_cost *vec_costs;
};
@@ -348,6 +349,50 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
VD_REGS, VD_REGS, VD_REGS, VD_REGS,
};
+/* Generic costs for VLS vector operations. */
+static const common_vector_cost generic_vls_vector_cost = {
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 1, /* gather_load_cost */
+ 1, /* scatter_store_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* permute_cost */
+ 3, /* align_load_cost */
+ 3, /* align_store_cost */
+ 3, /* unalign_load_cost */
+ 3, /* unalign_store_cost */
+};
+
+/* Generic costs for VLA vector operations. */
+static const scalable_vector_cost generic_vla_vector_cost = {
+ {
+ 1, /* int_stmt_cost */
+ 1, /* fp_stmt_cost */
+ 1, /* gather_load_cost */
+ 1, /* scatter_store_cost */
+ 1, /* vec_to_scalar_cost */
+ 1, /* scalar_to_vec_cost */
+ 1, /* permute_cost */
+ 3, /* align_load_cost */
+ 3, /* align_store_cost */
+ 3, /* unalign_load_cost */
+ 3, /* unalign_store_cost */
+ },
+};
+
+/* Generic costs for vector insn classes. */
+static const struct cpu_vector_cost generic_vector_cost = {
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 1, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 3, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &generic_vls_vector_cost, /* vls */
+ &generic_vla_vector_cost, /* vla */
+};
+
/* Costs to use when optimizing for rocket. */
static const struct riscv_tune_param rocket_tune_info = {
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
@@ -362,6 +407,7 @@ static const struct riscv_tune_param rocket_tune_info = {
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -378,6 +424,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
true, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -394,6 +441,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
/* Costs to use when optimizing for a generic ooo profile. */
@@ -410,6 +458,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ &generic_vector_cost, /* vector cost */
};
/* Costs to use when optimizing for size. */
@@ -426,6 +475,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
false, /* slow_unaligned_access */
false, /* use_divmod_expansion */
RISCV_FUSE_NOTHING, /* fusible_ops */
+ NULL, /* vector cost */
};
static bool riscv_avoid_shrink_wrapping_separate ();
@@ -10192,6 +10242,95 @@ riscv_frame_pointer_required (void)
return riscv_save_frame_pointer && !crtl->is_leaf;
}
+/* Return the appropriate common costs for vectors of type VECTYPE. */
+static const common_vector_cost *
+get_common_costs (tree vectype)
+{
+ const cpu_vector_cost *costs = tune_param->vec_costs;
+ gcc_assert (costs);
+
+ if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
+ return costs->vls;
+ return costs->vla;
+}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+
+static int
+riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int misalign ATTRIBUTE_UNUSED)
+{
+ unsigned elements;
+ const cpu_vector_cost *costs = tune_param->vec_costs;
+ bool fp = false;
+
+ if (vectype != NULL)
+ fp = FLOAT_TYPE_P (vectype);
+
+ if (costs != NULL)
+ {
+ const common_vector_cost *common_costs = get_common_costs (vectype);
+ gcc_assert (common_costs != NULL);
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
+
+ case scalar_load:
+ return costs->scalar_load_cost;
+
+ case scalar_store:
+ return costs->scalar_store_cost;
+
+ case vector_stmt:
+ return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
+
+ case vector_load:
+ return common_costs->align_load_cost;
+
+ case vector_store:
+ return common_costs->align_store_cost;
+
+ case vec_to_scalar:
+ return common_costs->vec_to_scalar_cost;
+
+ case scalar_to_vec:
+ return common_costs->scalar_to_vec_cost;
+
+ case unaligned_load:
+ return common_costs->unalign_load_cost;
+ case vector_gather_load:
+ return common_costs->gather_load_cost;
+
+ case unaligned_store:
+ return common_costs->unalign_store_cost;
+ case vector_scatter_store:
+ return common_costs->scatter_store_cost;
+
+ case cond_branch_taken:
+ return costs->cond_taken_branch_cost;
+
+ case cond_branch_not_taken:
+ return costs->cond_not_taken_branch_cost;
+
+ case vec_perm:
+ return common_costs->permute_cost;
+
+ case vec_promote_demote:
+ return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
+
+ case vec_construct:
+ elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+ return elements / 2 + 1;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
+}
+
/* Implement targetm.vectorize.create_costs. */
static vector_costs *
@@ -10582,6 +10721,10 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ riscv_builtin_vectorization_cost
+
#undef TARGET_VECTORIZE_CREATE_COSTS
#define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *__restrict a, int n) \
+ { \
+ TYPE r = 0; \
+ for (int i = 0; i < n; ++i) \
+ r += a[i]; \
+ return r; \
+ }
+
+#define TEST_PLUS(T) T (int)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */