1. Introduce vector regmove new tune info.
2. Adjust scalar_to_vec cost in add_stmt_cost.
We will get optimal codegen after this patch with -march=rv64gcv_zvl256b:
lui a5,%hi(a)
li a4,19
sb a4,%lo(a)(a5)
li a0,0
ret
Tested on both RV32/RV64 no regression, Ok for trunk ?
PR target/113281
gcc/ChangeLog:
* config/riscv/riscv-protos.h (struct regmove_vector_cost): New struct.
(struct cpu_vector_cost): Add regmove struct.
(get_vector_costs): Export as global.
* config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Adjust scalar_to_vec cost.
(costs::add_stmt_cost): Ditto.
* config/riscv/riscv.cc (get_common_costs): Export global function.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr113209.c: Adapt test.
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c: New test.
---
gcc/config/riscv/riscv-protos.h | 11 ++++++++
gcc/config/riscv/riscv-vector-costs.cc | 23 +++++++++++++++++
gcc/config/riscv/riscv.cc | 25 ++++++++++++-------
.../vect/costmodel/riscv/rvv/pr113281-1.c | 18 +++++++++++++
.../vect/costmodel/riscv/rvv/pr113281-2.c | 18 +++++++++++++
.../gcc.target/riscv/rvv/autovec/pr113209.c | 2 +-
6 files changed, 87 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c
@@ -250,6 +250,13 @@ struct scalable_vector_cost : common_vector_cost
E.g. fold_left reduction cost, lanes load/store cost, ..., etc. */
};
+/* Additional costs for register copies. Cost is for one register. */
+struct regmove_vector_cost
+{
+ const int GR2VR;
+ const int FR2VR;
+};
+
/* Cost for vector insn classes. */
struct cpu_vector_cost
{
@@ -276,6 +283,9 @@ struct cpu_vector_cost
/* Cost of an VLA modes operations. */
const scalable_vector_cost *vla;
+
+ /* Cost of vector register move operations. */
+ const regmove_vector_cost *regmove;
};
/* Routines implemented in riscv-selftests.cc. */
@@ -764,5 +774,6 @@ struct riscv_tune_info {
const struct riscv_tune_info *
riscv_parse_tune (const char *, bool);
+const cpu_vector_cost *get_vector_costs ();
#endif /* ! GCC_RISCV_PROTOS_H */
@@ -1055,6 +1055,26 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
return vector_costs::better_main_loop_than_p (other);
}
+/* Adjust vectorization cost after calling riscv_builtin_vectorization_cost.
+ For some statement, we would like to further fine-grain tweak the cost on
+ top of riscv_builtin_vectorization_cost handling which doesn't have any
+ information on statement operation codes etc. */
+
+static unsigned
+adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+{
+ const cpu_vector_cost *costs = get_vector_costs ();
+ switch (kind)
+ {
+ case scalar_to_vec:
+ return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+ : costs->regmove->GR2VR);
+ default:
+ break;
+ }
+ return stmt_cost;
+}
+
unsigned
costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree, tree vectype,
@@ -1082,6 +1102,9 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
as one iteration of the VLA loop. */
if (where == vect_body && m_unrolled_vls_niters)
m_unrolled_vls_stmts += count * m_unrolled_vls_niters;
+
+ if (vectype)
+ stmt_cost = adjust_stmt_cost (kind, vectype, stmt_cost);
}
return record_stmt_cost (stmt_info, where, count * stmt_cost);
@@ -391,17 +391,24 @@ static const scalable_vector_cost rvv_vla_vector_cost = {
},
};
+/* RVV register move cost. */
+static const regmove_vector_cost rvv_regmove_vector_cost = {
+ 2, /* GR2VR */
+ 2, /* FR2VR */
+};
+
/* Generic costs for vector insn classes. It is supposed to be the vector cost
models used by default if no other cost model was specified. */
static const struct cpu_vector_cost generic_vector_cost = {
- 1, /* scalar_int_stmt_cost */
- 1, /* scalar_fp_stmt_cost */
- 1, /* scalar_load_cost */
- 1, /* scalar_store_cost */
- 3, /* cond_taken_branch_cost */
- 1, /* cond_not_taken_branch_cost */
- &rvv_vls_vector_cost, /* vls */
- &rvv_vla_vector_cost, /* vla */
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 1, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 3, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &rvv_vls_vector_cost, /* vls */
+ &rvv_vla_vector_cost, /* vla */
+ &rvv_regmove_vector_cost, /* regmove */
};
/* Costs to use when optimizing for rocket. */
@@ -10443,7 +10450,7 @@ get_common_costs (const cpu_vector_cost *costs, tree vectype)
/* Return the CPU vector costs according to -mtune if tune info has non-NULL
vector cost. Otherwide, return the default generic vector costs. */
-static const cpu_vector_cost *
+const cpu_vector_cost *
get_vector_costs ()
{
const cpu_vector_cost *costs = tune_param->vec_costs;
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize" } */
+
+unsigned char a;
+
+int main() {
+ short b = a = 0;
+ for (; a != 19; a++)
+ if (a)
+ b = 32872 >> a;
+
+ if (b == 0)
+ return 0;
+ else
+ return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax" } */
+
+unsigned char a;
+
+int main() {
+ short b = a = 0;
+ for (; a != 19; a++)
+ if (a)
+ b = 32872 >> a;
+
+ if (b == 0)
+ return 0;
+ else
+ return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -fno-vect-cost-model" } */
int b, c, d, f, i, a;
int e[1] = {0};