[V3] RISC-V: Adjust scalar_to_vec cost

Message ID 20240112092844.260890-1-juzhe.zhong@rivai.ai
State Unresolved
Headers
Series [V3] RISC-V: Adjust scalar_to_vec cost |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

juzhe.zhong@rivai.ai Jan. 12, 2024, 9:28 a.m. UTC
  1. Introduce vector regmove new tune info.
2. Adjust scalar_to_vec cost in add_stmt_cost.

We will get optimal codegen after this patch with -march=rv64gcv_zvl256b:

	lui	a5,%hi(a)
	li	a4,19
	sb	a4,%lo(a)(a5)
	li	a0,0
	ret

Tested on both RV32/RV64 no regression, Ok for trunk ?

	PR target/113281

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (struct regmove_vector_cost): New struct.
	(struct cpu_vector_cost): Add regmove struct.
	(get_vector_costs): Export as global.
	* config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Adjust scalar_to_vec cost.
	(costs::add_stmt_cost): Ditto.
	* config/riscv/riscv.cc (get_common_costs): Export global function.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/pr113209.c: Adapt test.
	* gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c: New test.
	* gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c: New test.

---
 gcc/config/riscv/riscv-protos.h               | 11 ++++++++
 gcc/config/riscv/riscv-vector-costs.cc        | 23 +++++++++++++++++
 gcc/config/riscv/riscv.cc                     | 25 ++++++++++++-------
 .../vect/costmodel/riscv/rvv/pr113281-1.c     | 18 +++++++++++++
 .../vect/costmodel/riscv/rvv/pr113281-2.c     | 18 +++++++++++++
 .../gcc.target/riscv/rvv/autovec/pr113209.c   |  2 +-
 6 files changed, 87 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c
  

Comments

Robin Dapp Jan. 12, 2024, 10:10 a.m. UTC | #1
> Tested on both RV32/RV64 no regression, Ok for trunk ?

Yes, thanks!

Btw out of curiosity, did you see why we actually fail to
optimize away the VLA loop?  We should open a bug for that
I suppose.

Regards
 Robin
  

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e8c54c5be50..4f3b677f4f9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,13 @@  struct scalable_vector_cost : common_vector_cost
      E.g. fold_left reduction cost, lanes load/store cost, ..., etc.  */
 };
 
+/* Additional costs for register copies.  Cost is for one register.  */
+struct regmove_vector_cost
+{
+  const int GR2VR;
+  const int FR2VR;
+};
+
 /* Cost for vector insn classes.  */
 struct cpu_vector_cost
 {
@@ -276,6 +283,9 @@  struct cpu_vector_cost
 
   /* Cost of an VLA modes operations.  */
   const scalable_vector_cost *vla;
+
+  /* Cost of vector register move operations.  */
+  const regmove_vector_cost *regmove;
 };
 
 /* Routines implemented in riscv-selftests.cc.  */
@@ -764,5 +774,6 @@  struct riscv_tune_info {
 
 const struct riscv_tune_info *
 riscv_parse_tune (const char *, bool);
+const cpu_vector_cost *get_vector_costs ();
 
 #endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 58ec0b9b503..1c3708f23a0 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1055,6 +1055,26 @@  costs::better_main_loop_than_p (const vector_costs *uncast_other) const
   return vector_costs::better_main_loop_than_p (other);
 }
 
+/* Adjust vectorization cost after calling riscv_builtin_vectorization_cost.
+   For some statement, we would like to further fine-grain tweak the cost on
+   top of riscv_builtin_vectorization_cost handling which doesn't have any
+   information on statement operation codes etc.  */
+
+static unsigned
+adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost)
+{
+  const cpu_vector_cost *costs = get_vector_costs ();
+  switch (kind)
+    {
+    case scalar_to_vec:
+      return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
+						  : costs->regmove->GR2VR);
+    default:
+      break;
+    }
+  return stmt_cost;
+}
+
 unsigned
 costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 		      stmt_vec_info stmt_info, slp_tree, tree vectype,
@@ -1082,6 +1102,9 @@  costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	 as one iteration of the VLA loop.  */
       if (where == vect_body && m_unrolled_vls_niters)
 	m_unrolled_vls_stmts += count * m_unrolled_vls_niters;
+
+      if (vectype)
+	stmt_cost = adjust_stmt_cost (kind, vectype, stmt_cost);
     }
 
   return record_stmt_cost (stmt_info, where, count * stmt_cost);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f829014a589..ee1a57b321d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -391,17 +391,24 @@  static const scalable_vector_cost rvv_vla_vector_cost = {
   },
 };
 
+/* RVV register move cost.   */
+static const regmove_vector_cost rvv_regmove_vector_cost = {
+  2, /* GR2VR  */
+  2, /* FR2VR  */
+};
+
 /* Generic costs for vector insn classes.  It is supposed to be the vector cost
    models used by default if no other cost model was specified.  */
 static const struct cpu_vector_cost generic_vector_cost = {
-  1,			/* scalar_int_stmt_cost  */
-  1,			/* scalar_fp_stmt_cost  */
-  1,			/* scalar_load_cost  */
-  1,			/* scalar_store_cost  */
-  3,			/* cond_taken_branch_cost  */
-  1,			/* cond_not_taken_branch_cost  */
-  &rvv_vls_vector_cost, /* vls  */
-  &rvv_vla_vector_cost, /* vla */
+  1,			    /* scalar_int_stmt_cost  */
+  1,			    /* scalar_fp_stmt_cost  */
+  1,			    /* scalar_load_cost  */
+  1,			    /* scalar_store_cost  */
+  3,			    /* cond_taken_branch_cost  */
+  1,			    /* cond_not_taken_branch_cost  */
+  &rvv_vls_vector_cost,	    /* vls  */
+  &rvv_vla_vector_cost,	    /* vla  */
+  &rvv_regmove_vector_cost, /* regmove  */
 };
 
 /* Costs to use when optimizing for rocket.  */
@@ -10443,7 +10450,7 @@  get_common_costs (const cpu_vector_cost *costs, tree vectype)
 
 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
    vector cost.  Otherwide, return the default generic vector costs.  */
-static const cpu_vector_cost *
+const cpu_vector_cost *
 get_vector_costs ()
 {
   const cpu_vector_cost *costs = tune_param->vec_costs;
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
new file mode 100644
index 00000000000..fdf6ed0334b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c
new file mode 100644
index 00000000000..31cecec036f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=fixed-vlmax" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
index 081ee369394..70aae151000 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113209.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3 -fno-vect-cost-model" } */
 
 int b, c, d, f, i, a;
 int e[1] = {0};