RISC-V: Adjust loop len by costing 1 when NITER < VF [GCC 14 regression]
Checks
Commit Message
This patch fixes the regression between GCC 13.2.0 and trunk GCC (GCC-14)
GCC 13.2.0:
lui a5,%hi(a)
li a4,19
sb a4,%lo(a)(a5)
li a0,0
ret
Trunk GCC:
vsetvli a5,zero,e8,mf2,ta,ma
li a4,-32768
vid.v v1
vsetvli zero,zero,e16,m1,ta,ma
addiw a4,a4,104
vmv.v.i v3,15
lui a1,%hi(a)
li a0,19
vsetvli zero,zero,e8,mf2,ta,ma
vadd.vi v1,v1,1
sb a0,%lo(a)(a1)
vsetvli zero,zero,e16,m1,ta,ma
vzext.vf2 v2,v1
vmv.v.x v1,a4
vminu.vv v2,v2,v3
vsrl.vv v1,v1,v2
vslidedown.vi v1,v1,17
vmv.x.s a0,v1
snez a0,a0
ret
The root cause we are vectorizing the codes inefficiently since we doesn't cost len when NITERS < VF.
Leverage loop control of mask targets or rs6000 fixes the regression.
Tested no regression. Ok for trunk ?
PR target/113281
gcc/ChangeLog:
* config/riscv/riscv-vector-costs.cc (costs::adjust_vect_cost_per_loop): New function.
(costs::finish_cost): Adjust cost
* config/riscv/riscv-vector-costs.h: New function.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c: New test.
---
gcc/config/riscv/riscv-vector-costs.cc | 61 +++++++++++++++++++
gcc/config/riscv/riscv-vector-costs.h | 2 +
.../vect/costmodel/riscv/rvv/pr113281-3.c | 18 ++++++
.../vect/costmodel/riscv/rvv/pr113281-4.c | 18 ++++++
4 files changed, 99 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c
@@ -1110,9 +1110,70 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
return record_stmt_cost (stmt_info, where, count * stmt_cost);
}
+/* For some target specific vectorization cost which can't be handled per stmt,
+ we check the requisite conditions and adjust the vectorization cost
+ accordingly if satisfied. One typical example is to model model and adjust
+ loop_len cost for known_lt (NITERS, VF). */
+
+void
+costs::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
+{
+ if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+ && !LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)
+ && m_num_vector_iterations == 1
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ {
+ /* In middle-end loop vectorizer, we don't count the loop_len cost in
+ vect_estimate_min_profitable_iters when NITERS < VF, that is, we only
+ count cost of len that we need to iterate loop more than once with VF
+ (m_num_vector_iterations > 1). It's correct for most of the cases:
+
+ E.g. VF = [4, 4]
+ for (int i = 0; i < 3; i ++)
+ a[i] += b[i];
+
+ We don't need to cost MIN_EXPR or SELECT_VL for the case above.
+
+ However, for some inefficient vectorized cases, it does use MIN_EXPR
+ to generate len.
+
+ E.g. VF = [256, 256]
+
+ Loop body:
+ # loop_len_110 = PHI <18(2), _119(11)>
+ ...
+ _117 = MIN_EXPR <ivtmp_114, 18>;
+ _118 = 18 - _117;
+ _119 = MIN_EXPR <_118, POLY_INT_CST [256, 256]>;
+ ...
+
+ Epilogue:
+ ...
+ _112 = .VEC_EXTRACT (vect_patt_27.14_109, _111);
+
+ We cost 1 unconditionally for this situation like other targets which
+ apply mask as the loop control. */
+ rgroup_controls *rgc;
+ unsigned int num_vectors_m1;
+ unsigned int body_stmts = 0;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+ if (rgc->type)
+ body_stmts += num_vectors_m1 + 1;
+
+ add_stmt_cost (body_stmts, scalar_stmt, NULL, NULL, NULL_TREE, 0,
+ vect_body);
+ }
+}
+
void
costs::finish_cost (const vector_costs *scalar_costs)
{
+ if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
+ {
+ adjust_vect_cost_per_loop (loop_vinfo);
+ }
vector_costs::finish_cost (scalar_costs);
}
@@ -101,6 +101,8 @@ private:
V_REGS spills according to the analysis. */
bool m_has_unexpected_spills_p = false;
void record_potential_unexpected_spills (loop_vec_info);
+
+ void adjust_vect_cost_per_loop (loop_vec_info);
};
} // namespace riscv_vector
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8" } */
+
+unsigned char a;
+
+int main() {
+ short b = a = 0;
+ for (; a != 19; a++)
+ if (a)
+ b = 32872 >> a;
+
+ if (b == 0)
+ return 0;
+ else
+ return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8 --param=riscv-autovec-preference=fixed-vlmax" } */
+
+unsigned char a;
+
+int main() {
+ short b = a = 0;
+ for (; a != 19; a++)
+ if (a)
+ b = 32872 >> a;
+
+ if (b == 0)
+ return 0;
+ else
+ return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */