RISC-V: Minor tweak dynamic cost model
Checks
Commit Message
While working on cost model, I notice one case that dynamic lmul cost doesn't work well.
Before this patch:
foo:
lui a4,%hi(.LANCHOR0)
li a0,1953
li a1,63
addi a4,a4,%lo(.LANCHOR0)
li a3,64
vsetvli a2,zero,e32,mf2,ta,ma
vmv.v.x v5,a0
vmv.v.x v4,a1
vid.v v3
.L2:
vsetvli a5,a3,e32,mf2,ta,ma
vadd.vi v2,v3,1
vadd.vv v1,v3,v5
mv a2,a5
vmacc.vv v1,v2,v4
slli a1,a5,2
vse32.v v1,0(a4)
sub a3,a3,a5
add a4,a4,a1
vsetvli a5,zero,e32,mf2,ta,ma
vmv.v.x v1,a2
vadd.vv v3,v3,v1
bne a3,zero,.L2
li a0,0
ret
Unexpected: Use scalable vector and LMUL = MF2 which is wasting computation resources.
Ideally, we should use LMUL = M8 VLS modes.
The root cause is the dynamic LMUL heuristic dominates the VLS heuristic.
Adapt the cost model heuristic.
After this patch:
foo:
lui a4,%hi(.LANCHOR0)
addi a4,a4,%lo(.LANCHOR0)
li a3,4096
li a5,32
li a1,2016
addi a2,a4,128
addiw a3,a3,-32
vsetvli zero,a5,e32,m8,ta,ma
li a0,0
vid.v v8
vsll.vi v8,v8,6
vadd.vx v16,v8,a1
vadd.vx v8,v8,a3
vse32.v v16,0(a4)
vse32.v v8,0(a2)
ret
Tested on both RV32/RV64 no regression.
Ok for trunk ?
gcc/ChangeLog:
* config/riscv/riscv-vector-costs.cc (costs::better_main_loop_than_p): Minior tweak.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Fix test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto.
---
gcc/config/riscv/riscv-vector-costs.cc | 3 ++-
.../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c | 5 ++---
.../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c | 5 ++---
.../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c | 2 +-
4 files changed, 7 insertions(+), 8 deletions(-)
@@ -994,7 +994,8 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
vect_vf_for_cost (other_loop_vinfo));
/* Apply the unrolling heuristic described above m_unrolled_vls_niters. */
- if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts))
+ if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts)
+ && m_cost_type != other->m_cost_type)
{
bool this_prefer_unrolled = this->prefer_unrolled_loop ();
bool other_prefer_unrolled = other->prefer_unrolled_loop ();
@@ -3,7 +3,7 @@
#include <stdint-gcc.h>
-#define N 40
+#define N 48
int a[N];
@@ -22,7 +22,6 @@ foo (){
return 0;
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
/* { dg-final { scan-assembler-not {vsetvli} } } */
@@ -3,7 +3,7 @@
#include <stdint-gcc.h>
-#define N 40
+#define N 64
int a[N];
@@ -22,7 +22,6 @@ foo (){
return 0;
}
-/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic -fno-schedule-insns -fno-schedule-insns2" } */
#include <stdint-gcc.h>