[Committed,V2] RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move
Checks
Commit Message
V2: Address comments from Robin.
While working on fixing a bug, I notice this following code has redundant move:
#include "riscv_vector.h"
void
f (float x, float y, void *out)
{
float f[4] = { x, x, x, y };
vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
__riscv_vse32_v_f32m1 (out, v, 4);
}
Before this patch:
f:
vsetivli zero,4,e32,m1,ta,ma
addi sp,sp,-16
vfmv.v.f v1,fa0
vfslide1down.vf v1,v1,fa1
vmv.v.v v1,v1 ----> redundant move.
vse32.v v1,0(a0)
addi sp,sp,16
jr ra
The rootcause is that the complicate vmv.v.v pattern doesn't simplify it
into simple (set (reg) (reg)) reg-to-reg move pattern.
Currently, we support such simplification for VLMAX.
However, the case I found is non-VLMAX but with LEN = NUNITS which should be
considered as equivalent to VLMAX.
Add a simple fix for such situation.
Tested on both RV32/RV64 no regressions.
gcc/ChangeLog:
* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
* config/riscv/vector.md: Allow non-vlmax with len = NUNITS simplification.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/vf_avl-4.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 23 +++++++++++++++++++
gcc/config/riscv/vector.md | 9 ++------
.../gcc.target/riscv/rvv/base/vf_avl-4.c | 13 +++++++++++
4 files changed, 39 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
@@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode);
bool can_be_broadcasted_p (rtx);
bool gather_scatter_valid_offset_p (machine_mode);
HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
+bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
}
/* We classify builtin types into two classes:
@@ -5117,4 +5117,27 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN;
}
+/* Return true it is whole register-register move. */
+bool
+whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
+{
+ /* An operation is a whole-register move if either
+ (1) Its vlmax operand equals VLMAX
+ (2) Its vl operand equals the number of units of its mode. */
+ if (register_operand (ops[0], mode)
+ && register_operand (ops[3], mode)
+ && satisfies_constraint_vu (ops[2])
+ && satisfies_constraint_Wc1 (ops[1]))
+ {
+ if (INTVAL (ops[avl_type_index]) == VLMAX)
+ return true;
+ /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+ into NON-VLMAX with LEN = NUNITS. */
+ else if (CONST_INT_P (ops[4])
+ && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
+ return true;
+ }
+ return false;
+}
+
} // namespace riscv_vector
@@ -1724,10 +1724,7 @@
vse<sew>.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.v\t%0,%3"
- "&& register_operand (operands[0], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)
- && satisfies_constraint_vu (operands[2])
- && INTVAL (operands[7]) == riscv_vector::VLMAX"
+ "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1776,9 +1773,7 @@
vmmv.m\t%0,%3
vmclr.m\t%0
vmset.m\t%0"
- "&& register_operand (operands[0], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)
- && INTVAL (operands[5]) == riscv_vector::VLMAX"
+ "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "riscv_vector.h"
+void
+f (float x, float y, void *out)
+{
+ float f[4] = { x, x, x, y };
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
+ __riscv_vse32_v_f32m1 (out, v, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */