RISC-V: Allow simplification non-vlmax with len = NUNITS reg to reg move
Checks
Commit Message
While working on fixing a bug, I notice this following code has redundant move:
#include "riscv_vector.h"
void
f (float x, float y, void *out)
{
float f[4] = { x, x, x, y };
vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
__riscv_vse32_v_f32m1 (out, v, 4);
}
Before this patch:
f:
vsetivli zero,4,e32,m1,ta,ma
addi sp,sp,-16
vfmv.v.f v1,fa0
vfslide1down.vf v1,v1,fa1
vmv.v.v v1,v1 ----> redundant move.
vse32.v v1,0(a0)
addi sp,sp,16
jr ra
The rootcause is that the complicate vmv.v.v pattern doesn't simplify it
into simple (set (reg) (reg)) reg-to-reg move pattern.
Currently, we support such simplification for VLMAX.
However, the case I found is non-VLMAX but with LEN = NUNITS which should be
considered as equivalent to VLMAX.
Add a simple fix for such situation.
Tested on both RV32/RV64 no regressions. Ok for trunk ?
gcc/ChangeLog:
* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p): New function.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
* config/riscv/vector.md: Allow non-vlmax with len = NUNITS simplification.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/vf_avl-4.c: New test.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 21 +++++++++++++++++++
gcc/config/riscv/vector.md | 9 ++------
.../gcc.target/riscv/rvv/base/vf_avl-4.c | 13 ++++++++++++
4 files changed, 37 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-4.c
Comments
> +/* Return true it is whole register-register move. */
> +bool
> +whole_reg_to_reg_move_p (rtx *ops, machine_mode mode)
> +{
> + if (register_operand (ops[0], mode)
> + && register_operand (ops[3], mode)
> + && satisfies_constraint_vu (ops[2])
> + && satisfies_constraint_Wc1 (ops[1]))
> + {
> + int vlmax_index = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ? 5 : 7;
> + if (INTVAL (ops[vlmax_index]) == VLMAX)
> + return true;
Is that indent correct? Looks odd on my screen but I didn't verify.
> + /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
> + into NON-VLMAX with LEN = NUNITS. */
> + else if (CONST_INT_P (ops[4])
> + && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
> + return true;
> + }
> + return false;
> +}
I would prefer having the vlmax_index as a parameter. Even though
it's clear that a mask set operation has two operands less I don't
find it particularly intuitive to check that in the function.
Also explain both cases in the function-level comment and mention
the preconditions for calling the function. Something like:
"An operation is a whole-register move if either
(1) Its vlmax operand equals VLMAX
(2) Its vl operand equals the number of units of its mode."
Maybe some more asserts or checks wouldn't hurt either so the function
can't accidentally be called on other operations than vlde/vste/vimov.
Regards
Robin
@@ -687,6 +687,7 @@ bool imm_avl_p (machine_mode);
bool can_be_broadcasted_p (rtx);
bool gather_scatter_valid_offset_p (machine_mode);
HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
+bool whole_reg_to_reg_move_p (rtx *, machine_mode);
}
/* We classify builtin types into two classes:
@@ -5122,4 +5122,25 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN;
}
+/* Return true it is whole register-register move. */
+bool
+whole_reg_to_reg_move_p (rtx *ops, machine_mode mode)
+{
+ if (register_operand (ops[0], mode)
+ && register_operand (ops[3], mode)
+ && satisfies_constraint_vu (ops[2])
+ && satisfies_constraint_Wc1 (ops[1]))
+ {
+ int vlmax_index = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ? 5 : 7;
+ if (INTVAL (ops[vlmax_index]) == VLMAX)
+ return true;
+ /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+ into NON-VLMAX with LEN = NUNITS. */
+ else if (CONST_INT_P (ops[4])
+ && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
+ return true;
+ }
+ return false;
+}
+
} // namespace riscv_vector
@@ -1724,10 +1724,7 @@
vse<sew>.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.v\t%0,%3"
- "&& register_operand (operands[0], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)
- && satisfies_constraint_vu (operands[2])
- && INTVAL (operands[7]) == riscv_vector::VLMAX"
+ "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1776,9 +1773,7 @@
vmmv.m\t%0,%3
vmclr.m\t%0
vmset.m\t%0"
- "&& register_operand (operands[0], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)
- && INTVAL (operands[5]) == riscv_vector::VLMAX"
+ "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */
+
+#include "riscv_vector.h"
+void
+f (float x, float y, void *out)
+{
+ float f[4] = { x, x, x, y };
+ vfloat32m1_t v = __riscv_vle32_v_f32m1 (f, 4);
+ __riscv_vse32_v_f32m1 (out, v, 4);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */