@@ -296,6 +296,11 @@
(and (match_code "const_vector")
(match_test "satisfies_constraint_Wc0 (op)")))
+(define_predicate "vector_const_int_or_double_0_operand"
+ (and (match_code "const_vector")
+ (match_test "satisfies_constraint_vi (op)
+ || satisfies_constraint_Wc0 (op)")))
+
(define_predicate "vector_move_operand"
(ior (match_operand 0 "nonimmediate_operand")
(and (match_code "const_vector")
@@ -3471,7 +3471,14 @@ function_expander::function_expander (const function_instance &instance,
exp (exp_in), target (target_in), opno (0)
{
if (!function_returns_void_p ())
- create_output_operand (&m_ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
+ {
+ if (target != NULL_RTX && MEM_P (target))
+ /* Since there is no intrinsic where target is a mem operand, it
+ should be converted to reg if it is a mem operand. */
+ target = force_reg (GET_MODE (target), target);
+ create_output_operand (&m_ops[opno++], target,
+ TYPE_MODE (TREE_TYPE (exp)));
+ }
}
/* Take argument ARGNO from EXP's argument list and convert it into
@@ -1450,69 +1450,61 @@
;; - 15.1 Vector Mask-Register Logical Instructions
;; -------------------------------------------------------------------------------
-;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f.
-;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand.
+;; vle.v/vse.v/vmv.v.v.
+;; For vle.v/vmv.v.v, we may need merge and mask operand.
;; For vse.v, we don't need merge operand, so it should always match "vu".
;; constraint alternative 0 ~ 1 match vle.v.
;; constraint alternative 2 match vse.v.
;; constraint alternative 3 match vmv.v.v.
-;; constraint alternative 4 match vmv.v.i.
-;; For vmv.v.i, we allow 2 following cases:
-;; 1. (const_vector:RVVMF8QI repeat [
-;; (const_int:QI N)]), -15 <= N < 16.
-;; 2. (const_vector:RVVMF2SF repeat [
-;; (const_double:SF 0.0 [0x0.0p+0])]).
-
-;; We add "MEM_P (operands[0]) || MEM_P (operands[3]) || CONST_VECTOR_P (operands[1])" here to
-;; make sure we don't want CSE to generate the following pattern:
-;; (insn 17 8 19 2 (set (reg:RVVMF4HI 134 [ _1 ])
-;; (if_then_else:RVVMF4HI (unspec:RVVM1BI [
-;; (reg/v:RVVM1BI 137 [ mask ])
-;; (reg:DI 151)
-;; (const_int 0 [0]) repeated x3
-;; (reg:SI 66 vl)
-;; (reg:SI 67 vtype)
-;; ] UNSPEC_VPREDICATE)
-;; (const_vector:RVVMF4HI repeat [
-;; (const_int 0 [0])
-;; ])
-;; (reg/v:RVVMF4HI 140 [ merge ]))) "rvv.c":8:12 608 {pred_movvnx1hi}
-;; (expr_list:REG_DEAD (reg:DI 151)
-;; (expr_list:REG_DEAD (reg/v:RVVMF4HI 140 [ merge ])
-;; (expr_list:REG_DEAD (reg/v:RVVM1BI 137 [ mask ])
-;; (nil)))))
-;; Since both vmv.v.v and vmv.v.i doesn't have mask operand.
-(define_insn_and_split "@pred_mov<mode>"
- [(set (match_operand:V_VLS 0 "nonimmediate_operand" "=vr, vr, vd, m, vr, vr, vr, vr")
+
+;; If operand 3 is a const_vector, then it is left to pred_braordcast patterns.
+(define_expand "@pred_mov<mode>"
+ [(set (match_operand:V_VLS 0 "nonimmediate_operand")
(if_then_else:V_VLS
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm, vmWc1, Wc1, Wc1, Wc1, Wc1")
- (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK")
- (match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 4 "vector_length_operand")
+ (match_operand 5 "const_int_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (match_operand:V_VLS 3 "vector_move_operand" " m, m, m, vr, vr, vr, viWc0, viWc0")
- (match_operand:V_VLS 2 "vector_merge_operand" " 0, vu, vu, vu, vu, 0, vu, 0")))]
- "TARGET_VECTOR && (MEM_P (operands[0]) || MEM_P (operands[3])
- || CONST_VECTOR_P (operands[1]))"
+ (match_operand:V_VLS 3 "vector_move_operand")
+ (match_operand:V_VLS 2 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+ {})
+
+;; vle.v/vse.v,vmv.v.v
+(define_insn_and_split "*pred_mov<mode>"
+ [(set (match_operand:V_VLS 0 "nonimmediate_operand" "=vr, vr, vd, m, vr, vr")
+ (if_then_else:V_VLS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm, vmWc1, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK")
+ (match_operand 5 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:V_VLS 3 "reg_or_mem_operand" " m, m, m, vr, vr, vr")
+ (match_operand:V_VLS 2 "vector_merge_operand" " 0, vu, vu, vu, vu, 0")))]
+ "(TARGET_VECTOR
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[3], <MODE>mode)))"
"@
vle<sew>.v\t%0,%3%p1
vle<sew>.v\t%0,%3
vle<sew>.v\t%0,%3,%1.t
vse<sew>.v\t%3,%0%p1
vmv.v.v\t%0,%3
- vmv.v.v\t%0,%3
- vmv.v.i\t%0,%v3
- vmv.v.i\t%0,%v3"
+ vmv.v.v\t%0,%3"
"&& register_operand (operands[0], <MODE>mode)
&& register_operand (operands[3], <MODE>mode)
&& satisfies_constraint_vu (operands[2])
&& INTVAL (operands[7]) == riscv_vector::VLMAX"
[(set (match_dup 0) (match_dup 3))]
""
- [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov,vimov,vimov")
+ [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
(set_attr "mode" "<MODE>")])
;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov pattern to include
@@ -1897,6 +1889,26 @@
[(set_attr "type" "vimovxv,vimovxv")
(set_attr "mode" "<MODE>")])
+;; Because (vec_duplicate imm) will be converted to (const_vector imm),
+;; This pattern is used to handle this case.
+(define_insn "*pred_broadcast<mode>_imm"
+ [(set (match_operand:V_VLS 0 "register_operand" "=vr, vr")
+ (if_then_else:V_VLS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_all_trues_mask_operand" " Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" " rK, rK")
+ (match_operand 5 "const_int_operand" " i, i")
+ (match_operand 6 "const_int_operand" " i, i")
+ (match_operand 7 "const_int_operand" " i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (match_operand:V_VLS 3 "vector_const_int_or_double_0_operand" "viWc0, viWc0")
+ (match_operand:V_VLS 2 "vector_merge_operand" " vu, 0")))]
+ "TARGET_VECTOR"
+ "vmv.v.i\t%0,%v3"
+ [(set_attr "type" "vimov,vimov")
+ (set_attr "mode" "<MODE>")])
+
;; -------------------------------------------------------------------------------
;; ---- Predicated Strided loads/stores
;; -------------------------------------------------------------------------------
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <riscv_vector.h>
+
+/*
+** foo9:
+** vsetivli\tzero,1,e64,m2,t[au],m[au]
+** ...
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1);
+ *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+** vsetivli\tzero,1,e64,m2,t[au],m[au]
+** ...
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** ret
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+ int64_t scalar = *(int64_t*)(base + 100);
+ vint64m2_t v = __riscv_vmv_s_x_i64m2 (0, 1);
+ *(vint64m2_t*)out = v;
+}
@@ -194,12 +194,12 @@ vfloat16m4_t test_vget_v_f16m8_f16m4(vfloat16m8_t src, size_t index) {
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m8,\s*t[au],\s*m[au]} 5 } } */
/* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 5 } } */
/* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 5 } } */
-/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 20 } } */
+/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
/* { dg-final { scan-assembler-times {vse16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
-/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
-/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 4 } } */
-/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
-/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
+/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
+/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
+/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */
+/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */
/* { dg-final { scan-assembler-times {vs1r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
/* { dg-final { scan-assembler-times {vs2r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
/* { dg-final { scan-assembler-times {vs4r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */